1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-27 19:17:43 +00:00

Merge pull request #1508 from muskuloes/master

Flush output buffer for expand/unexpand commands
This commit is contained in:
Sylvestre Ledru 2020-05-09 21:17:58 +02:00 committed by GitHub
commit a83fe2f098
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 72 additions and 46 deletions

View file

@ -247,6 +247,7 @@ fn expand(options: Options) {
byte += nbytes; // advance the pointer byte += nbytes; // advance the pointer
} }
safe_unwrap!(output.flush());
buf.truncate(0); // clear the buffer buf.truncate(0); // clear the buffer
} }
} }

View file

@ -209,9 +209,51 @@ enum CharType {
Other, Other,
} }
fn unexpand(options: Options) { fn next_char_info(uflag: bool, buf: &[u8], byte: usize) -> (CharType, usize, usize) {
use self::CharType::*; let (ctype, cwidth, nbytes) = if uflag {
let nbytes = char::from(buf[byte]).len_utf8();
if byte + nbytes > buf.len() {
// make sure we don't overrun the buffer because of invalid UTF-8
(CharType::Other, 1, 1)
} else if let Ok(t) = from_utf8(&buf[byte..byte + nbytes]) {
// Now that we think it's UTF-8, figure out what kind of char it is
match t.chars().next() {
Some(' ') => (CharType::Space, 0, 1),
Some('\t') => (CharType::Tab, 0, 1),
Some('\x08') => (CharType::Backspace, 0, 1),
Some(c) => (
CharType::Other,
UnicodeWidthChar::width(c).unwrap_or(0),
nbytes,
),
None => {
// invalid char snuck past the utf8_validation_iterator somehow???
(CharType::Other, 1, 1)
}
}
} else {
// otherwise, it's not valid
(CharType::Other, 1, 1) // implicit assumption: non-UTF8 char has display width 1
}
} else {
(
match buf[byte] {
// always take exactly 1 byte in strict ASCII mode
0x20 => CharType::Space,
0x09 => CharType::Tab,
0x08 => CharType::Backspace,
_ => CharType::Other,
},
1,
1,
)
};
(ctype, cwidth, nbytes)
}
fn unexpand(options: Options) {
let mut output = BufWriter::new(stdout()); let mut output = BufWriter::new(stdout());
let ts = &options.tabstops[..]; let ts = &options.tabstops[..];
let mut buf = Vec::new(); let mut buf = Vec::new();
@ -228,60 +270,34 @@ fn unexpand(options: Options) {
let mut col = 0; // the current column let mut col = 0; // the current column
let mut scol = 0; // the start col for the current span, i.e., the already-printed width let mut scol = 0; // the start col for the current span, i.e., the already-printed width
let mut init = true; // are we at the start of the line? let mut init = true; // are we at the start of the line?
let mut pctype = Other; let mut pctype = CharType::Other;
while byte < buf.len() { while byte < buf.len() {
// when we have a finite number of columns, never convert past the last column // when we have a finite number of columns, never convert past the last column
if lastcol > 0 && col >= lastcol { if lastcol > 0 && col >= lastcol {
write_tabs(&mut output, ts, scol, col, pctype == Tab, init, true); write_tabs(
&mut output,
ts,
scol,
col,
pctype == CharType::Tab,
init,
true,
);
safe_unwrap!(output.write_all(&buf[byte..])); safe_unwrap!(output.write_all(&buf[byte..]));
scol = col; scol = col;
break; break;
} }
let (ctype, cwidth, nbytes) = if options.uflag { // figure out how big the next char is, if it's UTF-8
let nbytes = char::from(buf[byte]).len_utf8(); let (ctype, cwidth, nbytes) = next_char_info(options.uflag, &buf, byte);
// figure out how big the next char is, if it's UTF-8
if byte + nbytes > buf.len() {
// make sure we don't overrun the buffer because of invalid UTF-8
(Other, 1, 1)
} else if let Ok(t) = from_utf8(&buf[byte..byte + nbytes]) {
// Now that we think it's UTF-8, figure out what kind of char it is
match t.chars().next() {
Some(' ') => (Space, 0, 1),
Some('\t') => (Tab, 0, 1),
Some('\x08') => (Backspace, 0, 1),
Some(c) => (Other, UnicodeWidthChar::width(c).unwrap_or(0), nbytes),
None => {
// invalid char snuck past the utf8_validation_iterator somehow???
(Other, 1, 1)
}
}
} else {
// otherwise, it's not valid
(Other, 1, 1) // implicit assumption: non-UTF8 char has display width 1
}
} else {
(
match buf[byte] {
// always take exactly 1 byte in strict ASCII mode
0x20 => Space,
0x09 => Tab,
0x08 => Backspace,
_ => Other,
},
1,
1,
)
};
// now figure out how many columns this char takes up, and maybe print it // now figure out how many columns this char takes up, and maybe print it
let tabs_buffered = init || options.aflag; let tabs_buffered = init || options.aflag;
match ctype { match ctype {
Space | Tab => { CharType::Space | CharType::Tab => {
// compute next col, but only write space or tab chars if not buffering // compute next col, but only write space or tab chars if not buffering
col += if ctype == Space { col += if ctype == CharType::Space {
1 1
} else { } else {
next_tabstop(ts, col).unwrap_or(1) next_tabstop(ts, col).unwrap_or(1)
@ -292,19 +308,19 @@ fn unexpand(options: Options) {
scol = col; // now printed up to this column scol = col; // now printed up to this column
} }
} }
Other | Backspace => { CharType::Other | CharType::Backspace => {
// always // always
write_tabs( write_tabs(
&mut output, &mut output,
ts, ts,
scol, scol,
col, col,
pctype == Tab, pctype == CharType::Tab,
init, init,
options.aflag, options.aflag,
); );
init = false; // no longer at the start of a line init = false; // no longer at the start of a line
col = if ctype == Other { col = if ctype == CharType::Other {
// use computed width // use computed width
col + cwidth col + cwidth
} else if col > 0 { } else if col > 0 {
@ -323,7 +339,16 @@ fn unexpand(options: Options) {
} }
// write out anything remaining // write out anything remaining
write_tabs(&mut output, ts, scol, col, pctype == Tab, init, true); write_tabs(
&mut output,
ts,
scol,
col,
pctype == CharType::Tab,
init,
true,
);
safe_unwrap!(output.flush());
buf.truncate(0); // clear out the buffer buf.truncate(0); // clear out the buffer
} }
} }