mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-28 11:37:44 +00:00
fold: improve newline handling and test coverage
- refactor implementation for readability - correct handling of files with no trailing newline and/or blank lines
This commit is contained in:
parent
20d071a482
commit
bad1df9c1b
2 changed files with 433 additions and 107 deletions
|
@ -79,7 +79,6 @@ fn handle_obsolete(args: &[String]) -> (Vec<String>, Option<String>) {
|
|||
(args.to_vec(), None)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn fold(filenames: Vec<String>, bytes: bool, spaces: bool, width: usize) {
|
||||
for filename in &filenames {
|
||||
let filename: &str = &filename;
|
||||
|
@ -92,123 +91,173 @@ fn fold(filenames: Vec<String>, bytes: bool, spaces: bool, width: usize) {
|
|||
file_buf = safe_unwrap!(File::open(Path::new(filename)));
|
||||
&mut file_buf as &mut dyn Read
|
||||
});
|
||||
fold_file(buffer, bytes, spaces, width);
|
||||
|
||||
if bytes {
|
||||
fold_file_bytewise(buffer, spaces, width);
|
||||
} else {
|
||||
fold_file(buffer, spaces, width);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn fold_file<T: Read>(file: BufReader<T>, bytes: bool, spaces: bool, width: usize) {
|
||||
for line_result in file.lines() {
|
||||
let mut line = safe_unwrap!(line_result);
|
||||
/// Fold `file` to fit `width` (number of columns), counting all characters as
|
||||
/// one column.
|
||||
///
|
||||
/// This function handles folding for the `-b`/`--bytes` option, counting
|
||||
/// tab, backspace, and carriage return as occupying one column, identically
|
||||
/// to all other characters in the stream.
|
||||
///
|
||||
/// If `spaces` is `true`, attempt to break lines at whitespace boundaries.
|
||||
fn fold_file_bytewise<T: Read>(mut file: BufReader<T>, spaces: bool, width: usize) {
|
||||
let mut line = String::new();
|
||||
|
||||
if line.is_empty() {
|
||||
loop {
|
||||
if let Ok(0) = file.read_line(&mut line) {
|
||||
break;
|
||||
}
|
||||
|
||||
if line == "\n" {
|
||||
println!();
|
||||
} else if bytes {
|
||||
let len = line.len();
|
||||
let mut i = 0;
|
||||
while i < len {
|
||||
let width = if len - i >= width { width } else { len - i };
|
||||
let slice = {
|
||||
let slice = &line[i..i + width];
|
||||
if spaces && i + width < len {
|
||||
match slice.rfind(char::is_whitespace) {
|
||||
Some(m) => &slice[..=m],
|
||||
None => slice,
|
||||
}
|
||||
} else {
|
||||
slice
|
||||
line.truncate(0);
|
||||
continue;
|
||||
}
|
||||
|
||||
let len = line.len();
|
||||
let mut i = 0;
|
||||
|
||||
while i < len {
|
||||
let width = if len - i >= width { width } else { len - i };
|
||||
let slice = {
|
||||
let slice = &line[i..i + width];
|
||||
if spaces && i + width < len {
|
||||
match slice.rfind(char::is_whitespace) {
|
||||
Some(m) => &slice[..=m],
|
||||
None => slice,
|
||||
}
|
||||
};
|
||||
print!("{}", slice);
|
||||
i += slice.len();
|
||||
} else {
|
||||
slice
|
||||
}
|
||||
};
|
||||
|
||||
// Don't duplicate trailing newlines: if the slice is "\n", the
|
||||
// previous iteration folded just before the end of the line and
|
||||
// has already printed this newline.
|
||||
if slice == "\n" {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
let mut len = line.chars().count();
|
||||
let newline = line.ends_with('\n');
|
||||
if newline {
|
||||
if len == 1 {
|
||||
println!();
|
||||
|
||||
i += slice.len();
|
||||
|
||||
let at_eol = i >= len;
|
||||
|
||||
if at_eol {
|
||||
print!("{}", slice);
|
||||
} else {
|
||||
println!("{}", slice);
|
||||
}
|
||||
}
|
||||
|
||||
line.truncate(0);
|
||||
}
|
||||
}
|
||||
|
||||
/// Fold `file` to fit `width` (number of columns).
|
||||
///
|
||||
/// By default `fold` treats tab, backspace, and carriage return specially:
|
||||
/// tab characters count as 8 columns, backspace decreases the
|
||||
/// column count, and carriage return resets the column count to 0.
|
||||
///
|
||||
/// If `spaces` is `true`, attempt to break lines at whitespace boundaries.
|
||||
#[allow(unused_assignments)]
|
||||
fn fold_file<T: Read>(mut file: BufReader<T>, spaces: bool, width: usize) {
|
||||
let mut line = String::new();
|
||||
let mut output = String::new();
|
||||
let mut col_count = 0;
|
||||
let mut char_count = 0;
|
||||
let mut last_space = None;
|
||||
|
||||
/// Print the output line, resetting the column and character counts.
|
||||
///
|
||||
/// If `spaces` is `true`, print the output line up to the last
|
||||
/// encountered whitespace character (inclusive) and set the remaining
|
||||
/// characters as the start of the next line.
|
||||
macro_rules! emit_output {
|
||||
() => {
|
||||
let consume = match last_space {
|
||||
Some(i) => i + 1,
|
||||
None => output.len(),
|
||||
};
|
||||
|
||||
println!("{}", &output[..consume]);
|
||||
output.replace_range(..consume, "");
|
||||
char_count = output.len();
|
||||
|
||||
// we know there are no tabs left in output, so each char counts
|
||||
// as 1 column
|
||||
col_count = char_count;
|
||||
|
||||
last_space = None;
|
||||
};
|
||||
}
|
||||
|
||||
loop {
|
||||
if let Ok(0) = file.read_line(&mut line) {
|
||||
break;
|
||||
}
|
||||
|
||||
for ch in line.chars() {
|
||||
if ch == '\n' {
|
||||
// make sure to _not_ split output at whitespace, since we
|
||||
// know the entire output will fit
|
||||
last_space = None;
|
||||
emit_output!();
|
||||
break;
|
||||
}
|
||||
|
||||
if col_count >= width {
|
||||
emit_output!();
|
||||
}
|
||||
|
||||
match ch {
|
||||
'\t' => {
|
||||
if col_count + 8 > width && !output.is_empty() {
|
||||
emit_output!();
|
||||
}
|
||||
col_count += 8;
|
||||
last_space = Some(char_count);
|
||||
}
|
||||
'\x08' => {
|
||||
// FIXME: does not match GNU's handling of backspace
|
||||
if col_count > 0 {
|
||||
col_count -= 1;
|
||||
char_count -= 1;
|
||||
output.truncate(char_count);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
len -= 1;
|
||||
line.truncate(len);
|
||||
}
|
||||
let mut output = String::new();
|
||||
let mut count = 0;
|
||||
for (i, ch) in line.chars().enumerate() {
|
||||
if count >= width {
|
||||
let (val, ncount) = {
|
||||
let slice = &output[..];
|
||||
let (out, val, ncount) = if spaces && i + 1 < len {
|
||||
match rfind_whitespace(slice) {
|
||||
Some(m) => {
|
||||
let routput = &slice[m + 1..slice.chars().count()];
|
||||
let ncount = routput.chars().fold(0, |out, ch: char| {
|
||||
out + match ch {
|
||||
'\t' => 8,
|
||||
'\x08' => {
|
||||
if out > 0 {
|
||||
!0
|
||||
} else {
|
||||
0
|
||||
}
|
||||
}
|
||||
'\r' => return 0,
|
||||
_ => 1,
|
||||
}
|
||||
});
|
||||
(&slice[0..=m], routput, ncount)
|
||||
}
|
||||
None => (slice, "", 0),
|
||||
}
|
||||
} else {
|
||||
(slice, "", 0)
|
||||
};
|
||||
println!("{}", out);
|
||||
(val.to_owned(), ncount)
|
||||
};
|
||||
output = val;
|
||||
count = ncount;
|
||||
'\r' => {
|
||||
// FIXME: does not match GNU's handling of carriage return
|
||||
output.truncate(0);
|
||||
col_count = 0;
|
||||
char_count = 0;
|
||||
continue;
|
||||
}
|
||||
match ch {
|
||||
'\t' => {
|
||||
count += 8;
|
||||
if count > width {
|
||||
println!("{}", output);
|
||||
output.truncate(0);
|
||||
count = 8;
|
||||
}
|
||||
}
|
||||
'\x08' => {
|
||||
if count > 0 {
|
||||
count -= 1;
|
||||
let len = output.len() - 1;
|
||||
output.truncate(len);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
'\r' => {
|
||||
output.truncate(0);
|
||||
count = 0;
|
||||
continue;
|
||||
}
|
||||
_ => count += 1,
|
||||
};
|
||||
output.push(ch);
|
||||
}
|
||||
if count > 0 {
|
||||
println!("{}", output);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
_ if spaces && ch.is_whitespace() => {
|
||||
last_space = Some(char_count);
|
||||
col_count += 1
|
||||
}
|
||||
_ => col_count += 1,
|
||||
};
|
||||
|
||||
#[inline]
|
||||
fn rfind_whitespace(slice: &str) -> Option<usize> {
|
||||
for (i, ch) in slice.chars().rev().enumerate() {
|
||||
if ch.is_whitespace() {
|
||||
return Some(slice.chars().count() - (i + 1));
|
||||
output.push(ch);
|
||||
char_count += 1;
|
||||
}
|
||||
|
||||
if col_count > 0 {
|
||||
print!("{}", output);
|
||||
output.truncate(0);
|
||||
}
|
||||
|
||||
line.truncate(0);
|
||||
}
|
||||
None
|
||||
}
|
||||
|
|
|
@ -32,6 +32,24 @@ fn test_default_wrap_with_newlines() {
|
|||
.stdout_is_fixture("lorem_ipsum_new_line_80_column.expected");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_should_preserve_empty_line_without_final_newline() {
|
||||
new_ucmd!()
|
||||
.arg("-w2")
|
||||
.pipe_in("12\n\n34")
|
||||
.succeeds()
|
||||
.stdout_is("12\n\n34");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_should_preserve_empty_line_and_final_newline() {
|
||||
new_ucmd!()
|
||||
.arg("-w2")
|
||||
.pipe_in("12\n\n34\n")
|
||||
.succeeds()
|
||||
.stdout_is("12\n\n34\n");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_should_preserve_empty_lines() {
|
||||
new_ucmd!().pipe_in("\n").succeeds().stdout_is("\n");
|
||||
|
@ -57,3 +75,262 @@ fn test_word_boundary_split_should_preserve_empty_lines() {
|
|||
.succeeds()
|
||||
.stdout_is("0\n1\n\n2\n\n\n");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_should_not_add_newline_when_line_less_than_fold() {
|
||||
new_ucmd!().pipe_in("1234").succeeds().stdout_is("1234");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_should_not_add_newline_when_line_longer_than_fold() {
|
||||
new_ucmd!()
|
||||
.arg("-w2")
|
||||
.pipe_in("1234")
|
||||
.succeeds()
|
||||
.stdout_is("12\n34");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_should_not_add_newline_when_line_equal_to_fold() {
|
||||
new_ucmd!()
|
||||
.arg("-w1")
|
||||
.pipe_in(" ")
|
||||
.succeeds()
|
||||
.stdout_is(" ");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_should_preserve_final_newline_when_line_less_than_fold() {
|
||||
new_ucmd!().pipe_in("1234\n").succeeds().stdout_is("1234\n");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_should_preserve_final_newline_when_line_longer_than_fold() {
|
||||
new_ucmd!()
|
||||
.arg("-w2")
|
||||
.pipe_in("1234\n")
|
||||
.succeeds()
|
||||
.stdout_is("12\n34\n");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_should_preserve_final_newline_when_line_equal_to_fold() {
|
||||
new_ucmd!()
|
||||
.arg("-w2")
|
||||
.pipe_in("1\n")
|
||||
.succeeds()
|
||||
.stdout_is("1\n");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_single_tab_should_not_add_extra_newline() {
|
||||
new_ucmd!()
|
||||
.arg("-w1")
|
||||
.pipe_in("\t")
|
||||
.succeeds()
|
||||
.stdout_is("\t");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tab_counts_as_8_columns() {
|
||||
new_ucmd!()
|
||||
.arg("-w8")
|
||||
.pipe_in("\t1")
|
||||
.succeeds()
|
||||
.stdout_is("\t\n1");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fold_at_word_boundary() {
|
||||
new_ucmd!()
|
||||
.args(&["-w4", "-s"])
|
||||
.pipe_in("one two")
|
||||
.succeeds()
|
||||
.stdout_is("one \ntwo");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fold_at_leading_word_boundary() {
|
||||
new_ucmd!()
|
||||
.args(&["-w3", "-s"])
|
||||
.pipe_in(" aaa")
|
||||
.succeeds()
|
||||
.stdout_is(" \naaa");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fold_at_word_boundary_preserve_final_newline() {
|
||||
new_ucmd!()
|
||||
.args(&["-w4", "-s"])
|
||||
.pipe_in("one two\n")
|
||||
.succeeds()
|
||||
.stdout_is("one \ntwo\n");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fold_at_tab_as_word_boundary() {
|
||||
new_ucmd!()
|
||||
.args(&["-w10", "-s"])
|
||||
.pipe_in("a\tbbb\n")
|
||||
.succeeds()
|
||||
.stdout_is("a\t\nbbb\n");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fold_at_word_boundary_only_whitespace() {
|
||||
new_ucmd!()
|
||||
.args(&["-w2", "-s"])
|
||||
.pipe_in(" ")
|
||||
.succeeds()
|
||||
.stdout_is(" \n ");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fold_at_word_boundary_only_whitespace_preserve_final_newline() {
|
||||
new_ucmd!()
|
||||
.args(&["-w2", "-s"])
|
||||
.pipe_in(" \n")
|
||||
.succeeds()
|
||||
.stdout_is(" \n \n");
|
||||
}
|
||||
|
||||
//
|
||||
// bytewise tests
|
||||
|
||||
#[test]
|
||||
fn test_bytewise_should_preserve_empty_line_without_final_newline() {
|
||||
new_ucmd!()
|
||||
.args(&["-w2", "-b"])
|
||||
.pipe_in("123\n\n45")
|
||||
.succeeds()
|
||||
.stdout_is("12\n3\n\n45");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_bytewise_should_preserve_empty_line_and_final_newline() {
|
||||
new_ucmd!()
|
||||
.args(&["-w2", "-b"])
|
||||
.pipe_in("12\n\n34\n")
|
||||
.succeeds()
|
||||
.stdout_is("12\n\n34\n");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_bytewise_should_preserve_empty_lines() {
|
||||
new_ucmd!()
|
||||
.arg("-b")
|
||||
.pipe_in("\n")
|
||||
.succeeds()
|
||||
.stdout_is("\n");
|
||||
|
||||
new_ucmd!()
|
||||
.args(&["-w1", "-b"])
|
||||
.pipe_in("0\n1\n\n2\n\n\n")
|
||||
.succeeds()
|
||||
.stdout_is("0\n1\n\n2\n\n\n");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_bytewise_word_boundary_split_should_preserve_empty_lines() {
|
||||
new_ucmd!()
|
||||
.args(&["-s", "-b"])
|
||||
.pipe_in("\n")
|
||||
.succeeds()
|
||||
.stdout_is("\n");
|
||||
|
||||
new_ucmd!()
|
||||
.args(&["-w1", "-s", "-b"])
|
||||
.pipe_in("0\n1\n\n2\n\n\n")
|
||||
.succeeds()
|
||||
.stdout_is("0\n1\n\n2\n\n\n");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_bytewise_should_not_add_newline_when_line_less_than_fold() {
|
||||
new_ucmd!()
|
||||
.arg("-b")
|
||||
.pipe_in("1234")
|
||||
.succeeds()
|
||||
.stdout_is("1234");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_bytewise_should_not_add_newline_when_line_longer_than_fold() {
|
||||
new_ucmd!()
|
||||
.args(&["-w2", "-b"])
|
||||
.pipe_in("1234")
|
||||
.succeeds()
|
||||
.stdout_is("12\n34");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_bytewise_should_not_add_newline_when_line_equal_to_fold() {
|
||||
new_ucmd!()
|
||||
.args(&["-w1", "-b"])
|
||||
.pipe_in(" ")
|
||||
.succeeds()
|
||||
.stdout_is(" ");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_bytewise_should_preserve_final_newline_when_line_less_than_fold() {
|
||||
new_ucmd!()
|
||||
.arg("-b")
|
||||
.pipe_in("1234\n")
|
||||
.succeeds()
|
||||
.stdout_is("1234\n");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_bytewise_should_preserve_final_newline_when_line_longer_than_fold() {
|
||||
new_ucmd!()
|
||||
.args(&["-w2", "-b"])
|
||||
.pipe_in("1234\n")
|
||||
.succeeds()
|
||||
.stdout_is("12\n34\n");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_bytewise_should_preserve_final_newline_when_line_equal_to_fold() {
|
||||
new_ucmd!()
|
||||
.args(&["-w2", "-b"])
|
||||
.pipe_in("1\n")
|
||||
.succeeds()
|
||||
.stdout_is("1\n");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_bytewise_single_tab_should_not_add_extra_newline() {
|
||||
new_ucmd!()
|
||||
.args(&["-w1", "-b"])
|
||||
.pipe_in("\t")
|
||||
.succeeds()
|
||||
.stdout_is("\t");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tab_counts_as_one_byte() {
|
||||
new_ucmd!()
|
||||
.args(&["-w2", "-b"])
|
||||
.pipe_in("1\t2\n")
|
||||
.succeeds()
|
||||
.stdout_is("1\t\n2\n");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_bytewise_fold_at_word_boundary_only_whitespace() {
|
||||
new_ucmd!()
|
||||
.args(&["-w2", "-s", "-b"])
|
||||
.pipe_in(" ")
|
||||
.succeeds()
|
||||
.stdout_is(" \n ");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_bytewise_fold_at_word_boundary_only_whitespace_preserve_final_newline() {
|
||||
new_ucmd!()
|
||||
.args(&["-w2", "-s", "-b"])
|
||||
.pipe_in(" \n")
|
||||
.succeeds()
|
||||
.stdout_is(" \n \n");
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue