diff --git a/src/uu/fold/src/fold.rs b/src/uu/fold/src/fold.rs index 27ab319d0..8cf95b82a 100644 --- a/src/uu/fold/src/fold.rs +++ b/src/uu/fold/src/fold.rs @@ -79,7 +79,6 @@ fn handle_obsolete(args: &[String]) -> (Vec, Option) { (args.to_vec(), None) } -#[inline] fn fold(filenames: Vec, bytes: bool, spaces: bool, width: usize) { for filename in &filenames { let filename: &str = &filename; @@ -92,123 +91,173 @@ fn fold(filenames: Vec, bytes: bool, spaces: bool, width: usize) { file_buf = safe_unwrap!(File::open(Path::new(filename))); &mut file_buf as &mut dyn Read }); - fold_file(buffer, bytes, spaces, width); + + if bytes { + fold_file_bytewise(buffer, spaces, width); + } else { + fold_file(buffer, spaces, width); + } } } -#[inline] -fn fold_file(file: BufReader, bytes: bool, spaces: bool, width: usize) { - for line_result in file.lines() { - let mut line = safe_unwrap!(line_result); +/// Fold `file` to fit `width` (number of columns), counting all characters as +/// one column. +/// +/// This function handles folding for the `-b`/`--bytes` option, counting +/// tab, backspace, and carriage return as occupying one column, identically +/// to all other characters in the stream. +/// +/// If `spaces` is `true`, attempt to break lines at whitespace boundaries. +fn fold_file_bytewise(mut file: BufReader, spaces: bool, width: usize) { + let mut line = String::new(); - if line.is_empty() { + loop { + if let Ok(0) = file.read_line(&mut line) { + break; + } + + if line == "\n" { println!(); - } else if bytes { - let len = line.len(); - let mut i = 0; - while i < len { - let width = if len - i >= width { width } else { len - i }; - let slice = { - let slice = &line[i..i + width]; - if spaces && i + width < len { - match slice.rfind(char::is_whitespace) { - Some(m) => &slice[..=m], - None => slice, - } - } else { - slice + line.truncate(0); + continue; + } + + let len = line.len(); + let mut i = 0; + + while i < len { + let width = if len - i >= width { width } else { len - i }; + let slice = { + let slice = &line[i..i + width]; + if spaces && i + width < len { + match slice.rfind(char::is_whitespace) { + Some(m) => &slice[..=m], + None => slice, } - }; - print!("{}", slice); - i += slice.len(); + } else { + slice + } + }; + + // Don't duplicate trailing newlines: if the slice is "\n", the + // previous iteration folded just before the end of the line and + // has already printed this newline. + if slice == "\n" { + break; } - } else { - let mut len = line.chars().count(); - let newline = line.ends_with('\n'); - if newline { - if len == 1 { - println!(); + + i += slice.len(); + + let at_eol = i >= len; + + if at_eol { + print!("{}", slice); + } else { + println!("{}", slice); + } + } + + line.truncate(0); + } +} + +/// Fold `file` to fit `width` (number of columns). +/// +/// By default `fold` treats tab, backspace, and carriage return specially: +/// tab characters count as 8 columns, backspace decreases the +/// column count, and carriage return resets the column count to 0. +/// +/// If `spaces` is `true`, attempt to break lines at whitespace boundaries. +#[allow(unused_assignments)] +fn fold_file(mut file: BufReader, spaces: bool, width: usize) { + let mut line = String::new(); + let mut output = String::new(); + let mut col_count = 0; + let mut char_count = 0; + let mut last_space = None; + + /// Print the output line, resetting the column and character counts. + /// + /// If `spaces` is `true`, print the output line up to the last + /// encountered whitespace character (inclusive) and set the remaining + /// characters as the start of the next line. + macro_rules! emit_output { + () => { + let consume = match last_space { + Some(i) => i + 1, + None => output.len(), + }; + + println!("{}", &output[..consume]); + output.replace_range(..consume, ""); + char_count = output.len(); + + // we know there are no tabs left in output, so each char counts + // as 1 column + col_count = char_count; + + last_space = None; + }; + } + + loop { + if let Ok(0) = file.read_line(&mut line) { + break; + } + + for ch in line.chars() { + if ch == '\n' { + // make sure to _not_ split output at whitespace, since we + // know the entire output will fit + last_space = None; + emit_output!(); + break; + } + + if col_count >= width { + emit_output!(); + } + + match ch { + '\t' => { + if col_count + 8 > width && !output.is_empty() { + emit_output!(); + } + col_count += 8; + last_space = Some(char_count); + } + '\x08' => { + // FIXME: does not match GNU's handling of backspace + if col_count > 0 { + col_count -= 1; + char_count -= 1; + output.truncate(char_count); + } continue; } - len -= 1; - line.truncate(len); - } - let mut output = String::new(); - let mut count = 0; - for (i, ch) in line.chars().enumerate() { - if count >= width { - let (val, ncount) = { - let slice = &output[..]; - let (out, val, ncount) = if spaces && i + 1 < len { - match rfind_whitespace(slice) { - Some(m) => { - let routput = &slice[m + 1..slice.chars().count()]; - let ncount = routput.chars().fold(0, |out, ch: char| { - out + match ch { - '\t' => 8, - '\x08' => { - if out > 0 { - !0 - } else { - 0 - } - } - '\r' => return 0, - _ => 1, - } - }); - (&slice[0..=m], routput, ncount) - } - None => (slice, "", 0), - } - } else { - (slice, "", 0) - }; - println!("{}", out); - (val.to_owned(), ncount) - }; - output = val; - count = ncount; + '\r' => { + // FIXME: does not match GNU's handling of carriage return + output.truncate(0); + col_count = 0; + char_count = 0; + continue; } - match ch { - '\t' => { - count += 8; - if count > width { - println!("{}", output); - output.truncate(0); - count = 8; - } - } - '\x08' => { - if count > 0 { - count -= 1; - let len = output.len() - 1; - output.truncate(len); - } - continue; - } - '\r' => { - output.truncate(0); - count = 0; - continue; - } - _ => count += 1, - }; - output.push(ch); - } - if count > 0 { - println!("{}", output); - } - } - } -} + _ if spaces && ch.is_whitespace() => { + last_space = Some(char_count); + col_count += 1 + } + _ => col_count += 1, + }; -#[inline] -fn rfind_whitespace(slice: &str) -> Option { - for (i, ch) in slice.chars().rev().enumerate() { - if ch.is_whitespace() { - return Some(slice.chars().count() - (i + 1)); + output.push(ch); + char_count += 1; } + + if col_count > 0 { + print!("{}", output); + output.truncate(0); + } + + line.truncate(0); } - None } diff --git a/tests/by-util/test_fold.rs b/tests/by-util/test_fold.rs index 64d77cd2b..52e630e5b 100644 --- a/tests/by-util/test_fold.rs +++ b/tests/by-util/test_fold.rs @@ -32,6 +32,24 @@ fn test_default_wrap_with_newlines() { .stdout_is_fixture("lorem_ipsum_new_line_80_column.expected"); } +#[test] +fn test_should_preserve_empty_line_without_final_newline() { + new_ucmd!() + .arg("-w2") + .pipe_in("12\n\n34") + .succeeds() + .stdout_is("12\n\n34"); +} + +#[test] +fn test_should_preserve_empty_line_and_final_newline() { + new_ucmd!() + .arg("-w2") + .pipe_in("12\n\n34\n") + .succeeds() + .stdout_is("12\n\n34\n"); +} + #[test] fn test_should_preserve_empty_lines() { new_ucmd!().pipe_in("\n").succeeds().stdout_is("\n"); @@ -57,3 +75,262 @@ fn test_word_boundary_split_should_preserve_empty_lines() { .succeeds() .stdout_is("0\n1\n\n2\n\n\n"); } + +#[test] +fn test_should_not_add_newline_when_line_less_than_fold() { + new_ucmd!().pipe_in("1234").succeeds().stdout_is("1234"); +} + +#[test] +fn test_should_not_add_newline_when_line_longer_than_fold() { + new_ucmd!() + .arg("-w2") + .pipe_in("1234") + .succeeds() + .stdout_is("12\n34"); +} + +#[test] +fn test_should_not_add_newline_when_line_equal_to_fold() { + new_ucmd!() + .arg("-w1") + .pipe_in(" ") + .succeeds() + .stdout_is(" "); +} + +#[test] +fn test_should_preserve_final_newline_when_line_less_than_fold() { + new_ucmd!().pipe_in("1234\n").succeeds().stdout_is("1234\n"); +} + +#[test] +fn test_should_preserve_final_newline_when_line_longer_than_fold() { + new_ucmd!() + .arg("-w2") + .pipe_in("1234\n") + .succeeds() + .stdout_is("12\n34\n"); +} + +#[test] +fn test_should_preserve_final_newline_when_line_equal_to_fold() { + new_ucmd!() + .arg("-w2") + .pipe_in("1\n") + .succeeds() + .stdout_is("1\n"); +} + +#[test] +fn test_single_tab_should_not_add_extra_newline() { + new_ucmd!() + .arg("-w1") + .pipe_in("\t") + .succeeds() + .stdout_is("\t"); +} + +#[test] +fn test_tab_counts_as_8_columns() { + new_ucmd!() + .arg("-w8") + .pipe_in("\t1") + .succeeds() + .stdout_is("\t\n1"); +} + +#[test] +fn test_fold_at_word_boundary() { + new_ucmd!() + .args(&["-w4", "-s"]) + .pipe_in("one two") + .succeeds() + .stdout_is("one \ntwo"); +} + +#[test] +fn test_fold_at_leading_word_boundary() { + new_ucmd!() + .args(&["-w3", "-s"]) + .pipe_in(" aaa") + .succeeds() + .stdout_is(" \naaa"); +} + +#[test] +fn test_fold_at_word_boundary_preserve_final_newline() { + new_ucmd!() + .args(&["-w4", "-s"]) + .pipe_in("one two\n") + .succeeds() + .stdout_is("one \ntwo\n"); +} + +#[test] +fn test_fold_at_tab_as_word_boundary() { + new_ucmd!() + .args(&["-w10", "-s"]) + .pipe_in("a\tbbb\n") + .succeeds() + .stdout_is("a\t\nbbb\n"); +} + +#[test] +fn test_fold_at_word_boundary_only_whitespace() { + new_ucmd!() + .args(&["-w2", "-s"]) + .pipe_in(" ") + .succeeds() + .stdout_is(" \n "); +} + +#[test] +fn test_fold_at_word_boundary_only_whitespace_preserve_final_newline() { + new_ucmd!() + .args(&["-w2", "-s"]) + .pipe_in(" \n") + .succeeds() + .stdout_is(" \n \n"); +} + +// +// bytewise tests + +#[test] +fn test_bytewise_should_preserve_empty_line_without_final_newline() { + new_ucmd!() + .args(&["-w2", "-b"]) + .pipe_in("123\n\n45") + .succeeds() + .stdout_is("12\n3\n\n45"); +} + +#[test] +fn test_bytewise_should_preserve_empty_line_and_final_newline() { + new_ucmd!() + .args(&["-w2", "-b"]) + .pipe_in("12\n\n34\n") + .succeeds() + .stdout_is("12\n\n34\n"); +} + +#[test] +fn test_bytewise_should_preserve_empty_lines() { + new_ucmd!() + .arg("-b") + .pipe_in("\n") + .succeeds() + .stdout_is("\n"); + + new_ucmd!() + .args(&["-w1", "-b"]) + .pipe_in("0\n1\n\n2\n\n\n") + .succeeds() + .stdout_is("0\n1\n\n2\n\n\n"); +} + +#[test] +fn test_bytewise_word_boundary_split_should_preserve_empty_lines() { + new_ucmd!() + .args(&["-s", "-b"]) + .pipe_in("\n") + .succeeds() + .stdout_is("\n"); + + new_ucmd!() + .args(&["-w1", "-s", "-b"]) + .pipe_in("0\n1\n\n2\n\n\n") + .succeeds() + .stdout_is("0\n1\n\n2\n\n\n"); +} + +#[test] +fn test_bytewise_should_not_add_newline_when_line_less_than_fold() { + new_ucmd!() + .arg("-b") + .pipe_in("1234") + .succeeds() + .stdout_is("1234"); +} + +#[test] +fn test_bytewise_should_not_add_newline_when_line_longer_than_fold() { + new_ucmd!() + .args(&["-w2", "-b"]) + .pipe_in("1234") + .succeeds() + .stdout_is("12\n34"); +} + +#[test] +fn test_bytewise_should_not_add_newline_when_line_equal_to_fold() { + new_ucmd!() + .args(&["-w1", "-b"]) + .pipe_in(" ") + .succeeds() + .stdout_is(" "); +} + +#[test] +fn test_bytewise_should_preserve_final_newline_when_line_less_than_fold() { + new_ucmd!() + .arg("-b") + .pipe_in("1234\n") + .succeeds() + .stdout_is("1234\n"); +} + +#[test] +fn test_bytewise_should_preserve_final_newline_when_line_longer_than_fold() { + new_ucmd!() + .args(&["-w2", "-b"]) + .pipe_in("1234\n") + .succeeds() + .stdout_is("12\n34\n"); +} + +#[test] +fn test_bytewise_should_preserve_final_newline_when_line_equal_to_fold() { + new_ucmd!() + .args(&["-w2", "-b"]) + .pipe_in("1\n") + .succeeds() + .stdout_is("1\n"); +} + +#[test] +fn test_bytewise_single_tab_should_not_add_extra_newline() { + new_ucmd!() + .args(&["-w1", "-b"]) + .pipe_in("\t") + .succeeds() + .stdout_is("\t"); +} + +#[test] +fn test_tab_counts_as_one_byte() { + new_ucmd!() + .args(&["-w2", "-b"]) + .pipe_in("1\t2\n") + .succeeds() + .stdout_is("1\t\n2\n"); +} + +#[test] +fn test_bytewise_fold_at_word_boundary_only_whitespace() { + new_ucmd!() + .args(&["-w2", "-s", "-b"]) + .pipe_in(" ") + .succeeds() + .stdout_is(" \n "); +} + +#[test] +fn test_bytewise_fold_at_word_boundary_only_whitespace_preserve_final_newline() { + new_ucmd!() + .args(&["-w2", "-s", "-b"]) + .pipe_in(" \n") + .succeeds() + .stdout_is(" \n \n"); +}