From e9131e2b7f758ad8d3411f1453d796de27574074 Mon Sep 17 00:00:00 2001 From: Ackerley Tng Date: Wed, 23 Mar 2022 19:51:27 +0800 Subject: [PATCH] wc: compute number widths using total file sizes Previously, individual file sizes were used to compute the number width, which would cause misalignment when the total has a greater number of digits, and is different from the behavior of GNU wc ``` $ ./target/debug/wc -w -l -m -c -L deny.toml GNUmakefile 95 422 3110 3110 85 deny.toml 349 865 6996 6996 196 GNUmakefile 444 1287 10106 10106 196 total $ wc -w -l -m -c -L deny.toml GNUmakefile 95 422 3110 3110 85 deny.toml 349 865 6996 6996 196 GNUmakefile 444 1287 10106 10106 196 total ``` --- src/uu/wc/src/wc.rs | 163 +++++++++++---------------------------- tests/by-util/test_wc.rs | 24 +++++- 2 files changed, 69 insertions(+), 118 deletions(-) diff --git a/src/uu/wc/src/wc.rs b/src/uu/wc/src/wc.rs index 61eda6828..968830c48 100644 --- a/src/uu/wc/src/wc.rs +++ b/src/uu/wc/src/wc.rs @@ -399,46 +399,6 @@ fn word_count_from_input(input: &Input, settings: &Settings) -> CountResult { } } -/// Compute the number of digits needed to represent any count for this input. -/// -/// If `input` is [`Input::Stdin`], then this function returns -/// [`MINIMUM_WIDTH`]. Otherwise, if metadata could not be read from -/// `input` then this function returns 1. -/// -/// # Errors -/// -/// This function will return an error if `input` is a [`Input::Path`] -/// and there is a problem accessing the metadata of the given `input`. -/// -/// # Examples -/// -/// A [`Input::Stdin`] gets a default minimum width: -/// -/// ```rust,ignore -/// let input = Input::Stdin(StdinKind::Explicit); -/// assert_eq!(7, digit_width(input)); -/// ``` -fn digit_width(input: &Input) -> io::Result { - match input { - Input::Stdin(_) => Ok(MINIMUM_WIDTH), - Input::Path(filename) => { - let path = Path::new(filename); - let metadata = fs::metadata(path)?; - if metadata.is_file() { - // TODO We are now computing the number of bytes in a file - // twice: once here and once in `WordCount::from_line()` (or - // in `count_bytes_fast()` if that function is called - // instead). See GitHub issue #2201. - let num_bytes = metadata.len(); - let num_digits = num_bytes.to_string().len(); - Ok(num_digits) - } else { - Ok(MINIMUM_WIDTH) - } - } - } -} - /// Compute the number of digits needed to represent all counts in all inputs. /// /// `inputs` may include zero or more [`Input::Stdin`] entries, each of @@ -446,52 +406,45 @@ fn digit_width(input: &Input) -> io::Result { /// entry causes this function to return a width that is at least /// [`MINIMUM_WIDTH`]. /// -/// If `input` is empty, then this function returns 1. If file metadata -/// could not be read from any of the [`Input::Path`] inputs and there -/// are no [`Input::Stdin`] inputs, then this function returns 1. +/// If `input` is empty, or if only one number needs to be printed (for just +/// one file) then this function is optimized to return 1 without making any +/// calls to get file metadata. /// -/// If there is a problem accessing the metadata, this function will -/// silently ignore the error and assume that the number of digits -/// needed to display the counts for that file is 1. +/// If file metadata could not be read from any of the [`Input::Path`] input, +/// that input does not affect number width computation /// -/// # Examples -/// -/// An empty slice implies a width of 1: -/// -/// ```rust,ignore -/// assert_eq!(1, max_width(&vec![])); -/// ``` -/// -/// The presence of [`Input::Stdin`] implies a minimum width: -/// -/// ```rust,ignore -/// let inputs = vec![Input::Stdin(StdinKind::Explicit)]; -/// assert_eq!(7, max_width(&inputs)); -/// ``` -fn max_width(inputs: &[Input]) -> usize { - let mut result = 1; +/// Otherwise, the file sizes in the file metadata are summed and the number of +/// digits in that total size is returned as the number width +fn compute_number_width(inputs: &[Input], settings: &Settings) -> usize { + if inputs.is_empty() || (inputs.len() == 1 && settings.number_enabled() == 1) { + return 1; + } + + let mut minimum_width = 1; + let mut total = 0; + for input in inputs { - if let Ok(n) = digit_width(input) { - result = result.max(n); + match input { + Input::Stdin(_) => { + minimum_width = MINIMUM_WIDTH; + } + Input::Path(path) => { + if let Ok(meta) = fs::metadata(path) { + if meta.is_file() { + total += meta.len(); + } else { + minimum_width = MINIMUM_WIDTH; + } + } + } } } - result + + max(minimum_width, total.to_string().len()) } fn wc(inputs: &[Input], settings: &Settings) -> UResult<()> { - // Compute the width, in digits, to use when formatting counts. - // - // The width is the number of digits needed to print the number of - // bytes in the largest file. This is true regardless of whether - // the `settings` indicate that the bytes will be displayed. - // - // If we only need to display a single number, set this to 0 to - // prevent leading spaces. - let max_width = if settings.number_enabled() <= 1 { - 0 - } else { - max_width(inputs) - }; + let number_width = compute_number_width(inputs, settings); let mut total_word_count = WordCount::default(); @@ -517,7 +470,7 @@ fn wc(inputs: &[Input], settings: &Settings) -> UResult<()> { }; total_word_count += word_count; let result = word_count.with_title(input.to_title()); - if let Err(err) = print_stats(settings, &result, max_width) { + if let Err(err) = print_stats(settings, &result, number_width) { show!(USimpleError::new( 1, format!( @@ -534,7 +487,7 @@ fn wc(inputs: &[Input], settings: &Settings) -> UResult<()> { if num_inputs > 1 { let total_result = total_word_count.with_title(Some("total".as_ref())); - if let Err(err) = print_stats(settings, &total_result, max_width) { + if let Err(err) = print_stats(settings, &total_result, number_width) { show!(USimpleError::new( 1, format!("failed to print total: {}", err) @@ -547,56 +500,32 @@ fn wc(inputs: &[Input], settings: &Settings) -> UResult<()> { Ok(()) } -fn print_stats(settings: &Settings, result: &TitledWordCount, min_width: usize) -> io::Result<()> { - let stdout = io::stdout(); - let mut stdout_lock = stdout.lock(); - - let mut is_first: bool = true; +fn print_stats( + settings: &Settings, + result: &TitledWordCount, + number_width: usize, +) -> io::Result<()> { + let mut columns = Vec::new(); if settings.show_lines { - if !is_first { - write!(stdout_lock, " ")?; - } - write!(stdout_lock, "{:1$}", result.count.lines, min_width)?; - is_first = false; + columns.push(format!("{:1$}", result.count.lines, number_width)); } if settings.show_words { - if !is_first { - write!(stdout_lock, " ")?; - } - write!(stdout_lock, "{:1$}", result.count.words, min_width)?; - is_first = false; + columns.push(format!("{:1$}", result.count.words, number_width)); } if settings.show_chars { - if !is_first { - write!(stdout_lock, " ")?; - } - write!(stdout_lock, "{:1$}", result.count.chars, min_width)?; - is_first = false; + columns.push(format!("{:1$}", result.count.chars, number_width)); } if settings.show_bytes { - if !is_first { - write!(stdout_lock, " ")?; - } - write!(stdout_lock, "{:1$}", result.count.bytes, min_width)?; - is_first = false; + columns.push(format!("{:1$}", result.count.bytes, number_width)); } if settings.show_max_line_length { - if !is_first { - write!(stdout_lock, " ")?; - } - write!( - stdout_lock, - "{:1$}", - result.count.max_line_length, min_width - )?; + columns.push(format!("{:1$}", result.count.max_line_length, number_width)); } if let Some(title) = result.title { - writeln!(stdout_lock, " {}", title.maybe_quote())?; - } else { - writeln!(stdout_lock)?; + columns.push(title.maybe_quote().to_string()); } - Ok(()) + writeln!(io::stdout().lock(), "{}", columns.join(" ")) } diff --git a/tests/by-util/test_wc.rs b/tests/by-util/test_wc.rs index 39689afc9..1efbe81a7 100644 --- a/tests/by-util/test_wc.rs +++ b/tests/by-util/test_wc.rs @@ -181,7 +181,8 @@ fn test_file_one_long_word() { .stdout_is(" 1 1 10001 10001 10000 onelongword.txt\n"); } -/// Test that the number of bytes in the file dictate the display width. +/// Test that the total size of all the files in the input dictates +/// the display width. /// /// The width in digits of any count is the width in digits of the /// number of bytes in the file, regardless of whether the number of @@ -203,6 +204,27 @@ fn test_file_bytes_dictate_width() { .args(&["-lw", "emptyfile.txt"]) .run() .stdout_is("0 0 emptyfile.txt\n"); + + // lorem_ipsum.txt contains 772 bytes, and alice_in_wonderland.txt contains + // 302 bytes. The total is 1074 bytes, which has a width of 4 + new_ucmd!() + .args(&["-lwc", "alice_in_wonderland.txt", "lorem_ipsum.txt"]) + .run() + .stdout_is( + " 5 57 302 alice_in_wonderland.txt\n 13 109 772 \ + lorem_ipsum.txt\n 18 166 1074 total\n", + ); + + // . is a directory, so minimum_width should get set to 7 + #[cfg(not(windows))] + const STDOUT: &str = " 0 0 0 emptyfile.txt\n 0 0 0 \ + .\n 0 0 0 total\n"; + #[cfg(windows)] + const STDOUT: &str = " 0 0 0 emptyfile.txt\n 0 0 0 total\n"; + new_ucmd!() + .args(&["-lwc", "emptyfile.txt", "."]) + .run() + .stdout_is(STDOUT); } /// Test that getting counts from a directory is an error.