wc: compute min width to format counts up front

Fix two issues with the string formatting width for counts displayed by `wc`. First, the output was previously not using the default minimum width (seven characters) when reading from `stdin`. This commit corrects this behavior to match GNU `wc`. For example, $ cat alice_in_wonderland.txt | wc 5 57 302 Second, if at least 10^7 bytes were read from `stdin` *after* reading from a smaller regular file, then every output row would have width 8. This disagrees with GNU `wc`, in which only the `stdin` row and the total row would have width 8. This commit corrects this behavior to match GNU `wc`. For example, $ printf "%.0s0" {1..10000000} | wc emptyfile.txt - 0 0 0 emptyfile.txt 0 1 10000000 0 1 10000000 total Fixes #2186.
2025-07-28 03:27:44 +00:00 · 2021-05-07 15:07:17 -04:00 · 2021-05-07 15:07:17 -04:00 · 97a49c7c95
commit 97a49c7c95
parent 620a5a5df6
2 changed files with 128 additions and 16 deletions
--- a/tests/by-util/test_wc.rs
+++ b/tests/by-util/test_wc.rs
@ -33,7 +33,7 @@ fn test_stdin_default() {
    new_ucmd!()
        .pipe_in_fixture("lorem_ipsum.txt")
        .run()
-        .stdout_is(" 13 109 772\n");
+        .stdout_is("     13     109     772\n");
 }

 #[test]
@ -42,7 +42,7 @@ fn test_stdin_explicit() {
        .pipe_in_fixture("lorem_ipsum.txt")
        .arg("-")
        .run()
-        .stdout_is(" 13 109 772 -\n");
+        .stdout_is("     13     109     772 -\n");
 }

 #[test]
@ -51,9 +51,11 @@ fn test_utf8() {
        .args(&["-lwmcL"])
        .pipe_in_fixture("UTF_8_test.txt")
        .run()
-        .stdout_is("  300  4969 22781 22213    79\n");
-    // GNU returns "  300  2086 22219 22781    79"
-    // TODO: we should fix that to match GNU's behavior
+        .stdout_is("    300    4969   22781   22213      79\n");
+    // GNU returns "    300    2086   22219   22781      79"
+    //
+    // TODO: we should fix the word, character, and byte count to
+    // match the behavior of GNU wc
 }

 #[test]
@ -80,7 +82,7 @@ fn test_stdin_all_counts() {
        .args(&["-c", "-m", "-l", "-L", "-w"])
        .pipe_in_fixture("alice_in_wonderland.txt")
        .run()
-        .stdout_is("  5  57 302 302  66\n");
+        .stdout_is("      5      57     302     302      66\n");
 }

 #[test]
@ -169,6 +171,30 @@ fn test_file_one_long_word() {
        .stdout_is("    1     1 10001 10001 10000 onelongword.txt\n");
 }

+/// Test that the number of bytes in the file dictate the display width.
+///
+/// The width in digits of any count is the width in digits of the
+/// number of bytes in the file, regardless of whether the number of
+/// bytes are displayed.
+#[test]
+fn test_file_bytes_dictate_width() {
+    // This file has 10,001 bytes. Five digits are required to
+    // represent that. Even though the number of lines is 1 and the
+    // number of words is 0, each of those counts is formatted with
+    // five characters, filled with whitespace.
+    new_ucmd!()
+        .args(&["-lw", "onelongemptyline.txt"])
+        .run()
+        .stdout_is("    1     0 onelongemptyline.txt\n");
+
+    // This file has zero bytes. Only one digit is required to
+    // represent that.
+    new_ucmd!()
+        .args(&["-lw", "emptyfile.txt"])
+        .run()
+        .stdout_is("0 0 emptyfile.txt\n");
+}
+
 /// Test that getting counts from a directory is an error.
 #[test]
 fn test_read_from_directory_error() {