1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-28 03:27:44 +00:00

wc: compute min width to format counts up front

Fix two issues with the string formatting width for counts displayed
by `wc`.

First, the output was previously not using the default minimum width
(seven characters) when reading from `stdin`. This commit corrects
this behavior to match GNU `wc`. For example,

    $ cat alice_in_wonderland.txt | wc
          5      57     302

Second, if at least 10^7 bytes were read from `stdin` *after* reading
from a smaller regular file, then every output row would have width
8. This disagrees with GNU `wc`, in which only the `stdin` row and the
total row would have width 8. This commit corrects this behavior to
match GNU `wc`. For example,

    $ printf "%.0s0" {1..10000000} | wc emptyfile.txt -
	  0       0       0 emptyfile.txt
	  0       1 10000000
	  0       1 10000000 total

Fixes #2186.
This commit is contained in:
Jeffrey Finkelstein 2021-05-07 15:07:17 -04:00
parent 620a5a5df6
commit 97a49c7c95
2 changed files with 128 additions and 16 deletions

View file

@ -33,7 +33,7 @@ fn test_stdin_default() {
new_ucmd!()
.pipe_in_fixture("lorem_ipsum.txt")
.run()
.stdout_is(" 13 109 772\n");
.stdout_is(" 13 109 772\n");
}
#[test]
@ -42,7 +42,7 @@ fn test_stdin_explicit() {
.pipe_in_fixture("lorem_ipsum.txt")
.arg("-")
.run()
.stdout_is(" 13 109 772 -\n");
.stdout_is(" 13 109 772 -\n");
}
#[test]
@ -51,9 +51,11 @@ fn test_utf8() {
.args(&["-lwmcL"])
.pipe_in_fixture("UTF_8_test.txt")
.run()
.stdout_is(" 300 4969 22781 22213 79\n");
// GNU returns " 300 2086 22219 22781 79"
// TODO: we should fix that to match GNU's behavior
.stdout_is(" 300 4969 22781 22213 79\n");
// GNU returns " 300 2086 22219 22781 79"
//
// TODO: we should fix the word, character, and byte count to
// match the behavior of GNU wc
}
#[test]
@ -80,7 +82,7 @@ fn test_stdin_all_counts() {
.args(&["-c", "-m", "-l", "-L", "-w"])
.pipe_in_fixture("alice_in_wonderland.txt")
.run()
.stdout_is(" 5 57 302 302 66\n");
.stdout_is(" 5 57 302 302 66\n");
}
#[test]
@ -169,6 +171,30 @@ fn test_file_one_long_word() {
.stdout_is(" 1 1 10001 10001 10000 onelongword.txt\n");
}
/// Test that the number of bytes in the file dictate the display width.
///
/// The width in digits of any count is the width in digits of the
/// number of bytes in the file, regardless of whether the number of
/// bytes are displayed.
#[test]
fn test_file_bytes_dictate_width() {
// This file has 10,001 bytes. Five digits are required to
// represent that. Even though the number of lines is 1 and the
// number of words is 0, each of those counts is formatted with
// five characters, filled with whitespace.
new_ucmd!()
.args(&["-lw", "onelongemptyline.txt"])
.run()
.stdout_is(" 1 0 onelongemptyline.txt\n");
// This file has zero bytes. Only one digit is required to
// represent that.
new_ucmd!()
.args(&["-lw", "emptyfile.txt"])
.run()
.stdout_is("0 0 emptyfile.txt\n");
}
/// Test that getting counts from a directory is an error.
#[test]
fn test_read_from_directory_error() {