1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-28 03:27:44 +00:00

wc: Do a chunked read with proper UTF-8 handling

This brings the results mostly in line with GNU wc and solves nasty
behavior with long lines.
This commit is contained in:
Jan Verbeek 2021-08-25 13:26:44 +02:00 committed by Michael Debertol
parent 48437fc49d
commit 6f7d740592
8 changed files with 105 additions and 138 deletions

View file

@ -53,11 +53,16 @@ fn test_utf8() {
.args(&["-lwmcL"])
.pipe_in_fixture("UTF_8_test.txt")
.run()
.stdout_is(" 300 4969 22781 22213 79\n");
// GNU returns " 300 2086 22219 22781 79"
//
// TODO: we should fix the word, character, and byte count to
// match the behavior of GNU wc
.stdout_is(" 303 2119 23025 22457 79\n");
}
#[test]
fn test_utf8_extra() {
new_ucmd!()
.arg("-lwmcL")
.pipe_in_fixture("UTF_8_weirdchars.txt")
.run()
.stdout_is(" 25 87 513 442 48\n");
}
#[test]