mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-28 03:27:44 +00:00
wc: Do a chunked read with proper UTF-8 handling
This brings the results mostly in line with GNU wc and solves nasty behavior with long lines.
This commit is contained in:
parent
48437fc49d
commit
6f7d740592
8 changed files with 105 additions and 138 deletions
|
@ -53,11 +53,16 @@ fn test_utf8() {
|
|||
.args(&["-lwmcL"])
|
||||
.pipe_in_fixture("UTF_8_test.txt")
|
||||
.run()
|
||||
.stdout_is(" 300 4969 22781 22213 79\n");
|
||||
// GNU returns " 300 2086 22219 22781 79"
|
||||
//
|
||||
// TODO: we should fix the word, character, and byte count to
|
||||
// match the behavior of GNU wc
|
||||
.stdout_is(" 303 2119 23025 22457 79\n");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_utf8_extra() {
|
||||
new_ucmd!()
|
||||
.arg("-lwmcL")
|
||||
.pipe_in_fixture("UTF_8_weirdchars.txt")
|
||||
.run()
|
||||
.stdout_is(" 25 87 513 442 48\n");
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue