mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-28 11:37:44 +00:00
wc: Do not decode UTF8 when only counting bytes/newlines
This commit is contained in:
parent
78dc71630c
commit
e3bd6b6848
1 changed files with 22 additions and 16 deletions
|
@ -146,6 +146,10 @@ fn wc(files: Vec<String>, settings: &Settings) -> StdResult<(), i32> {
|
||||||
let mut results = vec![];
|
let mut results = vec![];
|
||||||
let mut max_width: usize = 0;
|
let mut max_width: usize = 0;
|
||||||
|
|
||||||
|
// we do not need to decode the byte stream if we're only counting bytes/newlines
|
||||||
|
let decode_chars = settings.show_chars
|
||||||
|
|| settings.show_words || settings.show_max_line_length;
|
||||||
|
|
||||||
for path in &files {
|
for path in &files {
|
||||||
let mut reader = open(&path[..])?;
|
let mut reader = open(&path[..])?;
|
||||||
|
|
||||||
|
@ -173,24 +177,26 @@ fn wc(files: Vec<String>, settings: &Settings) -> StdResult<(), i32> {
|
||||||
|
|
||||||
byte_count += raw_line.len();
|
byte_count += raw_line.len();
|
||||||
|
|
||||||
// try and convert the bytes to UTF-8 first
|
if decode_chars {
|
||||||
let current_char_count;
|
// try and convert the bytes to UTF-8 first
|
||||||
match from_utf8(&raw_line[..]) {
|
let current_char_count;
|
||||||
Ok(line) => {
|
match from_utf8(&raw_line[..]) {
|
||||||
word_count += line.split_whitespace().count();
|
Ok(line) => {
|
||||||
current_char_count = line.chars().count();
|
word_count += line.split_whitespace().count();
|
||||||
|
current_char_count = line.chars().count();
|
||||||
|
}
|
||||||
|
Err(..) => {
|
||||||
|
word_count += raw_line.split(|&x| is_word_seperator(x)).count();
|
||||||
|
current_char_count = raw_line.iter().filter(|c| c.is_ascii()).count()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
Err(..) => {
|
char_count += current_char_count;
|
||||||
word_count += raw_line.split(|&x| is_word_seperator(x)).count();
|
|
||||||
current_char_count = raw_line.iter().filter(|c| c.is_ascii()).count()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
char_count += current_char_count;
|
|
||||||
|
|
||||||
if current_char_count > longest_line_length {
|
if current_char_count > longest_line_length {
|
||||||
// we subtract one here because `line.len()` includes the LF
|
// we subtract one here because `line.len()` includes the LF
|
||||||
// matches GNU 'wc' behaviour
|
// matches GNU 'wc' behaviour
|
||||||
longest_line_length = current_char_count - 1;
|
longest_line_length = current_char_count - 1;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
raw_line.truncate(0);
|
raw_line.truncate(0);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue