Merge pull request #1495 from orottier/wc

wc: Do not decode UTF8 when only counting bytes/newlines
2025-09-15 03:26:18 +00:00 · 2020-05-04 10:54:18 +02:00 · 2020-05-04 10:54:18 +02:00 · 06fe387c76
commit 06fe387c76
parent 30c14f1025 dab1b9ba1a
1 changed files with 21 additions and 16 deletions
--- a/src/uu/wc/src/wc.rs
+++ b/src/uu/wc/src/wc.rs
@ -146,6 +146,9 @@ fn wc(files: Vec<String>, settings: &Settings) -> StdResult<(), i32> {
    let mut results = vec![];
    let mut max_width: usize = 0;
    // we do not need to decode the byte stream if we're only counting bytes/newlines
    let decode_chars = settings.show_chars || settings.show_words || settings.show_max_line_length;
    for path in &files {
        let mut reader = open(&path[..])?;
@ -173,24 +176,26 @@ fn wc(files: Vec<String>, settings: &Settings) -> StdResult<(), i32> {
            byte_count += raw_line.len();
-            // try and convert the bytes to UTF-8 first
+            if decode_chars {
-            let current_char_count;
+                // try and convert the bytes to UTF-8 first
-            match from_utf8(&raw_line[..]) {
+                let current_char_count;
-                Ok(line) => {
+                match from_utf8(&raw_line[..]) {
-                    word_count += line.split_whitespace().count();
+                    Ok(line) => {
-                    current_char_count = line.chars().count();
+                        word_count += line.split_whitespace().count();
                        current_char_count = line.chars().count();
                    }
                    Err(..) => {
                        word_count += raw_line.split(|&x| is_word_seperator(x)).count();
                        current_char_count = raw_line.iter().filter(|c| c.is_ascii()).count()
                    }
                }
-                Err(..) => {
+                char_count += current_char_count;
                    word_count += raw_line.split(|&x| is_word_seperator(x)).count();
                    current_char_count = raw_line.iter().filter(|c| c.is_ascii()).count()
                }
            }
            char_count += current_char_count;
-            if current_char_count > longest_line_length {
+                if current_char_count > longest_line_length {
-                // we subtract one here because `line.len()` includes the LF
+                    // we subtract one here because `line.len()` includes the LF
-                // matches GNU 'wc' behaviour
+                    // matches GNU 'wc' behaviour
-                longest_line_length = current_char_count - 1;
+                    longest_line_length = current_char_count - 1;
                }
            }
            raw_line.truncate(0);