Merge pull request #1495 from orottier/wc

wc: Do not decode UTF8 when only counting bytes/newlines
2025-09-15 03:26:18 +00:00 · 2020-05-04 10:54:18 +02:00 · 2020-05-04 10:54:18 +02:00 · 06fe387c76
commit 06fe387c76
parent 30c14f1025 dab1b9ba1a
1 changed files with 21 additions and 16 deletions
--- a/src/uu/wc/src/wc.rs
+++ b/src/uu/wc/src/wc.rs
@ -146,6 +146,9 @@ fn wc(files: Vec<String>, settings: &Settings) -> StdResult<(), i32> {
    let mut results = vec![];
    let mut max_width: usize = 0;

+    // we do not need to decode the byte stream if we're only counting bytes/newlines
+    let decode_chars = settings.show_chars || settings.show_words || settings.show_max_line_length;
+
    for path in &files {
        let mut reader = open(&path[..])?;

@ -173,6 +176,7 @@ fn wc(files: Vec<String>, settings: &Settings) -> StdResult<(), i32> {

            byte_count += raw_line.len();

+            if decode_chars {
                // try and convert the bytes to UTF-8 first
                let current_char_count;
                match from_utf8(&raw_line[..]) {
@ -192,6 +196,7 @@ fn wc(files: Vec<String>, settings: &Settings) -> StdResult<(), i32> {
                    // matches GNU 'wc' behaviour
                    longest_line_length = current_char_count - 1;
                }
+            }

            raw_line.truncate(0);
        }