diff --git a/src/uu/wc/src/count_fast.rs b/src/uu/wc/src/count_fast.rs index 5c8d97afd..d151c9c90 100644 --- a/src/uu/wc/src/count_fast.rs +++ b/src/uu/wc/src/count_fast.rs @@ -117,32 +117,21 @@ pub(crate) fn count_bytes_fast(handle: &mut T) -> (usize, Opti } } -pub(crate) fn count_bytes_and_lines_fast( - handle: &mut R, -) -> (WordCount, Option) { - let mut total = WordCount::default(); - let mut buf = [0; BUF_SIZE]; - loop { - match handle.read(&mut buf) { - Ok(0) => return (total, None), - Ok(n) => { - total.bytes += n; - total.lines += bytecount::count(&buf[..n], b'\n'); - } - Err(ref e) if e.kind() == ErrorKind::Interrupted => continue, - Err(e) => return (total, Some(e)), - } - } -} - -/// Returns a WordCount that counts the number of Unicode characters encoded in UTF-8 read via a Reader. +/// Returns a WordCount that counts the number of bytes, lines, and/or the number of Unicode characters encoded in UTF-8 read via a Reader. /// -/// This corresponds to the `-m` command line flag to wc. +/// This corresponds to the `-c`, `-l` and `-m` command line flags to wc. /// /// # Arguments /// /// * `R` - A Reader from which the UTF-8 stream will be read. -pub(crate) fn count_chars_fast(handle: &mut R) -> (WordCount, Option) { +pub(crate) fn count_bytes_chars_and_lines_fast< + R: Read, + const COUNT_BYTES: bool, + const COUNT_CHARS: bool, + const COUNT_LINES: bool, +>( + handle: &mut R, +) -> (WordCount, Option) { /// Mask of the value bits of a continuation byte const CONT_MASK: u8 = 0b0011_1111u8; /// Value of the tag bits (tag mask is !CONT_MASK) of a continuation byte @@ -154,10 +143,18 @@ pub(crate) fn count_chars_fast(handle: &mut R) -> (WordCount, Option return (total, None), Ok(n) => { - total.chars += buf[..n] - .iter() - .filter(|&&byte| (byte & !CONT_MASK) != TAG_CONT_U8) - .count(); + if COUNT_BYTES { + total.bytes += n; + } + if COUNT_CHARS { + total.chars += buf[..n] + .iter() + .filter(|&&byte| (byte & !CONT_MASK) != TAG_CONT_U8) + .count(); + } + if COUNT_LINES { + total.lines += bytecount::count(&buf[..n], b'\n'); + } } Err(ref e) if e.kind() == ErrorKind::Interrupted => continue, Err(e) => return (total, Some(e)), diff --git a/src/uu/wc/src/wc.rs b/src/uu/wc/src/wc.rs index 91d1feeed..386e1bc28 100644 --- a/src/uu/wc/src/wc.rs +++ b/src/uu/wc/src/wc.rs @@ -13,7 +13,7 @@ extern crate uucore; mod count_fast; mod countable; mod word_count; -use count_fast::{count_bytes_and_lines_fast, count_bytes_fast, count_chars_fast}; +use count_fast::{count_bytes_chars_and_lines_fast, count_bytes_fast}; use countable::WordCountable; use unicode_width::UnicodeWidthChar; use utf8::{BufReadDecoder, BufReadDecoderError}; @@ -315,7 +315,7 @@ fn word_count_from_reader( ) { // Specialize scanning loop to improve the performance. (false, false, false, false, false) => unreachable!(), - (false, true, false, false, false) => count_chars_fast(&mut reader), + (true, false, false, false, false) => { // Fast path when only show_bytes is true. let (bytes, error) = count_bytes_fast(&mut reader); @@ -327,10 +327,27 @@ fn word_count_from_reader( error, ) } - (false, false, true, false, false) | (true, false, true, false, false) => { - // Fast path when only (show_bytes || show_lines) is true. - count_bytes_and_lines_fast(&mut reader) + + // Fast paths that can be computed without Unicode decoding. + (false, false, true, false, false) => { + count_bytes_chars_and_lines_fast::<_, false, false, true>(&mut reader) } + (false, true, false, false, false) => { + count_bytes_chars_and_lines_fast::<_, false, true, false>(&mut reader) + } + (false, true, true, false, false) => { + count_bytes_chars_and_lines_fast::<_, false, true, true>(&mut reader) + } + (true, false, true, false, false) => { + count_bytes_chars_and_lines_fast::<_, true, false, true>(&mut reader) + } + (true, true, false, false, false) => { + count_bytes_chars_and_lines_fast::<_, true, true, false>(&mut reader) + } + (true, true, true, false, false) => { + count_bytes_chars_and_lines_fast::<_, true, true, true>(&mut reader) + } + (_, false, false, false, true) => { word_count_from_reader_specialized::<_, false, false, false, true>(reader) } @@ -349,9 +366,6 @@ fn word_count_from_reader( (_, false, true, true, true) => { word_count_from_reader_specialized::<_, false, true, true, true>(reader) } - (_, true, false, false, false) => { - word_count_from_reader_specialized::<_, true, false, false, false>(reader) - } (_, true, false, false, true) => { word_count_from_reader_specialized::<_, true, false, false, true>(reader) } @@ -361,9 +375,6 @@ fn word_count_from_reader( (_, true, false, true, true) => { word_count_from_reader_specialized::<_, true, false, true, true>(reader) } - (_, true, true, false, false) => { - word_count_from_reader_specialized::<_, true, true, false, false>(reader) - } (_, true, true, false, true) => { word_count_from_reader_specialized::<_, true, true, false, true>(reader) }