From 1fe539f1d443336af26094086ea72083a15e92cc Mon Sep 17 00:00:00 2001 From: Nicolas Boichat Date: Wed, 14 May 2025 14:12:35 +0800 Subject: [PATCH 1/2] wc: Increase buffer size to 256kb Improves performance by about 4% on large files. --- src/uu/wc/src/count_fast.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/uu/wc/src/count_fast.rs b/src/uu/wc/src/count_fast.rs index b79e6b0e3..450372d4d 100644 --- a/src/uu/wc/src/count_fast.rs +++ b/src/uu/wc/src/count_fast.rs @@ -32,7 +32,7 @@ use libc::S_IFIFO; #[cfg(any(target_os = "linux", target_os = "android"))] use uucore::pipes::{pipe, splice, splice_exact}; -const BUF_SIZE: usize = 16 * 1024; +const BUF_SIZE: usize = 256 * 1024; #[cfg(any(target_os = "linux", target_os = "android"))] const SPLICE_SIZE: usize = 128 * 1024; From 1fc14d837c54ca3d163efa60d83753628fbf8bc7 Mon Sep 17 00:00:00 2001 From: Nicolas Boichat Date: Wed, 14 May 2025 20:24:06 +0800 Subject: [PATCH 2/2] wc: Align buffer to 32-byte boundary bytecount uses vector operations to speed up line counting. At least on x86 with AVX2 support, the vectors are 256-byte wide, and operations are much faster if the data is aligned. Saves about 4% of total performance, matching wc's performance. --- src/uu/wc/src/count_fast.rs | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/src/uu/wc/src/count_fast.rs b/src/uu/wc/src/count_fast.rs index 450372d4d..479183263 100644 --- a/src/uu/wc/src/count_fast.rs +++ b/src/uu/wc/src/count_fast.rs @@ -197,6 +197,23 @@ pub(crate) fn count_bytes_fast(handle: &mut T) -> (usize, Opti } } +/// A simple structure used to align a BUF_SIZE buffer to 32-byte boundary. +/// +/// This is useful as bytecount uses 256-bit wide vector operations that run much +/// faster on aligned data (at least on x86 with AVX2 support). +#[repr(align(32))] +struct AlignedBuffer { + data: [u8; BUF_SIZE], +} + +impl Default for AlignedBuffer { + fn default() -> Self { + Self { + data: [0; BUF_SIZE], + } + } +} + /// Returns a WordCount that counts the number of bytes, lines, and/or the number of Unicode characters encoded in UTF-8 read via a Reader. /// /// This corresponds to the `-c`, `-l` and `-m` command line flags to wc. @@ -213,9 +230,9 @@ pub(crate) fn count_bytes_chars_and_lines_fast< handle: &mut R, ) -> (WordCount, Option) { let mut total = WordCount::default(); - let mut buf = [0; BUF_SIZE]; + let buf: &mut [u8] = &mut AlignedBuffer::default().data; loop { - match handle.read(&mut buf) { + match handle.read(buf) { Ok(0) => return (total, None), Ok(n) => { if COUNT_BYTES {