From c84ee0ae0f2e75c200c438c53fb1fe8d5f46f155 Mon Sep 17 00:00:00 2001 From: Karl McDowall Date: Fri, 21 Mar 2025 08:47:11 -0600 Subject: [PATCH] cat: Improve performance of formatting. Issue #7518 Add a BufWriter over stdout when cat outputs any kind of formattted data. This improves performance considerably. --- src/uu/cat/src/cat.rs | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/uu/cat/src/cat.rs b/src/uu/cat/src/cat.rs index b41719cc9..8e0f167e2 100644 --- a/src/uu/cat/src/cat.rs +++ b/src/uu/cat/src/cat.rs @@ -5,7 +5,7 @@ // spell-checker:ignore (ToDO) nonprint nonblank nonprinting ELOOP use std::fs::{metadata, File}; -use std::io::{self, IsTerminal, Read, Write}; +use std::io::{self, BufWriter, IsTerminal, Read, Write}; /// Unix domain socket support #[cfg(unix)] use std::net::Shutdown; @@ -511,7 +511,9 @@ fn write_lines( ) -> CatResult<()> { let mut in_buf = [0; 1024 * 31]; let stdout = io::stdout(); - let mut writer = stdout.lock(); + let stdout = stdout.lock(); + // Add a 32K buffer for stdout - this greatly improves performance. + let mut writer = BufWriter::with_capacity(32 * 1024, stdout); while let Ok(n) = handle.reader.read(&mut in_buf) { if n == 0 { @@ -560,6 +562,14 @@ fn write_lines( } pos += offset + 1; } + // We need to flush the buffer each time around the loop in order to pass GNU tests. + // When we are reading the input from a pipe, the `handle.reader.read` call at the top + // of this loop will block (indefinitely) whist waiting for more data. The expectation + // however is that anything that's ready for output should show up in the meantime, + // and not be buffered internally to the `cat` process. + // Hence it's necessary to flush our buffer before every time we could potentially block + // on a `std::io::Read::read` call. + writer.flush()?; } Ok(())