1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-28 11:37:44 +00:00

cat: Improve performance on Linux (#1978)

* cat: Improve performance, especially on Linux

* cat: Don't use io::copy for splice fallback

On my MacBook Pro 2020, it is around 25% faster to not use io::copy.

* cat: Only fall back to generic copy if first splice fails

* cat: Don't double buffer stdout

* cat: Don't use experimental or-pattern syntax

* cat: Remove nix symbol use from non-Linux
This commit is contained in:
Árni Dagur 2021-04-01 21:08:48 +00:00 committed by GitHub
parent 2941dfd698
commit 7a947cfe46
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 255 additions and 175 deletions

5
Cargo.lock generated
View file

@ -1,5 +1,7 @@
# This file is automatically @generated by Cargo. # This file is automatically @generated by Cargo.
# It is not intended for manual editing. # It is not intended for manual editing.
version = 3
[[package]] [[package]]
name = "advapi32-sys" name = "advapi32-sys"
version = "0.2.0" version = "0.2.0"
@ -1581,7 +1583,8 @@ name = "uu_cat"
version = "0.0.4" version = "0.0.4"
dependencies = [ dependencies = [
"clap", "clap",
"quick-error", "nix 0.20.0",
"thiserror",
"unix_socket", "unix_socket",
"uucore", "uucore",
"uucore_procs", "uucore_procs",

View file

@ -16,13 +16,16 @@ path = "src/cat.rs"
[dependencies] [dependencies]
clap = "2.33" clap = "2.33"
quick-error = "1.2.3" thiserror = "1.0"
uucore = { version=">=0.0.7", package="uucore", path="../../uucore", features=["fs"] } uucore = { version=">=0.0.7", package="uucore", path="../../uucore", features=["fs"] }
uucore_procs = { version=">=0.0.5", package="uucore_procs", path="../../uucore_procs" } uucore_procs = { version=">=0.0.5", package="uucore_procs", path="../../uucore_procs" }
[target.'cfg(unix)'.dependencies] [target.'cfg(unix)'.dependencies]
unix_socket = "0.5.0" unix_socket = "0.5.0"
[target.'cfg(any(target_os = "linux", target_os = "android"))'.dependencies]
nix = "0.20"
[[bin]] [[bin]]
name = "cat" name = "cat"
path = "src/main.rs" path = "src/main.rs"

View file

@ -3,14 +3,13 @@
// (c) Jordi Boggiano <j.boggiano@seld.be> // (c) Jordi Boggiano <j.boggiano@seld.be>
// (c) Evgeniy Klyuchikov <evgeniy.klyuchikov@gmail.com> // (c) Evgeniy Klyuchikov <evgeniy.klyuchikov@gmail.com>
// (c) Joshua S. Miller <jsmiller@uchicago.edu> // (c) Joshua S. Miller <jsmiller@uchicago.edu>
// (c) Árni Dagur <arni@dagur.eu>
// //
// For the full copyright and license information, please view the LICENSE // For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code. // file that was distributed with this source code.
// spell-checker:ignore (ToDO) nonprint nonblank nonprinting // spell-checker:ignore (ToDO) nonprint nonblank nonprinting
#[macro_use]
extern crate quick_error;
#[cfg(unix)] #[cfg(unix)]
extern crate unix_socket; extern crate unix_socket;
#[macro_use] #[macro_use]
@ -18,9 +17,9 @@ extern crate uucore;
// last synced with: cat (GNU coreutils) 8.13 // last synced with: cat (GNU coreutils) 8.13
use clap::{App, Arg}; use clap::{App, Arg};
use quick_error::ResultExt;
use std::fs::{metadata, File}; use std::fs::{metadata, File};
use std::io::{self, stderr, stdin, stdout, BufWriter, Read, Write}; use std::io::{self, Read, Write};
use thiserror::Error;
use uucore::fs::is_stdin_interactive; use uucore::fs::is_stdin_interactive;
/// Unix domain socket support /// Unix domain socket support
@ -31,12 +30,44 @@ use std::os::unix::fs::FileTypeExt;
#[cfg(unix)] #[cfg(unix)]
use unix_socket::UnixStream; use unix_socket::UnixStream;
#[cfg(any(target_os = "linux", target_os = "android"))]
use nix::errno::Errno;
/// Linux splice support
#[cfg(any(target_os = "linux", target_os = "android"))]
use nix::fcntl::{splice, SpliceFFlags};
#[cfg(any(target_os = "linux", target_os = "android"))]
use nix::unistd::pipe;
#[cfg(any(target_os = "linux", target_os = "android"))]
use std::os::unix::io::{AsRawFd, RawFd};
static NAME: &str = "cat"; static NAME: &str = "cat";
static VERSION: &str = env!("CARGO_PKG_VERSION"); static VERSION: &str = env!("CARGO_PKG_VERSION");
static SYNTAX: &str = "[OPTION]... [FILE]..."; static SYNTAX: &str = "[OPTION]... [FILE]...";
static SUMMARY: &str = "Concatenate FILE(s), or standard input, to standard output static SUMMARY: &str = "Concatenate FILE(s), or standard input, to standard output
With no FILE, or when FILE is -, read standard input."; With no FILE, or when FILE is -, read standard input.";
#[derive(Error, Debug)]
enum CatError {
/// Wrapper around `io::Error`
#[error("{0}")]
Io(#[from] io::Error),
/// Wrapper around `nix::Error`
#[cfg(any(target_os = "linux", target_os = "android"))]
#[error("{0}")]
Nix(#[from] nix::Error),
/// Unknown file type; it's not a regular file, socket, etc.
#[error("{}: unknown filetype: {}", path, ft_debug)]
UnknownFiletype {
path: String,
/// A debug print of the file type
ft_debug: String,
},
#[error("{0}: Expected a file, found directory")]
IsDirectory(String),
}
type CatResult<T> = Result<T, CatError>;
#[derive(PartialEq)] #[derive(PartialEq)]
enum NumberingMode { enum NumberingMode {
None, None,
@ -44,39 +75,6 @@ enum NumberingMode {
All, All,
} }
quick_error! {
#[derive(Debug)]
enum CatError {
/// Wrapper for io::Error with path context
Input(err: io::Error, path: String) {
display("cat: {0}: {1}", path, err)
context(path: &'a str, err: io::Error) -> (err, path.to_owned())
cause(err)
}
/// Wrapper for io::Error with no context
Output(err: io::Error) {
display("cat: {0}", err) from()
cause(err)
}
/// Unknown Filetype classification
UnknownFiletype(path: String) {
display("cat: {0}: unknown filetype", path)
}
/// At least one error was encountered in reading or writing
EncounteredErrors(count: usize) {
display("cat: encountered {0} errors", count)
}
/// Denotes an error caused by trying to `cat` a directory
IsDirectory(path: String) {
display("cat: {0}: Is a directory", path)
}
}
}
struct OutputOptions { struct OutputOptions {
/// Line numbering mode /// Line numbering mode
number: NumberingMode, number: NumberingMode,
@ -87,21 +85,56 @@ struct OutputOptions {
/// display TAB characters as `tab` /// display TAB characters as `tab`
show_tabs: bool, show_tabs: bool,
/// If `show_tabs == true`, this string will be printed in the /// Show end of lines
/// place of tabs show_ends: bool,
tab: String,
/// Can be set to show characters other than '\n' a the end of
/// each line, e.g. $
end_of_line: String,
/// use ^ and M- notation, except for LF (\\n) and TAB (\\t) /// use ^ and M- notation, except for LF (\\n) and TAB (\\t)
show_nonprint: bool, show_nonprint: bool,
} }
impl OutputOptions {
fn tab(&self) -> &'static str {
if self.show_tabs {
"^I"
} else {
"\t"
}
}
fn end_of_line(&self) -> &'static str {
if self.show_ends {
"$\n"
} else {
"\n"
}
}
/// We can write fast if we can simply copy the contents of the file to
/// stdout, without augmenting the output with e.g. line numbers.
fn can_write_fast(&self) -> bool {
!(self.show_tabs
|| self.show_nonprint
|| self.show_ends
|| self.squeeze_blank
|| self.number != NumberingMode::None)
}
}
/// State that persists between output of each file. This struct is only used
/// when we can't write fast.
struct OutputState {
/// The current line number
line_number: usize,
/// Whether the output cursor is at the beginning of a new line
at_line_start: bool,
}
/// Represents an open file handle, stream, or other device /// Represents an open file handle, stream, or other device
struct InputHandle { struct InputHandle<R: Read> {
reader: Box<dyn Read>, #[cfg(any(target_os = "linux", target_os = "android"))]
file_descriptor: RawFd,
reader: R,
is_interactive: bool, is_interactive: bool,
} }
@ -124,8 +157,6 @@ enum InputType {
Socket, Socket,
} }
type CatResult<T> = Result<T, CatError>;
mod options { mod options {
pub static FILE: &str = "file"; pub static FILE: &str = "file";
pub static SHOW_ALL: &str = "show-all"; pub static SHOW_ALL: &str = "show-all";
@ -243,30 +274,14 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
None => vec!["-".to_owned()], None => vec!["-".to_owned()],
}; };
let can_write_fast = !(show_tabs
|| show_nonprint
|| show_ends
|| squeeze_blank
|| number_mode != NumberingMode::None);
let success = if can_write_fast {
write_fast(files).is_ok()
} else {
let tab = if show_tabs { "^I" } else { "\t" }.to_owned();
let end_of_line = if show_ends { "$\n" } else { "\n" }.to_owned();
let options = OutputOptions { let options = OutputOptions {
end_of_line, show_ends,
number: number_mode, number: number_mode,
show_nonprint, show_nonprint,
show_tabs, show_tabs,
squeeze_blank, squeeze_blank,
tab,
};
write_lines(files, &options).is_ok()
}; };
let success = cat_files(files, &options).is_ok();
if success { if success {
0 0
@ -275,6 +290,76 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
} }
} }
fn cat_handle<R: Read>(
handle: &mut InputHandle<R>,
options: &OutputOptions,
state: &mut OutputState,
) -> CatResult<()> {
if options.can_write_fast() {
write_fast(handle)
} else {
write_lines(handle, &options, state)
}
}
fn cat_path(path: &str, options: &OutputOptions, state: &mut OutputState) -> CatResult<()> {
if path == "-" {
let stdin = io::stdin();
let mut handle = InputHandle {
#[cfg(any(target_os = "linux", target_os = "android"))]
file_descriptor: stdin.as_raw_fd(),
reader: stdin,
is_interactive: is_stdin_interactive(),
};
return cat_handle(&mut handle, &options, state);
}
match get_input_type(path)? {
InputType::Directory => Err(CatError::IsDirectory(path.to_owned())),
#[cfg(unix)]
InputType::Socket => {
let socket = UnixStream::connect(path)?;
socket.shutdown(Shutdown::Write)?;
let mut handle = InputHandle {
#[cfg(any(target_os = "linux", target_os = "android"))]
file_descriptor: socket.as_raw_fd(),
reader: socket,
is_interactive: false,
};
cat_handle(&mut handle, &options, state)
}
_ => {
let file = File::open(path)?;
let mut handle = InputHandle {
#[cfg(any(target_os = "linux", target_os = "android"))]
file_descriptor: file.as_raw_fd(),
reader: file,
is_interactive: false,
};
cat_handle(&mut handle, &options, state)
}
}
}
fn cat_files(files: Vec<String>, options: &OutputOptions) -> Result<(), u32> {
let mut error_count = 0;
let mut state = OutputState {
line_number: 1,
at_line_start: true,
};
for path in &files {
if let Err(err) = cat_path(path, &options, &mut state) {
show_error!("{}", err);
error_count += 1;
}
}
if error_count == 0 {
Ok(())
} else {
Err(error_count)
}
}
/// Classifies the `InputType` of file at `path` if possible /// Classifies the `InputType` of file at `path` if possible
/// ///
/// # Arguments /// # Arguments
@ -285,7 +370,8 @@ fn get_input_type(path: &str) -> CatResult<InputType> {
return Ok(InputType::StdIn); return Ok(InputType::StdIn);
} }
match metadata(path).context(path)?.file_type() { let ft = metadata(path)?.file_type();
match ft {
#[cfg(unix)] #[cfg(unix)]
ft if ft.is_block_device() => Ok(InputType::BlockDevice), ft if ft.is_block_device() => Ok(InputType::BlockDevice),
#[cfg(unix)] #[cfg(unix)]
@ -297,125 +383,113 @@ fn get_input_type(path: &str) -> CatResult<InputType> {
ft if ft.is_dir() => Ok(InputType::Directory), ft if ft.is_dir() => Ok(InputType::Directory),
ft if ft.is_file() => Ok(InputType::File), ft if ft.is_file() => Ok(InputType::File),
ft if ft.is_symlink() => Ok(InputType::SymLink), ft if ft.is_symlink() => Ok(InputType::SymLink),
_ => Err(CatError::UnknownFiletype(path.to_owned())), _ => Err(CatError::UnknownFiletype {
path: path.to_owned(),
ft_debug: format!("{:?}", ft),
}),
} }
} }
/// Returns an InputHandle from which a Reader can be accessed or an /// Writes handle to stdout with no configuration. This allows a
/// error /// simple memory copy.
/// fn write_fast<R: Read>(handle: &mut InputHandle<R>) -> CatResult<()> {
/// # Arguments let stdout = io::stdout();
/// let mut stdout_lock = stdout.lock();
/// * `path` - `InputHandler` will wrap a reader from this file path #[cfg(any(target_os = "linux", target_os = "android"))]
fn open(path: &str) -> CatResult<InputHandle> { {
if path == "-" { // If we're on Linux or Android, try to use the splice() system call
let stdin = stdin(); // for faster writing. If it works, we're done.
return Ok(InputHandle { if !write_fast_using_splice(handle, stdout.as_raw_fd())? {
reader: Box::new(stdin) as Box<dyn Read>, return Ok(());
is_interactive: is_stdin_interactive(),
});
}
match get_input_type(path)? {
InputType::Directory => Err(CatError::IsDirectory(path.to_owned())),
#[cfg(unix)]
InputType::Socket => {
let socket = UnixStream::connect(path).context(path)?;
socket.shutdown(Shutdown::Write).context(path)?;
Ok(InputHandle {
reader: Box::new(socket) as Box<dyn Read>,
is_interactive: false,
})
}
_ => {
let file = File::open(path).context(path)?;
Ok(InputHandle {
reader: Box::new(file) as Box<dyn Read>,
is_interactive: false,
})
} }
} }
} // If we're not on Linux or Android, or the splice() call failed,
// fall back on slower writing.
/// Writes files to stdout with no configuration. This allows a let mut buf = [0; 1024 * 64];
/// simple memory copy. Returns `Ok(())` if no errors were while let Ok(n) = handle.reader.read(&mut buf) {
/// encountered, or an error with the number of errors encountered.
///
/// # Arguments
///
/// * `files` - There is no short circuit when encountering an error
/// reading a file in this vector
fn write_fast(files: Vec<String>) -> CatResult<()> {
let mut writer = stdout();
let mut in_buf = [0; 1024 * 64];
let mut error_count = 0;
for file in files {
match open(&file[..]) {
Ok(mut handle) => {
while let Ok(n) = handle.reader.read(&mut in_buf) {
if n == 0 { if n == 0 {
break; break;
} }
writer.write_all(&in_buf[..n]).context(&file[..])?; stdout_lock.write_all(&buf[..n])?;
}
}
Err(error) => {
writeln!(&mut stderr(), "{}", error)?;
error_count += 1;
}
}
}
match error_count {
0 => Ok(()),
_ => Err(CatError::EncounteredErrors(error_count)),
} }
Ok(())
} }
/// State that persists between output of each file /// This function is called from `write_fast()` on Linux and Android. The
struct OutputState { /// function `splice()` is used to move data between two file descriptors
/// The current line number /// without copying between kernel- and userspace. This results in a large
line_number: usize, /// speedup.
/// Whether the output cursor is at the beginning of a new line
at_line_start: bool,
}
/// Writes files to stdout with `options` as configuration. Returns
/// `Ok(())` if no errors were encountered, or an error with the
/// number of errors encountered.
/// ///
/// # Arguments /// The `bool` in the result value indicates if we need to fall back to normal
/// /// copying or not. False means we don't have to.
/// * `files` - There is no short circuit when encountering an error #[cfg(any(target_os = "linux", target_os = "android"))]
/// reading a file in this vector #[inline]
fn write_lines(files: Vec<String>, options: &OutputOptions) -> CatResult<()> { fn write_fast_using_splice<R: Read>(handle: &mut InputHandle<R>, writer: RawFd) -> CatResult<bool> {
let mut error_count = 0; const BUF_SIZE: usize = 1024 * 16;
let mut state = OutputState {
line_number: 1,
at_line_start: true,
};
for file in files { let (pipe_rd, pipe_wr) = pipe()?;
if let Err(error) = write_file_lines(&file, options, &mut state) {
writeln!(&mut stderr(), "{}", error).context(&file[..])?; // We only fall back if splice fails on the first call.
error_count += 1; match splice(
handle.file_descriptor,
None,
pipe_wr,
None,
BUF_SIZE,
SpliceFFlags::empty(),
) {
Ok(n) => {
if n == 0 {
return Ok(false);
}
}
Err(err) => {
match err.as_errno() {
Some(Errno::EPERM) | Some(Errno::ENOSYS) | Some(Errno::EINVAL) => {
// EPERM indicates the call was blocked by seccomp.
// ENOSYS indicates we're running on an ancient Kernel.
// EINVAL indicates some other failure.
return Ok(true);
}
_ => {
// Other errors include running out of memory, etc. We
// don't attempt to fall back from these.
return Err(err)?;
}
}
} }
} }
match error_count { loop {
0 => Ok(()), let n = splice(
_ => Err(CatError::EncounteredErrors(error_count)), handle.file_descriptor,
None,
pipe_wr,
None,
BUF_SIZE,
SpliceFFlags::empty(),
)?;
if n == 0 {
// We read 0 bytes from the input,
// which means we're done copying.
break;
} }
splice(pipe_rd, None, writer, None, BUF_SIZE, SpliceFFlags::empty())?;
}
Ok(false)
} }
/// Outputs file contents to stdout in a line-by-line fashion, /// Outputs file contents to stdout in a line-by-line fashion,
/// propagating any errors that might occur. /// propagating any errors that might occur.
fn write_file_lines(file: &str, options: &OutputOptions, state: &mut OutputState) -> CatResult<()> { fn write_lines<R: Read>(
let mut handle = open(file)?; handle: &mut InputHandle<R>,
options: &OutputOptions,
state: &mut OutputState,
) -> CatResult<()> {
let mut in_buf = [0; 1024 * 31]; let mut in_buf = [0; 1024 * 31];
let mut writer = BufWriter::with_capacity(1024 * 64, stdout()); let stdout = io::stdout();
let mut writer = stdout.lock();
let mut one_blank_kept = false; let mut one_blank_kept = false;
while let Ok(n) = handle.reader.read(&mut in_buf) { while let Ok(n) = handle.reader.read(&mut in_buf) {
@ -433,9 +507,9 @@ fn write_file_lines(file: &str, options: &OutputOptions, state: &mut OutputState
write!(&mut writer, "{0:6}\t", state.line_number)?; write!(&mut writer, "{0:6}\t", state.line_number)?;
state.line_number += 1; state.line_number += 1;
} }
writer.write_all(options.end_of_line.as_bytes())?; writer.write_all(options.end_of_line().as_bytes())?;
if handle.is_interactive { if handle.is_interactive {
writer.flush().context(file)?; writer.flush()?;
} }
} }
state.at_line_start = true; state.at_line_start = true;
@ -450,7 +524,7 @@ fn write_file_lines(file: &str, options: &OutputOptions, state: &mut OutputState
// print to end of line or end of buffer // print to end of line or end of buffer
let offset = if options.show_nonprint { let offset = if options.show_nonprint {
write_nonprint_to_end(&in_buf[pos..], &mut writer, options.tab.as_bytes()) write_nonprint_to_end(&in_buf[pos..], &mut writer, options.tab().as_bytes())
} else if options.show_tabs { } else if options.show_tabs {
write_tab_to_end(&in_buf[pos..], &mut writer) write_tab_to_end(&in_buf[pos..], &mut writer)
} else { } else {
@ -462,7 +536,7 @@ fn write_file_lines(file: &str, options: &OutputOptions, state: &mut OutputState
break; break;
} }
// print suitable end of line // print suitable end of line
writer.write_all(options.end_of_line.as_bytes())?; writer.write_all(options.end_of_line().as_bytes())?;
if handle.is_interactive { if handle.is_interactive {
writer.flush()?; writer.flush()?;
} }