From 7a947cfe46b694f61a5a7d840d622ce292da457c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81rni=20Dagur?= Date: Thu, 1 Apr 2021 21:08:48 +0000 Subject: [PATCH] cat: Improve performance on Linux (#1978) * cat: Improve performance, especially on Linux * cat: Don't use io::copy for splice fallback On my MacBook Pro 2020, it is around 25% faster to not use io::copy. * cat: Only fall back to generic copy if first splice fails * cat: Don't double buffer stdout * cat: Don't use experimental or-pattern syntax * cat: Remove nix symbol use from non-Linux --- Cargo.lock | 5 +- src/uu/cat/Cargo.toml | 5 +- src/uu/cat/src/cat.rs | 420 +++++++++++++++++++++++++----------------- 3 files changed, 255 insertions(+), 175 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e273f776c..f7e1187b4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,5 +1,7 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. +version = 3 + [[package]] name = "advapi32-sys" version = "0.2.0" @@ -1581,7 +1583,8 @@ name = "uu_cat" version = "0.0.4" dependencies = [ "clap", - "quick-error", + "nix 0.20.0", + "thiserror", "unix_socket", "uucore", "uucore_procs", diff --git a/src/uu/cat/Cargo.toml b/src/uu/cat/Cargo.toml index b6254cf6b..3878aee96 100644 --- a/src/uu/cat/Cargo.toml +++ b/src/uu/cat/Cargo.toml @@ -16,13 +16,16 @@ path = "src/cat.rs" [dependencies] clap = "2.33" -quick-error = "1.2.3" +thiserror = "1.0" uucore = { version=">=0.0.7", package="uucore", path="../../uucore", features=["fs"] } uucore_procs = { version=">=0.0.5", package="uucore_procs", path="../../uucore_procs" } [target.'cfg(unix)'.dependencies] unix_socket = "0.5.0" +[target.'cfg(any(target_os = "linux", target_os = "android"))'.dependencies] +nix = "0.20" + [[bin]] name = "cat" path = "src/main.rs" diff --git a/src/uu/cat/src/cat.rs b/src/uu/cat/src/cat.rs index cf5a384a4..f39708fd8 100644 --- a/src/uu/cat/src/cat.rs +++ b/src/uu/cat/src/cat.rs @@ -3,14 +3,13 @@ // (c) Jordi Boggiano // (c) Evgeniy Klyuchikov // (c) Joshua S. Miller +// (c) Árni Dagur // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. // spell-checker:ignore (ToDO) nonprint nonblank nonprinting -#[macro_use] -extern crate quick_error; #[cfg(unix)] extern crate unix_socket; #[macro_use] @@ -18,9 +17,9 @@ extern crate uucore; // last synced with: cat (GNU coreutils) 8.13 use clap::{App, Arg}; -use quick_error::ResultExt; use std::fs::{metadata, File}; -use std::io::{self, stderr, stdin, stdout, BufWriter, Read, Write}; +use std::io::{self, Read, Write}; +use thiserror::Error; use uucore::fs::is_stdin_interactive; /// Unix domain socket support @@ -31,12 +30,44 @@ use std::os::unix::fs::FileTypeExt; #[cfg(unix)] use unix_socket::UnixStream; +#[cfg(any(target_os = "linux", target_os = "android"))] +use nix::errno::Errno; +/// Linux splice support +#[cfg(any(target_os = "linux", target_os = "android"))] +use nix::fcntl::{splice, SpliceFFlags}; +#[cfg(any(target_os = "linux", target_os = "android"))] +use nix::unistd::pipe; +#[cfg(any(target_os = "linux", target_os = "android"))] +use std::os::unix::io::{AsRawFd, RawFd}; + static NAME: &str = "cat"; static VERSION: &str = env!("CARGO_PKG_VERSION"); static SYNTAX: &str = "[OPTION]... [FILE]..."; static SUMMARY: &str = "Concatenate FILE(s), or standard input, to standard output With no FILE, or when FILE is -, read standard input."; +#[derive(Error, Debug)] +enum CatError { + /// Wrapper around `io::Error` + #[error("{0}")] + Io(#[from] io::Error), + /// Wrapper around `nix::Error` + #[cfg(any(target_os = "linux", target_os = "android"))] + #[error("{0}")] + Nix(#[from] nix::Error), + /// Unknown file type; it's not a regular file, socket, etc. + #[error("{}: unknown filetype: {}", path, ft_debug)] + UnknownFiletype { + path: String, + /// A debug print of the file type + ft_debug: String, + }, + #[error("{0}: Expected a file, found directory")] + IsDirectory(String), +} + +type CatResult = Result; + #[derive(PartialEq)] enum NumberingMode { None, @@ -44,39 +75,6 @@ enum NumberingMode { All, } -quick_error! { - #[derive(Debug)] - enum CatError { - /// Wrapper for io::Error with path context - Input(err: io::Error, path: String) { - display("cat: {0}: {1}", path, err) - context(path: &'a str, err: io::Error) -> (err, path.to_owned()) - cause(err) - } - - /// Wrapper for io::Error with no context - Output(err: io::Error) { - display("cat: {0}", err) from() - cause(err) - } - - /// Unknown Filetype classification - UnknownFiletype(path: String) { - display("cat: {0}: unknown filetype", path) - } - - /// At least one error was encountered in reading or writing - EncounteredErrors(count: usize) { - display("cat: encountered {0} errors", count) - } - - /// Denotes an error caused by trying to `cat` a directory - IsDirectory(path: String) { - display("cat: {0}: Is a directory", path) - } - } -} - struct OutputOptions { /// Line numbering mode number: NumberingMode, @@ -87,21 +85,56 @@ struct OutputOptions { /// display TAB characters as `tab` show_tabs: bool, - /// If `show_tabs == true`, this string will be printed in the - /// place of tabs - tab: String, - - /// Can be set to show characters other than '\n' a the end of - /// each line, e.g. $ - end_of_line: String, + /// Show end of lines + show_ends: bool, /// use ^ and M- notation, except for LF (\\n) and TAB (\\t) show_nonprint: bool, } +impl OutputOptions { + fn tab(&self) -> &'static str { + if self.show_tabs { + "^I" + } else { + "\t" + } + } + + fn end_of_line(&self) -> &'static str { + if self.show_ends { + "$\n" + } else { + "\n" + } + } + + /// We can write fast if we can simply copy the contents of the file to + /// stdout, without augmenting the output with e.g. line numbers. + fn can_write_fast(&self) -> bool { + !(self.show_tabs + || self.show_nonprint + || self.show_ends + || self.squeeze_blank + || self.number != NumberingMode::None) + } +} + +/// State that persists between output of each file. This struct is only used +/// when we can't write fast. +struct OutputState { + /// The current line number + line_number: usize, + + /// Whether the output cursor is at the beginning of a new line + at_line_start: bool, +} + /// Represents an open file handle, stream, or other device -struct InputHandle { - reader: Box, +struct InputHandle { + #[cfg(any(target_os = "linux", target_os = "android"))] + file_descriptor: RawFd, + reader: R, is_interactive: bool, } @@ -124,8 +157,6 @@ enum InputType { Socket, } -type CatResult = Result; - mod options { pub static FILE: &str = "file"; pub static SHOW_ALL: &str = "show-all"; @@ -243,30 +274,14 @@ pub fn uumain(args: impl uucore::Args) -> i32 { None => vec!["-".to_owned()], }; - let can_write_fast = !(show_tabs - || show_nonprint - || show_ends - || squeeze_blank - || number_mode != NumberingMode::None); - - let success = if can_write_fast { - write_fast(files).is_ok() - } else { - let tab = if show_tabs { "^I" } else { "\t" }.to_owned(); - - let end_of_line = if show_ends { "$\n" } else { "\n" }.to_owned(); - - let options = OutputOptions { - end_of_line, - number: number_mode, - show_nonprint, - show_tabs, - squeeze_blank, - tab, - }; - - write_lines(files, &options).is_ok() + let options = OutputOptions { + show_ends, + number: number_mode, + show_nonprint, + show_tabs, + squeeze_blank, }; + let success = cat_files(files, &options).is_ok(); if success { 0 @@ -275,6 +290,76 @@ pub fn uumain(args: impl uucore::Args) -> i32 { } } +fn cat_handle( + handle: &mut InputHandle, + options: &OutputOptions, + state: &mut OutputState, +) -> CatResult<()> { + if options.can_write_fast() { + write_fast(handle) + } else { + write_lines(handle, &options, state) + } +} + +fn cat_path(path: &str, options: &OutputOptions, state: &mut OutputState) -> CatResult<()> { + if path == "-" { + let stdin = io::stdin(); + let mut handle = InputHandle { + #[cfg(any(target_os = "linux", target_os = "android"))] + file_descriptor: stdin.as_raw_fd(), + reader: stdin, + is_interactive: is_stdin_interactive(), + }; + return cat_handle(&mut handle, &options, state); + } + match get_input_type(path)? { + InputType::Directory => Err(CatError::IsDirectory(path.to_owned())), + #[cfg(unix)] + InputType::Socket => { + let socket = UnixStream::connect(path)?; + socket.shutdown(Shutdown::Write)?; + let mut handle = InputHandle { + #[cfg(any(target_os = "linux", target_os = "android"))] + file_descriptor: socket.as_raw_fd(), + reader: socket, + is_interactive: false, + }; + cat_handle(&mut handle, &options, state) + } + _ => { + let file = File::open(path)?; + let mut handle = InputHandle { + #[cfg(any(target_os = "linux", target_os = "android"))] + file_descriptor: file.as_raw_fd(), + reader: file, + is_interactive: false, + }; + cat_handle(&mut handle, &options, state) + } + } +} + +fn cat_files(files: Vec, options: &OutputOptions) -> Result<(), u32> { + let mut error_count = 0; + let mut state = OutputState { + line_number: 1, + at_line_start: true, + }; + + for path in &files { + if let Err(err) = cat_path(path, &options, &mut state) { + show_error!("{}", err); + error_count += 1; + } + } + if error_count == 0 { + Ok(()) + } else { + Err(error_count) + } +} + /// Classifies the `InputType` of file at `path` if possible /// /// # Arguments @@ -285,7 +370,8 @@ fn get_input_type(path: &str) -> CatResult { return Ok(InputType::StdIn); } - match metadata(path).context(path)?.file_type() { + let ft = metadata(path)?.file_type(); + match ft { #[cfg(unix)] ft if ft.is_block_device() => Ok(InputType::BlockDevice), #[cfg(unix)] @@ -297,125 +383,113 @@ fn get_input_type(path: &str) -> CatResult { ft if ft.is_dir() => Ok(InputType::Directory), ft if ft.is_file() => Ok(InputType::File), ft if ft.is_symlink() => Ok(InputType::SymLink), - _ => Err(CatError::UnknownFiletype(path.to_owned())), + _ => Err(CatError::UnknownFiletype { + path: path.to_owned(), + ft_debug: format!("{:?}", ft), + }), } } -/// Returns an InputHandle from which a Reader can be accessed or an -/// error -/// -/// # Arguments -/// -/// * `path` - `InputHandler` will wrap a reader from this file path -fn open(path: &str) -> CatResult { - if path == "-" { - let stdin = stdin(); - return Ok(InputHandle { - reader: Box::new(stdin) as Box, - is_interactive: is_stdin_interactive(), - }); - } - - match get_input_type(path)? { - InputType::Directory => Err(CatError::IsDirectory(path.to_owned())), - #[cfg(unix)] - InputType::Socket => { - let socket = UnixStream::connect(path).context(path)?; - socket.shutdown(Shutdown::Write).context(path)?; - Ok(InputHandle { - reader: Box::new(socket) as Box, - is_interactive: false, - }) - } - _ => { - let file = File::open(path).context(path)?; - Ok(InputHandle { - reader: Box::new(file) as Box, - is_interactive: false, - }) +/// Writes handle to stdout with no configuration. This allows a +/// simple memory copy. +fn write_fast(handle: &mut InputHandle) -> CatResult<()> { + let stdout = io::stdout(); + let mut stdout_lock = stdout.lock(); + #[cfg(any(target_os = "linux", target_os = "android"))] + { + // If we're on Linux or Android, try to use the splice() system call + // for faster writing. If it works, we're done. + if !write_fast_using_splice(handle, stdout.as_raw_fd())? { + return Ok(()); } } + // If we're not on Linux or Android, or the splice() call failed, + // fall back on slower writing. + let mut buf = [0; 1024 * 64]; + while let Ok(n) = handle.reader.read(&mut buf) { + if n == 0 { + break; + } + stdout_lock.write_all(&buf[..n])?; + } + Ok(()) } -/// Writes files to stdout with no configuration. This allows a -/// simple memory copy. Returns `Ok(())` if no errors were -/// encountered, or an error with the number of errors encountered. +/// This function is called from `write_fast()` on Linux and Android. The +/// function `splice()` is used to move data between two file descriptors +/// without copying between kernel- and userspace. This results in a large +/// speedup. /// -/// # Arguments -/// -/// * `files` - There is no short circuit when encountering an error -/// reading a file in this vector -fn write_fast(files: Vec) -> CatResult<()> { - let mut writer = stdout(); - let mut in_buf = [0; 1024 * 64]; - let mut error_count = 0; +/// The `bool` in the result value indicates if we need to fall back to normal +/// copying or not. False means we don't have to. +#[cfg(any(target_os = "linux", target_os = "android"))] +#[inline] +fn write_fast_using_splice(handle: &mut InputHandle, writer: RawFd) -> CatResult { + const BUF_SIZE: usize = 1024 * 16; - for file in files { - match open(&file[..]) { - Ok(mut handle) => { - while let Ok(n) = handle.reader.read(&mut in_buf) { - if n == 0 { - break; - } - writer.write_all(&in_buf[..n]).context(&file[..])?; + let (pipe_rd, pipe_wr) = pipe()?; + + // We only fall back if splice fails on the first call. + match splice( + handle.file_descriptor, + None, + pipe_wr, + None, + BUF_SIZE, + SpliceFFlags::empty(), + ) { + Ok(n) => { + if n == 0 { + return Ok(false); + } + } + Err(err) => { + match err.as_errno() { + Some(Errno::EPERM) | Some(Errno::ENOSYS) | Some(Errno::EINVAL) => { + // EPERM indicates the call was blocked by seccomp. + // ENOSYS indicates we're running on an ancient Kernel. + // EINVAL indicates some other failure. + return Ok(true); + } + _ => { + // Other errors include running out of memory, etc. We + // don't attempt to fall back from these. + return Err(err)?; } } - Err(error) => { - writeln!(&mut stderr(), "{}", error)?; - error_count += 1; - } } } - match error_count { - 0 => Ok(()), - _ => Err(CatError::EncounteredErrors(error_count)), - } -} - -/// State that persists between output of each file -struct OutputState { - /// The current line number - line_number: usize, - - /// Whether the output cursor is at the beginning of a new line - at_line_start: bool, -} - -/// Writes files to stdout with `options` as configuration. Returns -/// `Ok(())` if no errors were encountered, or an error with the -/// number of errors encountered. -/// -/// # Arguments -/// -/// * `files` - There is no short circuit when encountering an error -/// reading a file in this vector -fn write_lines(files: Vec, options: &OutputOptions) -> CatResult<()> { - let mut error_count = 0; - let mut state = OutputState { - line_number: 1, - at_line_start: true, - }; - - for file in files { - if let Err(error) = write_file_lines(&file, options, &mut state) { - writeln!(&mut stderr(), "{}", error).context(&file[..])?; - error_count += 1; + loop { + let n = splice( + handle.file_descriptor, + None, + pipe_wr, + None, + BUF_SIZE, + SpliceFFlags::empty(), + )?; + if n == 0 { + // We read 0 bytes from the input, + // which means we're done copying. + break; } + splice(pipe_rd, None, writer, None, BUF_SIZE, SpliceFFlags::empty())?; } - match error_count { - 0 => Ok(()), - _ => Err(CatError::EncounteredErrors(error_count)), - } + Ok(false) } /// Outputs file contents to stdout in a line-by-line fashion, /// propagating any errors that might occur. -fn write_file_lines(file: &str, options: &OutputOptions, state: &mut OutputState) -> CatResult<()> { - let mut handle = open(file)?; +fn write_lines( + handle: &mut InputHandle, + options: &OutputOptions, + state: &mut OutputState, +) -> CatResult<()> { let mut in_buf = [0; 1024 * 31]; - let mut writer = BufWriter::with_capacity(1024 * 64, stdout()); + let stdout = io::stdout(); + let mut writer = stdout.lock(); let mut one_blank_kept = false; while let Ok(n) = handle.reader.read(&mut in_buf) { @@ -433,9 +507,9 @@ fn write_file_lines(file: &str, options: &OutputOptions, state: &mut OutputState write!(&mut writer, "{0:6}\t", state.line_number)?; state.line_number += 1; } - writer.write_all(options.end_of_line.as_bytes())?; + writer.write_all(options.end_of_line().as_bytes())?; if handle.is_interactive { - writer.flush().context(file)?; + writer.flush()?; } } state.at_line_start = true; @@ -450,7 +524,7 @@ fn write_file_lines(file: &str, options: &OutputOptions, state: &mut OutputState // print to end of line or end of buffer let offset = if options.show_nonprint { - write_nonprint_to_end(&in_buf[pos..], &mut writer, options.tab.as_bytes()) + write_nonprint_to_end(&in_buf[pos..], &mut writer, options.tab().as_bytes()) } else if options.show_tabs { write_tab_to_end(&in_buf[pos..], &mut writer) } else { @@ -462,7 +536,7 @@ fn write_file_lines(file: &str, options: &OutputOptions, state: &mut OutputState break; } // print suitable end of line - writer.write_all(options.end_of_line.as_bytes())?; + writer.write_all(options.end_of_line().as_bytes())?; if handle.is_interactive { writer.flush()?; }