diff --git a/Cargo.lock b/Cargo.lock index adc373f85..8a9d0bdff 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -188,6 +188,12 @@ dependencies = [ "utf8-width", ] +[[package]] +name = "bytecount" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72feb31ffc86498dacdbd0fcebb56138e7177a8cc5cea4516031d15ae85a742e" + [[package]] name = "byteorder" version = "1.4.3" @@ -3110,6 +3116,7 @@ dependencies = [ name = "uu_wc" version = "0.0.7" dependencies = [ + "bytecount", "clap", "libc", "nix 0.20.0", diff --git a/src/uu/wc/Cargo.toml b/src/uu/wc/Cargo.toml index 31a7ac7af..49735adf7 100644 --- a/src/uu/wc/Cargo.toml +++ b/src/uu/wc/Cargo.toml @@ -19,6 +19,7 @@ clap = { version = "2.33", features = ["wrap_help"] } uucore = { version=">=0.0.9", package="uucore", path="../../uucore" } uucore_procs = { version=">=0.0.6", package="uucore_procs", path="../../uucore_procs" } thiserror = "1.0" +bytecount = "0.6.2" [target.'cfg(unix)'.dependencies] nix = "0.20" diff --git a/src/uu/wc/src/count_bytes.rs b/src/uu/wc/src/count_fast.rs similarity index 63% rename from src/uu/wc/src/count_bytes.rs rename to src/uu/wc/src/count_fast.rs index 83cc71ac4..b23e9ed8f 100644 --- a/src/uu/wc/src/count_bytes.rs +++ b/src/uu/wc/src/count_fast.rs @@ -1,13 +1,15 @@ +use crate::word_count::WordCount; + use super::{WcResult, WordCountable}; #[cfg(any(target_os = "linux", target_os = "android"))] use std::fs::{File, OpenOptions}; -use std::io::ErrorKind; +use std::io::{ErrorKind, Read}; #[cfg(unix)] use libc::S_IFREG; #[cfg(unix)] -use nix::sys::stat::fstat; +use nix::sys::stat; #[cfg(any(target_os = "linux", target_os = "android"))] use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; @@ -18,7 +20,8 @@ use nix::fcntl::{splice, SpliceFFlags}; #[cfg(any(target_os = "linux", target_os = "android"))] use nix::unistd::pipe; -const BUF_SIZE: usize = 16384; +const BUF_SIZE: usize = 16 * 1024; +const SPLICE_SIZE: usize = 128 * 1024; /// Splice wrapper which handles short writes #[cfg(any(target_os = "linux", target_os = "android"))] @@ -37,15 +40,24 @@ fn splice_exact(read_fd: RawFd, write_fd: RawFd, num_bytes: usize) -> nix::Resul /// This is a Linux-specific function to count the number of bytes using the /// `splice` system call, which is faster than using `read`. +/// +/// On error it returns the number of bytes it did manage to read, since the +/// caller will fall back to a simpler method. #[inline] #[cfg(any(target_os = "linux", target_os = "android"))] -fn count_bytes_using_splice(fd: RawFd) -> nix::Result { +fn count_bytes_using_splice(fd: RawFd) -> Result { let null_file = OpenOptions::new() .write(true) .open("/dev/null") - .map_err(|_| nix::Error::last())?; + .map_err(|_| 0_usize)?; let null = null_file.as_raw_fd(); - let (pipe_rd, pipe_wr) = pipe()?; + let null_rdev = stat::fstat(null).map_err(|_| 0_usize)?.st_rdev; + if (stat::major(null_rdev), stat::minor(null_rdev)) != (1, 3) { + // This is not a proper /dev/null, writing to it is probably bad + // Bit of an edge case, but it has been known to happen + return Err(0); + } + let (pipe_rd, pipe_wr) = pipe().map_err(|_| 0_usize)?; // Ensure the pipe is closed when the function returns. // SAFETY: The file descriptors do not have other owners. @@ -53,12 +65,16 @@ fn count_bytes_using_splice(fd: RawFd) -> nix::Result { let mut byte_count = 0; loop { - let res = splice(fd, None, pipe_wr, None, BUF_SIZE, SpliceFFlags::empty())?; - if res == 0 { - break; - } - byte_count += res; - splice_exact(pipe_rd, null, res)?; + match splice(fd, None, pipe_wr, None, SPLICE_SIZE, SpliceFFlags::empty()) { + Ok(0) => break, + Ok(res) => { + byte_count += res; + if splice_exact(pipe_rd, null, res).is_err() { + return Err(byte_count); + } + } + Err(_) => return Err(byte_count), + }; } Ok(byte_count) @@ -73,10 +89,12 @@ fn count_bytes_using_splice(fd: RawFd) -> nix::Result { /// other things such as lines and words. #[inline] pub(crate) fn count_bytes_fast(handle: &mut T) -> WcResult { + let mut byte_count = 0; + #[cfg(unix)] { let fd = handle.as_raw_fd(); - if let Ok(stat) = fstat(fd) { + if let Ok(stat) = stat::fstat(fd) { // If the file is regular, then the `st_size` should hold // the file's size in bytes. if (stat.st_mode & S_IFREG) != 0 { @@ -87,8 +105,9 @@ pub(crate) fn count_bytes_fast(handle: &mut T) -> WcResult return Ok(n), + Err(n) => byte_count = n, } } } @@ -97,7 +116,6 @@ pub(crate) fn count_bytes_fast(handle: &mut T) -> WcResult return Ok(byte_count), @@ -109,3 +127,19 @@ pub(crate) fn count_bytes_fast(handle: &mut T) -> WcResult(handle: &mut R) -> WcResult { + let mut total = WordCount::default(); + let mut buf = [0; BUF_SIZE]; + loop { + match handle.read(&mut buf) { + Ok(0) => return Ok(total), + Ok(n) => { + total.bytes += n; + total.lines += bytecount::count(&buf[..n], b'\n'); + } + Err(ref e) if e.kind() == ErrorKind::Interrupted => continue, + Err(e) => return Err(e.into()), + } + } +} diff --git a/src/uu/wc/src/countable.rs b/src/uu/wc/src/countable.rs index 3da910a03..098c451c7 100644 --- a/src/uu/wc/src/countable.rs +++ b/src/uu/wc/src/countable.rs @@ -28,7 +28,7 @@ impl WordCountable for StdinLock<'_> { where Self: Sized, { - Lines { buf: self } + Lines::new(self) } } impl WordCountable for File { @@ -38,9 +38,7 @@ impl WordCountable for File { where Self: Sized, { - Lines { - buf: BufReader::new(self), - } + Lines::new(BufReader::new(self)) } } @@ -53,19 +51,25 @@ impl WordCountable for File { /// [`io::Lines`]:: io::Lines pub struct Lines { buf: B, + line: Vec, } -impl Iterator for Lines { - type Item = io::Result>; +impl Lines { + fn new(reader: B) -> Self { + Lines { + buf: reader, + line: Vec::new(), + } + } - fn next(&mut self) -> Option { - let mut line = Vec::new(); + pub fn next(&mut self) -> Option> { + self.line.clear(); // reading from a TTY seems to raise a condition on, rather than return Some(0) like a file. // hence the option wrapped in a result here - match self.buf.read_until(b'\n', &mut line) { + match self.buf.read_until(b'\n', &mut self.line) { Ok(0) => None, - Ok(_n) => Some(Ok(line)), + Ok(_n) => Some(Ok(&self.line)), Err(e) => Some(Err(e)), } } diff --git a/src/uu/wc/src/wc.rs b/src/uu/wc/src/wc.rs index d77cd6b4b..68fd23fb4 100644 --- a/src/uu/wc/src/wc.rs +++ b/src/uu/wc/src/wc.rs @@ -8,10 +8,10 @@ #[macro_use] extern crate uucore; -mod count_bytes; +mod count_fast; mod countable; mod word_count; -use count_bytes::count_bytes_fast; +use count_fast::{count_bytes_and_lines_fast, count_bytes_fast}; use countable::WordCountable; use word_count::{TitledWordCount, WordCount}; @@ -220,19 +220,20 @@ fn word_count_from_reader( // we do not need to decode the byte stream if we're only counting bytes/newlines let decode_chars = settings.show_chars || settings.show_words || settings.show_max_line_length; + if !decode_chars { + return count_bytes_and_lines_fast(&mut reader); + } + // Sum the WordCount for each line. Show a warning for each line // that results in an IO error when trying to read it. - let total = reader - .lines() - .filter_map(|res| match res { - Ok(line) => Some(line), - Err(e) => { - show_warning!("Error while reading {}: {}", path, e); - None - } - }) - .map(|line| WordCount::from_line(&line, decode_chars)) - .sum(); + let mut lines = reader.lines(); + let mut total = WordCount::default(); + while let Some(res) = lines.next() { + match res { + Ok(line) => total += WordCount::from_line(line), + Err(e) => show_warning!("Error while reading {}: {}", path, e), + } + } Ok(total) } diff --git a/src/uu/wc/src/word_count.rs b/src/uu/wc/src/word_count.rs index bdb510f58..848e64b98 100644 --- a/src/uu/wc/src/word_count.rs +++ b/src/uu/wc/src/word_count.rs @@ -74,15 +74,11 @@ impl WordCount { /// fields will be set to 0. If it is `true`, this function will /// attempt to decode the bytes first as UTF-8, and failing that, /// as ASCII. - pub fn from_line(line: &[u8], decode_chars: bool) -> WordCount { + pub fn from_line(line: &[u8]) -> WordCount { // GNU 'wc' only counts lines that end in LF as lines let lines = (*line.last().unwrap() == LF) as usize; let bytes = line.len(); - let (words, chars) = if decode_chars { - WordCount::word_and_char_count(line) - } else { - (0, 0) - }; + let (words, chars) = WordCount::word_and_char_count(line); // -L is a GNU 'wc' extension so same behavior on LF let max_line_length = if chars > 0 { chars - lines } else { 0 }; WordCount {