From 2c1459cbfc4c060ffdbf0bd8c21ecd63504424e9 Mon Sep 17 00:00:00 2001 From: Chirag Jadwani Date: Sat, 24 Apr 2021 21:34:42 +0530 Subject: [PATCH 1/2] cut: optimizations * Use buffered stdout to reduce write sys calls. This simple change yielded the biggest performace gain. * Use `for_byte_record_with_terminator` from the `bstr` crate. This is to minimize the per line copying needed by `BufReader::read_until`. The `cut_fields` and `cut_fields_delimiter` functions used `read_until` to iterate over lines. That required copying each input line to the line buffer. With `for_byte_record_with_terminator` copying is minimized as it calls our closure with a reference to BufReader's buffer most of the time. It needs to copy (internally) only to process any incomplete lines at the end of the buffer. * Re-write `Searcher` to use `memchr`. Switch from the naive implementation to one that uses `memchr`. * Rewrite `cut_bytes` almost entirely. This was already well optimized. The performance gain in this case is not from avoiding copying. In fact, it needed zero copying whereas new implementation introduces some copying similar to `cut_fields` described above. But the occassional copying cost is more than offset by the use of the very fast `memchr` inside `for_byte_record_with_terminator`. This change also simplifies the code significantly. Removed the `buffer` module. --- Cargo.lock | 2 + src/uu/cut/Cargo.toml | 2 + src/uu/cut/src/buffer.rs | 152 --------------------------- src/uu/cut/src/cut.rs | 208 ++++++++++++++----------------------- src/uu/cut/src/searcher.rs | 95 +++++++++++++---- 5 files changed, 157 insertions(+), 302 deletions(-) delete mode 100644 src/uu/cut/src/buffer.rs diff --git a/Cargo.lock b/Cargo.lock index 321f41d89..9df4994c8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1777,7 +1777,9 @@ dependencies = [ name = "uu_cut" version = "0.0.6" dependencies = [ + "bstr", "clap", + "memchr 2.3.4", "uucore", "uucore_procs", ] diff --git a/src/uu/cut/Cargo.toml b/src/uu/cut/Cargo.toml index d892ddeb5..c863c1772 100644 --- a/src/uu/cut/Cargo.toml +++ b/src/uu/cut/Cargo.toml @@ -18,6 +18,8 @@ path = "src/cut.rs" clap = "2.33" uucore = { version=">=0.0.8", package="uucore", path="../../uucore" } uucore_procs = { version=">=0.0.5", package="uucore_procs", path="../../uucore_procs" } +memchr = "2" +bstr = "0.2" [[bin]] name = "cut" diff --git a/src/uu/cut/src/buffer.rs b/src/uu/cut/src/buffer.rs deleted file mode 100644 index 6c3238be1..000000000 --- a/src/uu/cut/src/buffer.rs +++ /dev/null @@ -1,152 +0,0 @@ -// This file is part of the uutils coreutils package. -// -// (c) Rolf Morel -// (c) kwantam -// * substantial rewrite to use the `std::io::BufReader` trait -// -// For the full copyright and license information, please view the LICENSE -// file that was distributed with this source code. - -// spell-checker:ignore (ToDO) SRes Newl - -use std::io::Result as IoResult; -use std::io::{BufRead, BufReader, Read, Write}; - -#[allow(non_snake_case)] -pub mod Bytes { - use std::io::Write; - - pub trait Select { - fn select(&mut self, bytes: usize, out: Option<&mut W>) -> Selected; - } - - #[derive(PartialEq, Eq, Debug)] - pub enum Selected { - NewlineFound, - Complete(usize), - Partial(usize), - EndOfFile, - } -} - -#[derive(Debug)] -pub struct ByteReader -where - R: Read, -{ - inner: BufReader, - newline_char: u8, -} - -impl ByteReader { - pub fn new(read: R, newline_char: u8) -> ByteReader { - ByteReader { - inner: BufReader::with_capacity(4096, read), - newline_char, - } - } -} - -impl Read for ByteReader { - fn read(&mut self, buf: &mut [u8]) -> IoResult { - self.inner.read(buf) - } -} - -impl BufRead for ByteReader { - fn fill_buf(&mut self) -> IoResult<&[u8]> { - self.inner.fill_buf() - } - - fn consume(&mut self, amt: usize) { - self.inner.consume(amt) - } -} - -impl ByteReader { - pub fn consume_line(&mut self) -> usize { - let mut bytes_consumed = 0; - let mut consume_val; - let newline_char = self.newline_char; - - loop { - { - // need filled_buf to go out of scope - let filled_buf = match self.fill_buf() { - Ok(b) => { - if b.is_empty() { - return bytes_consumed; - } else { - b - } - } - Err(e) => crash!(1, "read error: {}", e), - }; - - if let Some(idx) = filled_buf.iter().position(|byte| *byte == newline_char) { - consume_val = idx + 1; - bytes_consumed += consume_val; - break; - } - - consume_val = filled_buf.len(); - } - - bytes_consumed += consume_val; - self.consume(consume_val); - } - - self.consume(consume_val); - bytes_consumed - } -} - -impl self::Bytes::Select for ByteReader { - fn select(&mut self, bytes: usize, out: Option<&mut W>) -> Bytes::Selected { - enum SRes { - Comp, - Part, - Newl, - } - - use self::Bytes::Selected::*; - - let newline_char = self.newline_char; - let (res, consume_val) = { - let buffer = match self.fill_buf() { - Err(e) => crash!(1, "read error: {}", e), - Ok(b) => b, - }; - - let (res, consume_val) = match buffer.len() { - 0 => return EndOfFile, - buf_used if bytes < buf_used => { - // because the output delimiter should only be placed between - // segments check if the byte after bytes is a newline - let buf_slice = &buffer[0..=bytes]; - - match buf_slice.iter().position(|byte| *byte == newline_char) { - Some(idx) => (SRes::Newl, idx + 1), - None => (SRes::Comp, bytes), - } - } - _ => match buffer.iter().position(|byte| *byte == newline_char) { - Some(idx) => (SRes::Newl, idx + 1), - None => (SRes::Part, buffer.len()), - }, - }; - - if let Some(out) = out { - crash_if_err!(1, out.write_all(&buffer[0..consume_val])); - } - (res, consume_val) - }; - - self.consume(consume_val); - match res { - SRes::Comp => Complete(consume_val), - SRes::Part => Partial(consume_val), - SRes::Newl => NewlineFound, - } - } -} diff --git a/src/uu/cut/src/cut.rs b/src/uu/cut/src/cut.rs index 5bf310daa..91dc17e52 100644 --- a/src/uu/cut/src/cut.rs +++ b/src/uu/cut/src/cut.rs @@ -10,15 +10,16 @@ #[macro_use] extern crate uucore; +use bstr::io::BufReadExt; use clap::{App, Arg}; use std::fs::File; -use std::io::{stdin, stdout, BufRead, BufReader, Read, Stdout, Write}; +use std::io::{stdin, stdout, BufReader, BufWriter, Read, Write}; use std::path::Path; use self::searcher::Searcher; +use uucore::fs::is_stdout_interactive; use uucore::ranges::Range; -mod buffer; mod searcher; static NAME: &str = "cut"; @@ -125,6 +126,14 @@ enum Mode { Fields(Vec, FieldOptions), } +fn stdout_writer() -> Box { + if is_stdout_interactive() { + Box::new(stdout()) + } else { + Box::new(BufWriter::new(stdout())) as Box + } +} + fn list_to_ranges(list: &str, complement: bool) -> Result, String> { if complement { Range::from_list(list).map(|r| uucore::ranges::complement(&r)) @@ -134,72 +143,35 @@ fn list_to_ranges(list: &str, complement: bool) -> Result, String> { } fn cut_bytes(reader: R, ranges: &[Range], opts: &Options) -> i32 { - use self::buffer::Bytes::Select; - use self::buffer::Bytes::Selected::*; - let newline_char = if opts.zero_terminated { b'\0' } else { b'\n' }; - let mut buf_read = buffer::ByteReader::new(reader, newline_char); - let mut out = stdout(); + let buf_in = BufReader::new(reader); + let mut out = stdout_writer(); + let delim = opts + .out_delim + .as_ref() + .map_or("", String::as_str) + .as_bytes(); - 'newline: loop { - let mut cur_pos = 1; + let res = buf_in.for_byte_record(newline_char, |line| { let mut print_delim = false; - - for &Range { low, high } in ranges.iter() { - // skip up to low - let orig_pos = cur_pos; - loop { - match buf_read.select(low - cur_pos, None::<&mut Stdout>) { - NewlineFound => { - crash_if_err!(1, out.write_all(&[newline_char])); - continue 'newline; - } - Complete(len) => { - cur_pos += len; - break; - } - Partial(len) => cur_pos += len, - EndOfFile => { - if orig_pos != cur_pos { - crash_if_err!(1, out.write_all(&[newline_char])); - } - - break 'newline; - } - } + for &Range { low, high } in ranges { + if low > line.len() { + break; } - - if let Some(ref delim) = opts.out_delim { - if print_delim { - crash_if_err!(1, out.write_all(delim.as_bytes())); - } + if print_delim { + out.write_all(delim)?; + } else if opts.out_delim.is_some() { print_delim = true; } - - // write out from low to high - loop { - match buf_read.select(high - cur_pos + 1, Some(&mut out)) { - NewlineFound => continue 'newline, - Partial(len) => cur_pos += len, - Complete(_) => { - cur_pos = high + 1; - break; - } - EndOfFile => { - if cur_pos != low || low == high { - crash_if_err!(1, out.write_all(&[newline_char])); - } - - break 'newline; - } - } - } + // change `low` from 1-indexed value to 0-index value + let low = low - 1; + let high = high.min(line.len()); + out.write_all(&line[low..high])?; } - - buf_read.consume_line(); - crash_if_err!(1, out.write_all(&[newline_char])); - } - + out.write_all(&[newline_char])?; + Ok(true) + }); + crash_if_err!(1, res); 0 } @@ -212,23 +184,11 @@ fn cut_fields_delimiter( newline_char: u8, out_delim: &str, ) -> i32 { - let mut buf_in = BufReader::new(reader); - let mut out = stdout(); - let mut buffer = Vec::new(); + let buf_in = BufReader::new(reader); + let mut out = stdout_writer(); + let input_delim_len = delim.len(); - 'newline: loop { - buffer.clear(); - match buf_in.read_until(newline_char, &mut buffer) { - Ok(n) if n == 0 => break, - Err(e) => { - if buffer.is_empty() { - crash!(1, "read error: {}", e); - } - } - _ => (), - } - - let line = &buffer[..]; + let result = buf_in.for_byte_record_with_terminator(newline_char, |line| { let mut fields_pos = 1; let mut low_idx = 0; let mut delim_search = Searcher::new(line, delim.as_bytes()).peekable(); @@ -236,46 +196,46 @@ fn cut_fields_delimiter( if delim_search.peek().is_none() { if !only_delimited { - crash_if_err!(1, out.write_all(line)); + out.write_all(line)?; if line[line.len() - 1] != newline_char { - crash_if_err!(1, out.write_all(&[newline_char])); + out.write_all(&[newline_char])?; } } - continue; + return Ok(true); } for &Range { low, high } in ranges.iter() { if low - fields_pos > 0 { low_idx = match delim_search.nth(low - fields_pos - 1) { - Some((_, beyond_delim)) => beyond_delim, + Some(index) => index + input_delim_len, None => break, }; } for _ in 0..=high - low { if print_delim { - crash_if_err!(1, out.write_all(out_delim.as_bytes())); + out.write_all(out_delim.as_bytes())?; + } else { + print_delim = true; } match delim_search.next() { - Some((high_idx, next_low_idx)) => { + Some(high_idx) => { let segment = &line[low_idx..high_idx]; - crash_if_err!(1, out.write_all(segment)); + out.write_all(segment)?; - print_delim = true; - - low_idx = next_low_idx; + low_idx = high_idx + input_delim_len; fields_pos = high + 1; } None => { let segment = &line[low_idx..]; - crash_if_err!(1, out.write_all(segment)); + out.write_all(segment)?; if line[line.len() - 1] == newline_char { - continue 'newline; + return Ok(true); } break; } @@ -283,9 +243,10 @@ fn cut_fields_delimiter( } } - crash_if_err!(1, out.write_all(&[newline_char])); - } - + out.write_all(&[newline_char])?; + Ok(true) + }); + crash_if_err!(1, result); 0 } @@ -303,23 +264,11 @@ fn cut_fields(reader: R, ranges: &[Range], opts: &FieldOptions) -> i32 ); } - let mut buf_in = BufReader::new(reader); - let mut out = stdout(); - let mut buffer = Vec::new(); + let buf_in = BufReader::new(reader); + let mut out = stdout_writer(); + let delim_len = opts.delimiter.len(); - 'newline: loop { - buffer.clear(); - match buf_in.read_until(newline_char, &mut buffer) { - Ok(n) if n == 0 => break, - Err(e) => { - if buffer.is_empty() { - crash!(1, "read error: {}", e); - } - } - _ => (), - } - - let line = &buffer[..]; + let result = buf_in.for_byte_record_with_terminator(newline_char, |line| { let mut fields_pos = 1; let mut low_idx = 0; let mut delim_search = Searcher::new(line, opts.delimiter.as_bytes()).peekable(); @@ -327,53 +276,54 @@ fn cut_fields(reader: R, ranges: &[Range], opts: &FieldOptions) -> i32 if delim_search.peek().is_none() { if !opts.only_delimited { - crash_if_err!(1, out.write_all(line)); + out.write_all(line)?; if line[line.len() - 1] != newline_char { - crash_if_err!(1, out.write_all(&[newline_char])); + out.write_all(&[newline_char])?; } } - continue; + return Ok(true); } - for &Range { low, high } in ranges.iter() { + for &Range { low, high } in ranges { if low - fields_pos > 0 { - low_idx = match delim_search.nth(low - fields_pos - 1) { - Some((_, beyond_delim)) => beyond_delim, - None => break, - }; - } - - if print_delim && low_idx >= opts.delimiter.as_bytes().len() { - low_idx -= opts.delimiter.as_bytes().len(); + if let Some(delim_pos) = delim_search.nth(low - fields_pos - 1) { + low_idx = if print_delim { + delim_pos + } else { + delim_pos + delim_len + } + } else { + break; + } } match delim_search.nth(high - low) { - Some((high_idx, next_low_idx)) => { + Some(high_idx) => { let segment = &line[low_idx..high_idx]; - crash_if_err!(1, out.write_all(segment)); + out.write_all(segment)?; print_delim = true; - low_idx = next_low_idx; + low_idx = high_idx; fields_pos = high + 1; } None => { let segment = &line[low_idx..line.len()]; - crash_if_err!(1, out.write_all(segment)); + out.write_all(segment)?; if line[line.len() - 1] == newline_char { - continue 'newline; + return Ok(true); } break; } } } - - crash_if_err!(1, out.write_all(&[newline_char])); - } - + out.write_all(&[newline_char])?; + Ok(true) + }); + crash_if_err!(1, result); 0 } diff --git a/src/uu/cut/src/searcher.rs b/src/uu/cut/src/searcher.rs index f0821ff3a..5e3c076df 100644 --- a/src/uu/cut/src/searcher.rs +++ b/src/uu/cut/src/searcher.rs @@ -5,7 +5,8 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -#[derive(Clone)] +use memchr::memchr; + pub struct Searcher<'a> { haystack: &'a [u8], needle: &'a [u8], @@ -14,6 +15,7 @@ pub struct Searcher<'a> { impl<'a> Searcher<'a> { pub fn new(haystack: &'a [u8], needle: &'a [u8]) -> Searcher<'a> { + assert!(!needle.is_empty()); Searcher { haystack, needle, @@ -23,30 +25,81 @@ impl<'a> Searcher<'a> { } impl<'a> Iterator for Searcher<'a> { - type Item = (usize, usize); + type Item = usize; - fn next(&mut self) -> Option<(usize, usize)> { - if self.needle.len() == 1 { - for offset in self.position..self.haystack.len() { - if self.haystack[offset] == self.needle[0] { - self.position = offset + 1; - return Some((offset, offset + 1)); + fn next(&mut self) -> Option { + loop { + if let Some(match_idx) = memchr(self.needle[0], self.haystack) { + if self.needle.len() == 1 + || self.haystack[match_idx + 1..].starts_with(&self.needle[1..]) + { + let skip = match_idx + self.needle.len(); + self.haystack = &self.haystack[skip..]; + let match_pos = self.position + match_idx; + self.position += skip; + return Some(match_pos); + } else { + let skip = match_idx + 1; + self.haystack = &self.haystack[skip..]; + self.position += skip; + // continue } - } - - self.position = self.haystack.len(); - return None; - } - - while self.position + self.needle.len() <= self.haystack.len() { - if &self.haystack[self.position..self.position + self.needle.len()] == self.needle { - let match_pos = self.position; - self.position += self.needle.len(); - return Some((match_pos, match_pos + self.needle.len())); } else { - self.position += 1; + return None; } } - None + } +} + +#[cfg(test)] +mod tests { + + use super::*; + + const NEEDLE: &[u8] = "ab".as_bytes(); + + #[test] + fn test_normal() { + let iter = Searcher::new("a.a.a".as_bytes(), "a".as_bytes()); + let items: Vec = iter.collect(); + assert_eq!(vec![0, 2, 4], items); + } + + #[test] + fn test_empty() { + let iter = Searcher::new("".as_bytes(), "a".as_bytes()); + let items: Vec = iter.collect(); + assert_eq!(vec![] as Vec, items); + } + + fn test_multibyte(line: &[u8], expected: Vec) { + let iter = Searcher::new(line, NEEDLE); + let items: Vec = iter.collect(); + assert_eq!(expected, items); + } + + #[test] + fn test_multibyte_normal() { + test_multibyte("...ab...ab...".as_bytes(), vec![3, 8]); + } + + #[test] + fn test_multibyte_needle_head_at_end() { + test_multibyte("a".as_bytes(), vec![]); + } + + #[test] + fn test_multibyte_starting_needle() { + test_multibyte("ab...ab...".as_bytes(), vec![0, 5]); + } + + #[test] + fn test_multibyte_trailing_needle() { + test_multibyte("...ab...ab".as_bytes(), vec![3, 8]); + } + + #[test] + fn test_multibyte_first_byte_false_match() { + test_multibyte("aA..aCaC..ab..aD".as_bytes(), vec![10]); } } From 25f99097cc28298536282b545269b99dac484d22 Mon Sep 17 00:00:00 2001 From: Chirag Jadwani Date: Wed, 28 Apr 2021 23:28:26 +0530 Subject: [PATCH 2/2] cut: add BENCHMARKING.md and minor refactoring --- src/uu/cut/BENCHMARKING.md | 46 ++++++++++++++++++++++++++++++++++++++ src/uu/cut/src/cut.rs | 2 +- src/uu/cut/src/searcher.rs | 2 +- 3 files changed, 48 insertions(+), 2 deletions(-) create mode 100644 src/uu/cut/BENCHMARKING.md diff --git a/src/uu/cut/BENCHMARKING.md b/src/uu/cut/BENCHMARKING.md new file mode 100644 index 000000000..bd94cf93c --- /dev/null +++ b/src/uu/cut/BENCHMARKING.md @@ -0,0 +1,46 @@ +## Benchmarking cut + +### Performance profile + +In normal use cases a significant amount of the total execution time of `cut` +is spent performing I/O. When invoked with the `-f` option (cut fields) some +CPU time is spent on detecting fields (in `Searcher::next`). Other than that +some small amount of CPU time is spent on breaking the input stream into lines. + + +### How to + +When fixing bugs or adding features you might want to compare +performance before and after your code changes. + +- `hyperfine` can be used to accurately measure and compare the total + execution time of one or more commands. + + ``` + $ cargo build --release --package uu_cut + + $ hyperfine -w3 "./target/release/cut -f2-4,8 -d' ' input.txt" "cut -f2-4,8 -d' ' input.txt" + ``` + You can put those two commands in a shell script to be sure that you don't + forget to build after making any changes. + +When optimizing or fixing performance regressions seeing the number of times a +function is called, and the amount of time it takes can be useful. + +- `cargo flamegraph` generates flame graphs from function level metrics it records using `perf` or `dtrace` + + ``` + $ cargo flamegraph --bin cut --package uu_cut -- -f1,3-4 input.txt > /dev/null + ``` + + +### What to benchmark + +There are four different performance paths in `cut` to benchmark. + +- Byte ranges `-c`/`--characters` or `-b`/`--bytes` e.g. `cut -c 2,4,6-` +- Byte ranges with output delimiters e.g. `cut -c 4- --output-delimiter=/` +- Fields e.g. `cut -f -4` +- Fields with output delimiters e.g. `cut -f 7-10 --output-delimiter=:` + +Choose a test input file with large number of lines so that program startup time does not significantly affect the benchmark. diff --git a/src/uu/cut/src/cut.rs b/src/uu/cut/src/cut.rs index 91dc17e52..40b41e98f 100644 --- a/src/uu/cut/src/cut.rs +++ b/src/uu/cut/src/cut.rs @@ -205,7 +205,7 @@ fn cut_fields_delimiter( return Ok(true); } - for &Range { low, high } in ranges.iter() { + for &Range { low, high } in ranges { if low - fields_pos > 0 { low_idx = match delim_search.nth(low - fields_pos - 1) { Some(index) => index + input_delim_len, diff --git a/src/uu/cut/src/searcher.rs b/src/uu/cut/src/searcher.rs index 5e3c076df..5fe4a723b 100644 --- a/src/uu/cut/src/searcher.rs +++ b/src/uu/cut/src/searcher.rs @@ -33,9 +33,9 @@ impl<'a> Iterator for Searcher<'a> { if self.needle.len() == 1 || self.haystack[match_idx + 1..].starts_with(&self.needle[1..]) { + let match_pos = self.position + match_idx; let skip = match_idx + self.needle.len(); self.haystack = &self.haystack[skip..]; - let match_pos = self.position + match_idx; self.position += skip; return Some(match_pos); } else {