diff --git a/src/uu/cat/Cargo.toml b/src/uu/cat/Cargo.toml index 367164b83..f5ac6a64e 100644 --- a/src/uu/cat/Cargo.toml +++ b/src/uu/cat/Cargo.toml @@ -21,7 +21,7 @@ path = "src/cat.rs" clap = { workspace = true } memchr = { workspace = true } thiserror = { workspace = true } -uucore = { workspace = true, features = ["fs", "pipes"] } +uucore = { workspace = true, features = ["fast-inc", "fs", "pipes"] } [target.'cfg(unix)'.dependencies] nix = { workspace = true } diff --git a/src/uu/cat/src/cat.rs b/src/uu/cat/src/cat.rs index 2c0702424..c0a41270f 100644 --- a/src/uu/cat/src/cat.rs +++ b/src/uu/cat/src/cat.rs @@ -24,7 +24,7 @@ use thiserror::Error; use uucore::display::Quotable; use uucore::error::UResult; use uucore::fs::FileInformation; -use uucore::{format_usage, help_about, help_usage}; +use uucore::{fast_inc::fast_inc_one, format_usage, help_about, help_usage}; /// Linux splice support #[cfg(any(target_os = "linux", target_os = "android"))] @@ -33,61 +33,55 @@ mod splice; const USAGE: &str = help_usage!("cat.md"); const ABOUT: &str = help_about!("cat.md"); +// Allocate 32 digits for the line number. +// An estimate is that we can print about 1e8 lines/seconds, so 32 digits +// would be enough for billions of universe lifetimes. +const LINE_NUMBER_BUF_SIZE: usize = 32; + struct LineNumber { - buf: Vec, + buf: [u8; LINE_NUMBER_BUF_SIZE], + print_start: usize, + num_start: usize, + num_end: usize, } // Logic to store a string for the line number. Manually incrementing the value // represented in a buffer like this is significantly faster than storing // a `usize` and using the standard Rust formatting macros to format a `usize` // to a string each time it's needed. -// String is initialized to " 1\t" and incremented each time `increment` is -// called. When the value overflows the range storable in the buffer, a b'1' is -// prepended and the counting continues. +// Buffer is initialized to " 1\t" and incremented each time `increment` is +// called, using uucore's fast_inc function that operates on strings. impl LineNumber { fn new() -> Self { + let mut buf = [b'0'; LINE_NUMBER_BUF_SIZE]; + + let init_str = " 1\t"; + let print_start = buf.len() - init_str.len(); + let num_start = buf.len() - 2; + let num_end = buf.len() - 1; + + buf[print_start..].copy_from_slice(init_str.as_bytes()); + LineNumber { - // Initialize buf to b" 1\t" - buf: Vec::from(b" 1\t"), + buf, + print_start, + num_start, + num_end, } } fn increment(&mut self) { - // skip(1) to avoid the \t in the last byte. - for ascii_digit in self.buf.iter_mut().rev().skip(1) { - // Working from the least-significant digit, increment the number in the buffer. - // If we hit anything other than a b'9' we can break since the next digit is - // unaffected. - // Also note that if we hit a b' ', we can think of that as a 0 and increment to b'1'. - // If/else here is faster than match (as measured with some benchmarking Apr-2025), - // probably since we can prioritize most likely digits first. - if (b'0'..=b'8').contains(ascii_digit) { - *ascii_digit += 1; - break; - } else if b'9' == *ascii_digit { - *ascii_digit = b'0'; - } else { - assert_eq!(*ascii_digit, b' '); - *ascii_digit = b'1'; - break; - } - } - if self.buf[0] == b'0' { - // This implies we've overflowed. In this case the buffer will be - // [b'0', b'0', ..., b'0', b'\t']. - // For debugging, the following logic would assert that to be the case. - // assert_eq!(*self.buf.last().unwrap(), b'\t'); - // for ascii_digit in self.buf.iter_mut().rev().skip(1) { - // assert_eq!(*ascii_digit, b'0'); - // } + fast_inc_one(&mut self.buf, &mut self.num_start, self.num_end); + self.print_start = self.print_start.min(self.num_start); + } - // All we need to do is prepend a b'1' and we're good. - self.buf.insert(0, b'1'); - } + #[inline] + fn to_str(&self) -> &[u8] { + &self.buf[self.print_start..] } fn write(&self, writer: &mut impl Write) -> io::Result<()> { - writer.write_all(&self.buf) + writer.write_all(self.to_str()) } } @@ -804,21 +798,21 @@ mod tests { #[test] fn test_incrementing_string() { let mut incrementing_string = super::LineNumber::new(); - assert_eq!(b" 1\t", incrementing_string.buf.as_slice()); + assert_eq!(b" 1\t", incrementing_string.to_str()); incrementing_string.increment(); - assert_eq!(b" 2\t", incrementing_string.buf.as_slice()); + assert_eq!(b" 2\t", incrementing_string.to_str()); // Run through to 100 for _ in 3..=100 { incrementing_string.increment(); } - assert_eq!(b" 100\t", incrementing_string.buf.as_slice()); + assert_eq!(b" 100\t", incrementing_string.to_str()); // Run through until we overflow the original size. for _ in 101..=1_000_000 { incrementing_string.increment(); } - // Confirm that the buffer expands when we overflow the original size. - assert_eq!(b"1000000\t", incrementing_string.buf.as_slice()); + // Confirm that the start position moves when we overflow the original size. + assert_eq!(b"1000000\t", incrementing_string.to_str()); incrementing_string.increment(); - assert_eq!(b"1000001\t", incrementing_string.buf.as_slice()); + assert_eq!(b"1000001\t", incrementing_string.to_str()); } } diff --git a/src/uu/seq/BENCHMARKING.md b/src/uu/seq/BENCHMARKING.md index 6324564c4..5758d2a77 100644 --- a/src/uu/seq/BENCHMARKING.md +++ b/src/uu/seq/BENCHMARKING.md @@ -76,5 +76,20 @@ write!(stdout, "{separator}")? The change above resulted in a ~10% speedup. +### Fast increment path + +When dealing with positive integer values (first/increment/last), and +the default format is used, we use a custom fast path that does arithmetic +on u8 arrays (i.e. strings), instead of repeatedly calling into +formatting format. + +This provides _massive_ performance gains, in the order of 10-20x compared +with the default implementation, at the expense of some added code complexity. + +Just from performance numbers, it is clear that GNU `seq` uses similar +tricks, but we are more liberal on when we use our fast path (e.g. large +increments are supported, equal width is supported). Our fast path +implementation gets within ~10% of `seq` performance when its fast +path is activated. [0]: https://github.com/sharkdp/hyperfine diff --git a/src/uu/seq/Cargo.toml b/src/uu/seq/Cargo.toml index fffa5813a..5973b5157 100644 --- a/src/uu/seq/Cargo.toml +++ b/src/uu/seq/Cargo.toml @@ -23,6 +23,7 @@ num-traits = { workspace = true } thiserror = { workspace = true } uucore = { workspace = true, features = [ "extendedbigdecimal", + "fast-inc", "format", "parser", "quoting-style", diff --git a/src/uu/seq/src/seq.rs b/src/uu/seq/src/seq.rs index a99284d1c..af7ca2f84 100644 --- a/src/uu/seq/src/seq.rs +++ b/src/uu/seq/src/seq.rs @@ -2,18 +2,20 @@ // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -// spell-checker:ignore (ToDO) bigdecimal extendedbigdecimal numberparse hexadecimalfloat +// spell-checker:ignore (ToDO) bigdecimal extendedbigdecimal numberparse hexadecimalfloat biguint use std::ffi::OsString; use std::io::{BufWriter, ErrorKind, Write, stdout}; use clap::{Arg, ArgAction, Command}; +use num_bigint::BigUint; +use num_traits::ToPrimitive; use num_traits::Zero; use uucore::error::{FromIo, UResult}; use uucore::extendedbigdecimal::ExtendedBigDecimal; use uucore::format::num_format::FloatVariant; use uucore::format::{Format, num_format}; -use uucore::{format_usage, help_about, help_usage}; +use uucore::{fast_inc::fast_inc, format_usage, help_about, help_usage}; mod error; @@ -149,13 +151,17 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { } }; - let precision = select_precision(&first, &increment, &last); - // If a format was passed on the command line, use that. // If not, use some default format based on parameters precision. - let format = match options.format { - Some(str) => Format::::parse(str)?, + let (format, padding, fast_allowed) = match options.format { + Some(str) => ( + Format::::parse(str)?, + 0, + false, + ), None => { + let precision = select_precision(&first, &increment, &last); + let padding = if options.equal_width { let precision_value = precision.unwrap_or(0); first @@ -186,7 +192,12 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { ..Default::default() }, }; - Format::from_formatter(formatter) + // Allow fast printing if precision is 0 (integer inputs), `print_seq` will do further checks. + ( + Format::from_formatter(formatter), + padding, + precision == Some(0), + ) } }; @@ -195,7 +206,10 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { &options.separator, &options.terminator, &format, + fast_allowed, + padding, ); + match result { Ok(()) => Ok(()), Err(err) if err.kind() == ErrorKind::BrokenPipe => Ok(()), @@ -245,6 +259,72 @@ pub fn uu_app() -> Command { ) } +/// Integer print, default format, positive increment: fast code path +/// that avoids reformating digit at all iterations. +fn fast_print_seq( + mut stdout: impl Write, + first: &BigUint, + increment: u64, + last: &BigUint, + separator: &str, + terminator: &str, + padding: usize, +) -> std::io::Result<()> { + // Nothing to do, just return. + if last < first { + return Ok(()); + } + + // Do at most u64::MAX loops. We can print in the order of 1e8 digits per second, + // u64::MAX is 1e19, so it'd take hundreds of years for this to complete anyway. + // TODO: we can move this test to `print_seq` if we care about this case. + let loop_cnt = ((last - first) / increment).to_u64().unwrap_or(u64::MAX); + + // Format the first number. + let first_str = first.to_string(); + + // Makeshift log10.ceil + let last_length = last.to_string().len(); + + // Allocate a large u8 buffer, that contains a preformatted string + // of the number followed by the `separator`. + // + // | ... head space ... | number | separator | + // ^0 ^ start ^ num_end ^ size (==buf.len()) + // + // We keep track of start in this buffer, as the number grows. + // When printing, we take a slice between start and end. + let size = last_length.max(padding) + separator.len(); + // Fill with '0', this is needed for equal_width, and harmless otherwise. + let mut buf = vec![b'0'; size]; + let buf = buf.as_mut_slice(); + + let num_end = buf.len() - separator.len(); + let mut start = num_end - first_str.len(); + + // Initialize buf with first and separator. + buf[start..num_end].copy_from_slice(first_str.as_bytes()); + buf[num_end..].copy_from_slice(separator.as_bytes()); + + // Normally, if padding is > 0, it should be equal to last_length, + // so start would be == 0, but there are corner cases. + start = start.min(num_end - padding); + + // Prepare the number to increment with as a string + let inc_str = increment.to_string(); + let inc_str = inc_str.as_bytes(); + + for _ in 0..loop_cnt { + stdout.write_all(&buf[start..])?; + fast_inc(buf, &mut start, num_end, inc_str); + } + // Write the last number without separator, but with terminator. + stdout.write_all(&buf[start..num_end])?; + write!(stdout, "{terminator}")?; + stdout.flush()?; + Ok(()) +} + fn done_printing(next: &T, increment: &T, last: &T) -> bool { if increment >= &T::zero() { next > last @@ -253,16 +333,42 @@ fn done_printing(next: &T, increment: &T, last: &T) -> boo } } -/// Floating point based code path +/// Arbitrary precision decimal number code path ("slow" path) fn print_seq( range: RangeFloat, separator: &str, terminator: &str, format: &Format, + fast_allowed: bool, + padding: usize, // Used by fast path only ) -> std::io::Result<()> { let stdout = stdout().lock(); let mut stdout = BufWriter::new(stdout); let (first, increment, last) = range; + + if fast_allowed { + // Test if we can use fast code path. + // First try to convert the range to BigUint (u64 for the increment). + let (first_bui, increment_u64, last_bui) = ( + first.to_biguint(), + increment.to_biguint().and_then(|x| x.to_u64()), + last.to_biguint(), + ); + if let (Some(first_bui), Some(increment_u64), Some(last_bui)) = + (first_bui, increment_u64, last_bui) + { + return fast_print_seq( + stdout, + &first_bui, + increment_u64, + &last_bui, + separator, + terminator, + padding, + ); + } + } + let mut value = first; let mut is_first_iteration = true; diff --git a/src/uucore/Cargo.toml b/src/uucore/Cargo.toml index 6f70843db..acbba4c73 100644 --- a/src/uucore/Cargo.toml +++ b/src/uucore/Cargo.toml @@ -91,6 +91,7 @@ checksum = ["data-encoding", "thiserror", "sum"] encoding = ["data-encoding", "data-encoding-macro", "z85"] entries = ["libc"] extendedbigdecimal = ["bigdecimal", "num-traits"] +fast-inc = [] fs = ["dunce", "libc", "winapi-util", "windows-sys"] fsext = ["libc", "windows-sys"] fsxattr = ["xattr"] diff --git a/src/uucore/src/lib/features.rs b/src/uucore/src/lib/features.rs index 3f0649c0c..257043e00 100644 --- a/src/uucore/src/lib/features.rs +++ b/src/uucore/src/lib/features.rs @@ -20,6 +20,8 @@ pub mod custom_tz_fmt; pub mod encoding; #[cfg(feature = "extendedbigdecimal")] pub mod extendedbigdecimal; +#[cfg(feature = "fast-inc")] +pub mod fast_inc; #[cfg(feature = "format")] pub mod format; #[cfg(feature = "fs")] diff --git a/src/uucore/src/lib/features/extendedbigdecimal.rs b/src/uucore/src/lib/features/extendedbigdecimal.rs index a023a69d8..396b6f359 100644 --- a/src/uucore/src/lib/features/extendedbigdecimal.rs +++ b/src/uucore/src/lib/features/extendedbigdecimal.rs @@ -2,7 +2,7 @@ // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -// spell-checker:ignore bigdecimal extendedbigdecimal +// spell-checker:ignore bigdecimal extendedbigdecimal biguint //! An arbitrary precision float that can also represent infinity, NaN, etc. //! //! The finite values are stored as [`BigDecimal`] instances. Because @@ -25,7 +25,9 @@ use std::ops::Add; use std::ops::Neg; use bigdecimal::BigDecimal; +use bigdecimal::num_bigint::BigUint; use num_traits::FromPrimitive; +use num_traits::Signed; use num_traits::Zero; #[derive(Debug, Clone)] @@ -107,6 +109,20 @@ impl ExtendedBigDecimal { pub fn one() -> Self { Self::BigDecimal(1.into()) } + + pub fn to_biguint(&self) -> Option { + match self { + ExtendedBigDecimal::BigDecimal(big_decimal) => { + let (bi, scale) = big_decimal.as_bigint_and_scale(); + if bi.is_negative() || scale > 0 || scale < -(u32::MAX as i64) { + return None; + } + bi.to_biguint() + .map(|bi| bi * BigUint::from(10u32).pow(-scale as u32)) + } + _ => None, + } + } } impl Zero for ExtendedBigDecimal { diff --git a/src/uucore/src/lib/features/fast_inc.rs b/src/uucore/src/lib/features/fast_inc.rs new file mode 100644 index 000000000..165cf273f --- /dev/null +++ b/src/uucore/src/lib/features/fast_inc.rs @@ -0,0 +1,209 @@ +// This file is part of the uutils coreutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +/// Fast increment function, operating on ASCII strings. +/// +/// Add inc to the string val[start..end]. This operates on ASCII digits, assuming +/// val and inc are well formed. +/// +/// Updates `start` if we have a carry, or if inc > start. +/// +/// We also assume that there is enough space in val to expand if start needs +/// to be updated. +/// ``` +/// use uucore::fast_inc::fast_inc; +/// +/// // Start with a buffer containing "0", with one byte of head space +/// let mut val = Vec::from(".0".as_bytes()); +/// let mut start = val.len()-1; +/// let end = val.len(); +/// let inc = "6".as_bytes(); +/// assert_eq!(&val[start..end], "0".as_bytes()); +/// fast_inc(val.as_mut(), &mut start, end, inc); +/// assert_eq!(&val[start..end], "6".as_bytes()); +/// fast_inc(val.as_mut(), &mut start, end, inc); +/// assert_eq!(&val[start..end], "12".as_bytes()); +/// ``` +#[inline] +pub fn fast_inc(val: &mut [u8], start: &mut usize, end: usize, inc: &[u8]) { + // To avoid a lot of casts to signed integers, we make sure to decrement pos + // as late as possible, so that it does not ever go negative. + let mut pos = end; + let mut carry = 0u8; + + // First loop, add all digits of inc into val. + for inc_pos in (0..inc.len()).rev() { + // The decrement operation would also panic in debug mode, print a message for developer convenience. + debug_assert!( + pos > 0, + "Buffer overflowed, make sure you allocate val with enough headroom." + ); + pos -= 1; + + let mut new_val = inc[inc_pos] + carry; + // Be careful here, only add existing digit of val. + if pos >= *start { + new_val += val[pos] - b'0'; + } + if new_val > b'9' { + carry = 1; + new_val -= 10; + } else { + carry = 0; + } + val[pos] = new_val; + } + + // Done, now, if we have a carry, add that to the upper digits of val. + if carry == 0 { + *start = (*start).min(pos); + return; + } + + fast_inc_one(val, start, pos) +} + +/// Fast increment by one function, operating on ASCII strings. +/// +/// Add 1 to the string val[start..end]. This operates on ASCII digits, assuming +/// val is well formed. +/// +/// Updates `start` if we have a carry, or if inc > start. +/// +/// We also assume that there is enough space in val to expand if start needs +/// to be updated. +/// ``` +/// use uucore::fast_inc::fast_inc_one; +/// +/// // Start with a buffer containing "8", with one byte of head space +/// let mut val = Vec::from(".8".as_bytes()); +/// let mut start = val.len()-1; +/// let end = val.len(); +/// assert_eq!(&val[start..end], "8".as_bytes()); +/// fast_inc_one(val.as_mut(), &mut start, end); +/// assert_eq!(&val[start..end], "9".as_bytes()); +/// fast_inc_one(val.as_mut(), &mut start, end); +/// assert_eq!(&val[start..end], "10".as_bytes()); +/// ``` +#[inline] +pub fn fast_inc_one(val: &mut [u8], start: &mut usize, end: usize) { + let mut pos = end; + + while pos > *start { + pos -= 1; + + if val[pos] == b'9' { + // 9+1 = 10. Carry propagating, keep going. + val[pos] = b'0'; + } else { + // Carry stopped propagating, return unchanged start. + val[pos] += 1; + return; + } + } + + // The following decrement operation would also panic in debug mode, print a message for developer convenience. + debug_assert!( + *start > 0, + "Buffer overflowed, make sure you allocate val with enough headroom." + ); + // The carry propagated so far that a new digit was added. + val[*start - 1] = b'1'; + *start -= 1; +} + +#[cfg(test)] +mod tests { + use crate::fast_inc::fast_inc; + use crate::fast_inc::fast_inc_one; + + #[test] + fn test_fast_inc_simple() { + let mut val = Vec::from("...0_".as_bytes()); + let mut start: usize = 3; + let inc = "4".as_bytes(); + fast_inc(val.as_mut(), &mut start, 4, inc); + assert_eq!(start, 3); + assert_eq!(val, "...4_".as_bytes()); + fast_inc(val.as_mut(), &mut start, 4, inc); + assert_eq!(start, 3); + assert_eq!(val, "...8_".as_bytes()); + fast_inc(val.as_mut(), &mut start, 4, inc); + assert_eq!(start, 2); // carried 1 more digit + assert_eq!(val, "..12_".as_bytes()); + + let mut val = Vec::from("0_".as_bytes()); + let mut start: usize = 0; + let inc = "2".as_bytes(); + fast_inc(val.as_mut(), &mut start, 1, inc); + assert_eq!(start, 0); + assert_eq!(val, "2_".as_bytes()); + fast_inc(val.as_mut(), &mut start, 1, inc); + assert_eq!(start, 0); + assert_eq!(val, "4_".as_bytes()); + fast_inc(val.as_mut(), &mut start, 1, inc); + assert_eq!(start, 0); + assert_eq!(val, "6_".as_bytes()); + } + + // Check that we handle increment > val correctly. + #[test] + fn test_fast_inc_large_inc() { + let mut val = Vec::from("...7_".as_bytes()); + let mut start: usize = 3; + let inc = "543".as_bytes(); + fast_inc(val.as_mut(), &mut start, 4, inc); + assert_eq!(start, 1); // carried 2 more digits + assert_eq!(val, ".550_".as_bytes()); + fast_inc(val.as_mut(), &mut start, 4, inc); + assert_eq!(start, 0); // carried 1 more digit + assert_eq!(val, "1093_".as_bytes()); + } + + // Check that we handle longer carries + #[test] + fn test_fast_inc_carry() { + let mut val = Vec::from(".999_".as_bytes()); + let mut start: usize = 1; + let inc = "1".as_bytes(); + fast_inc(val.as_mut(), &mut start, 4, inc); + assert_eq!(start, 0); + assert_eq!(val, "1000_".as_bytes()); + + let mut val = Vec::from(".999_".as_bytes()); + let mut start: usize = 1; + let inc = "11".as_bytes(); + fast_inc(val.as_mut(), &mut start, 4, inc); + assert_eq!(start, 0); + assert_eq!(val, "1010_".as_bytes()); + } + + #[test] + fn test_fast_inc_one_simple() { + let mut val = Vec::from("...8_".as_bytes()); + let mut start: usize = 3; + fast_inc_one(val.as_mut(), &mut start, 4); + assert_eq!(start, 3); + assert_eq!(val, "...9_".as_bytes()); + fast_inc_one(val.as_mut(), &mut start, 4); + assert_eq!(start, 2); // carried 1 more digit + assert_eq!(val, "..10_".as_bytes()); + fast_inc_one(val.as_mut(), &mut start, 4); + assert_eq!(start, 2); + assert_eq!(val, "..11_".as_bytes()); + + let mut val = Vec::from("0_".as_bytes()); + let mut start: usize = 0; + fast_inc_one(val.as_mut(), &mut start, 1); + assert_eq!(start, 0); + assert_eq!(val, "1_".as_bytes()); + fast_inc_one(val.as_mut(), &mut start, 1); + assert_eq!(start, 0); + assert_eq!(val, "2_".as_bytes()); + fast_inc_one(val.as_mut(), &mut start, 1); + assert_eq!(start, 0); + assert_eq!(val, "3_".as_bytes()); + } +} diff --git a/src/uucore/src/lib/lib.rs b/src/uucore/src/lib/lib.rs index 0762240ed..ee0fd8525 100644 --- a/src/uucore/src/lib/lib.rs +++ b/src/uucore/src/lib/lib.rs @@ -45,6 +45,8 @@ pub use crate::features::custom_tz_fmt; pub use crate::features::encoding; #[cfg(feature = "extendedbigdecimal")] pub use crate::features::extendedbigdecimal; +#[cfg(feature = "fast-inc")] +pub use crate::features::fast_inc; #[cfg(feature = "format")] pub use crate::features::format; #[cfg(feature = "fs")] diff --git a/tests/by-util/test_seq.rs b/tests/by-util/test_seq.rs index 1786fd40e..e44bf3248 100644 --- a/tests/by-util/test_seq.rs +++ b/tests/by-util/test_seq.rs @@ -216,6 +216,10 @@ fn test_separator_and_terminator() { .args(&["-s", ",", "2", "6"]) .succeeds() .stdout_is("2,3,4,5,6\n"); + new_ucmd!() + .args(&["-s", "", "2", "6"]) + .succeeds() + .stdout_is("23456\n"); new_ucmd!() .args(&["-s", "\n", "2", "6"]) .succeeds() @@ -286,6 +290,10 @@ fn test_separator_and_terminator_floats() { .args(&["-s", ",", "-t", "!", "2.0", "6"]) .succeeds() .stdout_is("2.0,3.0,4.0,5.0,6.0!"); + new_ucmd!() + .args(&["-s", "", "-t", "!", "2.0", "6"]) + .succeeds() + .stdout_is("2.03.04.05.06.0!"); } #[test]