mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-28 11:37:44 +00:00
Merge pull request #7782 from drinkcat/seq-perf-more-use-cat
Move seq's `fast_inc` to `uucore`, use it in `cat`
This commit is contained in:
commit
1986c965cc
11 changed files with 408 additions and 54 deletions
|
@ -21,7 +21,7 @@ path = "src/cat.rs"
|
||||||
clap = { workspace = true }
|
clap = { workspace = true }
|
||||||
memchr = { workspace = true }
|
memchr = { workspace = true }
|
||||||
thiserror = { workspace = true }
|
thiserror = { workspace = true }
|
||||||
uucore = { workspace = true, features = ["fs", "pipes"] }
|
uucore = { workspace = true, features = ["fast-inc", "fs", "pipes"] }
|
||||||
|
|
||||||
[target.'cfg(unix)'.dependencies]
|
[target.'cfg(unix)'.dependencies]
|
||||||
nix = { workspace = true }
|
nix = { workspace = true }
|
||||||
|
|
|
@ -24,7 +24,7 @@ use thiserror::Error;
|
||||||
use uucore::display::Quotable;
|
use uucore::display::Quotable;
|
||||||
use uucore::error::UResult;
|
use uucore::error::UResult;
|
||||||
use uucore::fs::FileInformation;
|
use uucore::fs::FileInformation;
|
||||||
use uucore::{format_usage, help_about, help_usage};
|
use uucore::{fast_inc::fast_inc_one, format_usage, help_about, help_usage};
|
||||||
|
|
||||||
/// Linux splice support
|
/// Linux splice support
|
||||||
#[cfg(any(target_os = "linux", target_os = "android"))]
|
#[cfg(any(target_os = "linux", target_os = "android"))]
|
||||||
|
@ -33,61 +33,55 @@ mod splice;
|
||||||
const USAGE: &str = help_usage!("cat.md");
|
const USAGE: &str = help_usage!("cat.md");
|
||||||
const ABOUT: &str = help_about!("cat.md");
|
const ABOUT: &str = help_about!("cat.md");
|
||||||
|
|
||||||
|
// Allocate 32 digits for the line number.
|
||||||
|
// An estimate is that we can print about 1e8 lines/seconds, so 32 digits
|
||||||
|
// would be enough for billions of universe lifetimes.
|
||||||
|
const LINE_NUMBER_BUF_SIZE: usize = 32;
|
||||||
|
|
||||||
struct LineNumber {
|
struct LineNumber {
|
||||||
buf: Vec<u8>,
|
buf: [u8; LINE_NUMBER_BUF_SIZE],
|
||||||
|
print_start: usize,
|
||||||
|
num_start: usize,
|
||||||
|
num_end: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Logic to store a string for the line number. Manually incrementing the value
|
// Logic to store a string for the line number. Manually incrementing the value
|
||||||
// represented in a buffer like this is significantly faster than storing
|
// represented in a buffer like this is significantly faster than storing
|
||||||
// a `usize` and using the standard Rust formatting macros to format a `usize`
|
// a `usize` and using the standard Rust formatting macros to format a `usize`
|
||||||
// to a string each time it's needed.
|
// to a string each time it's needed.
|
||||||
// String is initialized to " 1\t" and incremented each time `increment` is
|
// Buffer is initialized to " 1\t" and incremented each time `increment` is
|
||||||
// called. When the value overflows the range storable in the buffer, a b'1' is
|
// called, using uucore's fast_inc function that operates on strings.
|
||||||
// prepended and the counting continues.
|
|
||||||
impl LineNumber {
|
impl LineNumber {
|
||||||
fn new() -> Self {
|
fn new() -> Self {
|
||||||
|
let mut buf = [b'0'; LINE_NUMBER_BUF_SIZE];
|
||||||
|
|
||||||
|
let init_str = " 1\t";
|
||||||
|
let print_start = buf.len() - init_str.len();
|
||||||
|
let num_start = buf.len() - 2;
|
||||||
|
let num_end = buf.len() - 1;
|
||||||
|
|
||||||
|
buf[print_start..].copy_from_slice(init_str.as_bytes());
|
||||||
|
|
||||||
LineNumber {
|
LineNumber {
|
||||||
// Initialize buf to b" 1\t"
|
buf,
|
||||||
buf: Vec::from(b" 1\t"),
|
print_start,
|
||||||
|
num_start,
|
||||||
|
num_end,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn increment(&mut self) {
|
fn increment(&mut self) {
|
||||||
// skip(1) to avoid the \t in the last byte.
|
fast_inc_one(&mut self.buf, &mut self.num_start, self.num_end);
|
||||||
for ascii_digit in self.buf.iter_mut().rev().skip(1) {
|
self.print_start = self.print_start.min(self.num_start);
|
||||||
// Working from the least-significant digit, increment the number in the buffer.
|
|
||||||
// If we hit anything other than a b'9' we can break since the next digit is
|
|
||||||
// unaffected.
|
|
||||||
// Also note that if we hit a b' ', we can think of that as a 0 and increment to b'1'.
|
|
||||||
// If/else here is faster than match (as measured with some benchmarking Apr-2025),
|
|
||||||
// probably since we can prioritize most likely digits first.
|
|
||||||
if (b'0'..=b'8').contains(ascii_digit) {
|
|
||||||
*ascii_digit += 1;
|
|
||||||
break;
|
|
||||||
} else if b'9' == *ascii_digit {
|
|
||||||
*ascii_digit = b'0';
|
|
||||||
} else {
|
|
||||||
assert_eq!(*ascii_digit, b' ');
|
|
||||||
*ascii_digit = b'1';
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
if self.buf[0] == b'0' {
|
|
||||||
// This implies we've overflowed. In this case the buffer will be
|
|
||||||
// [b'0', b'0', ..., b'0', b'\t'].
|
|
||||||
// For debugging, the following logic would assert that to be the case.
|
|
||||||
// assert_eq!(*self.buf.last().unwrap(), b'\t');
|
|
||||||
// for ascii_digit in self.buf.iter_mut().rev().skip(1) {
|
|
||||||
// assert_eq!(*ascii_digit, b'0');
|
|
||||||
// }
|
|
||||||
|
|
||||||
// All we need to do is prepend a b'1' and we're good.
|
#[inline]
|
||||||
self.buf.insert(0, b'1');
|
fn to_str(&self) -> &[u8] {
|
||||||
}
|
&self.buf[self.print_start..]
|
||||||
}
|
}
|
||||||
|
|
||||||
fn write(&self, writer: &mut impl Write) -> io::Result<()> {
|
fn write(&self, writer: &mut impl Write) -> io::Result<()> {
|
||||||
writer.write_all(&self.buf)
|
writer.write_all(self.to_str())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -804,21 +798,21 @@ mod tests {
|
||||||
#[test]
|
#[test]
|
||||||
fn test_incrementing_string() {
|
fn test_incrementing_string() {
|
||||||
let mut incrementing_string = super::LineNumber::new();
|
let mut incrementing_string = super::LineNumber::new();
|
||||||
assert_eq!(b" 1\t", incrementing_string.buf.as_slice());
|
assert_eq!(b" 1\t", incrementing_string.to_str());
|
||||||
incrementing_string.increment();
|
incrementing_string.increment();
|
||||||
assert_eq!(b" 2\t", incrementing_string.buf.as_slice());
|
assert_eq!(b" 2\t", incrementing_string.to_str());
|
||||||
// Run through to 100
|
// Run through to 100
|
||||||
for _ in 3..=100 {
|
for _ in 3..=100 {
|
||||||
incrementing_string.increment();
|
incrementing_string.increment();
|
||||||
}
|
}
|
||||||
assert_eq!(b" 100\t", incrementing_string.buf.as_slice());
|
assert_eq!(b" 100\t", incrementing_string.to_str());
|
||||||
// Run through until we overflow the original size.
|
// Run through until we overflow the original size.
|
||||||
for _ in 101..=1_000_000 {
|
for _ in 101..=1_000_000 {
|
||||||
incrementing_string.increment();
|
incrementing_string.increment();
|
||||||
}
|
}
|
||||||
// Confirm that the buffer expands when we overflow the original size.
|
// Confirm that the start position moves when we overflow the original size.
|
||||||
assert_eq!(b"1000000\t", incrementing_string.buf.as_slice());
|
assert_eq!(b"1000000\t", incrementing_string.to_str());
|
||||||
incrementing_string.increment();
|
incrementing_string.increment();
|
||||||
assert_eq!(b"1000001\t", incrementing_string.buf.as_slice());
|
assert_eq!(b"1000001\t", incrementing_string.to_str());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -76,5 +76,20 @@ write!(stdout, "{separator}")?
|
||||||
|
|
||||||
The change above resulted in a ~10% speedup.
|
The change above resulted in a ~10% speedup.
|
||||||
|
|
||||||
|
### Fast increment path
|
||||||
|
|
||||||
|
When dealing with positive integer values (first/increment/last), and
|
||||||
|
the default format is used, we use a custom fast path that does arithmetic
|
||||||
|
on u8 arrays (i.e. strings), instead of repeatedly calling into
|
||||||
|
formatting format.
|
||||||
|
|
||||||
|
This provides _massive_ performance gains, in the order of 10-20x compared
|
||||||
|
with the default implementation, at the expense of some added code complexity.
|
||||||
|
|
||||||
|
Just from performance numbers, it is clear that GNU `seq` uses similar
|
||||||
|
tricks, but we are more liberal on when we use our fast path (e.g. large
|
||||||
|
increments are supported, equal width is supported). Our fast path
|
||||||
|
implementation gets within ~10% of `seq` performance when its fast
|
||||||
|
path is activated.
|
||||||
|
|
||||||
[0]: https://github.com/sharkdp/hyperfine
|
[0]: https://github.com/sharkdp/hyperfine
|
||||||
|
|
|
@ -23,6 +23,7 @@ num-traits = { workspace = true }
|
||||||
thiserror = { workspace = true }
|
thiserror = { workspace = true }
|
||||||
uucore = { workspace = true, features = [
|
uucore = { workspace = true, features = [
|
||||||
"extendedbigdecimal",
|
"extendedbigdecimal",
|
||||||
|
"fast-inc",
|
||||||
"format",
|
"format",
|
||||||
"parser",
|
"parser",
|
||||||
"quoting-style",
|
"quoting-style",
|
||||||
|
|
|
@ -2,18 +2,20 @@
|
||||||
//
|
//
|
||||||
// For the full copyright and license information, please view the LICENSE
|
// For the full copyright and license information, please view the LICENSE
|
||||||
// file that was distributed with this source code.
|
// file that was distributed with this source code.
|
||||||
// spell-checker:ignore (ToDO) bigdecimal extendedbigdecimal numberparse hexadecimalfloat
|
// spell-checker:ignore (ToDO) bigdecimal extendedbigdecimal numberparse hexadecimalfloat biguint
|
||||||
use std::ffi::OsString;
|
use std::ffi::OsString;
|
||||||
use std::io::{BufWriter, ErrorKind, Write, stdout};
|
use std::io::{BufWriter, ErrorKind, Write, stdout};
|
||||||
|
|
||||||
use clap::{Arg, ArgAction, Command};
|
use clap::{Arg, ArgAction, Command};
|
||||||
|
use num_bigint::BigUint;
|
||||||
|
use num_traits::ToPrimitive;
|
||||||
use num_traits::Zero;
|
use num_traits::Zero;
|
||||||
|
|
||||||
use uucore::error::{FromIo, UResult};
|
use uucore::error::{FromIo, UResult};
|
||||||
use uucore::extendedbigdecimal::ExtendedBigDecimal;
|
use uucore::extendedbigdecimal::ExtendedBigDecimal;
|
||||||
use uucore::format::num_format::FloatVariant;
|
use uucore::format::num_format::FloatVariant;
|
||||||
use uucore::format::{Format, num_format};
|
use uucore::format::{Format, num_format};
|
||||||
use uucore::{format_usage, help_about, help_usage};
|
use uucore::{fast_inc::fast_inc, format_usage, help_about, help_usage};
|
||||||
|
|
||||||
mod error;
|
mod error;
|
||||||
|
|
||||||
|
@ -149,13 +151,17 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
let precision = select_precision(&first, &increment, &last);
|
|
||||||
|
|
||||||
// If a format was passed on the command line, use that.
|
// If a format was passed on the command line, use that.
|
||||||
// If not, use some default format based on parameters precision.
|
// If not, use some default format based on parameters precision.
|
||||||
let format = match options.format {
|
let (format, padding, fast_allowed) = match options.format {
|
||||||
Some(str) => Format::<num_format::Float, &ExtendedBigDecimal>::parse(str)?,
|
Some(str) => (
|
||||||
|
Format::<num_format::Float, &ExtendedBigDecimal>::parse(str)?,
|
||||||
|
0,
|
||||||
|
false,
|
||||||
|
),
|
||||||
None => {
|
None => {
|
||||||
|
let precision = select_precision(&first, &increment, &last);
|
||||||
|
|
||||||
let padding = if options.equal_width {
|
let padding = if options.equal_width {
|
||||||
let precision_value = precision.unwrap_or(0);
|
let precision_value = precision.unwrap_or(0);
|
||||||
first
|
first
|
||||||
|
@ -186,7 +192,12 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
|
||||||
..Default::default()
|
..Default::default()
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
Format::from_formatter(formatter)
|
// Allow fast printing if precision is 0 (integer inputs), `print_seq` will do further checks.
|
||||||
|
(
|
||||||
|
Format::from_formatter(formatter),
|
||||||
|
padding,
|
||||||
|
precision == Some(0),
|
||||||
|
)
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -195,7 +206,10 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
|
||||||
&options.separator,
|
&options.separator,
|
||||||
&options.terminator,
|
&options.terminator,
|
||||||
&format,
|
&format,
|
||||||
|
fast_allowed,
|
||||||
|
padding,
|
||||||
);
|
);
|
||||||
|
|
||||||
match result {
|
match result {
|
||||||
Ok(()) => Ok(()),
|
Ok(()) => Ok(()),
|
||||||
Err(err) if err.kind() == ErrorKind::BrokenPipe => Ok(()),
|
Err(err) if err.kind() == ErrorKind::BrokenPipe => Ok(()),
|
||||||
|
@ -245,6 +259,72 @@ pub fn uu_app() -> Command {
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Integer print, default format, positive increment: fast code path
|
||||||
|
/// that avoids reformating digit at all iterations.
|
||||||
|
fn fast_print_seq(
|
||||||
|
mut stdout: impl Write,
|
||||||
|
first: &BigUint,
|
||||||
|
increment: u64,
|
||||||
|
last: &BigUint,
|
||||||
|
separator: &str,
|
||||||
|
terminator: &str,
|
||||||
|
padding: usize,
|
||||||
|
) -> std::io::Result<()> {
|
||||||
|
// Nothing to do, just return.
|
||||||
|
if last < first {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Do at most u64::MAX loops. We can print in the order of 1e8 digits per second,
|
||||||
|
// u64::MAX is 1e19, so it'd take hundreds of years for this to complete anyway.
|
||||||
|
// TODO: we can move this test to `print_seq` if we care about this case.
|
||||||
|
let loop_cnt = ((last - first) / increment).to_u64().unwrap_or(u64::MAX);
|
||||||
|
|
||||||
|
// Format the first number.
|
||||||
|
let first_str = first.to_string();
|
||||||
|
|
||||||
|
// Makeshift log10.ceil
|
||||||
|
let last_length = last.to_string().len();
|
||||||
|
|
||||||
|
// Allocate a large u8 buffer, that contains a preformatted string
|
||||||
|
// of the number followed by the `separator`.
|
||||||
|
//
|
||||||
|
// | ... head space ... | number | separator |
|
||||||
|
// ^0 ^ start ^ num_end ^ size (==buf.len())
|
||||||
|
//
|
||||||
|
// We keep track of start in this buffer, as the number grows.
|
||||||
|
// When printing, we take a slice between start and end.
|
||||||
|
let size = last_length.max(padding) + separator.len();
|
||||||
|
// Fill with '0', this is needed for equal_width, and harmless otherwise.
|
||||||
|
let mut buf = vec![b'0'; size];
|
||||||
|
let buf = buf.as_mut_slice();
|
||||||
|
|
||||||
|
let num_end = buf.len() - separator.len();
|
||||||
|
let mut start = num_end - first_str.len();
|
||||||
|
|
||||||
|
// Initialize buf with first and separator.
|
||||||
|
buf[start..num_end].copy_from_slice(first_str.as_bytes());
|
||||||
|
buf[num_end..].copy_from_slice(separator.as_bytes());
|
||||||
|
|
||||||
|
// Normally, if padding is > 0, it should be equal to last_length,
|
||||||
|
// so start would be == 0, but there are corner cases.
|
||||||
|
start = start.min(num_end - padding);
|
||||||
|
|
||||||
|
// Prepare the number to increment with as a string
|
||||||
|
let inc_str = increment.to_string();
|
||||||
|
let inc_str = inc_str.as_bytes();
|
||||||
|
|
||||||
|
for _ in 0..loop_cnt {
|
||||||
|
stdout.write_all(&buf[start..])?;
|
||||||
|
fast_inc(buf, &mut start, num_end, inc_str);
|
||||||
|
}
|
||||||
|
// Write the last number without separator, but with terminator.
|
||||||
|
stdout.write_all(&buf[start..num_end])?;
|
||||||
|
write!(stdout, "{terminator}")?;
|
||||||
|
stdout.flush()?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
fn done_printing<T: Zero + PartialOrd>(next: &T, increment: &T, last: &T) -> bool {
|
fn done_printing<T: Zero + PartialOrd>(next: &T, increment: &T, last: &T) -> bool {
|
||||||
if increment >= &T::zero() {
|
if increment >= &T::zero() {
|
||||||
next > last
|
next > last
|
||||||
|
@ -253,16 +333,42 @@ fn done_printing<T: Zero + PartialOrd>(next: &T, increment: &T, last: &T) -> boo
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Floating point based code path
|
/// Arbitrary precision decimal number code path ("slow" path)
|
||||||
fn print_seq(
|
fn print_seq(
|
||||||
range: RangeFloat,
|
range: RangeFloat,
|
||||||
separator: &str,
|
separator: &str,
|
||||||
terminator: &str,
|
terminator: &str,
|
||||||
format: &Format<num_format::Float, &ExtendedBigDecimal>,
|
format: &Format<num_format::Float, &ExtendedBigDecimal>,
|
||||||
|
fast_allowed: bool,
|
||||||
|
padding: usize, // Used by fast path only
|
||||||
) -> std::io::Result<()> {
|
) -> std::io::Result<()> {
|
||||||
let stdout = stdout().lock();
|
let stdout = stdout().lock();
|
||||||
let mut stdout = BufWriter::new(stdout);
|
let mut stdout = BufWriter::new(stdout);
|
||||||
let (first, increment, last) = range;
|
let (first, increment, last) = range;
|
||||||
|
|
||||||
|
if fast_allowed {
|
||||||
|
// Test if we can use fast code path.
|
||||||
|
// First try to convert the range to BigUint (u64 for the increment).
|
||||||
|
let (first_bui, increment_u64, last_bui) = (
|
||||||
|
first.to_biguint(),
|
||||||
|
increment.to_biguint().and_then(|x| x.to_u64()),
|
||||||
|
last.to_biguint(),
|
||||||
|
);
|
||||||
|
if let (Some(first_bui), Some(increment_u64), Some(last_bui)) =
|
||||||
|
(first_bui, increment_u64, last_bui)
|
||||||
|
{
|
||||||
|
return fast_print_seq(
|
||||||
|
stdout,
|
||||||
|
&first_bui,
|
||||||
|
increment_u64,
|
||||||
|
&last_bui,
|
||||||
|
separator,
|
||||||
|
terminator,
|
||||||
|
padding,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
let mut value = first;
|
let mut value = first;
|
||||||
|
|
||||||
let mut is_first_iteration = true;
|
let mut is_first_iteration = true;
|
||||||
|
|
|
@ -91,6 +91,7 @@ checksum = ["data-encoding", "thiserror", "sum"]
|
||||||
encoding = ["data-encoding", "data-encoding-macro", "z85"]
|
encoding = ["data-encoding", "data-encoding-macro", "z85"]
|
||||||
entries = ["libc"]
|
entries = ["libc"]
|
||||||
extendedbigdecimal = ["bigdecimal", "num-traits"]
|
extendedbigdecimal = ["bigdecimal", "num-traits"]
|
||||||
|
fast-inc = []
|
||||||
fs = ["dunce", "libc", "winapi-util", "windows-sys"]
|
fs = ["dunce", "libc", "winapi-util", "windows-sys"]
|
||||||
fsext = ["libc", "windows-sys"]
|
fsext = ["libc", "windows-sys"]
|
||||||
fsxattr = ["xattr"]
|
fsxattr = ["xattr"]
|
||||||
|
|
|
@ -20,6 +20,8 @@ pub mod custom_tz_fmt;
|
||||||
pub mod encoding;
|
pub mod encoding;
|
||||||
#[cfg(feature = "extendedbigdecimal")]
|
#[cfg(feature = "extendedbigdecimal")]
|
||||||
pub mod extendedbigdecimal;
|
pub mod extendedbigdecimal;
|
||||||
|
#[cfg(feature = "fast-inc")]
|
||||||
|
pub mod fast_inc;
|
||||||
#[cfg(feature = "format")]
|
#[cfg(feature = "format")]
|
||||||
pub mod format;
|
pub mod format;
|
||||||
#[cfg(feature = "fs")]
|
#[cfg(feature = "fs")]
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
//
|
//
|
||||||
// For the full copyright and license information, please view the LICENSE
|
// For the full copyright and license information, please view the LICENSE
|
||||||
// file that was distributed with this source code.
|
// file that was distributed with this source code.
|
||||||
// spell-checker:ignore bigdecimal extendedbigdecimal
|
// spell-checker:ignore bigdecimal extendedbigdecimal biguint
|
||||||
//! An arbitrary precision float that can also represent infinity, NaN, etc.
|
//! An arbitrary precision float that can also represent infinity, NaN, etc.
|
||||||
//!
|
//!
|
||||||
//! The finite values are stored as [`BigDecimal`] instances. Because
|
//! The finite values are stored as [`BigDecimal`] instances. Because
|
||||||
|
@ -25,7 +25,9 @@ use std::ops::Add;
|
||||||
use std::ops::Neg;
|
use std::ops::Neg;
|
||||||
|
|
||||||
use bigdecimal::BigDecimal;
|
use bigdecimal::BigDecimal;
|
||||||
|
use bigdecimal::num_bigint::BigUint;
|
||||||
use num_traits::FromPrimitive;
|
use num_traits::FromPrimitive;
|
||||||
|
use num_traits::Signed;
|
||||||
use num_traits::Zero;
|
use num_traits::Zero;
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
|
@ -107,6 +109,20 @@ impl ExtendedBigDecimal {
|
||||||
pub fn one() -> Self {
|
pub fn one() -> Self {
|
||||||
Self::BigDecimal(1.into())
|
Self::BigDecimal(1.into())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn to_biguint(&self) -> Option<BigUint> {
|
||||||
|
match self {
|
||||||
|
ExtendedBigDecimal::BigDecimal(big_decimal) => {
|
||||||
|
let (bi, scale) = big_decimal.as_bigint_and_scale();
|
||||||
|
if bi.is_negative() || scale > 0 || scale < -(u32::MAX as i64) {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
bi.to_biguint()
|
||||||
|
.map(|bi| bi * BigUint::from(10u32).pow(-scale as u32))
|
||||||
|
}
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Zero for ExtendedBigDecimal {
|
impl Zero for ExtendedBigDecimal {
|
||||||
|
|
209
src/uucore/src/lib/features/fast_inc.rs
Normal file
209
src/uucore/src/lib/features/fast_inc.rs
Normal file
|
@ -0,0 +1,209 @@
|
||||||
|
// This file is part of the uutils coreutils package.
|
||||||
|
//
|
||||||
|
// For the full copyright and license information, please view the LICENSE
|
||||||
|
// file that was distributed with this source code.
|
||||||
|
|
||||||
|
/// Fast increment function, operating on ASCII strings.
|
||||||
|
///
|
||||||
|
/// Add inc to the string val[start..end]. This operates on ASCII digits, assuming
|
||||||
|
/// val and inc are well formed.
|
||||||
|
///
|
||||||
|
/// Updates `start` if we have a carry, or if inc > start.
|
||||||
|
///
|
||||||
|
/// We also assume that there is enough space in val to expand if start needs
|
||||||
|
/// to be updated.
|
||||||
|
/// ```
|
||||||
|
/// use uucore::fast_inc::fast_inc;
|
||||||
|
///
|
||||||
|
/// // Start with a buffer containing "0", with one byte of head space
|
||||||
|
/// let mut val = Vec::from(".0".as_bytes());
|
||||||
|
/// let mut start = val.len()-1;
|
||||||
|
/// let end = val.len();
|
||||||
|
/// let inc = "6".as_bytes();
|
||||||
|
/// assert_eq!(&val[start..end], "0".as_bytes());
|
||||||
|
/// fast_inc(val.as_mut(), &mut start, end, inc);
|
||||||
|
/// assert_eq!(&val[start..end], "6".as_bytes());
|
||||||
|
/// fast_inc(val.as_mut(), &mut start, end, inc);
|
||||||
|
/// assert_eq!(&val[start..end], "12".as_bytes());
|
||||||
|
/// ```
|
||||||
|
#[inline]
|
||||||
|
pub fn fast_inc(val: &mut [u8], start: &mut usize, end: usize, inc: &[u8]) {
|
||||||
|
// To avoid a lot of casts to signed integers, we make sure to decrement pos
|
||||||
|
// as late as possible, so that it does not ever go negative.
|
||||||
|
let mut pos = end;
|
||||||
|
let mut carry = 0u8;
|
||||||
|
|
||||||
|
// First loop, add all digits of inc into val.
|
||||||
|
for inc_pos in (0..inc.len()).rev() {
|
||||||
|
// The decrement operation would also panic in debug mode, print a message for developer convenience.
|
||||||
|
debug_assert!(
|
||||||
|
pos > 0,
|
||||||
|
"Buffer overflowed, make sure you allocate val with enough headroom."
|
||||||
|
);
|
||||||
|
pos -= 1;
|
||||||
|
|
||||||
|
let mut new_val = inc[inc_pos] + carry;
|
||||||
|
// Be careful here, only add existing digit of val.
|
||||||
|
if pos >= *start {
|
||||||
|
new_val += val[pos] - b'0';
|
||||||
|
}
|
||||||
|
if new_val > b'9' {
|
||||||
|
carry = 1;
|
||||||
|
new_val -= 10;
|
||||||
|
} else {
|
||||||
|
carry = 0;
|
||||||
|
}
|
||||||
|
val[pos] = new_val;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Done, now, if we have a carry, add that to the upper digits of val.
|
||||||
|
if carry == 0 {
|
||||||
|
*start = (*start).min(pos);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
fast_inc_one(val, start, pos)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Fast increment by one function, operating on ASCII strings.
|
||||||
|
///
|
||||||
|
/// Add 1 to the string val[start..end]. This operates on ASCII digits, assuming
|
||||||
|
/// val is well formed.
|
||||||
|
///
|
||||||
|
/// Updates `start` if we have a carry, or if inc > start.
|
||||||
|
///
|
||||||
|
/// We also assume that there is enough space in val to expand if start needs
|
||||||
|
/// to be updated.
|
||||||
|
/// ```
|
||||||
|
/// use uucore::fast_inc::fast_inc_one;
|
||||||
|
///
|
||||||
|
/// // Start with a buffer containing "8", with one byte of head space
|
||||||
|
/// let mut val = Vec::from(".8".as_bytes());
|
||||||
|
/// let mut start = val.len()-1;
|
||||||
|
/// let end = val.len();
|
||||||
|
/// assert_eq!(&val[start..end], "8".as_bytes());
|
||||||
|
/// fast_inc_one(val.as_mut(), &mut start, end);
|
||||||
|
/// assert_eq!(&val[start..end], "9".as_bytes());
|
||||||
|
/// fast_inc_one(val.as_mut(), &mut start, end);
|
||||||
|
/// assert_eq!(&val[start..end], "10".as_bytes());
|
||||||
|
/// ```
|
||||||
|
#[inline]
|
||||||
|
pub fn fast_inc_one(val: &mut [u8], start: &mut usize, end: usize) {
|
||||||
|
let mut pos = end;
|
||||||
|
|
||||||
|
while pos > *start {
|
||||||
|
pos -= 1;
|
||||||
|
|
||||||
|
if val[pos] == b'9' {
|
||||||
|
// 9+1 = 10. Carry propagating, keep going.
|
||||||
|
val[pos] = b'0';
|
||||||
|
} else {
|
||||||
|
// Carry stopped propagating, return unchanged start.
|
||||||
|
val[pos] += 1;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// The following decrement operation would also panic in debug mode, print a message for developer convenience.
|
||||||
|
debug_assert!(
|
||||||
|
*start > 0,
|
||||||
|
"Buffer overflowed, make sure you allocate val with enough headroom."
|
||||||
|
);
|
||||||
|
// The carry propagated so far that a new digit was added.
|
||||||
|
val[*start - 1] = b'1';
|
||||||
|
*start -= 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use crate::fast_inc::fast_inc;
|
||||||
|
use crate::fast_inc::fast_inc_one;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_fast_inc_simple() {
|
||||||
|
let mut val = Vec::from("...0_".as_bytes());
|
||||||
|
let mut start: usize = 3;
|
||||||
|
let inc = "4".as_bytes();
|
||||||
|
fast_inc(val.as_mut(), &mut start, 4, inc);
|
||||||
|
assert_eq!(start, 3);
|
||||||
|
assert_eq!(val, "...4_".as_bytes());
|
||||||
|
fast_inc(val.as_mut(), &mut start, 4, inc);
|
||||||
|
assert_eq!(start, 3);
|
||||||
|
assert_eq!(val, "...8_".as_bytes());
|
||||||
|
fast_inc(val.as_mut(), &mut start, 4, inc);
|
||||||
|
assert_eq!(start, 2); // carried 1 more digit
|
||||||
|
assert_eq!(val, "..12_".as_bytes());
|
||||||
|
|
||||||
|
let mut val = Vec::from("0_".as_bytes());
|
||||||
|
let mut start: usize = 0;
|
||||||
|
let inc = "2".as_bytes();
|
||||||
|
fast_inc(val.as_mut(), &mut start, 1, inc);
|
||||||
|
assert_eq!(start, 0);
|
||||||
|
assert_eq!(val, "2_".as_bytes());
|
||||||
|
fast_inc(val.as_mut(), &mut start, 1, inc);
|
||||||
|
assert_eq!(start, 0);
|
||||||
|
assert_eq!(val, "4_".as_bytes());
|
||||||
|
fast_inc(val.as_mut(), &mut start, 1, inc);
|
||||||
|
assert_eq!(start, 0);
|
||||||
|
assert_eq!(val, "6_".as_bytes());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check that we handle increment > val correctly.
|
||||||
|
#[test]
|
||||||
|
fn test_fast_inc_large_inc() {
|
||||||
|
let mut val = Vec::from("...7_".as_bytes());
|
||||||
|
let mut start: usize = 3;
|
||||||
|
let inc = "543".as_bytes();
|
||||||
|
fast_inc(val.as_mut(), &mut start, 4, inc);
|
||||||
|
assert_eq!(start, 1); // carried 2 more digits
|
||||||
|
assert_eq!(val, ".550_".as_bytes());
|
||||||
|
fast_inc(val.as_mut(), &mut start, 4, inc);
|
||||||
|
assert_eq!(start, 0); // carried 1 more digit
|
||||||
|
assert_eq!(val, "1093_".as_bytes());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check that we handle longer carries
|
||||||
|
#[test]
|
||||||
|
fn test_fast_inc_carry() {
|
||||||
|
let mut val = Vec::from(".999_".as_bytes());
|
||||||
|
let mut start: usize = 1;
|
||||||
|
let inc = "1".as_bytes();
|
||||||
|
fast_inc(val.as_mut(), &mut start, 4, inc);
|
||||||
|
assert_eq!(start, 0);
|
||||||
|
assert_eq!(val, "1000_".as_bytes());
|
||||||
|
|
||||||
|
let mut val = Vec::from(".999_".as_bytes());
|
||||||
|
let mut start: usize = 1;
|
||||||
|
let inc = "11".as_bytes();
|
||||||
|
fast_inc(val.as_mut(), &mut start, 4, inc);
|
||||||
|
assert_eq!(start, 0);
|
||||||
|
assert_eq!(val, "1010_".as_bytes());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_fast_inc_one_simple() {
|
||||||
|
let mut val = Vec::from("...8_".as_bytes());
|
||||||
|
let mut start: usize = 3;
|
||||||
|
fast_inc_one(val.as_mut(), &mut start, 4);
|
||||||
|
assert_eq!(start, 3);
|
||||||
|
assert_eq!(val, "...9_".as_bytes());
|
||||||
|
fast_inc_one(val.as_mut(), &mut start, 4);
|
||||||
|
assert_eq!(start, 2); // carried 1 more digit
|
||||||
|
assert_eq!(val, "..10_".as_bytes());
|
||||||
|
fast_inc_one(val.as_mut(), &mut start, 4);
|
||||||
|
assert_eq!(start, 2);
|
||||||
|
assert_eq!(val, "..11_".as_bytes());
|
||||||
|
|
||||||
|
let mut val = Vec::from("0_".as_bytes());
|
||||||
|
let mut start: usize = 0;
|
||||||
|
fast_inc_one(val.as_mut(), &mut start, 1);
|
||||||
|
assert_eq!(start, 0);
|
||||||
|
assert_eq!(val, "1_".as_bytes());
|
||||||
|
fast_inc_one(val.as_mut(), &mut start, 1);
|
||||||
|
assert_eq!(start, 0);
|
||||||
|
assert_eq!(val, "2_".as_bytes());
|
||||||
|
fast_inc_one(val.as_mut(), &mut start, 1);
|
||||||
|
assert_eq!(start, 0);
|
||||||
|
assert_eq!(val, "3_".as_bytes());
|
||||||
|
}
|
||||||
|
}
|
|
@ -45,6 +45,8 @@ pub use crate::features::custom_tz_fmt;
|
||||||
pub use crate::features::encoding;
|
pub use crate::features::encoding;
|
||||||
#[cfg(feature = "extendedbigdecimal")]
|
#[cfg(feature = "extendedbigdecimal")]
|
||||||
pub use crate::features::extendedbigdecimal;
|
pub use crate::features::extendedbigdecimal;
|
||||||
|
#[cfg(feature = "fast-inc")]
|
||||||
|
pub use crate::features::fast_inc;
|
||||||
#[cfg(feature = "format")]
|
#[cfg(feature = "format")]
|
||||||
pub use crate::features::format;
|
pub use crate::features::format;
|
||||||
#[cfg(feature = "fs")]
|
#[cfg(feature = "fs")]
|
||||||
|
|
|
@ -216,6 +216,10 @@ fn test_separator_and_terminator() {
|
||||||
.args(&["-s", ",", "2", "6"])
|
.args(&["-s", ",", "2", "6"])
|
||||||
.succeeds()
|
.succeeds()
|
||||||
.stdout_is("2,3,4,5,6\n");
|
.stdout_is("2,3,4,5,6\n");
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-s", "", "2", "6"])
|
||||||
|
.succeeds()
|
||||||
|
.stdout_is("23456\n");
|
||||||
new_ucmd!()
|
new_ucmd!()
|
||||||
.args(&["-s", "\n", "2", "6"])
|
.args(&["-s", "\n", "2", "6"])
|
||||||
.succeeds()
|
.succeeds()
|
||||||
|
@ -286,6 +290,10 @@ fn test_separator_and_terminator_floats() {
|
||||||
.args(&["-s", ",", "-t", "!", "2.0", "6"])
|
.args(&["-s", ",", "-t", "!", "2.0", "6"])
|
||||||
.succeeds()
|
.succeeds()
|
||||||
.stdout_is("2.0,3.0,4.0,5.0,6.0!");
|
.stdout_is("2.0,3.0,4.0,5.0,6.0!");
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-s", "", "-t", "!", "2.0", "6"])
|
||||||
|
.succeeds()
|
||||||
|
.stdout_is("2.03.04.05.06.0!");
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue