1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-28 19:47:45 +00:00

uucore: num_parser: Optimize parse_digits_count

parse_digits_count is a significant hotspot in parsing code.
In particular, any add/mul operation on BigUint is fairly slow,
so it's better to accumulate digits in a u64, then add them
to the resulting BigUint.

Saves about 15-20% performance in `sort -g`.
This commit is contained in:
Nicolas Boichat 2025-06-03 10:05:27 +02:00
parent edc1e5def6
commit 3b18316337

View file

@ -71,12 +71,37 @@ impl Base {
let mut digits: Option<BigUint> = digits; let mut digits: Option<BigUint> = digits;
let mut count: u64 = 0; let mut count: u64 = 0;
let mut rest = str; let mut rest = str;
// Doing operations on BigUint is really expensive, so we do as much as we
// can on u64, then add them to the BigUint.
let mut digits_tmp: u64 = 0;
let mut count_tmp: u64 = 0;
let mut mul_tmp: u64 = 1;
while let Some(d) = rest.chars().next().and_then(|c| self.digit(c)) { while let Some(d) = rest.chars().next().and_then(|c| self.digit(c)) {
(digits, count) = ( (digits_tmp, count_tmp, mul_tmp) = (
Some(digits.unwrap_or_default() * *self as u8 + d), digits_tmp * *self as u64 + d,
count + 1, count_tmp + 1,
mul_tmp * *self as u64,
); );
rest = &rest[1..]; rest = &rest[1..];
// In base 16, we parse 4 bits at a time, so we can parse 16 digits at most in a u64.
if count_tmp >= 15 {
// Accumulate what we have so far
(digits, count) = (
Some(digits.unwrap_or_default() * mul_tmp + digits_tmp),
count + count_tmp,
);
// Reset state
(digits_tmp, count_tmp, mul_tmp) = (0, 0, 1);
}
}
// Accumulate the leftovers (if any)
if mul_tmp > 1 {
(digits, count) = (
Some(digits.unwrap_or_default() * mul_tmp + digits_tmp),
count + count_tmp,
);
} }
(digits, count, rest) (digits, count, rest)
} }