1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-08-01 05:27:45 +00:00

uucore: format: num_parser: Use ExtendedBigDecimal for internal representation

ExtendedBigDecimal already provides everything we need, use that
instead of a custom representation.
This commit is contained in:
Nicolas Boichat 2025-03-17 12:15:59 +01:00
parent 07cce029cb
commit 20add88afc

View file

@ -5,7 +5,16 @@
//! Utilities for parsing numbers in various formats //! Utilities for parsing numbers in various formats
// spell-checker:ignore powf copysign prec inity // spell-checker:ignore powf copysign prec inity bigdecimal extendedbigdecimal biguint
use bigdecimal::{
num_bigint::{BigInt, BigUint, Sign},
BigDecimal,
};
use num_traits::ToPrimitive;
use num_traits::Zero;
use crate::format::extendedbigdecimal::ExtendedBigDecimal;
/// Base for number parsing /// Base for number parsing
#[derive(Clone, Copy, PartialEq)] #[derive(Clone, Copy, PartialEq)]
@ -68,31 +77,28 @@ impl<'a, T> ParseError<'a, T> {
/// A number parser for binary, octal, decimal, hexadecimal and single characters. /// A number parser for binary, octal, decimal, hexadecimal and single characters.
/// ///
/// Internally, in order to get the maximum possible precision and cover the full /// TODO: we just keep an ExtendedBigDecimal internally, so we don't really need this
/// range of u64 and i64 without losing precision for f64, the returned number is /// struct actually.
/// decomposed into:
/// - A `base` value
/// - A `neg` sign bit
/// - A `integral` positive part
/// - A `fractional` positive part
/// - A `precision` representing the number of digits in the fractional part
/// ///
/// If the fractional part cannot be represented on a `u64`, parsing continues /// If the fractional part cannot be represented on a `u64`, parsing continues
/// silently by ignoring non-significant digits. /// silently by ignoring non-significant digits.
pub struct ParsedNumber { pub struct ParsedNumber {
base: Base, number: ExtendedBigDecimal,
negative: bool,
integral: u64,
fractional: u64,
precision: usize,
} }
impl ParsedNumber { impl ParsedNumber {
fn into_i64(self) -> Option<i64> { fn into_i64(self) -> Option<i64> {
if self.negative { match self.number {
i64::try_from(-i128::from(self.integral)).ok() ExtendedBigDecimal::BigDecimal(bd) => {
} else { let (digits, scale) = bd.into_bigint_and_scale();
i64::try_from(self.integral).ok() if scale == 0 {
i64::try_from(digits).ok()
} else {
None
}
}
ExtendedBigDecimal::MinusZero => Some(0),
_ => None,
} }
} }
@ -108,21 +114,41 @@ impl ParsedNumber {
} }
} }
fn into_u64(self) -> Option<u64> {
match self.number {
ExtendedBigDecimal::BigDecimal(bd) => {
let (digits, scale) = bd.into_bigint_and_scale();
if scale == 0 {
u64::try_from(digits).ok()
} else {
None
}
}
_ => None,
}
}
/// Parse a number as u64. No fractional part is allowed. /// Parse a number as u64. No fractional part is allowed.
pub fn parse_u64(input: &str) -> Result<u64, ParseError<'_, u64>> { pub fn parse_u64(input: &str) -> Result<u64, ParseError<'_, u64>> {
match Self::parse(input, true) { match Self::parse(input, true) {
Ok(v) | Err(ParseError::PartialMatch(v, _)) if v.negative => { Ok(v) => v.into_u64().ok_or(ParseError::Overflow),
Err(ParseError::NotNumeric) Err(e) => Err(e.map(|v, rest| {
} v.into_u64()
Ok(v) => Ok(v.integral), .map(|v| ParseError::PartialMatch(v, rest))
Err(e) => Err(e.map(|v, rest| ParseError::PartialMatch(v.integral, rest))), .unwrap_or(ParseError::Overflow)
})),
} }
} }
fn into_f64(self) -> f64 { fn into_f64(self) -> f64 {
let n = self.integral as f64 match self.number {
+ (self.fractional as f64) / (self.base as u8 as f64).powf(self.precision as f64); ExtendedBigDecimal::BigDecimal(bd) => bd.to_f64().unwrap(),
if self.negative { -n } else { n } ExtendedBigDecimal::MinusZero => -0.0,
ExtendedBigDecimal::Nan => f64::NAN,
ExtendedBigDecimal::MinusNan => -f64::NAN,
ExtendedBigDecimal::Infinity => f64::INFINITY,
ExtendedBigDecimal::MinusInfinity => -f64::INFINITY,
}
} }
/// Parse a number as f64 /// Parse a number as f64
@ -164,11 +190,7 @@ impl ParsedNumber {
if let Some(rest) = input.strip_prefix('\'') { if let Some(rest) = input.strip_prefix('\'') {
let mut chars = rest.char_indices().fuse(); let mut chars = rest.char_indices().fuse();
let v = chars.next().map(|(_, c)| Self { let v = chars.next().map(|(_, c)| Self {
base: Base::Decimal, number: ExtendedBigDecimal::BigDecimal(u32::from(c).into()),
negative: false,
integral: u64::from(c),
fractional: 0,
precision: 0,
}); });
return match (v, chars.next()) { return match (v, chars.next()) {
(Some(v), None) => Ok(v), (Some(v), None) => Ok(v),
@ -209,36 +231,23 @@ impl ParsedNumber {
// Parse the integral part of the number // Parse the integral part of the number
let mut chars = rest.chars().enumerate().fuse().peekable(); let mut chars = rest.chars().enumerate().fuse().peekable();
let mut integral = 0u64; let mut digits = BigUint::zero();
let mut scale = 0i64;
while let Some(d) = chars.peek().and_then(|&(_, c)| base.digit(c)) { while let Some(d) = chars.peek().and_then(|&(_, c)| base.digit(c)) {
chars.next(); chars.next();
integral = integral digits = digits * base as u8 + d;
.checked_mul(base as u64)
.and_then(|n| n.checked_add(d))
.ok_or(ParseError::Overflow)?;
} }
// Parse the fractional part of the number if there can be one and the input contains // Parse the fractional part of the number if there can be one and the input contains
// a '.' decimal separator. // a '.' decimal separator.
let (mut fractional, mut precision) = (0u64, 0);
if matches!(chars.peek(), Some(&(_, '.'))) if matches!(chars.peek(), Some(&(_, '.')))
&& matches!(base, Base::Decimal | Base::Hexadecimal) && matches!(base, Base::Decimal | Base::Hexadecimal)
&& !integral_only && !integral_only
{ {
chars.next(); chars.next();
let mut ended = false;
while let Some(d) = chars.peek().and_then(|&(_, c)| base.digit(c)) { while let Some(d) = chars.peek().and_then(|&(_, c)| base.digit(c)) {
chars.next(); chars.next();
if !ended { (digits, scale) = (digits * base as u8 + d, scale + 1);
if let Some(f) = fractional
.checked_mul(base as u64)
.and_then(|n| n.checked_add(d))
{
(fractional, precision) = (f, precision + 1);
} else {
ended = true;
}
}
} }
} }
@ -247,15 +256,26 @@ impl ParsedNumber {
return Err(ParseError::NotNumeric); return Err(ParseError::NotNumeric);
} }
// TODO: Might be nice to implement a ExtendedBigDecimal copysign or negation function to move away some of this logic...
let ebd = if digits == BigUint::zero() && negative {
ExtendedBigDecimal::MinusZero
} else {
let sign = if negative { Sign::Minus } else { Sign::Plus };
let signed_digits = BigInt::from_biguint(sign, digits);
let bd = if scale == 0 {
BigDecimal::from_bigint(signed_digits, 0)
} else if base == Base::Decimal {
BigDecimal::from_bigint(signed_digits, scale)
} else {
// Base is not 10, init at scale 0 then divide by base**scale.
BigDecimal::from_bigint(signed_digits, 0) / (base as u64).pow(scale as u32)
};
ExtendedBigDecimal::BigDecimal(bd)
};
// Return what has been parsed so far. It there are extra characters, mark the // Return what has been parsed so far. It there are extra characters, mark the
// parsing as a partial match. // parsing as a partial match.
let parsed = Self { let parsed = Self { number: ebd };
base,
negative,
integral,
fractional,
precision,
};
if let Some((first_unparsed, _)) = chars.next() { if let Some((first_unparsed, _)) = chars.next() {
Err(ParseError::PartialMatch(parsed, &rest[first_unparsed..])) Err(ParseError::PartialMatch(parsed, &rest[first_unparsed..]))
} else { } else {
@ -277,7 +297,7 @@ mod tests {
); );
assert!(matches!( assert!(matches!(
ParsedNumber::parse_u64("-123"), ParsedNumber::parse_u64("-123"),
Err(ParseError::NotNumeric) Err(ParseError::Overflow)
)); ));
assert!(matches!( assert!(matches!(
ParsedNumber::parse_u64(""), ParsedNumber::parse_u64(""),