From 20add88afc4ef21b895186185cf3d998890302fa Mon Sep 17 00:00:00 2001 From: Nicolas Boichat Date: Mon, 17 Mar 2025 12:15:59 +0100 Subject: [PATCH 01/10] uucore: format: num_parser: Use ExtendedBigDecimal for internal representation ExtendedBigDecimal already provides everything we need, use that instead of a custom representation. --- .../src/lib/features/format/num_parser.rs | 132 ++++++++++-------- 1 file changed, 76 insertions(+), 56 deletions(-) diff --git a/src/uucore/src/lib/features/format/num_parser.rs b/src/uucore/src/lib/features/format/num_parser.rs index 7de3a0e0a..1296b52b3 100644 --- a/src/uucore/src/lib/features/format/num_parser.rs +++ b/src/uucore/src/lib/features/format/num_parser.rs @@ -5,7 +5,16 @@ //! Utilities for parsing numbers in various formats -// spell-checker:ignore powf copysign prec inity +// spell-checker:ignore powf copysign prec inity bigdecimal extendedbigdecimal biguint + +use bigdecimal::{ + num_bigint::{BigInt, BigUint, Sign}, + BigDecimal, +}; +use num_traits::ToPrimitive; +use num_traits::Zero; + +use crate::format::extendedbigdecimal::ExtendedBigDecimal; /// Base for number parsing #[derive(Clone, Copy, PartialEq)] @@ -68,31 +77,28 @@ impl<'a, T> ParseError<'a, T> { /// A number parser for binary, octal, decimal, hexadecimal and single characters. /// -/// Internally, in order to get the maximum possible precision and cover the full -/// range of u64 and i64 without losing precision for f64, the returned number is -/// decomposed into: -/// - A `base` value -/// - A `neg` sign bit -/// - A `integral` positive part -/// - A `fractional` positive part -/// - A `precision` representing the number of digits in the fractional part +/// TODO: we just keep an ExtendedBigDecimal internally, so we don't really need this +/// struct actually. /// /// If the fractional part cannot be represented on a `u64`, parsing continues /// silently by ignoring non-significant digits. pub struct ParsedNumber { - base: Base, - negative: bool, - integral: u64, - fractional: u64, - precision: usize, + number: ExtendedBigDecimal, } impl ParsedNumber { fn into_i64(self) -> Option { - if self.negative { - i64::try_from(-i128::from(self.integral)).ok() - } else { - i64::try_from(self.integral).ok() + match self.number { + ExtendedBigDecimal::BigDecimal(bd) => { + let (digits, scale) = bd.into_bigint_and_scale(); + if scale == 0 { + i64::try_from(digits).ok() + } else { + None + } + } + ExtendedBigDecimal::MinusZero => Some(0), + _ => None, } } @@ -108,21 +114,41 @@ impl ParsedNumber { } } + fn into_u64(self) -> Option { + match self.number { + ExtendedBigDecimal::BigDecimal(bd) => { + let (digits, scale) = bd.into_bigint_and_scale(); + if scale == 0 { + u64::try_from(digits).ok() + } else { + None + } + } + _ => None, + } + } + /// Parse a number as u64. No fractional part is allowed. pub fn parse_u64(input: &str) -> Result> { match Self::parse(input, true) { - Ok(v) | Err(ParseError::PartialMatch(v, _)) if v.negative => { - Err(ParseError::NotNumeric) - } - Ok(v) => Ok(v.integral), - Err(e) => Err(e.map(|v, rest| ParseError::PartialMatch(v.integral, rest))), + Ok(v) => v.into_u64().ok_or(ParseError::Overflow), + Err(e) => Err(e.map(|v, rest| { + v.into_u64() + .map(|v| ParseError::PartialMatch(v, rest)) + .unwrap_or(ParseError::Overflow) + })), } } fn into_f64(self) -> f64 { - let n = self.integral as f64 - + (self.fractional as f64) / (self.base as u8 as f64).powf(self.precision as f64); - if self.negative { -n } else { n } + match self.number { + ExtendedBigDecimal::BigDecimal(bd) => bd.to_f64().unwrap(), + ExtendedBigDecimal::MinusZero => -0.0, + ExtendedBigDecimal::Nan => f64::NAN, + ExtendedBigDecimal::MinusNan => -f64::NAN, + ExtendedBigDecimal::Infinity => f64::INFINITY, + ExtendedBigDecimal::MinusInfinity => -f64::INFINITY, + } } /// Parse a number as f64 @@ -164,11 +190,7 @@ impl ParsedNumber { if let Some(rest) = input.strip_prefix('\'') { let mut chars = rest.char_indices().fuse(); let v = chars.next().map(|(_, c)| Self { - base: Base::Decimal, - negative: false, - integral: u64::from(c), - fractional: 0, - precision: 0, + number: ExtendedBigDecimal::BigDecimal(u32::from(c).into()), }); return match (v, chars.next()) { (Some(v), None) => Ok(v), @@ -209,36 +231,23 @@ impl ParsedNumber { // Parse the integral part of the number let mut chars = rest.chars().enumerate().fuse().peekable(); - let mut integral = 0u64; + let mut digits = BigUint::zero(); + let mut scale = 0i64; while let Some(d) = chars.peek().and_then(|&(_, c)| base.digit(c)) { chars.next(); - integral = integral - .checked_mul(base as u64) - .and_then(|n| n.checked_add(d)) - .ok_or(ParseError::Overflow)?; + digits = digits * base as u8 + d; } // Parse the fractional part of the number if there can be one and the input contains // a '.' decimal separator. - let (mut fractional, mut precision) = (0u64, 0); if matches!(chars.peek(), Some(&(_, '.'))) && matches!(base, Base::Decimal | Base::Hexadecimal) && !integral_only { chars.next(); - let mut ended = false; while let Some(d) = chars.peek().and_then(|&(_, c)| base.digit(c)) { chars.next(); - if !ended { - if let Some(f) = fractional - .checked_mul(base as u64) - .and_then(|n| n.checked_add(d)) - { - (fractional, precision) = (f, precision + 1); - } else { - ended = true; - } - } + (digits, scale) = (digits * base as u8 + d, scale + 1); } } @@ -247,15 +256,26 @@ impl ParsedNumber { return Err(ParseError::NotNumeric); } + // TODO: Might be nice to implement a ExtendedBigDecimal copysign or negation function to move away some of this logic... + let ebd = if digits == BigUint::zero() && negative { + ExtendedBigDecimal::MinusZero + } else { + let sign = if negative { Sign::Minus } else { Sign::Plus }; + let signed_digits = BigInt::from_biguint(sign, digits); + let bd = if scale == 0 { + BigDecimal::from_bigint(signed_digits, 0) + } else if base == Base::Decimal { + BigDecimal::from_bigint(signed_digits, scale) + } else { + // Base is not 10, init at scale 0 then divide by base**scale. + BigDecimal::from_bigint(signed_digits, 0) / (base as u64).pow(scale as u32) + }; + ExtendedBigDecimal::BigDecimal(bd) + }; + // Return what has been parsed so far. It there are extra characters, mark the // parsing as a partial match. - let parsed = Self { - base, - negative, - integral, - fractional, - precision, - }; + let parsed = Self { number: ebd }; if let Some((first_unparsed, _)) = chars.next() { Err(ParseError::PartialMatch(parsed, &rest[first_unparsed..])) } else { @@ -277,7 +297,7 @@ mod tests { ); assert!(matches!( ParsedNumber::parse_u64("-123"), - Err(ParseError::NotNumeric) + Err(ParseError::Overflow) )); assert!(matches!( ParsedNumber::parse_u64(""), From 8bbec161150fe9c25c3725b0e3f63b18eac36534 Mon Sep 17 00:00:00 2001 From: Nicolas Boichat Date: Mon, 17 Mar 2025 18:30:57 +0100 Subject: [PATCH 02/10] uucore: format: num_parser: Fold special value parsing in main parsing function --- .../src/lib/features/format/num_parser.rs | 47 ++++++++++++------- 1 file changed, 29 insertions(+), 18 deletions(-) diff --git a/src/uucore/src/lib/features/format/num_parser.rs b/src/uucore/src/lib/features/format/num_parser.rs index 1296b52b3..67cb457ba 100644 --- a/src/uucore/src/lib/features/format/num_parser.rs +++ b/src/uucore/src/lib/features/format/num_parser.rs @@ -155,32 +155,39 @@ impl ParsedNumber { pub fn parse_f64(input: &str) -> Result> { match Self::parse(input, false) { Ok(v) => Ok(v.into_f64()), - Err(ParseError::NotNumeric) => Self::parse_f64_special_values(input), Err(e) => Err(e.map(|v, rest| ParseError::PartialMatch(v.into_f64(), rest))), } } - fn parse_f64_special_values(input: &str) -> Result> { - let (sign, rest) = if let Some(input) = input.strip_prefix('-') { - (-1.0, input) - } else { - (1.0, input) - }; - let prefix = rest + fn parse_special_value(input: &str, negative: bool) -> Result> { + let prefix = input .chars() .take(3) .map(|c| c.to_ascii_lowercase()) .collect::(); - let special = match prefix.as_str() { - "inf" => f64::INFINITY, - "nan" => f64::NAN, - _ => return Err(ParseError::NotNumeric), - } - .copysign(sign); - if rest.len() == 3 { + let special = Self { + number: match prefix.as_str() { + "inf" => { + if negative { + ExtendedBigDecimal::MinusInfinity + } else { + ExtendedBigDecimal::Infinity + } + } + "nan" => { + if negative { + ExtendedBigDecimal::MinusNan + } else { + ExtendedBigDecimal::Nan + } + } + _ => return Err(ParseError::NotNumeric), + }, + }; + if input.len() == 3 { Ok(special) } else { - Err(ParseError::PartialMatch(special, &rest[3..])) + Err(ParseError::PartialMatch(special, &input[3..])) } } @@ -251,9 +258,13 @@ impl ParsedNumber { } } - // If nothing has been parsed, declare the parsing unsuccessful + // If nothing has been parsed, check if this is a special value, or declare the parsing unsuccessful if let Some((0, _)) = chars.peek() { - return Err(ParseError::NotNumeric); + if integral_only { + return Err(ParseError::NotNumeric); + } else { + return Self::parse_special_value(unsigned, negative); + } } // TODO: Might be nice to implement a ExtendedBigDecimal copysign or negation function to move away some of this logic... From 40a7c65980f66ce3b6273b0ced36490a7ad0a919 Mon Sep 17 00:00:00 2001 From: Nicolas Boichat Date: Tue, 18 Mar 2025 10:29:44 +0100 Subject: [PATCH 03/10] uucore: format: num_parser: Turn parser into a trait We call the function extended_parse, so that we do not clash with other parsing functions in other traits. - Also implement parser for ExtendedBigDecimal (straightforward). - Base doesn't need to be public anymore. - Rename the error to ExtendedParserError. --- .../src/lib/features/format/argument.rs | 16 +- .../src/lib/features/format/num_parser.rs | 565 +++++++++--------- 2 files changed, 293 insertions(+), 288 deletions(-) diff --git a/src/uucore/src/lib/features/format/argument.rs b/src/uucore/src/lib/features/format/argument.rs index 08111bca8..fca402378 100644 --- a/src/uucore/src/lib/features/format/argument.rs +++ b/src/uucore/src/lib/features/format/argument.rs @@ -5,7 +5,7 @@ use crate::{ error::set_exit_code, - features::format::num_parser::{ParseError, ParsedNumber}, + features::format::num_parser::{ExtendedParser, ExtendedParserError}, quoting_style::{Quotes, QuotingStyle, escape_name}, show_error, show_warning, }; @@ -56,7 +56,7 @@ impl<'a, T: Iterator> ArgumentIter<'a> for T { }; match next { FormatArgument::UnsignedInt(n) => *n, - FormatArgument::Unparsed(s) => extract_value(ParsedNumber::parse_u64(s), s), + FormatArgument::Unparsed(s) => extract_value(u64::extended_parse(s), s), _ => 0, } } @@ -67,7 +67,7 @@ impl<'a, T: Iterator> ArgumentIter<'a> for T { }; match next { FormatArgument::SignedInt(n) => *n, - FormatArgument::Unparsed(s) => extract_value(ParsedNumber::parse_i64(s), s), + FormatArgument::Unparsed(s) => extract_value(i64::extended_parse(s), s), _ => 0, } } @@ -78,7 +78,7 @@ impl<'a, T: Iterator> ArgumentIter<'a> for T { }; match next { FormatArgument::Float(n) => *n, - FormatArgument::Unparsed(s) => extract_value(ParsedNumber::parse_f64(s), s), + FormatArgument::Unparsed(s) => extract_value(f64::extended_parse(s), s), _ => 0.0, } } @@ -91,7 +91,7 @@ impl<'a, T: Iterator> ArgumentIter<'a> for T { } } -fn extract_value(p: Result>, input: &str) -> T { +fn extract_value(p: Result>, input: &str) -> T { match p { Ok(v) => v, Err(e) => { @@ -103,15 +103,15 @@ fn extract_value(p: Result>, input: &str) -> T }, ); match e { - ParseError::Overflow => { + ExtendedParserError::Overflow => { show_error!("{}: Numerical result out of range", input.quote()); Default::default() } - ParseError::NotNumeric => { + ExtendedParserError::NotNumeric => { show_error!("{}: expected a numeric value", input.quote()); Default::default() } - ParseError::PartialMatch(v, rest) => { + ExtendedParserError::PartialMatch(v, rest) => { let bytes = input.as_encoded_bytes(); if !bytes.is_empty() && bytes[0] == b'\'' { show_warning!( diff --git a/src/uucore/src/lib/features/format/num_parser.rs b/src/uucore/src/lib/features/format/num_parser.rs index 67cb457ba..396bdfa72 100644 --- a/src/uucore/src/lib/features/format/num_parser.rs +++ b/src/uucore/src/lib/features/format/num_parser.rs @@ -8,8 +8,8 @@ // spell-checker:ignore powf copysign prec inity bigdecimal extendedbigdecimal biguint use bigdecimal::{ - num_bigint::{BigInt, BigUint, Sign}, BigDecimal, + num_bigint::{BigInt, BigUint, Sign}, }; use num_traits::ToPrimitive; use num_traits::Zero; @@ -18,7 +18,7 @@ use crate::format::extendedbigdecimal::ExtendedBigDecimal; /// Base for number parsing #[derive(Clone, Copy, PartialEq)] -pub enum Base { +enum Base { /// Binary base Binary = 2, @@ -53,7 +53,7 @@ impl Base { /// Type returned if a number could not be parsed in its entirety #[derive(Debug, PartialEq)] -pub enum ParseError<'a, T> { +pub enum ExtendedParserError<'a, T> { /// The input as a whole makes no sense NotNumeric, /// The beginning of the input made sense and has been parsed, @@ -65,11 +65,14 @@ pub enum ParseError<'a, T> { Overflow, } -impl<'a, T> ParseError<'a, T> { - fn map(self, f: impl FnOnce(T, &'a str) -> ParseError<'a, U>) -> ParseError<'a, U> { +impl<'a, T> ExtendedParserError<'a, T> { + fn map( + self, + f: impl FnOnce(T, &'a str) -> ExtendedParserError<'a, U>, + ) -> ExtendedParserError<'a, U> { match self { - Self::NotNumeric => ParseError::NotNumeric, - Self::Overflow => ParseError::Overflow, + Self::NotNumeric => ExtendedParserError::NotNumeric, + Self::Overflow => ExtendedParserError::Overflow, Self::PartialMatch(v, s) => f(v, s), } } @@ -77,375 +80,377 @@ impl<'a, T> ParseError<'a, T> { /// A number parser for binary, octal, decimal, hexadecimal and single characters. /// -/// TODO: we just keep an ExtendedBigDecimal internally, so we don't really need this -/// struct actually. -/// -/// If the fractional part cannot be represented on a `u64`, parsing continues -/// silently by ignoring non-significant digits. -pub struct ParsedNumber { - number: ExtendedBigDecimal, +/// It is implemented for `u64`/`i64`, where no fractional part is parsed, +/// and `f64` float, where octal is not allowed. +pub trait ExtendedParser { + // We pick a hopefully different name for our parser, to avoid clash with standard traits. + fn extended_parse(input: &str) -> Result> + where + Self: Sized; } -impl ParsedNumber { - fn into_i64(self) -> Option { - match self.number { - ExtendedBigDecimal::BigDecimal(bd) => { - let (digits, scale) = bd.into_bigint_and_scale(); - if scale == 0 { - i64::try_from(digits).ok() - } else { - None - } - } - ExtendedBigDecimal::MinusZero => Some(0), - _ => None, - } - } - +impl ExtendedParser for i64 { /// Parse a number as i64. No fractional part is allowed. - pub fn parse_i64(input: &str) -> Result> { - match Self::parse(input, true) { - Ok(v) => v.into_i64().ok_or(ParseError::Overflow), + fn extended_parse(input: &str) -> Result> { + fn into_i64(ebd: ExtendedBigDecimal) -> Option { + match ebd { + ExtendedBigDecimal::BigDecimal(bd) => { + let (digits, scale) = bd.into_bigint_and_scale(); + if scale == 0 { + i64::try_from(digits).ok() + } else { + None + } + } + ExtendedBigDecimal::MinusZero => Some(0), + _ => None, + } + } + + match parse(input, true) { + Ok(v) => into_i64(v).ok_or(ExtendedParserError::Overflow), Err(e) => Err(e.map(|v, rest| { - v.into_i64() - .map(|v| ParseError::PartialMatch(v, rest)) - .unwrap_or(ParseError::Overflow) + into_i64(v) + .map(|v| ExtendedParserError::PartialMatch(v, rest)) + .unwrap_or(ExtendedParserError::Overflow) })), } } +} - fn into_u64(self) -> Option { - match self.number { - ExtendedBigDecimal::BigDecimal(bd) => { - let (digits, scale) = bd.into_bigint_and_scale(); - if scale == 0 { - u64::try_from(digits).ok() - } else { - None - } - } - _ => None, - } - } - +impl ExtendedParser for u64 { /// Parse a number as u64. No fractional part is allowed. - pub fn parse_u64(input: &str) -> Result> { - match Self::parse(input, true) { - Ok(v) => v.into_u64().ok_or(ParseError::Overflow), + fn extended_parse(input: &str) -> Result> { + fn into_u64(ebd: ExtendedBigDecimal) -> Option { + match ebd { + ExtendedBigDecimal::BigDecimal(bd) => { + let (digits, scale) = bd.into_bigint_and_scale(); + if scale == 0 { + u64::try_from(digits).ok() + } else { + None + } + } + _ => None, + } + } + + match parse(input, true) { + Ok(v) => into_u64(v).ok_or(ExtendedParserError::Overflow), Err(e) => Err(e.map(|v, rest| { - v.into_u64() - .map(|v| ParseError::PartialMatch(v, rest)) - .unwrap_or(ParseError::Overflow) + into_u64(v) + .map(|v| ExtendedParserError::PartialMatch(v, rest)) + .unwrap_or(ExtendedParserError::Overflow) })), } } +} - fn into_f64(self) -> f64 { - match self.number { - ExtendedBigDecimal::BigDecimal(bd) => bd.to_f64().unwrap(), - ExtendedBigDecimal::MinusZero => -0.0, - ExtendedBigDecimal::Nan => f64::NAN, - ExtendedBigDecimal::MinusNan => -f64::NAN, - ExtendedBigDecimal::Infinity => f64::INFINITY, - ExtendedBigDecimal::MinusInfinity => -f64::INFINITY, - } - } - +impl ExtendedParser for f64 { /// Parse a number as f64 - pub fn parse_f64(input: &str) -> Result> { - match Self::parse(input, false) { - Ok(v) => Ok(v.into_f64()), - Err(e) => Err(e.map(|v, rest| ParseError::PartialMatch(v.into_f64(), rest))), - } - } - - fn parse_special_value(input: &str, negative: bool) -> Result> { - let prefix = input - .chars() - .take(3) - .map(|c| c.to_ascii_lowercase()) - .collect::(); - let special = Self { - number: match prefix.as_str() { - "inf" => { - if negative { - ExtendedBigDecimal::MinusInfinity - } else { - ExtendedBigDecimal::Infinity - } - } - "nan" => { - if negative { - ExtendedBigDecimal::MinusNan - } else { - ExtendedBigDecimal::Nan - } - } - _ => return Err(ParseError::NotNumeric), - }, - }; - if input.len() == 3 { - Ok(special) - } else { - Err(ParseError::PartialMatch(special, &input[3..])) - } - } - - #[allow(clippy::cognitive_complexity)] - fn parse(input: &str, integral_only: bool) -> Result> { - // Parse the "'" prefix separately - if let Some(rest) = input.strip_prefix('\'') { - let mut chars = rest.char_indices().fuse(); - let v = chars.next().map(|(_, c)| Self { - number: ExtendedBigDecimal::BigDecimal(u32::from(c).into()), - }); - return match (v, chars.next()) { - (Some(v), None) => Ok(v), - (Some(v), Some((i, _))) => Err(ParseError::PartialMatch(v, &rest[i..])), - (None, _) => Err(ParseError::NotNumeric), - }; - } - - let trimmed_input = input.trim_ascii_start(); - - // Initial minus sign - let (negative, unsigned) = if let Some(trimmed_input) = trimmed_input.strip_prefix('-') { - (true, trimmed_input) - } else { - (false, trimmed_input) - }; - - // Parse an optional base prefix ("0b" / "0B" / "0" / "0x" / "0X"). "0" is octal unless a - // fractional part is allowed in which case it is an insignificant leading 0. A "0" prefix - // will not be consumed in case the parsable string contains only "0": the leading extra "0" - // will have no influence on the result. - let (base, rest) = if let Some(rest) = unsigned.strip_prefix('0') { - if let Some(rest) = rest.strip_prefix(['b', 'B']) { - (Base::Binary, rest) - } else if let Some(rest) = rest.strip_prefix(['x', 'X']) { - (Base::Hexadecimal, rest) - } else if integral_only { - (Base::Octal, unsigned) - } else { - (Base::Decimal, unsigned) + fn extended_parse(input: &str) -> Result> { + // TODO: This is generic, so this should probably be implemented as an ExtendedBigDecimal trait (ToPrimitive). + fn into_f64(ebd: ExtendedBigDecimal) -> f64 { + match ebd { + ExtendedBigDecimal::BigDecimal(bd) => bd.to_f64().unwrap(), + ExtendedBigDecimal::MinusZero => -0.0, + ExtendedBigDecimal::Nan => f64::NAN, + ExtendedBigDecimal::MinusNan => -f64::NAN, + ExtendedBigDecimal::Infinity => f64::INFINITY, + ExtendedBigDecimal::MinusInfinity => -f64::INFINITY, } + } + + match parse(input, false) { + Ok(v) => Ok(into_f64(v)), + Err(e) => Err(e.map(|v, rest| ExtendedParserError::PartialMatch(into_f64(v), rest))), + } + } +} + +fn parse_special_value( + input: &str, + negative: bool, +) -> Result> { + let prefix = input + .chars() + .take(3) + .map(|c| c.to_ascii_lowercase()) + .collect::(); + let special = match prefix.as_str() { + "inf" => { + if negative { + ExtendedBigDecimal::MinusInfinity + } else { + ExtendedBigDecimal::Infinity + } + } + "nan" => { + if negative { + ExtendedBigDecimal::MinusNan + } else { + ExtendedBigDecimal::Nan + } + } + _ => return Err(ExtendedParserError::NotNumeric), + }; + if input.len() == 3 { + Ok(special) + } else { + Err(ExtendedParserError::PartialMatch(special, &input[3..])) + } +} + +#[allow(clippy::cognitive_complexity)] +fn parse( + input: &str, + integral_only: bool, +) -> Result> { + // Parse the "'" prefix separately + if let Some(rest) = input.strip_prefix('\'') { + let mut chars = rest.char_indices().fuse(); + let v = chars + .next() + .map(|(_, c)| ExtendedBigDecimal::BigDecimal(u32::from(c).into())); + return match (v, chars.next()) { + (Some(v), None) => Ok(v), + (Some(v), Some((i, _))) => Err(ExtendedParserError::PartialMatch(v, &rest[i..])), + (None, _) => Err(ExtendedParserError::NotNumeric), + }; + } + + let trimmed_input = input.trim_ascii_start(); + + // Initial minus sign + let (negative, unsigned) = if let Some(trimmed_input) = trimmed_input.strip_prefix('-') { + (true, trimmed_input) + } else { + (false, trimmed_input) + }; + + // Parse an optional base prefix ("0b" / "0B" / "0" / "0x" / "0X"). "0" is octal unless a + // fractional part is allowed in which case it is an insignificant leading 0. A "0" prefix + // will not be consumed in case the parsable string contains only "0": the leading extra "0" + // will have no influence on the result. + let (base, rest) = if let Some(rest) = unsigned.strip_prefix('0') { + if let Some(rest) = rest.strip_prefix(['b', 'B']) { + (Base::Binary, rest) + } else if let Some(rest) = rest.strip_prefix(['x', 'X']) { + (Base::Hexadecimal, rest) + } else if integral_only { + (Base::Octal, unsigned) } else { (Base::Decimal, unsigned) - }; - if rest.is_empty() { - return Err(ParseError::NotNumeric); } + } else { + (Base::Decimal, unsigned) + }; + if rest.is_empty() { + return Err(ExtendedParserError::NotNumeric); + } - // Parse the integral part of the number - let mut chars = rest.chars().enumerate().fuse().peekable(); - let mut digits = BigUint::zero(); - let mut scale = 0i64; + // Parse the integral part of the number + let mut chars = rest.chars().enumerate().fuse().peekable(); + let mut digits = BigUint::zero(); + let mut scale = 0i64; + while let Some(d) = chars.peek().and_then(|&(_, c)| base.digit(c)) { + chars.next(); + digits = digits * base as u8 + d; + } + + // Parse the fractional part of the number if there can be one and the input contains + // a '.' decimal separator. + if matches!(chars.peek(), Some(&(_, '.'))) + && matches!(base, Base::Decimal | Base::Hexadecimal) + && !integral_only + { + chars.next(); while let Some(d) = chars.peek().and_then(|&(_, c)| base.digit(c)) { chars.next(); - digits = digits * base as u8 + d; + (digits, scale) = (digits * base as u8 + d, scale + 1); } + } - // Parse the fractional part of the number if there can be one and the input contains - // a '.' decimal separator. - if matches!(chars.peek(), Some(&(_, '.'))) - && matches!(base, Base::Decimal | Base::Hexadecimal) - && !integral_only - { - chars.next(); - while let Some(d) = chars.peek().and_then(|&(_, c)| base.digit(c)) { - chars.next(); - (digits, scale) = (digits * base as u8 + d, scale + 1); - } - } - - // If nothing has been parsed, check if this is a special value, or declare the parsing unsuccessful - if let Some((0, _)) = chars.peek() { - if integral_only { - return Err(ParseError::NotNumeric); - } else { - return Self::parse_special_value(unsigned, negative); - } - } - - // TODO: Might be nice to implement a ExtendedBigDecimal copysign or negation function to move away some of this logic... - let ebd = if digits == BigUint::zero() && negative { - ExtendedBigDecimal::MinusZero + // If nothing has been parsed, check if this is a special value, or declare the parsing unsuccessful + if let Some((0, _)) = chars.peek() { + if integral_only { + return Err(ExtendedParserError::NotNumeric); } else { - let sign = if negative { Sign::Minus } else { Sign::Plus }; - let signed_digits = BigInt::from_biguint(sign, digits); - let bd = if scale == 0 { - BigDecimal::from_bigint(signed_digits, 0) - } else if base == Base::Decimal { - BigDecimal::from_bigint(signed_digits, scale) - } else { - // Base is not 10, init at scale 0 then divide by base**scale. - BigDecimal::from_bigint(signed_digits, 0) / (base as u64).pow(scale as u32) - }; - ExtendedBigDecimal::BigDecimal(bd) + return parse_special_value(unsigned, negative); + } + } + + // TODO: Might be nice to implement a ExtendedBigDecimal copysign or negation function to move away some of this logic... + let ebd = if digits == BigUint::zero() && negative { + ExtendedBigDecimal::MinusZero + } else { + let sign = if negative { Sign::Minus } else { Sign::Plus }; + let signed_digits = BigInt::from_biguint(sign, digits); + let bd = if scale == 0 { + BigDecimal::from_bigint(signed_digits, 0) + } else if base == Base::Decimal { + BigDecimal::from_bigint(signed_digits, scale) + } else { + // Base is not 10, init at scale 0 then divide by base**scale. + BigDecimal::from_bigint(signed_digits, 0) / (base as u64).pow(scale as u32) }; + ExtendedBigDecimal::BigDecimal(bd) + }; - // Return what has been parsed so far. It there are extra characters, mark the - // parsing as a partial match. - let parsed = Self { number: ebd }; - if let Some((first_unparsed, _)) = chars.next() { - Err(ParseError::PartialMatch(parsed, &rest[first_unparsed..])) - } else { - Ok(parsed) - } + // Return what has been parsed so far. It there are extra characters, mark the + // parsing as a partial match. + if let Some((first_unparsed, _)) = chars.next() { + Err(ExtendedParserError::PartialMatch( + ebd, + &rest[first_unparsed..], + )) + } else { + Ok(ebd) } } #[cfg(test)] mod tests { - use super::{ParseError, ParsedNumber}; + use super::{ExtendedParser, ExtendedParserError}; #[test] fn test_decimal_u64() { - assert_eq!(Ok(123), ParsedNumber::parse_u64("123")); - assert_eq!( - Ok(u64::MAX), - ParsedNumber::parse_u64(&format!("{}", u64::MAX)) - ); + assert_eq!(Ok(123), u64::extended_parse("123")); + assert_eq!(Ok(u64::MAX), u64::extended_parse(&format!("{}", u64::MAX))); assert!(matches!( - ParsedNumber::parse_u64("-123"), - Err(ParseError::Overflow) + u64::extended_parse("-123"), + Err(ExtendedParserError::Overflow) )); assert!(matches!( - ParsedNumber::parse_u64(""), - Err(ParseError::NotNumeric) + u64::extended_parse(""), + Err(ExtendedParserError::NotNumeric) )); assert!(matches!( - ParsedNumber::parse_u64("123.15"), - Err(ParseError::PartialMatch(123, ".15")) + u64::extended_parse("123.15"), + Err(ExtendedParserError::PartialMatch(123, ".15")) )); } #[test] fn test_decimal_i64() { - assert_eq!(Ok(123), ParsedNumber::parse_i64("123")); - assert_eq!(Ok(-123), ParsedNumber::parse_i64("-123")); + assert_eq!(Ok(123), i64::extended_parse("123")); + assert_eq!(Ok(-123), i64::extended_parse("-123")); assert!(matches!( - ParsedNumber::parse_i64("--123"), - Err(ParseError::NotNumeric) + i64::extended_parse("--123"), + Err(ExtendedParserError::NotNumeric) )); - assert_eq!( - Ok(i64::MAX), - ParsedNumber::parse_i64(&format!("{}", i64::MAX)) - ); - assert_eq!( - Ok(i64::MIN), - ParsedNumber::parse_i64(&format!("{}", i64::MIN)) - ); + assert_eq!(Ok(i64::MAX), i64::extended_parse(&format!("{}", i64::MAX))); + assert_eq!(Ok(i64::MIN), i64::extended_parse(&format!("{}", i64::MIN))); assert!(matches!( - ParsedNumber::parse_i64(&format!("{}", u64::MAX)), - Err(ParseError::Overflow) + i64::extended_parse(&format!("{}", u64::MAX)), + Err(ExtendedParserError::Overflow) )); assert!(matches!( - ParsedNumber::parse_i64(&format!("{}", i64::MAX as u64 + 1)), - Err(ParseError::Overflow) + i64::extended_parse(&format!("{}", i64::MAX as u64 + 1)), + Err(ExtendedParserError::Overflow) )); } #[test] fn test_decimal_f64() { - assert_eq!(Ok(123.0), ParsedNumber::parse_f64("123")); - assert_eq!(Ok(-123.0), ParsedNumber::parse_f64("-123")); - assert_eq!(Ok(123.0), ParsedNumber::parse_f64("123.")); - assert_eq!(Ok(-123.0), ParsedNumber::parse_f64("-123.")); - assert_eq!(Ok(123.0), ParsedNumber::parse_f64("123.0")); - assert_eq!(Ok(-123.0), ParsedNumber::parse_f64("-123.0")); - assert_eq!(Ok(123.15), ParsedNumber::parse_f64("123.15")); - assert_eq!(Ok(-123.15), ParsedNumber::parse_f64("-123.15")); - assert_eq!(Ok(0.15), ParsedNumber::parse_f64(".15")); - assert_eq!(Ok(-0.15), ParsedNumber::parse_f64("-.15")); + assert_eq!(Ok(123.0), f64::extended_parse("123")); + assert_eq!(Ok(-123.0), f64::extended_parse("-123")); + assert_eq!(Ok(123.0), f64::extended_parse("123.")); + assert_eq!(Ok(-123.0), f64::extended_parse("-123.")); + assert_eq!(Ok(123.0), f64::extended_parse("123.0")); + assert_eq!(Ok(-123.0), f64::extended_parse("-123.0")); + assert_eq!(Ok(123.15), f64::extended_parse("123.15")); + assert_eq!(Ok(-123.15), f64::extended_parse("-123.15")); + assert_eq!(Ok(0.15), f64::extended_parse(".15")); + assert_eq!(Ok(-0.15), f64::extended_parse("-.15")); assert_eq!( Ok(0.15), - ParsedNumber::parse_f64(".150000000000000000000000000231313") + f64::extended_parse(".150000000000000000000000000231313") ); - assert!(matches!(ParsedNumber::parse_f64("1.2.3"), - Err(ParseError::PartialMatch(f, ".3")) if f == 1.2)); - assert_eq!(Ok(f64::INFINITY), ParsedNumber::parse_f64("inf")); - assert_eq!(Ok(f64::NEG_INFINITY), ParsedNumber::parse_f64("-inf")); - assert!(ParsedNumber::parse_f64("NaN").unwrap().is_nan()); - assert!(ParsedNumber::parse_f64("NaN").unwrap().is_sign_positive()); - assert!(ParsedNumber::parse_f64("-NaN").unwrap().is_nan()); - assert!(ParsedNumber::parse_f64("-NaN").unwrap().is_sign_negative()); - assert!(matches!(ParsedNumber::parse_f64("-infinity"), - Err(ParseError::PartialMatch(f, "inity")) if f == f64::NEG_INFINITY)); - assert!(ParsedNumber::parse_f64(&format!("{}", u64::MAX)).is_ok()); - assert!(ParsedNumber::parse_f64(&format!("{}", i64::MIN)).is_ok()); + assert!(matches!(f64::extended_parse("1.2.3"), + Err(ExtendedParserError::PartialMatch(f, ".3")) if f == 1.2)); + assert_eq!(Ok(f64::INFINITY), f64::extended_parse("inf")); + assert_eq!(Ok(f64::NEG_INFINITY), f64::extended_parse("-inf")); + assert!(f64::extended_parse("NaN").unwrap().is_nan()); + assert!(f64::extended_parse("NaN").unwrap().is_sign_positive()); + assert!(f64::extended_parse("-NaN").unwrap().is_nan()); + assert!(f64::extended_parse("-NaN").unwrap().is_sign_negative()); + assert!(matches!(f64::extended_parse("-infinity"), + Err(ExtendedParserError::PartialMatch(f, "inity")) if f == f64::NEG_INFINITY)); + assert!(f64::extended_parse(&format!("{}", u64::MAX)).is_ok()); + assert!(f64::extended_parse(&format!("{}", i64::MIN)).is_ok()); } #[test] fn test_hexadecimal() { - assert_eq!(Ok(0x123), ParsedNumber::parse_u64("0x123")); - assert_eq!(Ok(0x123), ParsedNumber::parse_u64("0X123")); - assert_eq!(Ok(0xfe), ParsedNumber::parse_u64("0xfE")); - assert_eq!(Ok(-0x123), ParsedNumber::parse_i64("-0x123")); + assert_eq!(Ok(0x123), u64::extended_parse("0x123")); + assert_eq!(Ok(0x123), u64::extended_parse("0X123")); + assert_eq!(Ok(0xfe), u64::extended_parse("0xfE")); + assert_eq!(Ok(-0x123), i64::extended_parse("-0x123")); - assert_eq!(Ok(0.5), ParsedNumber::parse_f64("0x.8")); - assert_eq!(Ok(0.0625), ParsedNumber::parse_f64("0x.1")); - assert_eq!(Ok(15.007_812_5), ParsedNumber::parse_f64("0xf.02")); + assert_eq!(Ok(0.5), f64::extended_parse("0x.8")); + assert_eq!(Ok(0.0625), f64::extended_parse("0x.1")); + assert_eq!(Ok(15.007_812_5), f64::extended_parse("0xf.02")); } #[test] fn test_octal() { - assert_eq!(Ok(0), ParsedNumber::parse_u64("0")); - assert_eq!(Ok(0o123), ParsedNumber::parse_u64("0123")); - assert_eq!(Ok(0o123), ParsedNumber::parse_u64("00123")); - assert_eq!(Ok(0), ParsedNumber::parse_u64("00")); + assert_eq!(Ok(0), u64::extended_parse("0")); + assert_eq!(Ok(0o123), u64::extended_parse("0123")); + assert_eq!(Ok(0o123), u64::extended_parse("00123")); + assert_eq!(Ok(0), u64::extended_parse("00")); assert!(matches!( - ParsedNumber::parse_u64("008"), - Err(ParseError::PartialMatch(0, "8")) + u64::extended_parse("008"), + Err(ExtendedParserError::PartialMatch(0, "8")) )); assert!(matches!( - ParsedNumber::parse_u64("08"), - Err(ParseError::PartialMatch(0, "8")) + u64::extended_parse("08"), + Err(ExtendedParserError::PartialMatch(0, "8")) )); assert!(matches!( - ParsedNumber::parse_u64("0."), - Err(ParseError::PartialMatch(0, ".")) + u64::extended_parse("0."), + Err(ExtendedParserError::PartialMatch(0, ".")) )); } #[test] fn test_binary() { - assert_eq!(Ok(0b1011), ParsedNumber::parse_u64("0b1011")); - assert_eq!(Ok(0b1011), ParsedNumber::parse_u64("0B1011")); + assert_eq!(Ok(0b1011), u64::extended_parse("0b1011")); + assert_eq!(Ok(0b1011), u64::extended_parse("0B1011")); } #[test] fn test_parsing_with_leading_whitespace() { - assert_eq!(Ok(1), ParsedNumber::parse_u64(" 0x1")); - assert_eq!(Ok(-2), ParsedNumber::parse_i64(" -0x2")); - assert_eq!(Ok(-3), ParsedNumber::parse_i64(" \t-0x3")); - assert_eq!(Ok(-4), ParsedNumber::parse_i64(" \n-0x4")); - assert_eq!(Ok(-5), ParsedNumber::parse_i64(" \n\t\u{000d}-0x5")); + assert_eq!(Ok(1), u64::extended_parse(" 0x1")); + assert_eq!(Ok(-2), i64::extended_parse(" -0x2")); + assert_eq!(Ok(-3), i64::extended_parse(" \t-0x3")); + assert_eq!(Ok(-4), i64::extended_parse(" \n-0x4")); + assert_eq!(Ok(-5), i64::extended_parse(" \n\t\u{000d}-0x5")); // Ensure that trailing whitespace is still a partial match assert_eq!( - Err(ParseError::PartialMatch(6, " ")), - ParsedNumber::parse_u64("0x6 ") + Err(ExtendedParserError::PartialMatch(6, " ")), + u64::extended_parse("0x6 ") ); assert_eq!( - Err(ParseError::PartialMatch(7, "\t")), - ParsedNumber::parse_u64("0x7\t") + Err(ExtendedParserError::PartialMatch(7, "\t")), + u64::extended_parse("0x7\t") ); assert_eq!( - Err(ParseError::PartialMatch(8, "\n")), - ParsedNumber::parse_u64("0x8\n") + Err(ExtendedParserError::PartialMatch(8, "\n")), + u64::extended_parse("0x8\n") ); // Ensure that unicode non-ascii whitespace is a partial match assert_eq!( - Err(ParseError::NotNumeric), - ParsedNumber::parse_i64("\u{2029}-0x9") + Err(ExtendedParserError::NotNumeric), + i64::extended_parse("\u{2029}-0x9") ); // Ensure that whitespace after the number has "started" is not allowed assert_eq!( - Err(ParseError::NotNumeric), - ParsedNumber::parse_i64("- 0x9") + Err(ExtendedParserError::NotNumeric), + i64::extended_parse("- 0x9") ); } } From 97e333c6d927f4e47cd4917e226560f2480245d1 Mon Sep 17 00:00:00 2001 From: Nicolas Boichat Date: Tue, 18 Mar 2025 21:06:44 +0100 Subject: [PATCH 04/10] uucore: format: num_parser: allow leading + sign when parsing Leading plus signs are allowed for all formats. Add tests (including some tests for negative i64 values, and mixed case special values that springed to mind). Fixes #7473. --- .../src/lib/features/format/num_parser.rs | 22 ++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/src/uucore/src/lib/features/format/num_parser.rs b/src/uucore/src/lib/features/format/num_parser.rs index 396bdfa72..6a1de022d 100644 --- a/src/uucore/src/lib/features/format/num_parser.rs +++ b/src/uucore/src/lib/features/format/num_parser.rs @@ -221,9 +221,11 @@ fn parse( let trimmed_input = input.trim_ascii_start(); - // Initial minus sign + // Initial minus/plus sign let (negative, unsigned) = if let Some(trimmed_input) = trimmed_input.strip_prefix('-') { (true, trimmed_input) + } else if let Some(trimmed_input) = trimmed_input.strip_prefix('+') { + (false, trimmed_input) } else { (false, trimmed_input) }; @@ -334,6 +336,7 @@ mod tests { #[test] fn test_decimal_i64() { assert_eq!(Ok(123), i64::extended_parse("123")); + assert_eq!(Ok(123), i64::extended_parse("+123")); assert_eq!(Ok(-123), i64::extended_parse("-123")); assert!(matches!( i64::extended_parse("--123"), @@ -354,12 +357,14 @@ mod tests { #[test] fn test_decimal_f64() { assert_eq!(Ok(123.0), f64::extended_parse("123")); + assert_eq!(Ok(123.0), f64::extended_parse("+123")); assert_eq!(Ok(-123.0), f64::extended_parse("-123")); assert_eq!(Ok(123.0), f64::extended_parse("123.")); assert_eq!(Ok(-123.0), f64::extended_parse("-123.")); assert_eq!(Ok(123.0), f64::extended_parse("123.0")); assert_eq!(Ok(-123.0), f64::extended_parse("-123.0")); assert_eq!(Ok(123.15), f64::extended_parse("123.15")); + assert_eq!(Ok(123.15), f64::extended_parse("+123.15")); assert_eq!(Ok(-123.15), f64::extended_parse("-123.15")); assert_eq!(Ok(0.15), f64::extended_parse(".15")); assert_eq!(Ok(-0.15), f64::extended_parse("-.15")); @@ -370,11 +375,21 @@ mod tests { assert!(matches!(f64::extended_parse("1.2.3"), Err(ExtendedParserError::PartialMatch(f, ".3")) if f == 1.2)); assert_eq!(Ok(f64::INFINITY), f64::extended_parse("inf")); + assert_eq!(Ok(f64::INFINITY), f64::extended_parse("+inf")); assert_eq!(Ok(f64::NEG_INFINITY), f64::extended_parse("-inf")); + assert_eq!(Ok(f64::INFINITY), f64::extended_parse("Inf")); + assert_eq!(Ok(f64::INFINITY), f64::extended_parse("InF")); + assert_eq!(Ok(f64::INFINITY), f64::extended_parse("INF")); assert!(f64::extended_parse("NaN").unwrap().is_nan()); assert!(f64::extended_parse("NaN").unwrap().is_sign_positive()); + assert!(f64::extended_parse("+NaN").unwrap().is_nan()); + assert!(f64::extended_parse("+NaN").unwrap().is_sign_positive()); assert!(f64::extended_parse("-NaN").unwrap().is_nan()); assert!(f64::extended_parse("-NaN").unwrap().is_sign_negative()); + assert!(f64::extended_parse("nan").unwrap().is_nan()); + assert!(f64::extended_parse("nan").unwrap().is_sign_positive()); + assert!(f64::extended_parse("NAN").unwrap().is_nan()); + assert!(f64::extended_parse("NAN").unwrap().is_sign_positive()); assert!(matches!(f64::extended_parse("-infinity"), Err(ExtendedParserError::PartialMatch(f, "inity")) if f == f64::NEG_INFINITY)); assert!(f64::extended_parse(&format!("{}", u64::MAX)).is_ok()); @@ -385,6 +400,7 @@ mod tests { fn test_hexadecimal() { assert_eq!(Ok(0x123), u64::extended_parse("0x123")); assert_eq!(Ok(0x123), u64::extended_parse("0X123")); + assert_eq!(Ok(0x123), u64::extended_parse("+0x123")); assert_eq!(Ok(0xfe), u64::extended_parse("0xfE")); assert_eq!(Ok(-0x123), i64::extended_parse("-0x123")); @@ -397,6 +413,8 @@ mod tests { fn test_octal() { assert_eq!(Ok(0), u64::extended_parse("0")); assert_eq!(Ok(0o123), u64::extended_parse("0123")); + assert_eq!(Ok(0o123), u64::extended_parse("+0123")); + assert_eq!(Ok(-0o123), i64::extended_parse("-0123")); assert_eq!(Ok(0o123), u64::extended_parse("00123")); assert_eq!(Ok(0), u64::extended_parse("00")); assert!(matches!( @@ -417,6 +435,8 @@ mod tests { fn test_binary() { assert_eq!(Ok(0b1011), u64::extended_parse("0b1011")); assert_eq!(Ok(0b1011), u64::extended_parse("0B1011")); + assert_eq!(Ok(0b1011), u64::extended_parse("+0b1011")); + assert_eq!(Ok(-0b1011), i64::extended_parse("-0b1011")); } #[test] From d7502e4b2e9490d8bf7184b6d8b54093d5c9ab82 Mon Sep 17 00:00:00 2001 From: Nicolas Boichat Date: Wed, 19 Mar 2025 11:17:01 +0100 Subject: [PATCH 05/10] uucore: format: num_parser: Disallow binary number parsing for floats Fixes #7487. Also, add more tests for leading zeros not getting parsed as octal when dealing with floats. --- .../src/lib/features/format/num_parser.rs | 35 ++++++++++++++++--- 1 file changed, 30 insertions(+), 5 deletions(-) diff --git a/src/uucore/src/lib/features/format/num_parser.rs b/src/uucore/src/lib/features/format/num_parser.rs index 6a1de022d..a01fdeddc 100644 --- a/src/uucore/src/lib/features/format/num_parser.rs +++ b/src/uucore/src/lib/features/format/num_parser.rs @@ -81,7 +81,7 @@ impl<'a, T> ExtendedParserError<'a, T> { /// A number parser for binary, octal, decimal, hexadecimal and single characters. /// /// It is implemented for `u64`/`i64`, where no fractional part is parsed, -/// and `f64` float, where octal is not allowed. +/// and `f64` float, where octal and binary formats are not allowed. pub trait ExtendedParser { // We pick a hopefully different name for our parser, to avoid clash with standard traits. fn extended_parse(input: &str) -> Result> @@ -235,12 +235,15 @@ fn parse( // will not be consumed in case the parsable string contains only "0": the leading extra "0" // will have no influence on the result. let (base, rest) = if let Some(rest) = unsigned.strip_prefix('0') { - if let Some(rest) = rest.strip_prefix(['b', 'B']) { - (Base::Binary, rest) - } else if let Some(rest) = rest.strip_prefix(['x', 'X']) { + if let Some(rest) = rest.strip_prefix(['x', 'X']) { (Base::Hexadecimal, rest) } else if integral_only { - (Base::Octal, unsigned) + // Binary/Octal only allowed for integer parsing. + if let Some(rest) = rest.strip_prefix(['b', 'B']) { + (Base::Binary, rest) + } else { + (Base::Octal, unsigned) + } } else { (Base::Decimal, unsigned) } @@ -368,6 +371,16 @@ mod tests { assert_eq!(Ok(-123.15), f64::extended_parse("-123.15")); assert_eq!(Ok(0.15), f64::extended_parse(".15")); assert_eq!(Ok(-0.15), f64::extended_parse("-.15")); + // Leading 0(s) are _not_ octal when parsed as float + assert_eq!(Ok(123.0), f64::extended_parse("0123")); + assert_eq!(Ok(123.0), f64::extended_parse("+0123")); + assert_eq!(Ok(-123.0), f64::extended_parse("-0123")); + assert_eq!(Ok(123.0), f64::extended_parse("00123")); + assert_eq!(Ok(123.0), f64::extended_parse("+00123")); + assert_eq!(Ok(-123.0), f64::extended_parse("-00123")); + assert_eq!(Ok(123.15), f64::extended_parse("0123.15")); + assert_eq!(Ok(123.15), f64::extended_parse("+0123.15")); + assert_eq!(Ok(-123.15), f64::extended_parse("-0123.15")); assert_eq!( Ok(0.15), f64::extended_parse(".150000000000000000000000000231313") @@ -429,6 +442,8 @@ mod tests { u64::extended_parse("0."), Err(ExtendedParserError::PartialMatch(0, ".")) )); + + // No float tests, leading zeros get parsed as decimal anyway. } #[test] @@ -437,6 +452,16 @@ mod tests { assert_eq!(Ok(0b1011), u64::extended_parse("0B1011")); assert_eq!(Ok(0b1011), u64::extended_parse("+0b1011")); assert_eq!(Ok(-0b1011), i64::extended_parse("-0b1011")); + + // Binary not allowed for floats + assert!(matches!( + f64::extended_parse("0b100"), + Err(ExtendedParserError::PartialMatch(0f64, "b100")) + )); + assert!(matches!( + f64::extended_parse("0b100.1"), + Err(ExtendedParserError::PartialMatch(0f64, "b100.1")) + )); } #[test] From 71a285468bc1582307dd4dd59a808eab73811f20 Mon Sep 17 00:00:00 2001 From: Nicolas Boichat Date: Wed, 19 Mar 2025 11:20:52 +0100 Subject: [PATCH 06/10] uucore: format: num_parser: Add parser for ExtendedBigDecimal Very simple as the f64 parser actually uses that as intermediary value. Add a few tests too. --- .../src/lib/features/format/num_parser.rs | 80 +++++++++++++++++++ 1 file changed, 80 insertions(+) diff --git a/src/uucore/src/lib/features/format/num_parser.rs b/src/uucore/src/lib/features/format/num_parser.rs index a01fdeddc..7cb584db1 100644 --- a/src/uucore/src/lib/features/format/num_parser.rs +++ b/src/uucore/src/lib/features/format/num_parser.rs @@ -168,6 +168,15 @@ impl ExtendedParser for f64 { } } +impl ExtendedParser for ExtendedBigDecimal { + /// Parse a number as an ExtendedBigDecimal + fn extended_parse( + input: &str, + ) -> Result> { + parse(input, false) + } +} + fn parse_special_value( input: &str, negative: bool, @@ -316,6 +325,12 @@ fn parse( #[cfg(test)] mod tests { + use std::str::FromStr; + + use bigdecimal::BigDecimal; + + use crate::format::ExtendedBigDecimal; + use super::{ExtendedParser, ExtendedParserError}; #[test] @@ -387,6 +402,9 @@ mod tests { ); assert!(matches!(f64::extended_parse("1.2.3"), Err(ExtendedParserError::PartialMatch(f, ".3")) if f == 1.2)); + // Minus zero. 0.0 == -0.0 so we explicitly check the sign. + assert_eq!(Ok(0.0), f64::extended_parse("-0.0")); + assert!(f64::extended_parse("-0.0").unwrap().is_sign_negative()); assert_eq!(Ok(f64::INFINITY), f64::extended_parse("inf")); assert_eq!(Ok(f64::INFINITY), f64::extended_parse("+inf")); assert_eq!(Ok(f64::NEG_INFINITY), f64::extended_parse("-inf")); @@ -409,6 +427,55 @@ mod tests { assert!(f64::extended_parse(&format!("{}", i64::MIN)).is_ok()); } + #[test] + fn test_decimal_extended_big_decimal() { + // f64 parsing above already tested a lot of these, just do a few. + // Careful, we usually cannot use From to get a precise ExtendedBigDecimal as numbers like 123.15 cannot be exactly represented by a f64. + assert_eq!( + Ok(ExtendedBigDecimal::BigDecimal( + BigDecimal::from_str("123").unwrap() + )), + ExtendedBigDecimal::extended_parse("123") + ); + assert_eq!( + Ok(ExtendedBigDecimal::BigDecimal( + BigDecimal::from_str("123.15").unwrap() + )), + ExtendedBigDecimal::extended_parse("123.15") + ); + // Very high precision that would not fit in a f64. + assert_eq!( + Ok(ExtendedBigDecimal::BigDecimal( + BigDecimal::from_str(".150000000000000000000000000000000000001").unwrap() + )), + ExtendedBigDecimal::extended_parse(".150000000000000000000000000000000000001") + ); + assert!(matches!( + ExtendedBigDecimal::extended_parse("nan"), + Ok(ExtendedBigDecimal::Nan) + )); + assert!(matches!( + ExtendedBigDecimal::extended_parse("-NAN"), + Ok(ExtendedBigDecimal::MinusNan) + )); + assert_eq!( + Ok(ExtendedBigDecimal::Infinity), + ExtendedBigDecimal::extended_parse("InF") + ); + assert_eq!( + Ok(ExtendedBigDecimal::MinusInfinity), + ExtendedBigDecimal::extended_parse("-iNf") + ); + assert_eq!( + Ok(ExtendedBigDecimal::zero()), + ExtendedBigDecimal::extended_parse("0.0") + ); + assert!(matches!( + ExtendedBigDecimal::extended_parse("-0.0"), + Ok(ExtendedBigDecimal::MinusZero) + )); + } + #[test] fn test_hexadecimal() { assert_eq!(Ok(0x123), u64::extended_parse("0x123")); @@ -420,6 +487,13 @@ mod tests { assert_eq!(Ok(0.5), f64::extended_parse("0x.8")); assert_eq!(Ok(0.0625), f64::extended_parse("0x.1")); assert_eq!(Ok(15.007_812_5), f64::extended_parse("0xf.02")); + + assert_eq!( + Ok(ExtendedBigDecimal::BigDecimal( + BigDecimal::from_str("0.0625").unwrap() + )), + ExtendedBigDecimal::extended_parse("0x.1") + ); } #[test] @@ -462,6 +536,12 @@ mod tests { f64::extended_parse("0b100.1"), Err(ExtendedParserError::PartialMatch(0f64, "b100.1")) )); + + assert!(match ExtendedBigDecimal::extended_parse("0b100.1") { + Err(ExtendedParserError::PartialMatch(ebd, "b100.1")) => + ebd == ExtendedBigDecimal::zero(), + _ => false, + }); } #[test] From b5a658528b0993e4930d7db5804bbdb9ec88ce80 Mon Sep 17 00:00:00 2001 From: Nicolas Boichat Date: Wed, 19 Mar 2025 13:08:43 +0100 Subject: [PATCH 07/10] uucore: format: Use ExtendedBigDecimal in argument code Provides arbitrary precision for float parsing in printf. Also add a printf test for that. --- src/uucore/src/lib/features/format/argument.rs | 16 +++++++++------- .../lib/features/format/extendedbigdecimal.rs | 6 ++++++ src/uucore/src/lib/features/format/spec.rs | 3 +-- tests/by-util/test_printf.rs | 10 ++++++++++ 4 files changed, 26 insertions(+), 9 deletions(-) diff --git a/src/uucore/src/lib/features/format/argument.rs b/src/uucore/src/lib/features/format/argument.rs index fca402378..e1877de3d 100644 --- a/src/uucore/src/lib/features/format/argument.rs +++ b/src/uucore/src/lib/features/format/argument.rs @@ -12,6 +12,8 @@ use crate::{ use os_display::Quotable; use std::ffi::OsStr; +use super::ExtendedBigDecimal; + /// An argument for formatting /// /// Each of these variants is only accepted by their respective directives. For @@ -25,7 +27,7 @@ pub enum FormatArgument { String(String), UnsignedInt(u64), SignedInt(i64), - Float(f64), + Float(ExtendedBigDecimal), /// Special argument that gets coerced into the other variants Unparsed(String), } @@ -34,7 +36,7 @@ pub trait ArgumentIter<'a>: Iterator { fn get_char(&mut self) -> u8; fn get_i64(&mut self) -> i64; fn get_u64(&mut self) -> u64; - fn get_f64(&mut self) -> f64; + fn get_extended_big_decimal(&mut self) -> ExtendedBigDecimal; fn get_str(&mut self) -> &'a str; } @@ -72,14 +74,14 @@ impl<'a, T: Iterator> ArgumentIter<'a> for T { } } - fn get_f64(&mut self) -> f64 { + fn get_extended_big_decimal(&mut self) -> ExtendedBigDecimal { let Some(next) = self.next() else { - return 0.0; + return ExtendedBigDecimal::zero(); }; match next { - FormatArgument::Float(n) => *n, - FormatArgument::Unparsed(s) => extract_value(f64::extended_parse(s), s), - _ => 0.0, + FormatArgument::Float(n) => n.clone(), + FormatArgument::Unparsed(s) => extract_value(ExtendedBigDecimal::extended_parse(s), s), + _ => ExtendedBigDecimal::zero(), } } diff --git a/src/uucore/src/lib/features/format/extendedbigdecimal.rs b/src/uucore/src/lib/features/format/extendedbigdecimal.rs index 4e4a1fe5c..8c242be9f 100644 --- a/src/uucore/src/lib/features/format/extendedbigdecimal.rs +++ b/src/uucore/src/lib/features/format/extendedbigdecimal.rs @@ -141,6 +141,12 @@ impl Zero for ExtendedBigDecimal { } } +impl Default for ExtendedBigDecimal { + fn default() -> Self { + Self::zero() + } +} + impl Add for ExtendedBigDecimal { type Output = Self; diff --git a/src/uucore/src/lib/features/format/spec.rs b/src/uucore/src/lib/features/format/spec.rs index 63b3dd221..13025ae74 100644 --- a/src/uucore/src/lib/features/format/spec.rs +++ b/src/uucore/src/lib/features/format/spec.rs @@ -433,8 +433,7 @@ impl Spec { } => { let width = resolve_asterisk(*width, &mut args).unwrap_or(0); let precision = resolve_asterisk(*precision, &mut args).unwrap_or(6); - // TODO: We should implement some get_extendedBigDecimal function in args to avoid losing precision. - let f: ExtendedBigDecimal = args.get_f64().into(); + let f: ExtendedBigDecimal = args.get_extended_big_decimal(); if precision as u64 > i32::MAX as u64 { return Err(FormatError::InvalidPrecision(precision.to_string())); diff --git a/tests/by-util/test_printf.rs b/tests/by-util/test_printf.rs index df1fafd60..ee2f399d5 100644 --- a/tests/by-util/test_printf.rs +++ b/tests/by-util/test_printf.rs @@ -992,6 +992,16 @@ fn float_flag_position_space_padding() { .stdout_only(" +1.0"); } +#[test] +fn float_large_precision() { + // Note: This does not match GNU coreutils output (0.100000000000000000001355252716 on x86), + // as we parse and format using ExtendedBigDecimal, which provides arbitrary precision. + new_ucmd!() + .args(&["%.30f", "0.1"]) + .succeeds() + .stdout_only("0.100000000000000000000000000000"); +} + #[test] fn float_non_finite_space_padding() { new_ucmd!() From 55773e9d3551c37dff8f83e6b04991b6d7c3d05e Mon Sep 17 00:00:00 2001 From: Nicolas Boichat Date: Wed, 19 Mar 2025 21:29:11 +0100 Subject: [PATCH 08/10] uucore: format: num_parser: Fix large hexadecimal float parsing Large numbers can overflow u64 when doing 16u64.pow(scale): do the operation on BigInt/BigDecimal instead. Also, add a test. Wolfram Alpha can help confirm the decimal number is correct (16-16**-21). --- src/uucore/src/lib/features/format/num_parser.rs | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/uucore/src/lib/features/format/num_parser.rs b/src/uucore/src/lib/features/format/num_parser.rs index 7cb584db1..54d60f69b 100644 --- a/src/uucore/src/lib/features/format/num_parser.rs +++ b/src/uucore/src/lib/features/format/num_parser.rs @@ -306,7 +306,8 @@ fn parse( BigDecimal::from_bigint(signed_digits, scale) } else { // Base is not 10, init at scale 0 then divide by base**scale. - BigDecimal::from_bigint(signed_digits, 0) / (base as u64).pow(scale as u32) + BigDecimal::from_bigint(signed_digits, 0) + / BigDecimal::from_bigint(BigInt::from(base as u32).pow(scale as u32), 0) }; ExtendedBigDecimal::BigDecimal(bd) }; @@ -494,6 +495,14 @@ mod tests { )), ExtendedBigDecimal::extended_parse("0x.1") ); + + // Precisely parse very large hexadecimal numbers (i.e. with a large division). + assert_eq!( + Ok(ExtendedBigDecimal::BigDecimal( + BigDecimal::from_str("15.999999999999999999999999948301211715435770320536956745627321652136743068695068359375").unwrap() + )), + ExtendedBigDecimal::extended_parse("0xf.fffffffffffffffffffff") + ); } #[test] From bd68eb8bebebbfd14659d6241e5fa0e80f8f72d6 Mon Sep 17 00:00:00 2001 From: Nicolas Boichat Date: Wed, 19 Mar 2025 21:13:52 +0100 Subject: [PATCH 09/10] uucore: format: num_parser: Parse exponent part of floating point numbers Parse numbers like 123.15e15 and 0xfp-2, and add some tests for that. `parse` is becoming more and more of a monster: we should consider splitting it into multiple parts. Fixes #7474. --- .../src/lib/features/format/num_parser.rs | 103 +++++++++++++++--- 1 file changed, 89 insertions(+), 14 deletions(-) diff --git a/src/uucore/src/lib/features/format/num_parser.rs b/src/uucore/src/lib/features/format/num_parser.rs index 54d60f69b..645e6c2bf 100644 --- a/src/uucore/src/lib/features/format/num_parser.rs +++ b/src/uucore/src/lib/features/format/num_parser.rs @@ -210,6 +210,8 @@ fn parse_special_value( } } +// TODO: As highlighted by clippy, this function _is_ high cognitive complexity, jumps +// around between integer and float parsing, and should be split in multiple parts. #[allow(clippy::cognitive_complexity)] fn parse( input: &str, @@ -267,21 +269,51 @@ fn parse( let mut chars = rest.chars().enumerate().fuse().peekable(); let mut digits = BigUint::zero(); let mut scale = 0i64; + let mut exponent = 0i64; while let Some(d) = chars.peek().and_then(|&(_, c)| base.digit(c)) { chars.next(); digits = digits * base as u8 + d; } - // Parse the fractional part of the number if there can be one and the input contains - // a '.' decimal separator. - if matches!(chars.peek(), Some(&(_, '.'))) - && matches!(base, Base::Decimal | Base::Hexadecimal) - && !integral_only - { - chars.next(); - while let Some(d) = chars.peek().and_then(|&(_, c)| base.digit(c)) { + // Parse fractional/exponent part of the number for supported bases. + if matches!(base, Base::Decimal | Base::Hexadecimal) && !integral_only { + // Parse the fractional part of the number if there can be one and the input contains + // a '.' decimal separator. + if matches!(chars.peek(), Some(&(_, '.'))) { chars.next(); - (digits, scale) = (digits * base as u8 + d, scale + 1); + while let Some(d) = chars.peek().and_then(|&(_, c)| base.digit(c)) { + chars.next(); + (digits, scale) = (digits * base as u8 + d, scale + 1); + } + } + + let exp_char = match base { + Base::Decimal => 'e', + Base::Hexadecimal => 'p', + _ => unreachable!(), + }; + + // Parse the exponent part, only decimal numbers are allowed. + if chars.peek().is_some_and(|&(_, c)| c == exp_char) { + chars.next(); + let exp_negative = match chars.peek() { + Some((_, '-')) => { + chars.next(); + true + } + Some((_, '+')) => { + chars.next(); + false + } + _ => false, // Something else, or nothing at all: keep going. + }; + while let Some(d) = chars.peek().and_then(|&(_, c)| Base::Decimal.digit(c)) { + chars.next(); + exponent = exponent * 10 + d as i64; + } + if exp_negative { + exponent = -exponent; + } } } @@ -300,14 +332,23 @@ fn parse( } else { let sign = if negative { Sign::Minus } else { Sign::Plus }; let signed_digits = BigInt::from_biguint(sign, digits); - let bd = if scale == 0 { + let bd = if scale == 0 && exponent == 0 { BigDecimal::from_bigint(signed_digits, 0) } else if base == Base::Decimal { - BigDecimal::from_bigint(signed_digits, scale) + BigDecimal::from_bigint(signed_digits, scale - exponent) + } else if base == Base::Hexadecimal { + // Base is 16, init at scale 0 then divide by base**scale. + let bd = BigDecimal::from_bigint(signed_digits, 0) + / BigDecimal::from_bigint(BigInt::from(16).pow(scale as u32), 0); + // Confusingly, exponent is in base 2 for hex floating point numbers. + if exponent >= 0 { + bd * 2u64.pow(exponent as u32) + } else { + bd / 2u64.pow(-exponent as u32) + } } else { - // Base is not 10, init at scale 0 then divide by base**scale. - BigDecimal::from_bigint(signed_digits, 0) - / BigDecimal::from_bigint(BigInt::from(base as u32).pow(scale as u32), 0) + // scale != 0, which means that integral_only is not set, so only base 10 and 16 are allowed. + unreachable!(); }; ExtendedBigDecimal::BigDecimal(bd) }; @@ -350,6 +391,10 @@ mod tests { u64::extended_parse("123.15"), Err(ExtendedParserError::PartialMatch(123, ".15")) )); + assert!(matches!( + u64::extended_parse("123e10"), + Err(ExtendedParserError::PartialMatch(123, "e10")) + )); } #[test] @@ -371,6 +416,10 @@ mod tests { i64::extended_parse(&format!("{}", i64::MAX as u64 + 1)), Err(ExtendedParserError::Overflow) )); + assert!(matches!( + i64::extended_parse("-123e10"), + Err(ExtendedParserError::PartialMatch(-123, "e10")) + )); } #[test] @@ -397,12 +446,18 @@ mod tests { assert_eq!(Ok(123.15), f64::extended_parse("0123.15")); assert_eq!(Ok(123.15), f64::extended_parse("+0123.15")); assert_eq!(Ok(-123.15), f64::extended_parse("-0123.15")); + assert_eq!(Ok(12315000.0), f64::extended_parse("123.15e5")); + assert_eq!(Ok(-12315000.0), f64::extended_parse("-123.15e5")); + assert_eq!(Ok(12315000.0), f64::extended_parse("123.15e+5")); + assert_eq!(Ok(0.0012315), f64::extended_parse("123.15e-5")); assert_eq!( Ok(0.15), f64::extended_parse(".150000000000000000000000000231313") ); assert!(matches!(f64::extended_parse("1.2.3"), Err(ExtendedParserError::PartialMatch(f, ".3")) if f == 1.2)); + assert!(matches!(f64::extended_parse("123.15p5"), + Err(ExtendedParserError::PartialMatch(f, "p5")) if f == 123.15)); // Minus zero. 0.0 == -0.0 so we explicitly check the sign. assert_eq!(Ok(0.0), f64::extended_parse("-0.0")); assert!(f64::extended_parse("-0.0").unwrap().is_sign_negative()); @@ -444,6 +499,20 @@ mod tests { )), ExtendedBigDecimal::extended_parse("123.15") ); + assert_eq!( + Ok(ExtendedBigDecimal::BigDecimal(BigDecimal::from_bigint( + 12315.into(), + -98 + ))), + ExtendedBigDecimal::extended_parse("123.15e100") + ); + assert_eq!( + Ok(ExtendedBigDecimal::BigDecimal(BigDecimal::from_bigint( + 12315.into(), + 102 + ))), + ExtendedBigDecimal::extended_parse("123.15e-100") + ); // Very high precision that would not fit in a f64. assert_eq!( Ok(ExtendedBigDecimal::BigDecimal( @@ -488,6 +557,12 @@ mod tests { assert_eq!(Ok(0.5), f64::extended_parse("0x.8")); assert_eq!(Ok(0.0625), f64::extended_parse("0x.1")); assert_eq!(Ok(15.007_812_5), f64::extended_parse("0xf.02")); + assert_eq!(Ok(16.0), f64::extended_parse("0x0.8p5")); + assert_eq!(Ok(0.0625), f64::extended_parse("0x1p-4")); + + // We cannot really check that 'e' is not a valid exponent indicator for hex floats... + // but we can check that the number still gets parsed properly: 0x0.8e5 is 0x8e5 / 16**3 + assert_eq!(Ok(0.555908203125), f64::extended_parse("0x0.8e5")); assert_eq!( Ok(ExtendedBigDecimal::BigDecimal( From 30c89af9acc87a528adff2147f669e700cdff1fa Mon Sep 17 00:00:00 2001 From: Nicolas Boichat Date: Fri, 21 Mar 2025 20:38:06 +0100 Subject: [PATCH 10/10] uucore: format: num_parser: Make it clear that scale can only be positive After scratching my head a bit about why the hexadecimal code works, seems better to do make scale an u64 to clarify. Note that this may u64 may exceed i64 capacity, but that can only happen if the number of digits provided > 2**63 (impossible). --- src/uucore/src/lib/features/format/num_parser.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/uucore/src/lib/features/format/num_parser.rs b/src/uucore/src/lib/features/format/num_parser.rs index 645e6c2bf..f11a75cdb 100644 --- a/src/uucore/src/lib/features/format/num_parser.rs +++ b/src/uucore/src/lib/features/format/num_parser.rs @@ -268,7 +268,7 @@ fn parse( // Parse the integral part of the number let mut chars = rest.chars().enumerate().fuse().peekable(); let mut digits = BigUint::zero(); - let mut scale = 0i64; + let mut scale = 0u64; let mut exponent = 0i64; while let Some(d) = chars.peek().and_then(|&(_, c)| base.digit(c)) { chars.next(); @@ -335,7 +335,7 @@ fn parse( let bd = if scale == 0 && exponent == 0 { BigDecimal::from_bigint(signed_digits, 0) } else if base == Base::Decimal { - BigDecimal::from_bigint(signed_digits, scale - exponent) + BigDecimal::from_bigint(signed_digits, scale as i64 - exponent) } else if base == Base::Hexadecimal { // Base is 16, init at scale 0 then divide by base**scale. let bd = BigDecimal::from_bigint(signed_digits, 0)