diff --git a/src/uucore/src/lib/features/format/argument.rs b/src/uucore/src/lib/features/format/argument.rs index 08111bca8..e1877de3d 100644 --- a/src/uucore/src/lib/features/format/argument.rs +++ b/src/uucore/src/lib/features/format/argument.rs @@ -5,13 +5,15 @@ use crate::{ error::set_exit_code, - features::format::num_parser::{ParseError, ParsedNumber}, + features::format::num_parser::{ExtendedParser, ExtendedParserError}, quoting_style::{Quotes, QuotingStyle, escape_name}, show_error, show_warning, }; use os_display::Quotable; use std::ffi::OsStr; +use super::ExtendedBigDecimal; + /// An argument for formatting /// /// Each of these variants is only accepted by their respective directives. For @@ -25,7 +27,7 @@ pub enum FormatArgument { String(String), UnsignedInt(u64), SignedInt(i64), - Float(f64), + Float(ExtendedBigDecimal), /// Special argument that gets coerced into the other variants Unparsed(String), } @@ -34,7 +36,7 @@ pub trait ArgumentIter<'a>: Iterator { fn get_char(&mut self) -> u8; fn get_i64(&mut self) -> i64; fn get_u64(&mut self) -> u64; - fn get_f64(&mut self) -> f64; + fn get_extended_big_decimal(&mut self) -> ExtendedBigDecimal; fn get_str(&mut self) -> &'a str; } @@ -56,7 +58,7 @@ impl<'a, T: Iterator> ArgumentIter<'a> for T { }; match next { FormatArgument::UnsignedInt(n) => *n, - FormatArgument::Unparsed(s) => extract_value(ParsedNumber::parse_u64(s), s), + FormatArgument::Unparsed(s) => extract_value(u64::extended_parse(s), s), _ => 0, } } @@ -67,19 +69,19 @@ impl<'a, T: Iterator> ArgumentIter<'a> for T { }; match next { FormatArgument::SignedInt(n) => *n, - FormatArgument::Unparsed(s) => extract_value(ParsedNumber::parse_i64(s), s), + FormatArgument::Unparsed(s) => extract_value(i64::extended_parse(s), s), _ => 0, } } - fn get_f64(&mut self) -> f64 { + fn get_extended_big_decimal(&mut self) -> ExtendedBigDecimal { let Some(next) = self.next() else { - return 0.0; + return ExtendedBigDecimal::zero(); }; match next { - FormatArgument::Float(n) => *n, - FormatArgument::Unparsed(s) => extract_value(ParsedNumber::parse_f64(s), s), - _ => 0.0, + FormatArgument::Float(n) => n.clone(), + FormatArgument::Unparsed(s) => extract_value(ExtendedBigDecimal::extended_parse(s), s), + _ => ExtendedBigDecimal::zero(), } } @@ -91,7 +93,7 @@ impl<'a, T: Iterator> ArgumentIter<'a> for T { } } -fn extract_value(p: Result>, input: &str) -> T { +fn extract_value(p: Result>, input: &str) -> T { match p { Ok(v) => v, Err(e) => { @@ -103,15 +105,15 @@ fn extract_value(p: Result>, input: &str) -> T }, ); match e { - ParseError::Overflow => { + ExtendedParserError::Overflow => { show_error!("{}: Numerical result out of range", input.quote()); Default::default() } - ParseError::NotNumeric => { + ExtendedParserError::NotNumeric => { show_error!("{}: expected a numeric value", input.quote()); Default::default() } - ParseError::PartialMatch(v, rest) => { + ExtendedParserError::PartialMatch(v, rest) => { let bytes = input.as_encoded_bytes(); if !bytes.is_empty() && bytes[0] == b'\'' { show_warning!( diff --git a/src/uucore/src/lib/features/format/extendedbigdecimal.rs b/src/uucore/src/lib/features/format/extendedbigdecimal.rs index 4e4a1fe5c..8c242be9f 100644 --- a/src/uucore/src/lib/features/format/extendedbigdecimal.rs +++ b/src/uucore/src/lib/features/format/extendedbigdecimal.rs @@ -141,6 +141,12 @@ impl Zero for ExtendedBigDecimal { } } +impl Default for ExtendedBigDecimal { + fn default() -> Self { + Self::zero() + } +} + impl Add for ExtendedBigDecimal { type Output = Self; diff --git a/src/uucore/src/lib/features/format/num_parser.rs b/src/uucore/src/lib/features/format/num_parser.rs index 7de3a0e0a..f11a75cdb 100644 --- a/src/uucore/src/lib/features/format/num_parser.rs +++ b/src/uucore/src/lib/features/format/num_parser.rs @@ -5,11 +5,20 @@ //! Utilities for parsing numbers in various formats -// spell-checker:ignore powf copysign prec inity +// spell-checker:ignore powf copysign prec inity bigdecimal extendedbigdecimal biguint + +use bigdecimal::{ + BigDecimal, + num_bigint::{BigInt, BigUint, Sign}, +}; +use num_traits::ToPrimitive; +use num_traits::Zero; + +use crate::format::extendedbigdecimal::ExtendedBigDecimal; /// Base for number parsing #[derive(Clone, Copy, PartialEq)] -pub enum Base { +enum Base { /// Binary base Binary = 2, @@ -44,7 +53,7 @@ impl Base { /// Type returned if a number could not be parsed in its entirety #[derive(Debug, PartialEq)] -pub enum ParseError<'a, T> { +pub enum ExtendedParserError<'a, T> { /// The input as a whole makes no sense NotNumeric, /// The beginning of the input made sense and has been parsed, @@ -56,11 +65,14 @@ pub enum ParseError<'a, T> { Overflow, } -impl<'a, T> ParseError<'a, T> { - fn map(self, f: impl FnOnce(T, &'a str) -> ParseError<'a, U>) -> ParseError<'a, U> { +impl<'a, T> ExtendedParserError<'a, T> { + fn map( + self, + f: impl FnOnce(T, &'a str) -> ExtendedParserError<'a, U>, + ) -> ExtendedParserError<'a, U> { match self { - Self::NotNumeric => ParseError::NotNumeric, - Self::Overflow => ParseError::Overflow, + Self::NotNumeric => ExtendedParserError::NotNumeric, + Self::Overflow => ExtendedParserError::Overflow, Self::PartialMatch(v, s) => f(v, s), } } @@ -68,353 +80,586 @@ impl<'a, T> ParseError<'a, T> { /// A number parser for binary, octal, decimal, hexadecimal and single characters. /// -/// Internally, in order to get the maximum possible precision and cover the full -/// range of u64 and i64 without losing precision for f64, the returned number is -/// decomposed into: -/// - A `base` value -/// - A `neg` sign bit -/// - A `integral` positive part -/// - A `fractional` positive part -/// - A `precision` representing the number of digits in the fractional part -/// -/// If the fractional part cannot be represented on a `u64`, parsing continues -/// silently by ignoring non-significant digits. -pub struct ParsedNumber { - base: Base, - negative: bool, - integral: u64, - fractional: u64, - precision: usize, +/// It is implemented for `u64`/`i64`, where no fractional part is parsed, +/// and `f64` float, where octal and binary formats are not allowed. +pub trait ExtendedParser { + // We pick a hopefully different name for our parser, to avoid clash with standard traits. + fn extended_parse(input: &str) -> Result> + where + Self: Sized; } -impl ParsedNumber { - fn into_i64(self) -> Option { - if self.negative { - i64::try_from(-i128::from(self.integral)).ok() - } else { - i64::try_from(self.integral).ok() - } - } - +impl ExtendedParser for i64 { /// Parse a number as i64. No fractional part is allowed. - pub fn parse_i64(input: &str) -> Result> { - match Self::parse(input, true) { - Ok(v) => v.into_i64().ok_or(ParseError::Overflow), + fn extended_parse(input: &str) -> Result> { + fn into_i64(ebd: ExtendedBigDecimal) -> Option { + match ebd { + ExtendedBigDecimal::BigDecimal(bd) => { + let (digits, scale) = bd.into_bigint_and_scale(); + if scale == 0 { + i64::try_from(digits).ok() + } else { + None + } + } + ExtendedBigDecimal::MinusZero => Some(0), + _ => None, + } + } + + match parse(input, true) { + Ok(v) => into_i64(v).ok_or(ExtendedParserError::Overflow), Err(e) => Err(e.map(|v, rest| { - v.into_i64() - .map(|v| ParseError::PartialMatch(v, rest)) - .unwrap_or(ParseError::Overflow) + into_i64(v) + .map(|v| ExtendedParserError::PartialMatch(v, rest)) + .unwrap_or(ExtendedParserError::Overflow) })), } } +} +impl ExtendedParser for u64 { /// Parse a number as u64. No fractional part is allowed. - pub fn parse_u64(input: &str) -> Result> { - match Self::parse(input, true) { - Ok(v) | Err(ParseError::PartialMatch(v, _)) if v.negative => { - Err(ParseError::NotNumeric) + fn extended_parse(input: &str) -> Result> { + fn into_u64(ebd: ExtendedBigDecimal) -> Option { + match ebd { + ExtendedBigDecimal::BigDecimal(bd) => { + let (digits, scale) = bd.into_bigint_and_scale(); + if scale == 0 { + u64::try_from(digits).ok() + } else { + None + } + } + _ => None, } - Ok(v) => Ok(v.integral), - Err(e) => Err(e.map(|v, rest| ParseError::PartialMatch(v.integral, rest))), + } + + match parse(input, true) { + Ok(v) => into_u64(v).ok_or(ExtendedParserError::Overflow), + Err(e) => Err(e.map(|v, rest| { + into_u64(v) + .map(|v| ExtendedParserError::PartialMatch(v, rest)) + .unwrap_or(ExtendedParserError::Overflow) + })), } } +} - fn into_f64(self) -> f64 { - let n = self.integral as f64 - + (self.fractional as f64) / (self.base as u8 as f64).powf(self.precision as f64); - if self.negative { -n } else { n } - } - +impl ExtendedParser for f64 { /// Parse a number as f64 - pub fn parse_f64(input: &str) -> Result> { - match Self::parse(input, false) { - Ok(v) => Ok(v.into_f64()), - Err(ParseError::NotNumeric) => Self::parse_f64_special_values(input), - Err(e) => Err(e.map(|v, rest| ParseError::PartialMatch(v.into_f64(), rest))), + fn extended_parse(input: &str) -> Result> { + // TODO: This is generic, so this should probably be implemented as an ExtendedBigDecimal trait (ToPrimitive). + fn into_f64(ebd: ExtendedBigDecimal) -> f64 { + match ebd { + ExtendedBigDecimal::BigDecimal(bd) => bd.to_f64().unwrap(), + ExtendedBigDecimal::MinusZero => -0.0, + ExtendedBigDecimal::Nan => f64::NAN, + ExtendedBigDecimal::MinusNan => -f64::NAN, + ExtendedBigDecimal::Infinity => f64::INFINITY, + ExtendedBigDecimal::MinusInfinity => -f64::INFINITY, + } + } + + match parse(input, false) { + Ok(v) => Ok(into_f64(v)), + Err(e) => Err(e.map(|v, rest| ExtendedParserError::PartialMatch(into_f64(v), rest))), } } +} - fn parse_f64_special_values(input: &str) -> Result> { - let (sign, rest) = if let Some(input) = input.strip_prefix('-') { - (-1.0, input) - } else { - (1.0, input) +impl ExtendedParser for ExtendedBigDecimal { + /// Parse a number as an ExtendedBigDecimal + fn extended_parse( + input: &str, + ) -> Result> { + parse(input, false) + } +} + +fn parse_special_value( + input: &str, + negative: bool, +) -> Result> { + let prefix = input + .chars() + .take(3) + .map(|c| c.to_ascii_lowercase()) + .collect::(); + let special = match prefix.as_str() { + "inf" => { + if negative { + ExtendedBigDecimal::MinusInfinity + } else { + ExtendedBigDecimal::Infinity + } + } + "nan" => { + if negative { + ExtendedBigDecimal::MinusNan + } else { + ExtendedBigDecimal::Nan + } + } + _ => return Err(ExtendedParserError::NotNumeric), + }; + if input.len() == 3 { + Ok(special) + } else { + Err(ExtendedParserError::PartialMatch(special, &input[3..])) + } +} + +// TODO: As highlighted by clippy, this function _is_ high cognitive complexity, jumps +// around between integer and float parsing, and should be split in multiple parts. +#[allow(clippy::cognitive_complexity)] +fn parse( + input: &str, + integral_only: bool, +) -> Result> { + // Parse the "'" prefix separately + if let Some(rest) = input.strip_prefix('\'') { + let mut chars = rest.char_indices().fuse(); + let v = chars + .next() + .map(|(_, c)| ExtendedBigDecimal::BigDecimal(u32::from(c).into())); + return match (v, chars.next()) { + (Some(v), None) => Ok(v), + (Some(v), Some((i, _))) => Err(ExtendedParserError::PartialMatch(v, &rest[i..])), + (None, _) => Err(ExtendedParserError::NotNumeric), }; - let prefix = rest - .chars() - .take(3) - .map(|c| c.to_ascii_lowercase()) - .collect::(); - let special = match prefix.as_str() { - "inf" => f64::INFINITY, - "nan" => f64::NAN, - _ => return Err(ParseError::NotNumeric), - } - .copysign(sign); - if rest.len() == 3 { - Ok(special) - } else { - Err(ParseError::PartialMatch(special, &rest[3..])) - } } - #[allow(clippy::cognitive_complexity)] - fn parse(input: &str, integral_only: bool) -> Result> { - // Parse the "'" prefix separately - if let Some(rest) = input.strip_prefix('\'') { - let mut chars = rest.char_indices().fuse(); - let v = chars.next().map(|(_, c)| Self { - base: Base::Decimal, - negative: false, - integral: u64::from(c), - fractional: 0, - precision: 0, - }); - return match (v, chars.next()) { - (Some(v), None) => Ok(v), - (Some(v), Some((i, _))) => Err(ParseError::PartialMatch(v, &rest[i..])), - (None, _) => Err(ParseError::NotNumeric), - }; - } + let trimmed_input = input.trim_ascii_start(); - let trimmed_input = input.trim_ascii_start(); + // Initial minus/plus sign + let (negative, unsigned) = if let Some(trimmed_input) = trimmed_input.strip_prefix('-') { + (true, trimmed_input) + } else if let Some(trimmed_input) = trimmed_input.strip_prefix('+') { + (false, trimmed_input) + } else { + (false, trimmed_input) + }; - // Initial minus sign - let (negative, unsigned) = if let Some(trimmed_input) = trimmed_input.strip_prefix('-') { - (true, trimmed_input) - } else { - (false, trimmed_input) - }; - - // Parse an optional base prefix ("0b" / "0B" / "0" / "0x" / "0X"). "0" is octal unless a - // fractional part is allowed in which case it is an insignificant leading 0. A "0" prefix - // will not be consumed in case the parsable string contains only "0": the leading extra "0" - // will have no influence on the result. - let (base, rest) = if let Some(rest) = unsigned.strip_prefix('0') { + // Parse an optional base prefix ("0b" / "0B" / "0" / "0x" / "0X"). "0" is octal unless a + // fractional part is allowed in which case it is an insignificant leading 0. A "0" prefix + // will not be consumed in case the parsable string contains only "0": the leading extra "0" + // will have no influence on the result. + let (base, rest) = if let Some(rest) = unsigned.strip_prefix('0') { + if let Some(rest) = rest.strip_prefix(['x', 'X']) { + (Base::Hexadecimal, rest) + } else if integral_only { + // Binary/Octal only allowed for integer parsing. if let Some(rest) = rest.strip_prefix(['b', 'B']) { (Base::Binary, rest) - } else if let Some(rest) = rest.strip_prefix(['x', 'X']) { - (Base::Hexadecimal, rest) - } else if integral_only { - (Base::Octal, unsigned) } else { - (Base::Decimal, unsigned) + (Base::Octal, unsigned) } } else { (Base::Decimal, unsigned) - }; - if rest.is_empty() { - return Err(ParseError::NotNumeric); } + } else { + (Base::Decimal, unsigned) + }; + if rest.is_empty() { + return Err(ExtendedParserError::NotNumeric); + } - // Parse the integral part of the number - let mut chars = rest.chars().enumerate().fuse().peekable(); - let mut integral = 0u64; - while let Some(d) = chars.peek().and_then(|&(_, c)| base.digit(c)) { - chars.next(); - integral = integral - .checked_mul(base as u64) - .and_then(|n| n.checked_add(d)) - .ok_or(ParseError::Overflow)?; - } + // Parse the integral part of the number + let mut chars = rest.chars().enumerate().fuse().peekable(); + let mut digits = BigUint::zero(); + let mut scale = 0u64; + let mut exponent = 0i64; + while let Some(d) = chars.peek().and_then(|&(_, c)| base.digit(c)) { + chars.next(); + digits = digits * base as u8 + d; + } + // Parse fractional/exponent part of the number for supported bases. + if matches!(base, Base::Decimal | Base::Hexadecimal) && !integral_only { // Parse the fractional part of the number if there can be one and the input contains // a '.' decimal separator. - let (mut fractional, mut precision) = (0u64, 0); - if matches!(chars.peek(), Some(&(_, '.'))) - && matches!(base, Base::Decimal | Base::Hexadecimal) - && !integral_only - { + if matches!(chars.peek(), Some(&(_, '.'))) { chars.next(); - let mut ended = false; while let Some(d) = chars.peek().and_then(|&(_, c)| base.digit(c)) { chars.next(); - if !ended { - if let Some(f) = fractional - .checked_mul(base as u64) - .and_then(|n| n.checked_add(d)) - { - (fractional, precision) = (f, precision + 1); - } else { - ended = true; - } - } + (digits, scale) = (digits * base as u8 + d, scale + 1); } } - // If nothing has been parsed, declare the parsing unsuccessful - if let Some((0, _)) = chars.peek() { - return Err(ParseError::NotNumeric); - } - - // Return what has been parsed so far. It there are extra characters, mark the - // parsing as a partial match. - let parsed = Self { - base, - negative, - integral, - fractional, - precision, + let exp_char = match base { + Base::Decimal => 'e', + Base::Hexadecimal => 'p', + _ => unreachable!(), }; - if let Some((first_unparsed, _)) = chars.next() { - Err(ParseError::PartialMatch(parsed, &rest[first_unparsed..])) - } else { - Ok(parsed) + + // Parse the exponent part, only decimal numbers are allowed. + if chars.peek().is_some_and(|&(_, c)| c == exp_char) { + chars.next(); + let exp_negative = match chars.peek() { + Some((_, '-')) => { + chars.next(); + true + } + Some((_, '+')) => { + chars.next(); + false + } + _ => false, // Something else, or nothing at all: keep going. + }; + while let Some(d) = chars.peek().and_then(|&(_, c)| Base::Decimal.digit(c)) { + chars.next(); + exponent = exponent * 10 + d as i64; + } + if exp_negative { + exponent = -exponent; + } } } + + // If nothing has been parsed, check if this is a special value, or declare the parsing unsuccessful + if let Some((0, _)) = chars.peek() { + if integral_only { + return Err(ExtendedParserError::NotNumeric); + } else { + return parse_special_value(unsigned, negative); + } + } + + // TODO: Might be nice to implement a ExtendedBigDecimal copysign or negation function to move away some of this logic... + let ebd = if digits == BigUint::zero() && negative { + ExtendedBigDecimal::MinusZero + } else { + let sign = if negative { Sign::Minus } else { Sign::Plus }; + let signed_digits = BigInt::from_biguint(sign, digits); + let bd = if scale == 0 && exponent == 0 { + BigDecimal::from_bigint(signed_digits, 0) + } else if base == Base::Decimal { + BigDecimal::from_bigint(signed_digits, scale as i64 - exponent) + } else if base == Base::Hexadecimal { + // Base is 16, init at scale 0 then divide by base**scale. + let bd = BigDecimal::from_bigint(signed_digits, 0) + / BigDecimal::from_bigint(BigInt::from(16).pow(scale as u32), 0); + // Confusingly, exponent is in base 2 for hex floating point numbers. + if exponent >= 0 { + bd * 2u64.pow(exponent as u32) + } else { + bd / 2u64.pow(-exponent as u32) + } + } else { + // scale != 0, which means that integral_only is not set, so only base 10 and 16 are allowed. + unreachable!(); + }; + ExtendedBigDecimal::BigDecimal(bd) + }; + + // Return what has been parsed so far. It there are extra characters, mark the + // parsing as a partial match. + if let Some((first_unparsed, _)) = chars.next() { + Err(ExtendedParserError::PartialMatch( + ebd, + &rest[first_unparsed..], + )) + } else { + Ok(ebd) + } } #[cfg(test)] mod tests { - use super::{ParseError, ParsedNumber}; + use std::str::FromStr; + + use bigdecimal::BigDecimal; + + use crate::format::ExtendedBigDecimal; + + use super::{ExtendedParser, ExtendedParserError}; #[test] fn test_decimal_u64() { - assert_eq!(Ok(123), ParsedNumber::parse_u64("123")); - assert_eq!( - Ok(u64::MAX), - ParsedNumber::parse_u64(&format!("{}", u64::MAX)) - ); + assert_eq!(Ok(123), u64::extended_parse("123")); + assert_eq!(Ok(u64::MAX), u64::extended_parse(&format!("{}", u64::MAX))); assert!(matches!( - ParsedNumber::parse_u64("-123"), - Err(ParseError::NotNumeric) + u64::extended_parse("-123"), + Err(ExtendedParserError::Overflow) )); assert!(matches!( - ParsedNumber::parse_u64(""), - Err(ParseError::NotNumeric) + u64::extended_parse(""), + Err(ExtendedParserError::NotNumeric) )); assert!(matches!( - ParsedNumber::parse_u64("123.15"), - Err(ParseError::PartialMatch(123, ".15")) + u64::extended_parse("123.15"), + Err(ExtendedParserError::PartialMatch(123, ".15")) + )); + assert!(matches!( + u64::extended_parse("123e10"), + Err(ExtendedParserError::PartialMatch(123, "e10")) )); } #[test] fn test_decimal_i64() { - assert_eq!(Ok(123), ParsedNumber::parse_i64("123")); - assert_eq!(Ok(-123), ParsedNumber::parse_i64("-123")); + assert_eq!(Ok(123), i64::extended_parse("123")); + assert_eq!(Ok(123), i64::extended_parse("+123")); + assert_eq!(Ok(-123), i64::extended_parse("-123")); assert!(matches!( - ParsedNumber::parse_i64("--123"), - Err(ParseError::NotNumeric) + i64::extended_parse("--123"), + Err(ExtendedParserError::NotNumeric) )); - assert_eq!( - Ok(i64::MAX), - ParsedNumber::parse_i64(&format!("{}", i64::MAX)) - ); - assert_eq!( - Ok(i64::MIN), - ParsedNumber::parse_i64(&format!("{}", i64::MIN)) - ); + assert_eq!(Ok(i64::MAX), i64::extended_parse(&format!("{}", i64::MAX))); + assert_eq!(Ok(i64::MIN), i64::extended_parse(&format!("{}", i64::MIN))); assert!(matches!( - ParsedNumber::parse_i64(&format!("{}", u64::MAX)), - Err(ParseError::Overflow) + i64::extended_parse(&format!("{}", u64::MAX)), + Err(ExtendedParserError::Overflow) )); assert!(matches!( - ParsedNumber::parse_i64(&format!("{}", i64::MAX as u64 + 1)), - Err(ParseError::Overflow) + i64::extended_parse(&format!("{}", i64::MAX as u64 + 1)), + Err(ExtendedParserError::Overflow) + )); + assert!(matches!( + i64::extended_parse("-123e10"), + Err(ExtendedParserError::PartialMatch(-123, "e10")) )); } #[test] fn test_decimal_f64() { - assert_eq!(Ok(123.0), ParsedNumber::parse_f64("123")); - assert_eq!(Ok(-123.0), ParsedNumber::parse_f64("-123")); - assert_eq!(Ok(123.0), ParsedNumber::parse_f64("123.")); - assert_eq!(Ok(-123.0), ParsedNumber::parse_f64("-123.")); - assert_eq!(Ok(123.0), ParsedNumber::parse_f64("123.0")); - assert_eq!(Ok(-123.0), ParsedNumber::parse_f64("-123.0")); - assert_eq!(Ok(123.15), ParsedNumber::parse_f64("123.15")); - assert_eq!(Ok(-123.15), ParsedNumber::parse_f64("-123.15")); - assert_eq!(Ok(0.15), ParsedNumber::parse_f64(".15")); - assert_eq!(Ok(-0.15), ParsedNumber::parse_f64("-.15")); + assert_eq!(Ok(123.0), f64::extended_parse("123")); + assert_eq!(Ok(123.0), f64::extended_parse("+123")); + assert_eq!(Ok(-123.0), f64::extended_parse("-123")); + assert_eq!(Ok(123.0), f64::extended_parse("123.")); + assert_eq!(Ok(-123.0), f64::extended_parse("-123.")); + assert_eq!(Ok(123.0), f64::extended_parse("123.0")); + assert_eq!(Ok(-123.0), f64::extended_parse("-123.0")); + assert_eq!(Ok(123.15), f64::extended_parse("123.15")); + assert_eq!(Ok(123.15), f64::extended_parse("+123.15")); + assert_eq!(Ok(-123.15), f64::extended_parse("-123.15")); + assert_eq!(Ok(0.15), f64::extended_parse(".15")); + assert_eq!(Ok(-0.15), f64::extended_parse("-.15")); + // Leading 0(s) are _not_ octal when parsed as float + assert_eq!(Ok(123.0), f64::extended_parse("0123")); + assert_eq!(Ok(123.0), f64::extended_parse("+0123")); + assert_eq!(Ok(-123.0), f64::extended_parse("-0123")); + assert_eq!(Ok(123.0), f64::extended_parse("00123")); + assert_eq!(Ok(123.0), f64::extended_parse("+00123")); + assert_eq!(Ok(-123.0), f64::extended_parse("-00123")); + assert_eq!(Ok(123.15), f64::extended_parse("0123.15")); + assert_eq!(Ok(123.15), f64::extended_parse("+0123.15")); + assert_eq!(Ok(-123.15), f64::extended_parse("-0123.15")); + assert_eq!(Ok(12315000.0), f64::extended_parse("123.15e5")); + assert_eq!(Ok(-12315000.0), f64::extended_parse("-123.15e5")); + assert_eq!(Ok(12315000.0), f64::extended_parse("123.15e+5")); + assert_eq!(Ok(0.0012315), f64::extended_parse("123.15e-5")); assert_eq!( Ok(0.15), - ParsedNumber::parse_f64(".150000000000000000000000000231313") + f64::extended_parse(".150000000000000000000000000231313") ); - assert!(matches!(ParsedNumber::parse_f64("1.2.3"), - Err(ParseError::PartialMatch(f, ".3")) if f == 1.2)); - assert_eq!(Ok(f64::INFINITY), ParsedNumber::parse_f64("inf")); - assert_eq!(Ok(f64::NEG_INFINITY), ParsedNumber::parse_f64("-inf")); - assert!(ParsedNumber::parse_f64("NaN").unwrap().is_nan()); - assert!(ParsedNumber::parse_f64("NaN").unwrap().is_sign_positive()); - assert!(ParsedNumber::parse_f64("-NaN").unwrap().is_nan()); - assert!(ParsedNumber::parse_f64("-NaN").unwrap().is_sign_negative()); - assert!(matches!(ParsedNumber::parse_f64("-infinity"), - Err(ParseError::PartialMatch(f, "inity")) if f == f64::NEG_INFINITY)); - assert!(ParsedNumber::parse_f64(&format!("{}", u64::MAX)).is_ok()); - assert!(ParsedNumber::parse_f64(&format!("{}", i64::MIN)).is_ok()); + assert!(matches!(f64::extended_parse("1.2.3"), + Err(ExtendedParserError::PartialMatch(f, ".3")) if f == 1.2)); + assert!(matches!(f64::extended_parse("123.15p5"), + Err(ExtendedParserError::PartialMatch(f, "p5")) if f == 123.15)); + // Minus zero. 0.0 == -0.0 so we explicitly check the sign. + assert_eq!(Ok(0.0), f64::extended_parse("-0.0")); + assert!(f64::extended_parse("-0.0").unwrap().is_sign_negative()); + assert_eq!(Ok(f64::INFINITY), f64::extended_parse("inf")); + assert_eq!(Ok(f64::INFINITY), f64::extended_parse("+inf")); + assert_eq!(Ok(f64::NEG_INFINITY), f64::extended_parse("-inf")); + assert_eq!(Ok(f64::INFINITY), f64::extended_parse("Inf")); + assert_eq!(Ok(f64::INFINITY), f64::extended_parse("InF")); + assert_eq!(Ok(f64::INFINITY), f64::extended_parse("INF")); + assert!(f64::extended_parse("NaN").unwrap().is_nan()); + assert!(f64::extended_parse("NaN").unwrap().is_sign_positive()); + assert!(f64::extended_parse("+NaN").unwrap().is_nan()); + assert!(f64::extended_parse("+NaN").unwrap().is_sign_positive()); + assert!(f64::extended_parse("-NaN").unwrap().is_nan()); + assert!(f64::extended_parse("-NaN").unwrap().is_sign_negative()); + assert!(f64::extended_parse("nan").unwrap().is_nan()); + assert!(f64::extended_parse("nan").unwrap().is_sign_positive()); + assert!(f64::extended_parse("NAN").unwrap().is_nan()); + assert!(f64::extended_parse("NAN").unwrap().is_sign_positive()); + assert!(matches!(f64::extended_parse("-infinity"), + Err(ExtendedParserError::PartialMatch(f, "inity")) if f == f64::NEG_INFINITY)); + assert!(f64::extended_parse(&format!("{}", u64::MAX)).is_ok()); + assert!(f64::extended_parse(&format!("{}", i64::MIN)).is_ok()); + } + + #[test] + fn test_decimal_extended_big_decimal() { + // f64 parsing above already tested a lot of these, just do a few. + // Careful, we usually cannot use From to get a precise ExtendedBigDecimal as numbers like 123.15 cannot be exactly represented by a f64. + assert_eq!( + Ok(ExtendedBigDecimal::BigDecimal( + BigDecimal::from_str("123").unwrap() + )), + ExtendedBigDecimal::extended_parse("123") + ); + assert_eq!( + Ok(ExtendedBigDecimal::BigDecimal( + BigDecimal::from_str("123.15").unwrap() + )), + ExtendedBigDecimal::extended_parse("123.15") + ); + assert_eq!( + Ok(ExtendedBigDecimal::BigDecimal(BigDecimal::from_bigint( + 12315.into(), + -98 + ))), + ExtendedBigDecimal::extended_parse("123.15e100") + ); + assert_eq!( + Ok(ExtendedBigDecimal::BigDecimal(BigDecimal::from_bigint( + 12315.into(), + 102 + ))), + ExtendedBigDecimal::extended_parse("123.15e-100") + ); + // Very high precision that would not fit in a f64. + assert_eq!( + Ok(ExtendedBigDecimal::BigDecimal( + BigDecimal::from_str(".150000000000000000000000000000000000001").unwrap() + )), + ExtendedBigDecimal::extended_parse(".150000000000000000000000000000000000001") + ); + assert!(matches!( + ExtendedBigDecimal::extended_parse("nan"), + Ok(ExtendedBigDecimal::Nan) + )); + assert!(matches!( + ExtendedBigDecimal::extended_parse("-NAN"), + Ok(ExtendedBigDecimal::MinusNan) + )); + assert_eq!( + Ok(ExtendedBigDecimal::Infinity), + ExtendedBigDecimal::extended_parse("InF") + ); + assert_eq!( + Ok(ExtendedBigDecimal::MinusInfinity), + ExtendedBigDecimal::extended_parse("-iNf") + ); + assert_eq!( + Ok(ExtendedBigDecimal::zero()), + ExtendedBigDecimal::extended_parse("0.0") + ); + assert!(matches!( + ExtendedBigDecimal::extended_parse("-0.0"), + Ok(ExtendedBigDecimal::MinusZero) + )); } #[test] fn test_hexadecimal() { - assert_eq!(Ok(0x123), ParsedNumber::parse_u64("0x123")); - assert_eq!(Ok(0x123), ParsedNumber::parse_u64("0X123")); - assert_eq!(Ok(0xfe), ParsedNumber::parse_u64("0xfE")); - assert_eq!(Ok(-0x123), ParsedNumber::parse_i64("-0x123")); + assert_eq!(Ok(0x123), u64::extended_parse("0x123")); + assert_eq!(Ok(0x123), u64::extended_parse("0X123")); + assert_eq!(Ok(0x123), u64::extended_parse("+0x123")); + assert_eq!(Ok(0xfe), u64::extended_parse("0xfE")); + assert_eq!(Ok(-0x123), i64::extended_parse("-0x123")); - assert_eq!(Ok(0.5), ParsedNumber::parse_f64("0x.8")); - assert_eq!(Ok(0.0625), ParsedNumber::parse_f64("0x.1")); - assert_eq!(Ok(15.007_812_5), ParsedNumber::parse_f64("0xf.02")); + assert_eq!(Ok(0.5), f64::extended_parse("0x.8")); + assert_eq!(Ok(0.0625), f64::extended_parse("0x.1")); + assert_eq!(Ok(15.007_812_5), f64::extended_parse("0xf.02")); + assert_eq!(Ok(16.0), f64::extended_parse("0x0.8p5")); + assert_eq!(Ok(0.0625), f64::extended_parse("0x1p-4")); + + // We cannot really check that 'e' is not a valid exponent indicator for hex floats... + // but we can check that the number still gets parsed properly: 0x0.8e5 is 0x8e5 / 16**3 + assert_eq!(Ok(0.555908203125), f64::extended_parse("0x0.8e5")); + + assert_eq!( + Ok(ExtendedBigDecimal::BigDecimal( + BigDecimal::from_str("0.0625").unwrap() + )), + ExtendedBigDecimal::extended_parse("0x.1") + ); + + // Precisely parse very large hexadecimal numbers (i.e. with a large division). + assert_eq!( + Ok(ExtendedBigDecimal::BigDecimal( + BigDecimal::from_str("15.999999999999999999999999948301211715435770320536956745627321652136743068695068359375").unwrap() + )), + ExtendedBigDecimal::extended_parse("0xf.fffffffffffffffffffff") + ); } #[test] fn test_octal() { - assert_eq!(Ok(0), ParsedNumber::parse_u64("0")); - assert_eq!(Ok(0o123), ParsedNumber::parse_u64("0123")); - assert_eq!(Ok(0o123), ParsedNumber::parse_u64("00123")); - assert_eq!(Ok(0), ParsedNumber::parse_u64("00")); + assert_eq!(Ok(0), u64::extended_parse("0")); + assert_eq!(Ok(0o123), u64::extended_parse("0123")); + assert_eq!(Ok(0o123), u64::extended_parse("+0123")); + assert_eq!(Ok(-0o123), i64::extended_parse("-0123")); + assert_eq!(Ok(0o123), u64::extended_parse("00123")); + assert_eq!(Ok(0), u64::extended_parse("00")); assert!(matches!( - ParsedNumber::parse_u64("008"), - Err(ParseError::PartialMatch(0, "8")) + u64::extended_parse("008"), + Err(ExtendedParserError::PartialMatch(0, "8")) )); assert!(matches!( - ParsedNumber::parse_u64("08"), - Err(ParseError::PartialMatch(0, "8")) + u64::extended_parse("08"), + Err(ExtendedParserError::PartialMatch(0, "8")) )); assert!(matches!( - ParsedNumber::parse_u64("0."), - Err(ParseError::PartialMatch(0, ".")) + u64::extended_parse("0."), + Err(ExtendedParserError::PartialMatch(0, ".")) )); + + // No float tests, leading zeros get parsed as decimal anyway. } #[test] fn test_binary() { - assert_eq!(Ok(0b1011), ParsedNumber::parse_u64("0b1011")); - assert_eq!(Ok(0b1011), ParsedNumber::parse_u64("0B1011")); + assert_eq!(Ok(0b1011), u64::extended_parse("0b1011")); + assert_eq!(Ok(0b1011), u64::extended_parse("0B1011")); + assert_eq!(Ok(0b1011), u64::extended_parse("+0b1011")); + assert_eq!(Ok(-0b1011), i64::extended_parse("-0b1011")); + + // Binary not allowed for floats + assert!(matches!( + f64::extended_parse("0b100"), + Err(ExtendedParserError::PartialMatch(0f64, "b100")) + )); + assert!(matches!( + f64::extended_parse("0b100.1"), + Err(ExtendedParserError::PartialMatch(0f64, "b100.1")) + )); + + assert!(match ExtendedBigDecimal::extended_parse("0b100.1") { + Err(ExtendedParserError::PartialMatch(ebd, "b100.1")) => + ebd == ExtendedBigDecimal::zero(), + _ => false, + }); } #[test] fn test_parsing_with_leading_whitespace() { - assert_eq!(Ok(1), ParsedNumber::parse_u64(" 0x1")); - assert_eq!(Ok(-2), ParsedNumber::parse_i64(" -0x2")); - assert_eq!(Ok(-3), ParsedNumber::parse_i64(" \t-0x3")); - assert_eq!(Ok(-4), ParsedNumber::parse_i64(" \n-0x4")); - assert_eq!(Ok(-5), ParsedNumber::parse_i64(" \n\t\u{000d}-0x5")); + assert_eq!(Ok(1), u64::extended_parse(" 0x1")); + assert_eq!(Ok(-2), i64::extended_parse(" -0x2")); + assert_eq!(Ok(-3), i64::extended_parse(" \t-0x3")); + assert_eq!(Ok(-4), i64::extended_parse(" \n-0x4")); + assert_eq!(Ok(-5), i64::extended_parse(" \n\t\u{000d}-0x5")); // Ensure that trailing whitespace is still a partial match assert_eq!( - Err(ParseError::PartialMatch(6, " ")), - ParsedNumber::parse_u64("0x6 ") + Err(ExtendedParserError::PartialMatch(6, " ")), + u64::extended_parse("0x6 ") ); assert_eq!( - Err(ParseError::PartialMatch(7, "\t")), - ParsedNumber::parse_u64("0x7\t") + Err(ExtendedParserError::PartialMatch(7, "\t")), + u64::extended_parse("0x7\t") ); assert_eq!( - Err(ParseError::PartialMatch(8, "\n")), - ParsedNumber::parse_u64("0x8\n") + Err(ExtendedParserError::PartialMatch(8, "\n")), + u64::extended_parse("0x8\n") ); // Ensure that unicode non-ascii whitespace is a partial match assert_eq!( - Err(ParseError::NotNumeric), - ParsedNumber::parse_i64("\u{2029}-0x9") + Err(ExtendedParserError::NotNumeric), + i64::extended_parse("\u{2029}-0x9") ); // Ensure that whitespace after the number has "started" is not allowed assert_eq!( - Err(ParseError::NotNumeric), - ParsedNumber::parse_i64("- 0x9") + Err(ExtendedParserError::NotNumeric), + i64::extended_parse("- 0x9") ); } } diff --git a/src/uucore/src/lib/features/format/spec.rs b/src/uucore/src/lib/features/format/spec.rs index 63b3dd221..13025ae74 100644 --- a/src/uucore/src/lib/features/format/spec.rs +++ b/src/uucore/src/lib/features/format/spec.rs @@ -433,8 +433,7 @@ impl Spec { } => { let width = resolve_asterisk(*width, &mut args).unwrap_or(0); let precision = resolve_asterisk(*precision, &mut args).unwrap_or(6); - // TODO: We should implement some get_extendedBigDecimal function in args to avoid losing precision. - let f: ExtendedBigDecimal = args.get_f64().into(); + let f: ExtendedBigDecimal = args.get_extended_big_decimal(); if precision as u64 > i32::MAX as u64 { return Err(FormatError::InvalidPrecision(precision.to_string())); diff --git a/tests/by-util/test_printf.rs b/tests/by-util/test_printf.rs index df1fafd60..ee2f399d5 100644 --- a/tests/by-util/test_printf.rs +++ b/tests/by-util/test_printf.rs @@ -992,6 +992,16 @@ fn float_flag_position_space_padding() { .stdout_only(" +1.0"); } +#[test] +fn float_large_precision() { + // Note: This does not match GNU coreutils output (0.100000000000000000001355252716 on x86), + // as we parse and format using ExtendedBigDecimal, which provides arbitrary precision. + new_ucmd!() + .args(&["%.30f", "0.1"]) + .succeeds() + .stdout_only("0.100000000000000000000000000000"); +} + #[test] fn float_non_finite_space_padding() { new_ucmd!()