From 27efb9eff4e3bb69f096c3c2d3e8a80b75b410ef Mon Sep 17 00:00:00 2001 From: Nicolas Boichat Date: Sat, 22 Mar 2025 20:03:29 +0100 Subject: [PATCH] seq: Parse integral and fractional number of digits in the same function A lot of the code can be shared, and parsing is quite straightforward as we know that the digit is somewhat valid. --- src/uu/seq/src/hexadecimalfloat.rs | 206 ----------------------------- src/uu/seq/src/numberparse.rs | 101 ++++++++------ src/uu/seq/src/seq.rs | 1 - 3 files changed, 57 insertions(+), 251 deletions(-) delete mode 100644 src/uu/seq/src/hexadecimalfloat.rs diff --git a/src/uu/seq/src/hexadecimalfloat.rs b/src/uu/seq/src/hexadecimalfloat.rs deleted file mode 100644 index c09e15d59..000000000 --- a/src/uu/seq/src/hexadecimalfloat.rs +++ /dev/null @@ -1,206 +0,0 @@ -// This file is part of the uutils coreutils package. -// -// For the full copyright and license information, please view the LICENSE -// file that was distributed with this source code. -// spell-checker:ignore extendedbigdecimal bigdecimal hexdigit numberparse - -// TODO: Rewrite this -// Detect number precision similar to GNU coreutils. Refer to scan_arg in seq.c. There are still -// some differences from the GNU version, but this should be sufficient to test the idea. -pub fn parse_precision(s: &str) -> Option { - let hex_index = s.find(['x', 'X']); - let point_index = s.find('.'); - - if hex_index.is_some() { - // Hex value. Returns: - // - 0 for a hexadecimal integer (filled above) - // - None for a hexadecimal floating-point number (the default value of precision) - let power_index = s.find(['p', 'P']); - if point_index.is_none() && power_index.is_none() { - // No decimal point and no 'p' (power) => integer => precision = 0 - return Some(0); - } else { - return None; - } - } - - // This is a decimal floating point. The precision depends on two parameters: - // - the number of fractional digits - // - the exponent - // Let's detect the number of fractional digits - let fractional_length = if let Some(point_index) = point_index { - s[point_index + 1..] - .chars() - .take_while(|c| c.is_ascii_digit()) - .count() - } else { - 0 - }; - - let mut precision = Some(fractional_length); - - // Let's update the precision if exponent is present - if let Some(exponent_index) = s.find(['e', 'E']) { - let exponent_value: i32 = s[exponent_index + 1..].parse().unwrap_or(0); - if exponent_value < 0 { - precision = precision.map(|p| p + exponent_value.unsigned_abs() as usize); - } else { - precision = precision.map(|p| p - p.min(exponent_value as usize)); - } - } - precision -} - -/* TODO: move tests -#[cfg(test)] -mod tests { - - use super::{parse_number, parse_precision}; - use crate::{ExtendedBigDecimal, numberparse::ParseNumberError}; - use bigdecimal::BigDecimal; - use num_traits::ToPrimitive; - - fn parse_big_decimal(s: &str) -> Result { - match parse_number(s)?.number { - ExtendedBigDecimal::BigDecimal(bd) => Ok(bd), - _ => Err(ParseNumberError::Float), - } - } - - fn parse_f64(s: &str) -> Result { - parse_big_decimal(s)? - .to_f64() - .ok_or(ParseNumberError::Float) - } - - #[test] - fn test_parse_precise_number_case_insensitive() { - assert_eq!(parse_f64("0x1P1").unwrap(), 2.0); - assert_eq!(parse_f64("0x1p1").unwrap(), 2.0); - } - - #[test] - fn test_parse_precise_number_plus_minus_prefixes() { - assert_eq!(parse_f64("+0x1p1").unwrap(), 2.0); - assert_eq!(parse_f64("-0x1p1").unwrap(), -2.0); - } - - #[test] - fn test_parse_precise_number_power_signs() { - assert_eq!(parse_f64("0x1p1").unwrap(), 2.0); - assert_eq!(parse_f64("0x1p+1").unwrap(), 2.0); - assert_eq!(parse_f64("0x1p-1").unwrap(), 0.5); - } - - #[test] - fn test_parse_precise_number_hex() { - assert_eq!(parse_f64("0xd.dp-1").unwrap(), 6.90625); - } - - #[test] - fn test_parse_precise_number_no_power() { - assert_eq!(parse_f64("0x123.a").unwrap(), 291.625); - } - - #[test] - fn test_parse_precise_number_no_fractional() { - assert_eq!(parse_f64("0x333p-4").unwrap(), 51.1875); - } - - #[test] - fn test_parse_precise_number_no_integral() { - assert_eq!(parse_f64("0x.9").unwrap(), 0.5625); - assert_eq!(parse_f64("0x.9p2").unwrap(), 2.25); - } - - #[test] - fn test_parse_precise_number_from_valid_values() { - assert_eq!(parse_f64("0x1p1").unwrap(), 2.0); - assert_eq!(parse_f64("+0x1p1").unwrap(), 2.0); - assert_eq!(parse_f64("-0x1p1").unwrap(), -2.0); - assert_eq!(parse_f64("0x1p-1").unwrap(), 0.5); - assert_eq!(parse_f64("0x1.8").unwrap(), 1.5); - assert_eq!(parse_f64("-0x1.8").unwrap(), -1.5); - assert_eq!(parse_f64("0x1.8p2").unwrap(), 6.0); - assert_eq!(parse_f64("0x1.8p+2").unwrap(), 6.0); - assert_eq!(parse_f64("0x1.8p-2").unwrap(), 0.375); - assert_eq!(parse_f64("0x.8").unwrap(), 0.5); - assert_eq!(parse_f64("0x10p0").unwrap(), 16.0); - assert_eq!(parse_f64("0x0.0").unwrap(), 0.0); - assert_eq!(parse_f64("0x0p0").unwrap(), 0.0); - assert_eq!(parse_f64("0x0.0p0").unwrap(), 0.0); - assert_eq!(parse_f64("-0x.1p-3").unwrap(), -0.0078125); - assert_eq!(parse_f64("-0x.ep-3").unwrap(), -0.109375); - } - - #[test] - fn test_parse_float_from_invalid_values() { - let expected_error = ParseNumberError::Float; - assert_eq!(parse_f64("").unwrap_err(), expected_error); - assert_eq!(parse_f64("1").unwrap_err(), expected_error); - assert_eq!(parse_f64("1p").unwrap_err(), expected_error); - assert_eq!(parse_f64("0x").unwrap_err(), expected_error); - assert_eq!(parse_f64("0xG").unwrap_err(), expected_error); - assert_eq!(parse_f64("0xp").unwrap_err(), expected_error); - assert_eq!(parse_f64("0xp3").unwrap_err(), expected_error); - assert_eq!(parse_f64("0x1").unwrap_err(), expected_error); - assert_eq!(parse_f64("0x1.").unwrap_err(), expected_error); - assert_eq!(parse_f64("0x1p").unwrap_err(), expected_error); - assert_eq!(parse_f64("0x1p+").unwrap_err(), expected_error); - assert_eq!(parse_f64("-0xx1p1").unwrap_err(), expected_error); - assert_eq!(parse_f64("0x1.k").unwrap_err(), expected_error); - assert_eq!(parse_f64("0x1").unwrap_err(), expected_error); - assert_eq!(parse_f64("-0x1pa").unwrap_err(), expected_error); - assert_eq!(parse_f64("0x1.1pk").unwrap_err(), expected_error); - assert_eq!(parse_f64("0x1.8p2z").unwrap_err(), expected_error); - assert_eq!(parse_f64("0x1p3.2").unwrap_err(), expected_error); - assert_eq!(parse_f64("-0x.ep-3z").unwrap_err(), expected_error); - } - - #[test] - fn test_parse_precise_number_count_digits() { - let precise_num = parse_number("0x1.2").unwrap(); // 1.125 decimal - assert_eq!(precise_num.num_integral_digits, 1); - assert_eq!(precise_num.num_fractional_digits, 3); - - let precise_num = parse_number("-0x1.2").unwrap(); // -1.125 decimal - assert_eq!(precise_num.num_integral_digits, 2); - assert_eq!(precise_num.num_fractional_digits, 3); - - let precise_num = parse_number("0x123.8").unwrap(); // 291.5 decimal - assert_eq!(precise_num.num_integral_digits, 3); - assert_eq!(precise_num.num_fractional_digits, 1); - - let precise_num = parse_number("-0x123.8").unwrap(); // -291.5 decimal - assert_eq!(precise_num.num_integral_digits, 4); - assert_eq!(precise_num.num_fractional_digits, 1); - } - - #[test] - fn test_parse_precision_valid_values() { - assert_eq!(parse_precision("1"), Some(0)); - assert_eq!(parse_precision("0x1"), Some(0)); - assert_eq!(parse_precision("0x1.1"), None); - assert_eq!(parse_precision("0x1.1p2"), None); - assert_eq!(parse_precision("0x1.1p-2"), None); - assert_eq!(parse_precision(".1"), Some(1)); - assert_eq!(parse_precision("1.1"), Some(1)); - assert_eq!(parse_precision("1.12"), Some(2)); - assert_eq!(parse_precision("1.12345678"), Some(8)); - assert_eq!(parse_precision("1.12345678e-3"), Some(11)); - assert_eq!(parse_precision("1.1e-1"), Some(2)); - assert_eq!(parse_precision("1.1e-3"), Some(4)); - } - - #[test] - fn test_parse_precision_invalid_values() { - // Just to make sure it doesn't crash on incomplete values/bad format - // Good enough for now. - assert_eq!(parse_precision("1."), Some(0)); - assert_eq!(parse_precision("1e"), Some(0)); - assert_eq!(parse_precision("1e-"), Some(0)); - assert_eq!(parse_precision("1e+"), Some(0)); - assert_eq!(parse_precision("1em"), Some(0)); - } -} -*/ diff --git a/src/uu/seq/src/numberparse.rs b/src/uu/seq/src/numberparse.rs index 6d839eccb..11e00cacd 100644 --- a/src/uu/seq/src/numberparse.rs +++ b/src/uu/seq/src/numberparse.rs @@ -9,11 +9,9 @@ //! [`PreciseNumber`] struct. use std::str::FromStr; -use bigdecimal::BigDecimal; -use num_traits::Zero; use uucore::format::num_parser::{ExtendedParser, ExtendedParserError}; -use crate::{hexadecimalfloat, number::PreciseNumber}; +use crate::number::PreciseNumber; use uucore::format::ExtendedBigDecimal; /// An error returned when parsing a number fails. @@ -23,10 +21,11 @@ pub enum ParseNumberError { Nan, } -// Compute the number of integral digits in input string. We know that the -// string has already been parsed correctly, so we don't need to be too -// careful. -fn compute_num_integral_digits(input: &str, _number: &BigDecimal) -> usize { +// Compute the number of integral and fractional digits in input string, +// and wrap the result in a PreciseNumber. +// We know that the string has already been parsed correctly, so we don't +// need to be too careful. +fn compute_num_digits(input: &str, ebd: ExtendedBigDecimal) -> PreciseNumber { let input = input.to_lowercase(); let mut input = input.trim_start(); @@ -35,9 +34,20 @@ fn compute_num_integral_digits(input: &str, _number: &BigDecimal) -> usize { input = trimmed; } - // Integral digits for an hex number is ill-defined. + // Integral digits for any hex number is ill-defined (0 is fine as an output) + // Fractional digits for an floating hex number is ill-defined, return None + // as we'll totally ignore that number for precision computations. + // Still return 0 for hex integers though. if input.starts_with("0x") || input.starts_with("-0x") { - return 0; + return PreciseNumber { + number: ebd, + num_integral_digits: 0, + num_fractional_digits: if input.contains(".") || input.contains("p") { + None + } else { + Some(0) + }, + }; } // Split the exponent part, if any @@ -45,17 +55,19 @@ fn compute_num_integral_digits(input: &str, _number: &BigDecimal) -> usize { debug_assert!(parts.len() <= 2); // Count all the digits up to `.`, `-` sign is included. - let digits: usize = match parts[0].find(".") { + let (mut int_digits, mut frac_digits) = match parts[0].find(".") { Some(i) => { // Cover special case .X and -.X where we behave as if there was a leading 0: // 0.X, -0.X. - match i { + let int_digits = match i { 0 => 1, 1 if parts[0].starts_with("-") => 2, _ => i, - } + }; + + (int_digits, parts[0].len() - i - 1) } - None => parts[0].len(), + None => (parts[0].len(), 0), }; // If there is an exponent, reparse that (yes this is not optimal, @@ -63,14 +75,22 @@ fn compute_num_integral_digits(input: &str, _number: &BigDecimal) -> usize { if parts.len() == 2 { let exp = parts[1].parse::().unwrap_or(0); // For positive exponents, effectively expand the number. Ignore negative exponents. - // Also ignore overflowed exponents (default 0 above). + // Also ignore overflowed exponents (unwrap_or(0)). if exp > 0 { - digits + exp as usize + int_digits += exp.try_into().unwrap_or(0) + }; + frac_digits = if exp < frac_digits as i64 { + // Subtract from i128 to avoid any overflow + (frac_digits as i128 - exp as i128).try_into().unwrap_or(0) } else { - digits + 0 } - } else { - digits + } + + PreciseNumber { + number: ebd, + num_integral_digits: int_digits, + num_fractional_digits: Some(frac_digits), } } @@ -80,36 +100,29 @@ impl FromStr for PreciseNumber { type Err = ParseNumberError; fn from_str(input: &str) -> Result { let ebd = match ExtendedBigDecimal::extended_parse(input) { - Ok(ebd) => ebd, + Ok(ebd) => match ebd { + // Handle special values + ExtendedBigDecimal::BigDecimal(_) | ExtendedBigDecimal::MinusZero => { + // TODO: GNU `seq` treats small numbers < 1e-4950 as 0, we could do the same + // to avoid printing senselessly small numbers. + ebd + } + ExtendedBigDecimal::Infinity | ExtendedBigDecimal::MinusInfinity => { + return Ok(PreciseNumber { + number: ebd, + num_integral_digits: 0, + num_fractional_digits: Some(0), + }); + } + ExtendedBigDecimal::Nan | ExtendedBigDecimal::MinusNan => { + return Err(ParseNumberError::Nan); + } + }, Err(ExtendedParserError::Underflow(ebd)) => ebd, // Treat underflow as 0 Err(_) => return Err(ParseNumberError::Float), }; - // Handle special values, get a BigDecimal to help digit-counting. - let bd = match ebd { - ExtendedBigDecimal::Infinity | ExtendedBigDecimal::MinusInfinity => { - return Ok(PreciseNumber { - number: ebd, - num_integral_digits: 0, - num_fractional_digits: Some(0), - }); - } - ExtendedBigDecimal::Nan | ExtendedBigDecimal::MinusNan => { - return Err(ParseNumberError::Nan); - } - ExtendedBigDecimal::BigDecimal(ref bd) => { - // TODO: `seq` treats small numbers < 1e-4950 as 0, we could do the same - // to avoid printing senselessly small numbers. - bd.clone() - } - ExtendedBigDecimal::MinusZero => BigDecimal::zero(), - }; - - Ok(PreciseNumber { - number: ebd, - num_integral_digits: compute_num_integral_digits(input, &bd), - num_fractional_digits: hexadecimalfloat::parse_precision(input), - }) + Ok(compute_num_digits(input, ebd)) } } diff --git a/src/uu/seq/src/seq.rs b/src/uu/seq/src/seq.rs index 3c8a275d4..2ef829a6d 100644 --- a/src/uu/seq/src/seq.rs +++ b/src/uu/seq/src/seq.rs @@ -15,7 +15,6 @@ use uucore::format::{ExtendedBigDecimal, Format, num_format}; use uucore::{format_usage, help_about, help_usage}; mod error; -mod hexadecimalfloat; // public to allow fuzzing #[cfg(fuzzing)]