diff --git a/src/uu/seq/src/error.rs b/src/uu/seq/src/error.rs index 90b1a8416..8c951240f 100644 --- a/src/uu/seq/src/error.rs +++ b/src/uu/seq/src/error.rs @@ -34,7 +34,6 @@ fn parse_error_type(e: &ParseNumberError) -> &'static str { match e { ParseNumberError::Float => "floating point", ParseNumberError::Nan => "'not-a-number'", - ParseNumberError::Hex => "hexadecimal", } } diff --git a/src/uu/seq/src/hexadecimalfloat.rs b/src/uu/seq/src/hexadecimalfloat.rs deleted file mode 100644 index 1624fb183..000000000 --- a/src/uu/seq/src/hexadecimalfloat.rs +++ /dev/null @@ -1,404 +0,0 @@ -// This file is part of the uutils coreutils package. -// -// For the full copyright and license information, please view the LICENSE -// file that was distributed with this source code. -// spell-checker:ignore extendedbigdecimal bigdecimal hexdigit numberparse -use crate::number::PreciseNumber; -use crate::numberparse::ParseNumberError; -use bigdecimal::BigDecimal; -use num_traits::FromPrimitive; -use uucore::format::ExtendedBigDecimal; - -/// The base of the hex number system -const HEX_RADIX: u32 = 16; - -/// Parse a number from a floating-point hexadecimal exponent notation. -/// -/// # Errors -/// Returns [`Err`] if: -/// - the input string is not a valid hexadecimal string -/// - the input data can't be interpreted as ['f64'] or ['BigDecimal'] -/// -/// # Examples -/// -/// ```rust,ignore -/// let input = "0x1.4p-2"; -/// let expected = 0.3125; -/// match input.parse_number::().unwrap().number { -/// ExtendedBigDecimal::BigDecimal(bd) => assert_eq!(bd.to_f64().unwrap(),expected), -/// _ => unreachable!() -/// }; -/// ``` -pub fn parse_number(s: &str) -> Result { - // Parse floating point parts - let (sign, remain) = parse_sign_multiplier(s.trim())?; - let remain = parse_hex_prefix(remain)?; - let (integral_part, remain) = parse_integral_part(remain)?; - let (fractional_part, remain) = parse_fractional_part(remain)?; - let (exponent_part, remain) = parse_exponent_part(remain)?; - - // Check parts. Rise error if: - // - The input string is not fully consumed - // - Only integral part is presented - // - Only exponent part is presented - // - All 3 parts are empty - match ( - integral_part, - fractional_part, - exponent_part, - remain.is_empty(), - ) { - (_, _, _, false) - | (Some(_), None, None, _) - | (None, None, Some(_), _) - | (None, None, None, _) => return Err(ParseNumberError::Float), - _ => (), - }; - - // Build a number from parts - let integral_value = integral_part.unwrap_or(0.0); - let fractional_value = fractional_part.unwrap_or(0.0); - let exponent_value = (2.0_f64).powi(exponent_part.unwrap_or(0)); - let value = sign * (integral_value + fractional_value) * exponent_value; - - // Build a PreciseNumber - let number = BigDecimal::from_f64(value).ok_or(ParseNumberError::Float)?; - let num_fractional_digits = number.fractional_digit_count().max(0) as u64; - let num_integral_digits = if value.abs() < 1.0 { - 0 - } else { - number.digits() - num_fractional_digits - }; - let num_integral_digits = num_integral_digits + if sign < 0.0 { 1 } else { 0 }; - - Ok(PreciseNumber::new( - ExtendedBigDecimal::BigDecimal(number), - num_integral_digits as usize, - num_fractional_digits as usize, - )) -} - -// Detect number precision similar to GNU coreutils. Refer to scan_arg in seq.c. There are still -// some differences from the GNU version, but this should be sufficient to test the idea. -pub fn parse_precision(s: &str) -> Option { - let hex_index = s.find(['x', 'X']); - let point_index = s.find('.'); - - if hex_index.is_some() { - // Hex value. Returns: - // - 0 for a hexadecimal integer (filled above) - // - None for a hexadecimal floating-point number (the default value of precision) - let power_index = s.find(['p', 'P']); - if point_index.is_none() && power_index.is_none() { - // No decimal point and no 'p' (power) => integer => precision = 0 - return Some(0); - } else { - return None; - } - } - - // This is a decimal floating point. The precision depends on two parameters: - // - the number of fractional digits - // - the exponent - // Let's detect the number of fractional digits - let fractional_length = if let Some(point_index) = point_index { - s[point_index + 1..] - .chars() - .take_while(|c| c.is_ascii_digit()) - .count() - } else { - 0 - }; - - let mut precision = Some(fractional_length); - - // Let's update the precision if exponent is present - if let Some(exponent_index) = s.find(['e', 'E']) { - let exponent_value: i32 = s[exponent_index + 1..].parse().unwrap_or(0); - if exponent_value < 0 { - precision = precision.map(|p| p + exponent_value.unsigned_abs() as usize); - } else { - precision = precision.map(|p| p - p.min(exponent_value as usize)); - } - } - precision -} - -/// Parse the sign multiplier. -/// -/// If a sign is present, the function reads and converts it into a multiplier. -/// If no sign is present, a multiplier of 1.0 is used. -/// -/// # Errors -/// -/// Returns [`Err`] if the input string does not start with a recognized sign or '0' symbol. -fn parse_sign_multiplier(s: &str) -> Result<(f64, &str), ParseNumberError> { - if let Some(remain) = s.strip_prefix('-') { - Ok((-1.0, remain)) - } else if let Some(remain) = s.strip_prefix('+') { - Ok((1.0, remain)) - } else if s.starts_with('0') { - Ok((1.0, s)) - } else { - Err(ParseNumberError::Float) - } -} - -/// Parses the `0x` prefix in a case-insensitive manner. -/// -/// # Errors -/// -/// Returns [`Err`] if the input string does not contain the required prefix. -fn parse_hex_prefix(s: &str) -> Result<&str, ParseNumberError> { - if !(s.starts_with("0x") || s.starts_with("0X")) { - return Err(ParseNumberError::Float); - } - Ok(&s[2..]) -} - -/// Parse the integral part in hexadecimal notation. -/// -/// The integral part is hexadecimal number located after the '0x' prefix and before '.' or 'p' -/// symbols. For example, the number 0x1.234p2 has an integral part 1. -/// -/// This part is optional. -/// -/// # Errors -/// -/// Returns [`Err`] if the integral part is present but a hexadecimal number cannot be parsed from the input string. -fn parse_integral_part(s: &str) -> Result<(Option, &str), ParseNumberError> { - // This part is optional. Skip parsing if symbol is not a hex digit. - let length = s.chars().take_while(|c| c.is_ascii_hexdigit()).count(); - if length > 0 { - let integer = - u64::from_str_radix(&s[..length], HEX_RADIX).map_err(|_| ParseNumberError::Float)?; - Ok((Some(integer as f64), &s[length..])) - } else { - Ok((None, s)) - } -} - -/// Parse the fractional part in hexadecimal notation. -/// -/// The function calculates the sum of the digits after the '.' (dot) sign. Each Nth digit is -/// interpreted as digit / 16^n, where n represents the position after the dot starting from 1. -/// -/// For example, the number 0x1.234p2 has a fractional part 234, which can be interpreted as -/// 2/16^1 + 3/16^2 + 4/16^3, where 16 is the radix of the hexadecimal number system. This equals -/// 0.125 + 0.01171875 + 0.0009765625 = 0.1376953125 in decimal. And this is exactly what the -/// function does. -/// -/// This part is optional. -/// -/// # Errors -/// -/// Returns [`Err`] if the fractional part is present but a hexadecimal number cannot be parsed from the input string. -fn parse_fractional_part(s: &str) -> Result<(Option, &str), ParseNumberError> { - // This part is optional and follows after the '.' symbol. Skip parsing if the dot is not present. - if !s.starts_with('.') { - return Ok((None, s)); - } - - let s = &s[1..]; - let mut multiplier = 1.0 / HEX_RADIX as f64; - let mut total = 0.0; - let mut length = 0; - - for c in s.chars().take_while(|c| c.is_ascii_hexdigit()) { - let digit = c - .to_digit(HEX_RADIX) - .map(|x| x as u8) - .ok_or(ParseNumberError::Float)?; - total += (digit as f64) * multiplier; - multiplier /= HEX_RADIX as f64; - length += 1; - } - - if length == 0 { - return Err(ParseNumberError::Float); - } - Ok((Some(total), &s[length..])) -} - -/// Parse the exponent part in hexadecimal notation. -/// -/// The exponent part is a decimal number located after the 'p' symbol. -/// For example, the number 0x1.234p2 has an exponent part 2. -/// -/// This part is optional. -/// -/// # Errors -/// -/// Returns [`Err`] if the exponent part is presented but a decimal number cannot be parsed from -/// the input string. -fn parse_exponent_part(s: &str) -> Result<(Option, &str), ParseNumberError> { - // This part is optional and follows after 'p' or 'P' symbols. Skip parsing if the symbols are not present - if !(s.starts_with('p') || s.starts_with('P')) { - return Ok((None, s)); - } - - let s = &s[1..]; - let length = s - .chars() - .take_while(|c| c.is_ascii_digit() || *c == '-' || *c == '+') - .count(); - - if length == 0 { - return Err(ParseNumberError::Float); - } - - let value = s[..length].parse().map_err(|_| ParseNumberError::Float)?; - Ok((Some(value), &s[length..])) -} - -#[cfg(test)] -mod tests { - - use super::{parse_number, parse_precision}; - use crate::{ExtendedBigDecimal, numberparse::ParseNumberError}; - use bigdecimal::BigDecimal; - use num_traits::ToPrimitive; - - fn parse_big_decimal(s: &str) -> Result { - match parse_number(s)?.number { - ExtendedBigDecimal::BigDecimal(bd) => Ok(bd), - _ => Err(ParseNumberError::Float), - } - } - - fn parse_f64(s: &str) -> Result { - parse_big_decimal(s)? - .to_f64() - .ok_or(ParseNumberError::Float) - } - - #[test] - fn test_parse_precise_number_case_insensitive() { - assert_eq!(parse_f64("0x1P1").unwrap(), 2.0); - assert_eq!(parse_f64("0x1p1").unwrap(), 2.0); - } - - #[test] - fn test_parse_precise_number_plus_minus_prefixes() { - assert_eq!(parse_f64("+0x1p1").unwrap(), 2.0); - assert_eq!(parse_f64("-0x1p1").unwrap(), -2.0); - } - - #[test] - fn test_parse_precise_number_power_signs() { - assert_eq!(parse_f64("0x1p1").unwrap(), 2.0); - assert_eq!(parse_f64("0x1p+1").unwrap(), 2.0); - assert_eq!(parse_f64("0x1p-1").unwrap(), 0.5); - } - - #[test] - fn test_parse_precise_number_hex() { - assert_eq!(parse_f64("0xd.dp-1").unwrap(), 6.90625); - } - - #[test] - fn test_parse_precise_number_no_power() { - assert_eq!(parse_f64("0x123.a").unwrap(), 291.625); - } - - #[test] - fn test_parse_precise_number_no_fractional() { - assert_eq!(parse_f64("0x333p-4").unwrap(), 51.1875); - } - - #[test] - fn test_parse_precise_number_no_integral() { - assert_eq!(parse_f64("0x.9").unwrap(), 0.5625); - assert_eq!(parse_f64("0x.9p2").unwrap(), 2.25); - } - - #[test] - fn test_parse_precise_number_from_valid_values() { - assert_eq!(parse_f64("0x1p1").unwrap(), 2.0); - assert_eq!(parse_f64("+0x1p1").unwrap(), 2.0); - assert_eq!(parse_f64("-0x1p1").unwrap(), -2.0); - assert_eq!(parse_f64("0x1p-1").unwrap(), 0.5); - assert_eq!(parse_f64("0x1.8").unwrap(), 1.5); - assert_eq!(parse_f64("-0x1.8").unwrap(), -1.5); - assert_eq!(parse_f64("0x1.8p2").unwrap(), 6.0); - assert_eq!(parse_f64("0x1.8p+2").unwrap(), 6.0); - assert_eq!(parse_f64("0x1.8p-2").unwrap(), 0.375); - assert_eq!(parse_f64("0x.8").unwrap(), 0.5); - assert_eq!(parse_f64("0x10p0").unwrap(), 16.0); - assert_eq!(parse_f64("0x0.0").unwrap(), 0.0); - assert_eq!(parse_f64("0x0p0").unwrap(), 0.0); - assert_eq!(parse_f64("0x0.0p0").unwrap(), 0.0); - assert_eq!(parse_f64("-0x.1p-3").unwrap(), -0.0078125); - assert_eq!(parse_f64("-0x.ep-3").unwrap(), -0.109375); - } - - #[test] - fn test_parse_float_from_invalid_values() { - let expected_error = ParseNumberError::Float; - assert_eq!(parse_f64("").unwrap_err(), expected_error); - assert_eq!(parse_f64("1").unwrap_err(), expected_error); - assert_eq!(parse_f64("1p").unwrap_err(), expected_error); - assert_eq!(parse_f64("0x").unwrap_err(), expected_error); - assert_eq!(parse_f64("0xG").unwrap_err(), expected_error); - assert_eq!(parse_f64("0xp").unwrap_err(), expected_error); - assert_eq!(parse_f64("0xp3").unwrap_err(), expected_error); - assert_eq!(parse_f64("0x1").unwrap_err(), expected_error); - assert_eq!(parse_f64("0x1.").unwrap_err(), expected_error); - assert_eq!(parse_f64("0x1p").unwrap_err(), expected_error); - assert_eq!(parse_f64("0x1p+").unwrap_err(), expected_error); - assert_eq!(parse_f64("-0xx1p1").unwrap_err(), expected_error); - assert_eq!(parse_f64("0x1.k").unwrap_err(), expected_error); - assert_eq!(parse_f64("0x1").unwrap_err(), expected_error); - assert_eq!(parse_f64("-0x1pa").unwrap_err(), expected_error); - assert_eq!(parse_f64("0x1.1pk").unwrap_err(), expected_error); - assert_eq!(parse_f64("0x1.8p2z").unwrap_err(), expected_error); - assert_eq!(parse_f64("0x1p3.2").unwrap_err(), expected_error); - assert_eq!(parse_f64("-0x.ep-3z").unwrap_err(), expected_error); - } - - #[test] - fn test_parse_precise_number_count_digits() { - let precise_num = parse_number("0x1.2").unwrap(); // 1.125 decimal - assert_eq!(precise_num.num_integral_digits, 1); - assert_eq!(precise_num.num_fractional_digits, 3); - - let precise_num = parse_number("-0x1.2").unwrap(); // -1.125 decimal - assert_eq!(precise_num.num_integral_digits, 2); - assert_eq!(precise_num.num_fractional_digits, 3); - - let precise_num = parse_number("0x123.8").unwrap(); // 291.5 decimal - assert_eq!(precise_num.num_integral_digits, 3); - assert_eq!(precise_num.num_fractional_digits, 1); - - let precise_num = parse_number("-0x123.8").unwrap(); // -291.5 decimal - assert_eq!(precise_num.num_integral_digits, 4); - assert_eq!(precise_num.num_fractional_digits, 1); - } - - #[test] - fn test_parse_precision_valid_values() { - assert_eq!(parse_precision("1"), Some(0)); - assert_eq!(parse_precision("0x1"), Some(0)); - assert_eq!(parse_precision("0x1.1"), None); - assert_eq!(parse_precision("0x1.1p2"), None); - assert_eq!(parse_precision("0x1.1p-2"), None); - assert_eq!(parse_precision(".1"), Some(1)); - assert_eq!(parse_precision("1.1"), Some(1)); - assert_eq!(parse_precision("1.12"), Some(2)); - assert_eq!(parse_precision("1.12345678"), Some(8)); - assert_eq!(parse_precision("1.12345678e-3"), Some(11)); - assert_eq!(parse_precision("1.1e-1"), Some(2)); - assert_eq!(parse_precision("1.1e-3"), Some(4)); - } - - #[test] - fn test_parse_precision_invalid_values() { - // Just to make sure it doesn't crash on incomplete values/bad format - // Good enough for now. - assert_eq!(parse_precision("1."), Some(0)); - assert_eq!(parse_precision("1e"), Some(0)); - assert_eq!(parse_precision("1e-"), Some(0)); - assert_eq!(parse_precision("1e+"), Some(0)); - assert_eq!(parse_precision("1em"), Some(0)); - } -} diff --git a/src/uu/seq/src/number.rs b/src/uu/seq/src/number.rs index bbd5a9564..b70ba446e 100644 --- a/src/uu/seq/src/number.rs +++ b/src/uu/seq/src/number.rs @@ -13,22 +13,26 @@ use uucore::format::ExtendedBigDecimal; /// on how many significant digits to use when displaying the number. /// The [`PreciseNumber::num_integral_digits`] field also includes the width needed to /// display the "-" character for a negative number. +/// [`PreciseNumber::num_fractional_digits`] provides the number of decimal digits after +/// the decimal point (a.k.a. precision), or None if that number cannot intuitively be +/// obtained (i.e. hexadecimal floats). +/// Note: Those 2 fields should not necessarily be interpreted literally, but as matching +/// GNU `seq` behavior: the exact way of guessing desired precision from user input is a +/// matter of interpretation. /// /// You can get an instance of this struct by calling [`str::parse`]. #[derive(Debug)] pub struct PreciseNumber { pub number: ExtendedBigDecimal, pub num_integral_digits: usize, - - #[allow(dead_code)] - pub num_fractional_digits: usize, + pub num_fractional_digits: Option, } impl PreciseNumber { pub fn new( number: ExtendedBigDecimal, num_integral_digits: usize, - num_fractional_digits: usize, + num_fractional_digits: Option, ) -> Self { Self { number, @@ -42,7 +46,7 @@ impl PreciseNumber { // We would like to implement `num_traits::One`, but it requires // a multiplication implementation, and we don't want to // implement that here. - Self::new(ExtendedBigDecimal::one(), 1, 0) + Self::new(ExtendedBigDecimal::one(), 1, Some(0)) } /// Decide whether this number is zero (either positive or negative). diff --git a/src/uu/seq/src/numberparse.rs b/src/uu/seq/src/numberparse.rs index 47a9d130d..11a9df076 100644 --- a/src/uu/seq/src/numberparse.rs +++ b/src/uu/seq/src/numberparse.rs @@ -9,13 +9,8 @@ //! [`PreciseNumber`] struct. use std::str::FromStr; -use bigdecimal::BigDecimal; -use num_bigint::BigInt; -use num_bigint::Sign; -use num_traits::Num; -use num_traits::Zero; +use uucore::format::num_parser::{ExtendedParser, ExtendedParserError}; -use crate::hexadecimalfloat; use crate::number::PreciseNumber; use uucore::format::ExtendedBigDecimal; @@ -24,357 +19,107 @@ use uucore::format::ExtendedBigDecimal; pub enum ParseNumberError { Float, Nan, - Hex, } -/// Decide whether a given string and its parsed `BigInt` is negative zero. -fn is_minus_zero_int(s: &str, n: &BigDecimal) -> bool { - s.starts_with('-') && n == &BigDecimal::zero() -} +// Compute the number of integral and fractional digits in input string, +// and wrap the result in a PreciseNumber. +// We know that the string has already been parsed correctly, so we don't +// need to be too careful. +fn compute_num_digits(input: &str, ebd: ExtendedBigDecimal) -> PreciseNumber { + let input = input.to_lowercase(); -/// Decide whether a given string and its parsed `BigDecimal` is negative zero. -fn is_minus_zero_float(s: &str, x: &BigDecimal) -> bool { - s.starts_with('-') && x == &BigDecimal::zero() -} + // Leading + is ignored for this. + let input = input.trim_start().strip_prefix('+').unwrap_or(&input); -/// Parse a number with neither a decimal point nor an exponent. -/// -/// # Errors -/// -/// This function returns an error if the input string is a variant of -/// "NaN" or if no [`BigInt`] could be parsed from the string. -/// -/// # Examples -/// -/// ```rust,ignore -/// let actual = "0".parse::().unwrap().number; -/// let expected = Number::BigInt(BigInt::zero()); -/// assert_eq!(actual, expected); -/// ``` -fn parse_no_decimal_no_exponent(s: &str) -> Result { - match s.parse::() { - Ok(n) => { - // If `s` is '-0', then `parse()` returns `BigInt::zero()`, - // but we need to return `Number::MinusZeroInt` instead. - if is_minus_zero_int(s, &n) { - Ok(PreciseNumber::new( - ExtendedBigDecimal::MinusZero, - s.len(), - 0, - )) + // Integral digits for any hex number is ill-defined (0 is fine as an output) + // Fractional digits for an floating hex number is ill-defined, return None + // as we'll totally ignore that number for precision computations. + // Still return 0 for hex integers though. + if input.starts_with("0x") || input.starts_with("-0x") { + return PreciseNumber { + number: ebd, + num_integral_digits: 0, + num_fractional_digits: if input.contains(".") || input.contains("p") { + None } else { - Ok(PreciseNumber::new( - ExtendedBigDecimal::BigDecimal(n), - s.len(), - 0, - )) - } - } - Err(_) => { - // Possibly "NaN" or "inf". - let float_val = match s.to_ascii_lowercase().as_str() { - "inf" | "infinity" => ExtendedBigDecimal::Infinity, - "-inf" | "-infinity" => ExtendedBigDecimal::MinusInfinity, - "nan" | "-nan" => return Err(ParseNumberError::Nan), - _ => return Err(ParseNumberError::Float), + Some(0) + }, + }; + } + + // Split the exponent part, if any + let parts: Vec<&str> = input.split("e").collect(); + debug_assert!(parts.len() <= 2); + + // Count all the digits up to `.`, `-` sign is included. + let (mut int_digits, mut frac_digits) = match parts[0].find(".") { + Some(i) => { + // Cover special case .X and -.X where we behave as if there was a leading 0: + // 0.X, -0.X. + let int_digits = match i { + 0 => 1, + 1 if parts[0].starts_with("-") => 2, + _ => i, }; - Ok(PreciseNumber::new(float_val, 0, 0)) + + (int_digits, parts[0].len() - i - 1) } - } -} - -/// Parse a number with an exponent but no decimal point. -/// -/// # Errors -/// -/// This function returns an error if `s` is not a valid number. -/// -/// # Examples -/// -/// ```rust,ignore -/// let actual = "1e2".parse::().unwrap().number; -/// let expected = "100".parse::().unwrap(); -/// assert_eq!(actual, expected); -/// ``` -fn parse_exponent_no_decimal(s: &str, j: usize) -> Result { - let exponent: i64 = s[j + 1..].parse().map_err(|_| ParseNumberError::Float)?; - // If the exponent is strictly less than zero, then the number - // should be treated as a floating point number that will be - // displayed in decimal notation. For example, "1e-2" will be - // displayed as "0.01", but "1e2" will be displayed as "100", - // without a decimal point. - - // In ['BigDecimal'], a positive scale represents a negative power of 10. - // This means the exponent value from the number must be inverted. However, - // since the |i64::MIN| > |i64::MAX| (i.e. |−2^63| > |2^63−1|) inverting a - // valid negative value could result in an overflow. To prevent this, we - // limit the minimal value with i64::MIN + 1. - let exponent = exponent.max(i64::MIN + 1); - let base: BigInt = s[..j].parse().map_err(|_| ParseNumberError::Float)?; - let x = if base.is_zero() { - BigDecimal::zero() - } else { - BigDecimal::from_bigint(base, -exponent) + None => (parts[0].len(), 0), }; - let num_integral_digits = if is_minus_zero_float(s, &x) { - if exponent > 0 { - (2usize) - .checked_add(exponent as usize) - .ok_or(ParseNumberError::Float)? - } else { - 2usize - } - } else { - let total = (j as i64) - .checked_add(exponent) - .ok_or(ParseNumberError::Float)?; - let result = if total < 1 { - 1 - } else { - total.try_into().map_err(|_| ParseNumberError::Float)? + // If there is an exponent, reparse that (yes this is not optimal, + // but we can't necessarily exactly recover that from the parsed number). + if parts.len() == 2 { + let exp = parts[1].parse::().unwrap_or(0); + // For positive exponents, effectively expand the number. Ignore negative exponents. + // Also ignore overflowed exponents (unwrap_or(0)). + if exp > 0 { + int_digits += exp.try_into().unwrap_or(0) }; - if x.sign() == Sign::Minus { - result + 1 + frac_digits = if exp < frac_digits as i64 { + // Subtract from i128 to avoid any overflow + (frac_digits as i128 - exp as i128).try_into().unwrap_or(0) } else { - result + 0 } - }; - let num_fractional_digits = if exponent < 0 { -exponent as usize } else { 0 }; + } - if is_minus_zero_float(s, &x) { - Ok(PreciseNumber::new( - ExtendedBigDecimal::MinusZero, - num_integral_digits, - num_fractional_digits, - )) - } else { - Ok(PreciseNumber::new( - ExtendedBigDecimal::BigDecimal(x), - num_integral_digits, - num_fractional_digits, - )) - } -} - -/// Parse a number with a decimal point but no exponent. -/// -/// # Errors -/// -/// This function returns an error if `s` is not a valid number. -/// -/// # Examples -/// -/// ```rust,ignore -/// let actual = "1.2".parse::().unwrap().number; -/// let expected = "1.2".parse::().unwrap(); -/// assert_eq!(actual, expected); -/// ``` -fn parse_decimal_no_exponent(s: &str, i: usize) -> Result { - let x: BigDecimal = s.parse().map_err(|_| ParseNumberError::Float)?; - - // The number of integral digits is the number of chars until the period. - // - // This includes the negative sign if there is one. Also, it is - // possible that a number is expressed as "-.123" instead of - // "-0.123", but when we display the number we want it to include - // the leading 0. - let num_integral_digits = if s.starts_with("-.") { i + 1 } else { i }; - let num_fractional_digits = s.len() - (i + 1); - if is_minus_zero_float(s, &x) { - Ok(PreciseNumber::new( - ExtendedBigDecimal::MinusZero, - num_integral_digits, - num_fractional_digits, - )) - } else { - Ok(PreciseNumber::new( - ExtendedBigDecimal::BigDecimal(x), - num_integral_digits, - num_fractional_digits, - )) - } -} - -/// Parse a number with both a decimal point and an exponent. -/// -/// # Errors -/// -/// This function returns an error if `s` is not a valid number. -/// -/// # Examples -/// -/// ```rust,ignore -/// let actual = "1.2e3".parse::().unwrap().number; -/// let expected = "1200".parse::().unwrap(); -/// assert_eq!(actual, expected); -/// ``` -fn parse_decimal_and_exponent( - s: &str, - i: usize, - j: usize, -) -> Result { - // Because of the match guard, this subtraction will not underflow. - let num_digits_between_decimal_point_and_e = (j - (i + 1)) as i64; - let exponent: i64 = s[j + 1..].parse().map_err(|_| ParseNumberError::Float)?; - let val: BigDecimal = { - let parsed_decimal = s - .parse::() - .map_err(|_| ParseNumberError::Float)?; - if parsed_decimal == BigDecimal::zero() { - BigDecimal::zero() - } else { - parsed_decimal - } - }; - - let num_integral_digits = { - let minimum: usize = { - let integral_part: f64 = s[..j].parse().map_err(|_| ParseNumberError::Float)?; - if integral_part.is_sign_negative() { - if exponent > 0 { - 2usize - .checked_add(exponent as usize) - .ok_or(ParseNumberError::Float)? - } else { - 2usize - } - } else { - 1 - } - }; - // Special case: if the string is "-.1e2", we need to treat it - // as if it were "-0.1e2". - let total = { - let total = (i as i64) - .checked_add(exponent) - .ok_or(ParseNumberError::Float)?; - if s.starts_with("-.") { - total.checked_add(1).ok_or(ParseNumberError::Float)? - } else { - total - } - }; - if total < minimum as i64 { - minimum - } else { - total.try_into().map_err(|_| ParseNumberError::Float)? - } - }; - - let num_fractional_digits = if num_digits_between_decimal_point_and_e < exponent { - 0 - } else { - (num_digits_between_decimal_point_and_e - exponent) - .try_into() - .unwrap() - }; - - if is_minus_zero_float(s, &val) { - Ok(PreciseNumber::new( - ExtendedBigDecimal::MinusZero, - num_integral_digits, - num_fractional_digits, - )) - } else { - Ok(PreciseNumber::new( - ExtendedBigDecimal::BigDecimal(val), - num_integral_digits, - num_fractional_digits, - )) - } -} - -/// Parse a hexadecimal integer from a string. -/// -/// # Errors -/// -/// This function returns an error if no [`BigInt`] could be parsed from -/// the string. -/// -/// # Examples -/// -/// ```rust,ignore -/// let actual = "0x0".parse::().unwrap().number; -/// let expected = Number::BigInt(BigInt::zero()); -/// assert_eq!(actual, expected); -/// ``` -fn parse_hexadecimal(s: &str) -> Result { - if s.find(['.', 'p', 'P']).is_some() { - hexadecimalfloat::parse_number(s) - } else { - parse_hexadecimal_integer(s) - } -} - -fn parse_hexadecimal_integer(s: &str) -> Result { - let (is_neg, s) = if s.starts_with('-') { - (true, &s[3..]) - } else { - (false, &s[2..]) - }; - - if s.starts_with('-') || s.starts_with('+') { - // Even though this is more like an invalid hexadecimal number, - // GNU reports this as an invalid floating point number, so we - // use `ParseNumberError::Float` to match that behavior. - return Err(ParseNumberError::Float); - } - - let num = BigInt::from_str_radix(s, 16).map_err(|_| ParseNumberError::Hex)?; - let num = BigDecimal::from(num); - - match (is_neg, num == BigDecimal::zero()) { - (true, true) => Ok(PreciseNumber::new(ExtendedBigDecimal::MinusZero, 2, 0)), - (true, false) => Ok(PreciseNumber::new( - ExtendedBigDecimal::BigDecimal(-num), - 0, - 0, - )), - (false, _) => Ok(PreciseNumber::new( - ExtendedBigDecimal::BigDecimal(num), - 0, - 0, - )), + PreciseNumber { + number: ebd, + num_integral_digits: int_digits, + num_fractional_digits: Some(frac_digits), } } +// Note: We could also have provided an `ExtendedParser` implementation for +// PreciseNumber, but we want a simpler custom error. impl FromStr for PreciseNumber { type Err = ParseNumberError; - fn from_str(mut s: &str) -> Result { - // Trim leading whitespace. - s = s.trim_start(); + fn from_str(input: &str) -> Result { + let ebd = match ExtendedBigDecimal::extended_parse(input) { + Ok(ebd) => match ebd { + // Handle special values + ExtendedBigDecimal::BigDecimal(_) | ExtendedBigDecimal::MinusZero => { + // TODO: GNU `seq` treats small numbers < 1e-4950 as 0, we could do the same + // to avoid printing senselessly small numbers. + ebd + } + ExtendedBigDecimal::Infinity | ExtendedBigDecimal::MinusInfinity => { + return Ok(PreciseNumber { + number: ebd, + num_integral_digits: 0, + num_fractional_digits: Some(0), + }); + } + ExtendedBigDecimal::Nan | ExtendedBigDecimal::MinusNan => { + return Err(ParseNumberError::Nan); + } + }, + Err(ExtendedParserError::Underflow(ebd)) => ebd, // Treat underflow as 0 + Err(_) => return Err(ParseNumberError::Float), + }; - // Trim a single leading "+" character. - if s.starts_with('+') { - s = &s[1..]; - } - - // Check if the string seems to be in hexadecimal format. - // - // May be 0x123 or -0x123, so the index `i` may be either 0 or 1. - if let Some(i) = s.find("0x").or_else(|| s.find("0X")) { - if i <= 1 { - return parse_hexadecimal(s); - } - } - - // Find the decimal point and the exponent symbol. Parse the - // number differently depending on its form. This is important - // because the form of the input dictates how the output will be - // presented. - match (s.find('.'), s.find(['e', 'E'])) { - // For example, "123456" or "inf". - (None, None) => parse_no_decimal_no_exponent(s), - // For example, "123e456" or "1e-2". - (None, Some(j)) => parse_exponent_no_decimal(s, j), - // For example, "123.456". - (Some(i), None) => parse_decimal_no_exponent(s, i), - // For example, "123.456e789". - (Some(i), Some(j)) if i < j => parse_decimal_and_exponent(s, i, j), - // For example, "1e2.3" or "1.2.3". - _ => Err(ParseNumberError::Float), - } + Ok(compute_num_digits(input, ebd)) } } @@ -398,7 +143,18 @@ mod tests { /// Convenience function for getting the number of fractional digits. fn num_fractional_digits(s: &str) -> usize { - s.parse::().unwrap().num_fractional_digits + s.parse::() + .unwrap() + .num_fractional_digits + .unwrap() + } + + /// Convenience function for making sure the number of fractional digits is "None" + fn num_fractional_digits_is_none(s: &str) -> bool { + s.parse::() + .unwrap() + .num_fractional_digits + .is_none() } #[test] @@ -496,7 +252,7 @@ mod tests { fn test_parse_invalid_hex() { assert_eq!( "0xg".parse::().unwrap_err(), - ParseNumberError::Hex + ParseNumberError::Float ); } @@ -535,12 +291,12 @@ mod tests { assert_eq!(num_integral_digits("-.1"), 2); // exponent, no decimal assert_eq!(num_integral_digits("123e4"), 3 + 4); - assert_eq!(num_integral_digits("123e-4"), 1); + assert_eq!(num_integral_digits("123e-4"), 3); assert_eq!(num_integral_digits("-1e-3"), 2); // decimal and exponent assert_eq!(num_integral_digits("123.45e6"), 3 + 6); - assert_eq!(num_integral_digits("123.45e-6"), 1); - assert_eq!(num_integral_digits("123.45e-1"), 2); + assert_eq!(num_integral_digits("123.45e-6"), 3); + assert_eq!(num_integral_digits("123.45e-1"), 3); assert_eq!(num_integral_digits("-0.1e0"), 2); assert_eq!(num_integral_digits("-0.1e2"), 4); assert_eq!(num_integral_digits("-.1e0"), 2); @@ -601,19 +357,23 @@ mod tests { assert_eq!(num_fractional_digits("-0.0"), 1); assert_eq!(num_fractional_digits("-0e-1"), 1); assert_eq!(num_fractional_digits("-0.0e-1"), 2); + // Hexadecimal numbers + assert_eq!(num_fractional_digits("0xff"), 0); + assert!(num_fractional_digits_is_none("0xff.1")); } #[test] fn test_parse_min_exponents() { - // Make sure exponents <= i64::MIN do not cause errors + // Make sure exponents < i64::MIN do not cause errors assert!("1e-9223372036854775807".parse::().is_ok()); assert!("1e-9223372036854775808".parse::().is_ok()); + assert!("1e-92233720368547758080".parse::().is_ok()); } #[test] fn test_parse_max_exponents() { - // Make sure exponents >= i64::MAX cause errors - assert!("1e9223372036854775807".parse::().is_err()); - assert!("1e9223372036854775808".parse::().is_err()); + // Make sure exponents much bigger than i64::MAX cause errors + assert!("1e9223372036854775807".parse::().is_ok()); + assert!("1e92233720368547758070".parse::().is_err()); } } diff --git a/src/uu/seq/src/seq.rs b/src/uu/seq/src/seq.rs index 827a8335e..2ef829a6d 100644 --- a/src/uu/seq/src/seq.rs +++ b/src/uu/seq/src/seq.rs @@ -15,7 +15,6 @@ use uucore::format::{ExtendedBigDecimal, Format, num_format}; use uucore::{format_usage, help_about, help_usage}; mod error; -mod hexadecimalfloat; // public to allow fuzzing #[cfg(fuzzing)] @@ -74,11 +73,15 @@ fn split_short_args_with_value(args: impl uucore::Args) -> impl uucore::Args { } fn select_precision( - first: Option, - increment: Option, - last: Option, + first: &PreciseNumber, + increment: &PreciseNumber, + last: &PreciseNumber, ) -> Option { - match (first, increment, last) { + match ( + first.num_fractional_digits, + increment.num_fractional_digits, + last.num_fractional_digits, + ) { (Some(0), Some(0), Some(0)) => Some(0), (Some(f), Some(i), Some(_)) => Some(f.max(i)), _ => None, @@ -111,37 +114,37 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { format: matches.get_one::(OPT_FORMAT).map(|s| s.as_str()), }; - let (first, first_precision) = if numbers.len() > 1 { + let first = if numbers.len() > 1 { match numbers[0].parse() { - Ok(num) => (num, hexadecimalfloat::parse_precision(numbers[0])), + Ok(num) => num, Err(e) => return Err(SeqError::ParseError(numbers[0].to_string(), e).into()), } } else { - (PreciseNumber::one(), Some(0)) + PreciseNumber::one() }; - let (increment, increment_precision) = if numbers.len() > 2 { + let increment = if numbers.len() > 2 { match numbers[1].parse() { - Ok(num) => (num, hexadecimalfloat::parse_precision(numbers[1])), + Ok(num) => num, Err(e) => return Err(SeqError::ParseError(numbers[1].to_string(), e).into()), } } else { - (PreciseNumber::one(), Some(0)) + PreciseNumber::one() }; if increment.is_zero() { return Err(SeqError::ZeroIncrement(numbers[1].to_string()).into()); } - let (last, last_precision): (PreciseNumber, Option) = { + let last: PreciseNumber = { // We are guaranteed that `numbers.len()` is greater than zero // and at most three because of the argument specification in // `uu_app()`. let n: usize = numbers.len(); match numbers[n - 1].parse() { - Ok(num) => (num, hexadecimalfloat::parse_precision(numbers[n - 1])), + Ok(num) => num, Err(e) => return Err(SeqError::ParseError(numbers[n - 1].to_string(), e).into()), } }; - let precision = select_precision(first_precision, increment_precision, last_precision); + let precision = select_precision(&first, &increment, &last); // If a format was passed on the command line, use that. // If not, use some default format based on parameters precision. diff --git a/tests/by-util/test_seq.rs b/tests/by-util/test_seq.rs index 83bdb7a82..b112c75d8 100644 --- a/tests/by-util/test_seq.rs +++ b/tests/by-util/test_seq.rs @@ -752,21 +752,23 @@ fn test_undefined() { #[test] fn test_invalid_float_point_fail_properly() { + // Note that we support arguments that are much bigger than what GNU coreutils supports. + // Tests below use exponents larger than we support (i64) new_ucmd!() - .args(&["66000e000000000000000000000000000000000000000000000000000009223372036854775807"]) + .args(&["66000e0000000000000000000000000000000000000000000000000000092233720368547758070"]) .fails() .no_stdout() - .usage_error("invalid floating point argument: '66000e000000000000000000000000000000000000000000000000000009223372036854775807'"); + .usage_error("invalid floating point argument: '66000e0000000000000000000000000000000000000000000000000000092233720368547758070'"); new_ucmd!() - .args(&["-1.1e9223372036854775807"]) + .args(&["-1.1e92233720368547758070"]) .fails() .no_stdout() - .usage_error("invalid floating point argument: '-1.1e9223372036854775807'"); + .usage_error("invalid floating point argument: '-1.1e92233720368547758070'"); new_ucmd!() - .args(&["-.1e9223372036854775807"]) + .args(&["-.1e92233720368547758070"]) .fails() .no_stdout() - .usage_error("invalid floating point argument: '-.1e9223372036854775807'"); + .usage_error("invalid floating point argument: '-.1e92233720368547758070'"); } #[test] @@ -909,6 +911,18 @@ fn test_parse_out_of_bounds_exponents() { .args(&["1e-9223372036854775808"]) .succeeds() .stdout_only(""); + + // GNU seq supports arbitrarily small exponents (and treats the value as 0). + new_ucmd!() + .args(&["1e-922337203685477580800000000", "1"]) + .succeeds() + .stdout_only("0\n1\n"); + + // Check we can also underflow to -0.0. + new_ucmd!() + .args(&["-1e-922337203685477580800000000", "1"]) + .succeeds() + .stdout_only("-0\n1\n"); } #[ignore]