uucore: format: num_parser: Parse exponent part of floating point numbers

Parse numbers like 123.15e15 and 0xfp-2, and add some tests for that. `parse` is becoming more and more of a monster: we should consider splitting it into multiple parts. Fixes #7474.
2025-09-13 18:47:58 +00:00 · 2025-03-19 21:13:52 +01:00 · 2025-03-19 21:13:52 +01:00 · bd68eb8beb
commit bd68eb8beb
parent 55773e9d35
1 changed files with 89 additions and 14 deletions
--- a/src/uucore/src/lib/features/format/num_parser.rs
+++ b/src/uucore/src/lib/features/format/num_parser.rs
@ -210,6 +210,8 @@ fn parse_special_value(
    }
 }

+// TODO: As highlighted by clippy, this function _is_ high cognitive complexity, jumps
+// around between integer and float parsing, and should be split in multiple parts.
 #[allow(clippy::cognitive_complexity)]
 fn parse(
    input: &str,
@ -267,21 +269,51 @@ fn parse(
    let mut chars = rest.chars().enumerate().fuse().peekable();
    let mut digits = BigUint::zero();
    let mut scale = 0i64;
+    let mut exponent = 0i64;
    while let Some(d) = chars.peek().and_then(|&(_, c)| base.digit(c)) {
        chars.next();
        digits = digits * base as u8 + d;
    }

-    // Parse the fractional part of the number if there can be one and the input contains
-    // a '.' decimal separator.
-    if matches!(chars.peek(), Some(&(_, '.')))
-        && matches!(base, Base::Decimal | Base::Hexadecimal)
-        && !integral_only
-    {
-        chars.next();
-        while let Some(d) = chars.peek().and_then(|&(_, c)| base.digit(c)) {
+    // Parse fractional/exponent part of the number for supported bases.
+    if matches!(base, Base::Decimal | Base::Hexadecimal) && !integral_only {
+        // Parse the fractional part of the number if there can be one and the input contains
+        // a '.' decimal separator.
+        if matches!(chars.peek(), Some(&(_, '.'))) {
            chars.next();
-            (digits, scale) = (digits * base as u8 + d, scale + 1);
+            while let Some(d) = chars.peek().and_then(|&(_, c)| base.digit(c)) {
+                chars.next();
+                (digits, scale) = (digits * base as u8 + d, scale + 1);
+            }
+        }
+
+        let exp_char = match base {
+            Base::Decimal => 'e',
+            Base::Hexadecimal => 'p',
+            _ => unreachable!(),
+        };
+
+        // Parse the exponent part, only decimal numbers are allowed.
+        if chars.peek().is_some_and(|&(_, c)| c == exp_char) {
+            chars.next();
+            let exp_negative = match chars.peek() {
+                Some((_, '-')) => {
+                    chars.next();
+                    true
+                }
+                Some((_, '+')) => {
+                    chars.next();
+                    false
+                }
+                _ => false, // Something else, or nothing at all: keep going.
+            };
+            while let Some(d) = chars.peek().and_then(|&(_, c)| Base::Decimal.digit(c)) {
+                chars.next();
+                exponent = exponent * 10 + d as i64;
+            }
+            if exp_negative {
+                exponent = -exponent;
+            }
        }
    }

@ -300,14 +332,23 @@ fn parse(
    } else {
        let sign = if negative { Sign::Minus } else { Sign::Plus };
        let signed_digits = BigInt::from_biguint(sign, digits);
-        let bd = if scale == 0 {
+        let bd = if scale == 0 && exponent == 0 {
            BigDecimal::from_bigint(signed_digits, 0)
        } else if base == Base::Decimal {
-            BigDecimal::from_bigint(signed_digits, scale)
+            BigDecimal::from_bigint(signed_digits, scale - exponent)
+        } else if base == Base::Hexadecimal {
+            // Base is 16, init at scale 0 then divide by base**scale.
+            let bd = BigDecimal::from_bigint(signed_digits, 0)
+                / BigDecimal::from_bigint(BigInt::from(16).pow(scale as u32), 0);
+            // Confusingly, exponent is in base 2 for hex floating point numbers.
+            if exponent >= 0 {
+                bd * 2u64.pow(exponent as u32)
+            } else {
+                bd / 2u64.pow(-exponent as u32)
+            }
        } else {
-            // Base is not 10, init at scale 0 then divide by base**scale.
-            BigDecimal::from_bigint(signed_digits, 0)
-                / BigDecimal::from_bigint(BigInt::from(base as u32).pow(scale as u32), 0)
+            // scale != 0, which means that integral_only is not set, so only base 10 and 16 are allowed.
+            unreachable!();
        };
        ExtendedBigDecimal::BigDecimal(bd)
    };
@ -350,6 +391,10 @@ mod tests {
            u64::extended_parse("123.15"),
            Err(ExtendedParserError::PartialMatch(123, ".15"))
        ));
+        assert!(matches!(
+            u64::extended_parse("123e10"),
+            Err(ExtendedParserError::PartialMatch(123, "e10"))
+        ));
    }

    #[test]
@ -371,6 +416,10 @@ mod tests {
            i64::extended_parse(&format!("{}", i64::MAX as u64 + 1)),
            Err(ExtendedParserError::Overflow)
        ));
+        assert!(matches!(
+            i64::extended_parse("-123e10"),
+            Err(ExtendedParserError::PartialMatch(-123, "e10"))
+        ));
    }

    #[test]
@ -397,12 +446,18 @@ mod tests {
        assert_eq!(Ok(123.15), f64::extended_parse("0123.15"));
        assert_eq!(Ok(123.15), f64::extended_parse("+0123.15"));
        assert_eq!(Ok(-123.15), f64::extended_parse("-0123.15"));
+        assert_eq!(Ok(12315000.0), f64::extended_parse("123.15e5"));
+        assert_eq!(Ok(-12315000.0), f64::extended_parse("-123.15e5"));
+        assert_eq!(Ok(12315000.0), f64::extended_parse("123.15e+5"));
+        assert_eq!(Ok(0.0012315), f64::extended_parse("123.15e-5"));
        assert_eq!(
            Ok(0.15),
            f64::extended_parse(".150000000000000000000000000231313")
        );
        assert!(matches!(f64::extended_parse("1.2.3"),
                         Err(ExtendedParserError::PartialMatch(f, ".3")) if f == 1.2));
+        assert!(matches!(f64::extended_parse("123.15p5"),
+                        Err(ExtendedParserError::PartialMatch(f, "p5")) if f == 123.15));
        // Minus zero. 0.0 == -0.0 so we explicitly check the sign.
        assert_eq!(Ok(0.0), f64::extended_parse("-0.0"));
        assert!(f64::extended_parse("-0.0").unwrap().is_sign_negative());
@ -444,6 +499,20 @@ mod tests {
            )),
            ExtendedBigDecimal::extended_parse("123.15")
        );
+        assert_eq!(
+            Ok(ExtendedBigDecimal::BigDecimal(BigDecimal::from_bigint(
+                12315.into(),
+                -98
+            ))),
+            ExtendedBigDecimal::extended_parse("123.15e100")
+        );
+        assert_eq!(
+            Ok(ExtendedBigDecimal::BigDecimal(BigDecimal::from_bigint(
+                12315.into(),
+                102
+            ))),
+            ExtendedBigDecimal::extended_parse("123.15e-100")
+        );
        // Very high precision that would not fit in a f64.
        assert_eq!(
            Ok(ExtendedBigDecimal::BigDecimal(
@ -488,6 +557,12 @@ mod tests {
        assert_eq!(Ok(0.5), f64::extended_parse("0x.8"));
        assert_eq!(Ok(0.0625), f64::extended_parse("0x.1"));
        assert_eq!(Ok(15.007_812_5), f64::extended_parse("0xf.02"));
+        assert_eq!(Ok(16.0), f64::extended_parse("0x0.8p5"));
+        assert_eq!(Ok(0.0625), f64::extended_parse("0x1p-4"));
+
+        // We cannot really check that 'e' is not a valid exponent indicator for hex floats...
+        // but we can check that the number still gets parsed properly: 0x0.8e5 is 0x8e5 / 16**3
+        assert_eq!(Ok(0.555908203125), f64::extended_parse("0x0.8e5"));

        assert_eq!(
            Ok(ExtendedBigDecimal::BigDecimal(