1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-28 11:37:44 +00:00

Merge pull request #7624 from drinkcat/parse-bigdecimal-seq

seq: Move to uucore/format common number parsing code
This commit is contained in:
Daniel Hofstetter 2025-04-03 18:55:55 +02:00 committed by GitHub
commit 10a4dcb04d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 153 additions and 777 deletions

View file

@ -34,7 +34,6 @@ fn parse_error_type(e: &ParseNumberError) -> &'static str {
match e {
ParseNumberError::Float => "floating point",
ParseNumberError::Nan => "'not-a-number'",
ParseNumberError::Hex => "hexadecimal",
}
}

View file

@ -1,404 +0,0 @@
// This file is part of the uutils coreutils package.
//
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.
// spell-checker:ignore extendedbigdecimal bigdecimal hexdigit numberparse
use crate::number::PreciseNumber;
use crate::numberparse::ParseNumberError;
use bigdecimal::BigDecimal;
use num_traits::FromPrimitive;
use uucore::format::ExtendedBigDecimal;
/// The base of the hex number system
const HEX_RADIX: u32 = 16;
/// Parse a number from a floating-point hexadecimal exponent notation.
///
/// # Errors
/// Returns [`Err`] if:
/// - the input string is not a valid hexadecimal string
/// - the input data can't be interpreted as ['f64'] or ['BigDecimal']
///
/// # Examples
///
/// ```rust,ignore
/// let input = "0x1.4p-2";
/// let expected = 0.3125;
/// match input.parse_number::<PreciseNumber>().unwrap().number {
/// ExtendedBigDecimal::BigDecimal(bd) => assert_eq!(bd.to_f64().unwrap(),expected),
/// _ => unreachable!()
/// };
/// ```
pub fn parse_number(s: &str) -> Result<PreciseNumber, ParseNumberError> {
// Parse floating point parts
let (sign, remain) = parse_sign_multiplier(s.trim())?;
let remain = parse_hex_prefix(remain)?;
let (integral_part, remain) = parse_integral_part(remain)?;
let (fractional_part, remain) = parse_fractional_part(remain)?;
let (exponent_part, remain) = parse_exponent_part(remain)?;
// Check parts. Rise error if:
// - The input string is not fully consumed
// - Only integral part is presented
// - Only exponent part is presented
// - All 3 parts are empty
match (
integral_part,
fractional_part,
exponent_part,
remain.is_empty(),
) {
(_, _, _, false)
| (Some(_), None, None, _)
| (None, None, Some(_), _)
| (None, None, None, _) => return Err(ParseNumberError::Float),
_ => (),
};
// Build a number from parts
let integral_value = integral_part.unwrap_or(0.0);
let fractional_value = fractional_part.unwrap_or(0.0);
let exponent_value = (2.0_f64).powi(exponent_part.unwrap_or(0));
let value = sign * (integral_value + fractional_value) * exponent_value;
// Build a PreciseNumber
let number = BigDecimal::from_f64(value).ok_or(ParseNumberError::Float)?;
let num_fractional_digits = number.fractional_digit_count().max(0) as u64;
let num_integral_digits = if value.abs() < 1.0 {
0
} else {
number.digits() - num_fractional_digits
};
let num_integral_digits = num_integral_digits + if sign < 0.0 { 1 } else { 0 };
Ok(PreciseNumber::new(
ExtendedBigDecimal::BigDecimal(number),
num_integral_digits as usize,
num_fractional_digits as usize,
))
}
// Detect number precision similar to GNU coreutils. Refer to scan_arg in seq.c. There are still
// some differences from the GNU version, but this should be sufficient to test the idea.
pub fn parse_precision(s: &str) -> Option<usize> {
let hex_index = s.find(['x', 'X']);
let point_index = s.find('.');
if hex_index.is_some() {
// Hex value. Returns:
// - 0 for a hexadecimal integer (filled above)
// - None for a hexadecimal floating-point number (the default value of precision)
let power_index = s.find(['p', 'P']);
if point_index.is_none() && power_index.is_none() {
// No decimal point and no 'p' (power) => integer => precision = 0
return Some(0);
} else {
return None;
}
}
// This is a decimal floating point. The precision depends on two parameters:
// - the number of fractional digits
// - the exponent
// Let's detect the number of fractional digits
let fractional_length = if let Some(point_index) = point_index {
s[point_index + 1..]
.chars()
.take_while(|c| c.is_ascii_digit())
.count()
} else {
0
};
let mut precision = Some(fractional_length);
// Let's update the precision if exponent is present
if let Some(exponent_index) = s.find(['e', 'E']) {
let exponent_value: i32 = s[exponent_index + 1..].parse().unwrap_or(0);
if exponent_value < 0 {
precision = precision.map(|p| p + exponent_value.unsigned_abs() as usize);
} else {
precision = precision.map(|p| p - p.min(exponent_value as usize));
}
}
precision
}
/// Parse the sign multiplier.
///
/// If a sign is present, the function reads and converts it into a multiplier.
/// If no sign is present, a multiplier of 1.0 is used.
///
/// # Errors
///
/// Returns [`Err`] if the input string does not start with a recognized sign or '0' symbol.
fn parse_sign_multiplier(s: &str) -> Result<(f64, &str), ParseNumberError> {
if let Some(remain) = s.strip_prefix('-') {
Ok((-1.0, remain))
} else if let Some(remain) = s.strip_prefix('+') {
Ok((1.0, remain))
} else if s.starts_with('0') {
Ok((1.0, s))
} else {
Err(ParseNumberError::Float)
}
}
/// Parses the `0x` prefix in a case-insensitive manner.
///
/// # Errors
///
/// Returns [`Err`] if the input string does not contain the required prefix.
fn parse_hex_prefix(s: &str) -> Result<&str, ParseNumberError> {
if !(s.starts_with("0x") || s.starts_with("0X")) {
return Err(ParseNumberError::Float);
}
Ok(&s[2..])
}
/// Parse the integral part in hexadecimal notation.
///
/// The integral part is hexadecimal number located after the '0x' prefix and before '.' or 'p'
/// symbols. For example, the number 0x1.234p2 has an integral part 1.
///
/// This part is optional.
///
/// # Errors
///
/// Returns [`Err`] if the integral part is present but a hexadecimal number cannot be parsed from the input string.
fn parse_integral_part(s: &str) -> Result<(Option<f64>, &str), ParseNumberError> {
// This part is optional. Skip parsing if symbol is not a hex digit.
let length = s.chars().take_while(|c| c.is_ascii_hexdigit()).count();
if length > 0 {
let integer =
u64::from_str_radix(&s[..length], HEX_RADIX).map_err(|_| ParseNumberError::Float)?;
Ok((Some(integer as f64), &s[length..]))
} else {
Ok((None, s))
}
}
/// Parse the fractional part in hexadecimal notation.
///
/// The function calculates the sum of the digits after the '.' (dot) sign. Each Nth digit is
/// interpreted as digit / 16^n, where n represents the position after the dot starting from 1.
///
/// For example, the number 0x1.234p2 has a fractional part 234, which can be interpreted as
/// 2/16^1 + 3/16^2 + 4/16^3, where 16 is the radix of the hexadecimal number system. This equals
/// 0.125 + 0.01171875 + 0.0009765625 = 0.1376953125 in decimal. And this is exactly what the
/// function does.
///
/// This part is optional.
///
/// # Errors
///
/// Returns [`Err`] if the fractional part is present but a hexadecimal number cannot be parsed from the input string.
fn parse_fractional_part(s: &str) -> Result<(Option<f64>, &str), ParseNumberError> {
// This part is optional and follows after the '.' symbol. Skip parsing if the dot is not present.
if !s.starts_with('.') {
return Ok((None, s));
}
let s = &s[1..];
let mut multiplier = 1.0 / HEX_RADIX as f64;
let mut total = 0.0;
let mut length = 0;
for c in s.chars().take_while(|c| c.is_ascii_hexdigit()) {
let digit = c
.to_digit(HEX_RADIX)
.map(|x| x as u8)
.ok_or(ParseNumberError::Float)?;
total += (digit as f64) * multiplier;
multiplier /= HEX_RADIX as f64;
length += 1;
}
if length == 0 {
return Err(ParseNumberError::Float);
}
Ok((Some(total), &s[length..]))
}
/// Parse the exponent part in hexadecimal notation.
///
/// The exponent part is a decimal number located after the 'p' symbol.
/// For example, the number 0x1.234p2 has an exponent part 2.
///
/// This part is optional.
///
/// # Errors
///
/// Returns [`Err`] if the exponent part is presented but a decimal number cannot be parsed from
/// the input string.
fn parse_exponent_part(s: &str) -> Result<(Option<i32>, &str), ParseNumberError> {
// This part is optional and follows after 'p' or 'P' symbols. Skip parsing if the symbols are not present
if !(s.starts_with('p') || s.starts_with('P')) {
return Ok((None, s));
}
let s = &s[1..];
let length = s
.chars()
.take_while(|c| c.is_ascii_digit() || *c == '-' || *c == '+')
.count();
if length == 0 {
return Err(ParseNumberError::Float);
}
let value = s[..length].parse().map_err(|_| ParseNumberError::Float)?;
Ok((Some(value), &s[length..]))
}
#[cfg(test)]
mod tests {
use super::{parse_number, parse_precision};
use crate::{ExtendedBigDecimal, numberparse::ParseNumberError};
use bigdecimal::BigDecimal;
use num_traits::ToPrimitive;
fn parse_big_decimal(s: &str) -> Result<BigDecimal, ParseNumberError> {
match parse_number(s)?.number {
ExtendedBigDecimal::BigDecimal(bd) => Ok(bd),
_ => Err(ParseNumberError::Float),
}
}
fn parse_f64(s: &str) -> Result<f64, ParseNumberError> {
parse_big_decimal(s)?
.to_f64()
.ok_or(ParseNumberError::Float)
}
#[test]
fn test_parse_precise_number_case_insensitive() {
assert_eq!(parse_f64("0x1P1").unwrap(), 2.0);
assert_eq!(parse_f64("0x1p1").unwrap(), 2.0);
}
#[test]
fn test_parse_precise_number_plus_minus_prefixes() {
assert_eq!(parse_f64("+0x1p1").unwrap(), 2.0);
assert_eq!(parse_f64("-0x1p1").unwrap(), -2.0);
}
#[test]
fn test_parse_precise_number_power_signs() {
assert_eq!(parse_f64("0x1p1").unwrap(), 2.0);
assert_eq!(parse_f64("0x1p+1").unwrap(), 2.0);
assert_eq!(parse_f64("0x1p-1").unwrap(), 0.5);
}
#[test]
fn test_parse_precise_number_hex() {
assert_eq!(parse_f64("0xd.dp-1").unwrap(), 6.90625);
}
#[test]
fn test_parse_precise_number_no_power() {
assert_eq!(parse_f64("0x123.a").unwrap(), 291.625);
}
#[test]
fn test_parse_precise_number_no_fractional() {
assert_eq!(parse_f64("0x333p-4").unwrap(), 51.1875);
}
#[test]
fn test_parse_precise_number_no_integral() {
assert_eq!(parse_f64("0x.9").unwrap(), 0.5625);
assert_eq!(parse_f64("0x.9p2").unwrap(), 2.25);
}
#[test]
fn test_parse_precise_number_from_valid_values() {
assert_eq!(parse_f64("0x1p1").unwrap(), 2.0);
assert_eq!(parse_f64("+0x1p1").unwrap(), 2.0);
assert_eq!(parse_f64("-0x1p1").unwrap(), -2.0);
assert_eq!(parse_f64("0x1p-1").unwrap(), 0.5);
assert_eq!(parse_f64("0x1.8").unwrap(), 1.5);
assert_eq!(parse_f64("-0x1.8").unwrap(), -1.5);
assert_eq!(parse_f64("0x1.8p2").unwrap(), 6.0);
assert_eq!(parse_f64("0x1.8p+2").unwrap(), 6.0);
assert_eq!(parse_f64("0x1.8p-2").unwrap(), 0.375);
assert_eq!(parse_f64("0x.8").unwrap(), 0.5);
assert_eq!(parse_f64("0x10p0").unwrap(), 16.0);
assert_eq!(parse_f64("0x0.0").unwrap(), 0.0);
assert_eq!(parse_f64("0x0p0").unwrap(), 0.0);
assert_eq!(parse_f64("0x0.0p0").unwrap(), 0.0);
assert_eq!(parse_f64("-0x.1p-3").unwrap(), -0.0078125);
assert_eq!(parse_f64("-0x.ep-3").unwrap(), -0.109375);
}
#[test]
fn test_parse_float_from_invalid_values() {
let expected_error = ParseNumberError::Float;
assert_eq!(parse_f64("").unwrap_err(), expected_error);
assert_eq!(parse_f64("1").unwrap_err(), expected_error);
assert_eq!(parse_f64("1p").unwrap_err(), expected_error);
assert_eq!(parse_f64("0x").unwrap_err(), expected_error);
assert_eq!(parse_f64("0xG").unwrap_err(), expected_error);
assert_eq!(parse_f64("0xp").unwrap_err(), expected_error);
assert_eq!(parse_f64("0xp3").unwrap_err(), expected_error);
assert_eq!(parse_f64("0x1").unwrap_err(), expected_error);
assert_eq!(parse_f64("0x1.").unwrap_err(), expected_error);
assert_eq!(parse_f64("0x1p").unwrap_err(), expected_error);
assert_eq!(parse_f64("0x1p+").unwrap_err(), expected_error);
assert_eq!(parse_f64("-0xx1p1").unwrap_err(), expected_error);
assert_eq!(parse_f64("0x1.k").unwrap_err(), expected_error);
assert_eq!(parse_f64("0x1").unwrap_err(), expected_error);
assert_eq!(parse_f64("-0x1pa").unwrap_err(), expected_error);
assert_eq!(parse_f64("0x1.1pk").unwrap_err(), expected_error);
assert_eq!(parse_f64("0x1.8p2z").unwrap_err(), expected_error);
assert_eq!(parse_f64("0x1p3.2").unwrap_err(), expected_error);
assert_eq!(parse_f64("-0x.ep-3z").unwrap_err(), expected_error);
}
#[test]
fn test_parse_precise_number_count_digits() {
let precise_num = parse_number("0x1.2").unwrap(); // 1.125 decimal
assert_eq!(precise_num.num_integral_digits, 1);
assert_eq!(precise_num.num_fractional_digits, 3);
let precise_num = parse_number("-0x1.2").unwrap(); // -1.125 decimal
assert_eq!(precise_num.num_integral_digits, 2);
assert_eq!(precise_num.num_fractional_digits, 3);
let precise_num = parse_number("0x123.8").unwrap(); // 291.5 decimal
assert_eq!(precise_num.num_integral_digits, 3);
assert_eq!(precise_num.num_fractional_digits, 1);
let precise_num = parse_number("-0x123.8").unwrap(); // -291.5 decimal
assert_eq!(precise_num.num_integral_digits, 4);
assert_eq!(precise_num.num_fractional_digits, 1);
}
#[test]
fn test_parse_precision_valid_values() {
assert_eq!(parse_precision("1"), Some(0));
assert_eq!(parse_precision("0x1"), Some(0));
assert_eq!(parse_precision("0x1.1"), None);
assert_eq!(parse_precision("0x1.1p2"), None);
assert_eq!(parse_precision("0x1.1p-2"), None);
assert_eq!(parse_precision(".1"), Some(1));
assert_eq!(parse_precision("1.1"), Some(1));
assert_eq!(parse_precision("1.12"), Some(2));
assert_eq!(parse_precision("1.12345678"), Some(8));
assert_eq!(parse_precision("1.12345678e-3"), Some(11));
assert_eq!(parse_precision("1.1e-1"), Some(2));
assert_eq!(parse_precision("1.1e-3"), Some(4));
}
#[test]
fn test_parse_precision_invalid_values() {
// Just to make sure it doesn't crash on incomplete values/bad format
// Good enough for now.
assert_eq!(parse_precision("1."), Some(0));
assert_eq!(parse_precision("1e"), Some(0));
assert_eq!(parse_precision("1e-"), Some(0));
assert_eq!(parse_precision("1e+"), Some(0));
assert_eq!(parse_precision("1em"), Some(0));
}
}

View file

@ -13,22 +13,26 @@ use uucore::format::ExtendedBigDecimal;
/// on how many significant digits to use when displaying the number.
/// The [`PreciseNumber::num_integral_digits`] field also includes the width needed to
/// display the "-" character for a negative number.
/// [`PreciseNumber::num_fractional_digits`] provides the number of decimal digits after
/// the decimal point (a.k.a. precision), or None if that number cannot intuitively be
/// obtained (i.e. hexadecimal floats).
/// Note: Those 2 fields should not necessarily be interpreted literally, but as matching
/// GNU `seq` behavior: the exact way of guessing desired precision from user input is a
/// matter of interpretation.
///
/// You can get an instance of this struct by calling [`str::parse`].
#[derive(Debug)]
pub struct PreciseNumber {
pub number: ExtendedBigDecimal,
pub num_integral_digits: usize,
#[allow(dead_code)]
pub num_fractional_digits: usize,
pub num_fractional_digits: Option<usize>,
}
impl PreciseNumber {
pub fn new(
number: ExtendedBigDecimal,
num_integral_digits: usize,
num_fractional_digits: usize,
num_fractional_digits: Option<usize>,
) -> Self {
Self {
number,
@ -42,7 +46,7 @@ impl PreciseNumber {
// We would like to implement `num_traits::One`, but it requires
// a multiplication implementation, and we don't want to
// implement that here.
Self::new(ExtendedBigDecimal::one(), 1, 0)
Self::new(ExtendedBigDecimal::one(), 1, Some(0))
}
/// Decide whether this number is zero (either positive or negative).

View file

@ -9,13 +9,8 @@
//! [`PreciseNumber`] struct.
use std::str::FromStr;
use bigdecimal::BigDecimal;
use num_bigint::BigInt;
use num_bigint::Sign;
use num_traits::Num;
use num_traits::Zero;
use uucore::format::num_parser::{ExtendedParser, ExtendedParserError};
use crate::hexadecimalfloat;
use crate::number::PreciseNumber;
use uucore::format::ExtendedBigDecimal;
@ -24,357 +19,107 @@ use uucore::format::ExtendedBigDecimal;
pub enum ParseNumberError {
Float,
Nan,
Hex,
}
/// Decide whether a given string and its parsed `BigInt` is negative zero.
fn is_minus_zero_int(s: &str, n: &BigDecimal) -> bool {
s.starts_with('-') && n == &BigDecimal::zero()
}
// Compute the number of integral and fractional digits in input string,
// and wrap the result in a PreciseNumber.
// We know that the string has already been parsed correctly, so we don't
// need to be too careful.
fn compute_num_digits(input: &str, ebd: ExtendedBigDecimal) -> PreciseNumber {
let input = input.to_lowercase();
/// Decide whether a given string and its parsed `BigDecimal` is negative zero.
fn is_minus_zero_float(s: &str, x: &BigDecimal) -> bool {
s.starts_with('-') && x == &BigDecimal::zero()
}
// Leading + is ignored for this.
let input = input.trim_start().strip_prefix('+').unwrap_or(&input);
/// Parse a number with neither a decimal point nor an exponent.
///
/// # Errors
///
/// This function returns an error if the input string is a variant of
/// "NaN" or if no [`BigInt`] could be parsed from the string.
///
/// # Examples
///
/// ```rust,ignore
/// let actual = "0".parse::<Number>().unwrap().number;
/// let expected = Number::BigInt(BigInt::zero());
/// assert_eq!(actual, expected);
/// ```
fn parse_no_decimal_no_exponent(s: &str) -> Result<PreciseNumber, ParseNumberError> {
match s.parse::<BigDecimal>() {
Ok(n) => {
// If `s` is '-0', then `parse()` returns `BigInt::zero()`,
// but we need to return `Number::MinusZeroInt` instead.
if is_minus_zero_int(s, &n) {
Ok(PreciseNumber::new(
ExtendedBigDecimal::MinusZero,
s.len(),
0,
))
// Integral digits for any hex number is ill-defined (0 is fine as an output)
// Fractional digits for an floating hex number is ill-defined, return None
// as we'll totally ignore that number for precision computations.
// Still return 0 for hex integers though.
if input.starts_with("0x") || input.starts_with("-0x") {
return PreciseNumber {
number: ebd,
num_integral_digits: 0,
num_fractional_digits: if input.contains(".") || input.contains("p") {
None
} else {
Ok(PreciseNumber::new(
ExtendedBigDecimal::BigDecimal(n),
s.len(),
0,
))
}
}
Err(_) => {
// Possibly "NaN" or "inf".
let float_val = match s.to_ascii_lowercase().as_str() {
"inf" | "infinity" => ExtendedBigDecimal::Infinity,
"-inf" | "-infinity" => ExtendedBigDecimal::MinusInfinity,
"nan" | "-nan" => return Err(ParseNumberError::Nan),
_ => return Err(ParseNumberError::Float),
Some(0)
},
};
}
// Split the exponent part, if any
let parts: Vec<&str> = input.split("e").collect();
debug_assert!(parts.len() <= 2);
// Count all the digits up to `.`, `-` sign is included.
let (mut int_digits, mut frac_digits) = match parts[0].find(".") {
Some(i) => {
// Cover special case .X and -.X where we behave as if there was a leading 0:
// 0.X, -0.X.
let int_digits = match i {
0 => 1,
1 if parts[0].starts_with("-") => 2,
_ => i,
};
Ok(PreciseNumber::new(float_val, 0, 0))
(int_digits, parts[0].len() - i - 1)
}
}
}
/// Parse a number with an exponent but no decimal point.
///
/// # Errors
///
/// This function returns an error if `s` is not a valid number.
///
/// # Examples
///
/// ```rust,ignore
/// let actual = "1e2".parse::<Number>().unwrap().number;
/// let expected = "100".parse::<BigInt>().unwrap();
/// assert_eq!(actual, expected);
/// ```
fn parse_exponent_no_decimal(s: &str, j: usize) -> Result<PreciseNumber, ParseNumberError> {
let exponent: i64 = s[j + 1..].parse().map_err(|_| ParseNumberError::Float)?;
// If the exponent is strictly less than zero, then the number
// should be treated as a floating point number that will be
// displayed in decimal notation. For example, "1e-2" will be
// displayed as "0.01", but "1e2" will be displayed as "100",
// without a decimal point.
// In ['BigDecimal'], a positive scale represents a negative power of 10.
// This means the exponent value from the number must be inverted. However,
// since the |i64::MIN| > |i64::MAX| (i.e. |2^63| > |2^631|) inverting a
// valid negative value could result in an overflow. To prevent this, we
// limit the minimal value with i64::MIN + 1.
let exponent = exponent.max(i64::MIN + 1);
let base: BigInt = s[..j].parse().map_err(|_| ParseNumberError::Float)?;
let x = if base.is_zero() {
BigDecimal::zero()
} else {
BigDecimal::from_bigint(base, -exponent)
None => (parts[0].len(), 0),
};
let num_integral_digits = if is_minus_zero_float(s, &x) {
if exponent > 0 {
(2usize)
.checked_add(exponent as usize)
.ok_or(ParseNumberError::Float)?
} else {
2usize
}
} else {
let total = (j as i64)
.checked_add(exponent)
.ok_or(ParseNumberError::Float)?;
let result = if total < 1 {
1
} else {
total.try_into().map_err(|_| ParseNumberError::Float)?
// If there is an exponent, reparse that (yes this is not optimal,
// but we can't necessarily exactly recover that from the parsed number).
if parts.len() == 2 {
let exp = parts[1].parse::<i64>().unwrap_or(0);
// For positive exponents, effectively expand the number. Ignore negative exponents.
// Also ignore overflowed exponents (unwrap_or(0)).
if exp > 0 {
int_digits += exp.try_into().unwrap_or(0)
};
if x.sign() == Sign::Minus {
result + 1
frac_digits = if exp < frac_digits as i64 {
// Subtract from i128 to avoid any overflow
(frac_digits as i128 - exp as i128).try_into().unwrap_or(0)
} else {
result
0
}
};
let num_fractional_digits = if exponent < 0 { -exponent as usize } else { 0 };
}
if is_minus_zero_float(s, &x) {
Ok(PreciseNumber::new(
ExtendedBigDecimal::MinusZero,
num_integral_digits,
num_fractional_digits,
))
} else {
Ok(PreciseNumber::new(
ExtendedBigDecimal::BigDecimal(x),
num_integral_digits,
num_fractional_digits,
))
}
}
/// Parse a number with a decimal point but no exponent.
///
/// # Errors
///
/// This function returns an error if `s` is not a valid number.
///
/// # Examples
///
/// ```rust,ignore
/// let actual = "1.2".parse::<Number>().unwrap().number;
/// let expected = "1.2".parse::<BigDecimal>().unwrap();
/// assert_eq!(actual, expected);
/// ```
fn parse_decimal_no_exponent(s: &str, i: usize) -> Result<PreciseNumber, ParseNumberError> {
let x: BigDecimal = s.parse().map_err(|_| ParseNumberError::Float)?;
// The number of integral digits is the number of chars until the period.
//
// This includes the negative sign if there is one. Also, it is
// possible that a number is expressed as "-.123" instead of
// "-0.123", but when we display the number we want it to include
// the leading 0.
let num_integral_digits = if s.starts_with("-.") { i + 1 } else { i };
let num_fractional_digits = s.len() - (i + 1);
if is_minus_zero_float(s, &x) {
Ok(PreciseNumber::new(
ExtendedBigDecimal::MinusZero,
num_integral_digits,
num_fractional_digits,
))
} else {
Ok(PreciseNumber::new(
ExtendedBigDecimal::BigDecimal(x),
num_integral_digits,
num_fractional_digits,
))
}
}
/// Parse a number with both a decimal point and an exponent.
///
/// # Errors
///
/// This function returns an error if `s` is not a valid number.
///
/// # Examples
///
/// ```rust,ignore
/// let actual = "1.2e3".parse::<Number>().unwrap().number;
/// let expected = "1200".parse::<BigInt>().unwrap();
/// assert_eq!(actual, expected);
/// ```
fn parse_decimal_and_exponent(
s: &str,
i: usize,
j: usize,
) -> Result<PreciseNumber, ParseNumberError> {
// Because of the match guard, this subtraction will not underflow.
let num_digits_between_decimal_point_and_e = (j - (i + 1)) as i64;
let exponent: i64 = s[j + 1..].parse().map_err(|_| ParseNumberError::Float)?;
let val: BigDecimal = {
let parsed_decimal = s
.parse::<BigDecimal>()
.map_err(|_| ParseNumberError::Float)?;
if parsed_decimal == BigDecimal::zero() {
BigDecimal::zero()
} else {
parsed_decimal
}
};
let num_integral_digits = {
let minimum: usize = {
let integral_part: f64 = s[..j].parse().map_err(|_| ParseNumberError::Float)?;
if integral_part.is_sign_negative() {
if exponent > 0 {
2usize
.checked_add(exponent as usize)
.ok_or(ParseNumberError::Float)?
} else {
2usize
}
} else {
1
}
};
// Special case: if the string is "-.1e2", we need to treat it
// as if it were "-0.1e2".
let total = {
let total = (i as i64)
.checked_add(exponent)
.ok_or(ParseNumberError::Float)?;
if s.starts_with("-.") {
total.checked_add(1).ok_or(ParseNumberError::Float)?
} else {
total
}
};
if total < minimum as i64 {
minimum
} else {
total.try_into().map_err(|_| ParseNumberError::Float)?
}
};
let num_fractional_digits = if num_digits_between_decimal_point_and_e < exponent {
0
} else {
(num_digits_between_decimal_point_and_e - exponent)
.try_into()
.unwrap()
};
if is_minus_zero_float(s, &val) {
Ok(PreciseNumber::new(
ExtendedBigDecimal::MinusZero,
num_integral_digits,
num_fractional_digits,
))
} else {
Ok(PreciseNumber::new(
ExtendedBigDecimal::BigDecimal(val),
num_integral_digits,
num_fractional_digits,
))
}
}
/// Parse a hexadecimal integer from a string.
///
/// # Errors
///
/// This function returns an error if no [`BigInt`] could be parsed from
/// the string.
///
/// # Examples
///
/// ```rust,ignore
/// let actual = "0x0".parse::<Number>().unwrap().number;
/// let expected = Number::BigInt(BigInt::zero());
/// assert_eq!(actual, expected);
/// ```
fn parse_hexadecimal(s: &str) -> Result<PreciseNumber, ParseNumberError> {
if s.find(['.', 'p', 'P']).is_some() {
hexadecimalfloat::parse_number(s)
} else {
parse_hexadecimal_integer(s)
}
}
fn parse_hexadecimal_integer(s: &str) -> Result<PreciseNumber, ParseNumberError> {
let (is_neg, s) = if s.starts_with('-') {
(true, &s[3..])
} else {
(false, &s[2..])
};
if s.starts_with('-') || s.starts_with('+') {
// Even though this is more like an invalid hexadecimal number,
// GNU reports this as an invalid floating point number, so we
// use `ParseNumberError::Float` to match that behavior.
return Err(ParseNumberError::Float);
}
let num = BigInt::from_str_radix(s, 16).map_err(|_| ParseNumberError::Hex)?;
let num = BigDecimal::from(num);
match (is_neg, num == BigDecimal::zero()) {
(true, true) => Ok(PreciseNumber::new(ExtendedBigDecimal::MinusZero, 2, 0)),
(true, false) => Ok(PreciseNumber::new(
ExtendedBigDecimal::BigDecimal(-num),
0,
0,
)),
(false, _) => Ok(PreciseNumber::new(
ExtendedBigDecimal::BigDecimal(num),
0,
0,
)),
PreciseNumber {
number: ebd,
num_integral_digits: int_digits,
num_fractional_digits: Some(frac_digits),
}
}
// Note: We could also have provided an `ExtendedParser` implementation for
// PreciseNumber, but we want a simpler custom error.
impl FromStr for PreciseNumber {
type Err = ParseNumberError;
fn from_str(mut s: &str) -> Result<Self, Self::Err> {
// Trim leading whitespace.
s = s.trim_start();
fn from_str(input: &str) -> Result<Self, Self::Err> {
let ebd = match ExtendedBigDecimal::extended_parse(input) {
Ok(ebd) => match ebd {
// Handle special values
ExtendedBigDecimal::BigDecimal(_) | ExtendedBigDecimal::MinusZero => {
// TODO: GNU `seq` treats small numbers < 1e-4950 as 0, we could do the same
// to avoid printing senselessly small numbers.
ebd
}
ExtendedBigDecimal::Infinity | ExtendedBigDecimal::MinusInfinity => {
return Ok(PreciseNumber {
number: ebd,
num_integral_digits: 0,
num_fractional_digits: Some(0),
});
}
ExtendedBigDecimal::Nan | ExtendedBigDecimal::MinusNan => {
return Err(ParseNumberError::Nan);
}
},
Err(ExtendedParserError::Underflow(ebd)) => ebd, // Treat underflow as 0
Err(_) => return Err(ParseNumberError::Float),
};
// Trim a single leading "+" character.
if s.starts_with('+') {
s = &s[1..];
}
// Check if the string seems to be in hexadecimal format.
//
// May be 0x123 or -0x123, so the index `i` may be either 0 or 1.
if let Some(i) = s.find("0x").or_else(|| s.find("0X")) {
if i <= 1 {
return parse_hexadecimal(s);
}
}
// Find the decimal point and the exponent symbol. Parse the
// number differently depending on its form. This is important
// because the form of the input dictates how the output will be
// presented.
match (s.find('.'), s.find(['e', 'E'])) {
// For example, "123456" or "inf".
(None, None) => parse_no_decimal_no_exponent(s),
// For example, "123e456" or "1e-2".
(None, Some(j)) => parse_exponent_no_decimal(s, j),
// For example, "123.456".
(Some(i), None) => parse_decimal_no_exponent(s, i),
// For example, "123.456e789".
(Some(i), Some(j)) if i < j => parse_decimal_and_exponent(s, i, j),
// For example, "1e2.3" or "1.2.3".
_ => Err(ParseNumberError::Float),
}
Ok(compute_num_digits(input, ebd))
}
}
@ -398,7 +143,18 @@ mod tests {
/// Convenience function for getting the number of fractional digits.
fn num_fractional_digits(s: &str) -> usize {
s.parse::<PreciseNumber>().unwrap().num_fractional_digits
s.parse::<PreciseNumber>()
.unwrap()
.num_fractional_digits
.unwrap()
}
/// Convenience function for making sure the number of fractional digits is "None"
fn num_fractional_digits_is_none(s: &str) -> bool {
s.parse::<PreciseNumber>()
.unwrap()
.num_fractional_digits
.is_none()
}
#[test]
@ -496,7 +252,7 @@ mod tests {
fn test_parse_invalid_hex() {
assert_eq!(
"0xg".parse::<PreciseNumber>().unwrap_err(),
ParseNumberError::Hex
ParseNumberError::Float
);
}
@ -535,12 +291,12 @@ mod tests {
assert_eq!(num_integral_digits("-.1"), 2);
// exponent, no decimal
assert_eq!(num_integral_digits("123e4"), 3 + 4);
assert_eq!(num_integral_digits("123e-4"), 1);
assert_eq!(num_integral_digits("123e-4"), 3);
assert_eq!(num_integral_digits("-1e-3"), 2);
// decimal and exponent
assert_eq!(num_integral_digits("123.45e6"), 3 + 6);
assert_eq!(num_integral_digits("123.45e-6"), 1);
assert_eq!(num_integral_digits("123.45e-1"), 2);
assert_eq!(num_integral_digits("123.45e-6"), 3);
assert_eq!(num_integral_digits("123.45e-1"), 3);
assert_eq!(num_integral_digits("-0.1e0"), 2);
assert_eq!(num_integral_digits("-0.1e2"), 4);
assert_eq!(num_integral_digits("-.1e0"), 2);
@ -601,19 +357,23 @@ mod tests {
assert_eq!(num_fractional_digits("-0.0"), 1);
assert_eq!(num_fractional_digits("-0e-1"), 1);
assert_eq!(num_fractional_digits("-0.0e-1"), 2);
// Hexadecimal numbers
assert_eq!(num_fractional_digits("0xff"), 0);
assert!(num_fractional_digits_is_none("0xff.1"));
}
#[test]
fn test_parse_min_exponents() {
// Make sure exponents <= i64::MIN do not cause errors
// Make sure exponents < i64::MIN do not cause errors
assert!("1e-9223372036854775807".parse::<PreciseNumber>().is_ok());
assert!("1e-9223372036854775808".parse::<PreciseNumber>().is_ok());
assert!("1e-92233720368547758080".parse::<PreciseNumber>().is_ok());
}
#[test]
fn test_parse_max_exponents() {
// Make sure exponents >= i64::MAX cause errors
assert!("1e9223372036854775807".parse::<PreciseNumber>().is_err());
assert!("1e9223372036854775808".parse::<PreciseNumber>().is_err());
// Make sure exponents much bigger than i64::MAX cause errors
assert!("1e9223372036854775807".parse::<PreciseNumber>().is_ok());
assert!("1e92233720368547758070".parse::<PreciseNumber>().is_err());
}
}

View file

@ -15,7 +15,6 @@ use uucore::format::{ExtendedBigDecimal, Format, num_format};
use uucore::{format_usage, help_about, help_usage};
mod error;
mod hexadecimalfloat;
// public to allow fuzzing
#[cfg(fuzzing)]
@ -74,11 +73,15 @@ fn split_short_args_with_value(args: impl uucore::Args) -> impl uucore::Args {
}
fn select_precision(
first: Option<usize>,
increment: Option<usize>,
last: Option<usize>,
first: &PreciseNumber,
increment: &PreciseNumber,
last: &PreciseNumber,
) -> Option<usize> {
match (first, increment, last) {
match (
first.num_fractional_digits,
increment.num_fractional_digits,
last.num_fractional_digits,
) {
(Some(0), Some(0), Some(0)) => Some(0),
(Some(f), Some(i), Some(_)) => Some(f.max(i)),
_ => None,
@ -111,37 +114,37 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
format: matches.get_one::<String>(OPT_FORMAT).map(|s| s.as_str()),
};
let (first, first_precision) = if numbers.len() > 1 {
let first = if numbers.len() > 1 {
match numbers[0].parse() {
Ok(num) => (num, hexadecimalfloat::parse_precision(numbers[0])),
Ok(num) => num,
Err(e) => return Err(SeqError::ParseError(numbers[0].to_string(), e).into()),
}
} else {
(PreciseNumber::one(), Some(0))
PreciseNumber::one()
};
let (increment, increment_precision) = if numbers.len() > 2 {
let increment = if numbers.len() > 2 {
match numbers[1].parse() {
Ok(num) => (num, hexadecimalfloat::parse_precision(numbers[1])),
Ok(num) => num,
Err(e) => return Err(SeqError::ParseError(numbers[1].to_string(), e).into()),
}
} else {
(PreciseNumber::one(), Some(0))
PreciseNumber::one()
};
if increment.is_zero() {
return Err(SeqError::ZeroIncrement(numbers[1].to_string()).into());
}
let (last, last_precision): (PreciseNumber, Option<usize>) = {
let last: PreciseNumber = {
// We are guaranteed that `numbers.len()` is greater than zero
// and at most three because of the argument specification in
// `uu_app()`.
let n: usize = numbers.len();
match numbers[n - 1].parse() {
Ok(num) => (num, hexadecimalfloat::parse_precision(numbers[n - 1])),
Ok(num) => num,
Err(e) => return Err(SeqError::ParseError(numbers[n - 1].to_string(), e).into()),
}
};
let precision = select_precision(first_precision, increment_precision, last_precision);
let precision = select_precision(&first, &increment, &last);
// If a format was passed on the command line, use that.
// If not, use some default format based on parameters precision.

View file

@ -752,21 +752,23 @@ fn test_undefined() {
#[test]
fn test_invalid_float_point_fail_properly() {
// Note that we support arguments that are much bigger than what GNU coreutils supports.
// Tests below use exponents larger than we support (i64)
new_ucmd!()
.args(&["66000e000000000000000000000000000000000000000000000000000009223372036854775807"])
.args(&["66000e0000000000000000000000000000000000000000000000000000092233720368547758070"])
.fails()
.no_stdout()
.usage_error("invalid floating point argument: '66000e000000000000000000000000000000000000000000000000000009223372036854775807'");
.usage_error("invalid floating point argument: '66000e0000000000000000000000000000000000000000000000000000092233720368547758070'");
new_ucmd!()
.args(&["-1.1e9223372036854775807"])
.args(&["-1.1e92233720368547758070"])
.fails()
.no_stdout()
.usage_error("invalid floating point argument: '-1.1e9223372036854775807'");
.usage_error("invalid floating point argument: '-1.1e92233720368547758070'");
new_ucmd!()
.args(&["-.1e9223372036854775807"])
.args(&["-.1e92233720368547758070"])
.fails()
.no_stdout()
.usage_error("invalid floating point argument: '-.1e9223372036854775807'");
.usage_error("invalid floating point argument: '-.1e92233720368547758070'");
}
#[test]
@ -909,6 +911,18 @@ fn test_parse_out_of_bounds_exponents() {
.args(&["1e-9223372036854775808"])
.succeeds()
.stdout_only("");
// GNU seq supports arbitrarily small exponents (and treats the value as 0).
new_ucmd!()
.args(&["1e-922337203685477580800000000", "1"])
.succeeds()
.stdout_only("0\n1\n");
// Check we can also underflow to -0.0.
new_ucmd!()
.args(&["-1e-922337203685477580800000000", "1"])
.succeeds()
.stdout_only("-0\n1\n");
}
#[ignore]