1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-30 12:37:49 +00:00

uucore: format: num_parser: Turn parser into a trait

We call the function extended_parse, so that we do not clash
with other parsing functions in other traits.

- Also implement parser for ExtendedBigDecimal (straightforward).
- Base doesn't need to be public anymore.
- Rename the error to ExtendedParserError.
This commit is contained in:
Nicolas Boichat 2025-03-18 10:29:44 +01:00
parent 8bbec16115
commit 40a7c65980
2 changed files with 293 additions and 288 deletions

View file

@ -5,7 +5,7 @@
use crate::{
error::set_exit_code,
features::format::num_parser::{ParseError, ParsedNumber},
features::format::num_parser::{ExtendedParser, ExtendedParserError},
quoting_style::{Quotes, QuotingStyle, escape_name},
show_error, show_warning,
};
@ -56,7 +56,7 @@ impl<'a, T: Iterator<Item = &'a FormatArgument>> ArgumentIter<'a> for T {
};
match next {
FormatArgument::UnsignedInt(n) => *n,
FormatArgument::Unparsed(s) => extract_value(ParsedNumber::parse_u64(s), s),
FormatArgument::Unparsed(s) => extract_value(u64::extended_parse(s), s),
_ => 0,
}
}
@ -67,7 +67,7 @@ impl<'a, T: Iterator<Item = &'a FormatArgument>> ArgumentIter<'a> for T {
};
match next {
FormatArgument::SignedInt(n) => *n,
FormatArgument::Unparsed(s) => extract_value(ParsedNumber::parse_i64(s), s),
FormatArgument::Unparsed(s) => extract_value(i64::extended_parse(s), s),
_ => 0,
}
}
@ -78,7 +78,7 @@ impl<'a, T: Iterator<Item = &'a FormatArgument>> ArgumentIter<'a> for T {
};
match next {
FormatArgument::Float(n) => *n,
FormatArgument::Unparsed(s) => extract_value(ParsedNumber::parse_f64(s), s),
FormatArgument::Unparsed(s) => extract_value(f64::extended_parse(s), s),
_ => 0.0,
}
}
@ -91,7 +91,7 @@ impl<'a, T: Iterator<Item = &'a FormatArgument>> ArgumentIter<'a> for T {
}
}
fn extract_value<T: Default>(p: Result<T, ParseError<'_, T>>, input: &str) -> T {
fn extract_value<T: Default>(p: Result<T, ExtendedParserError<'_, T>>, input: &str) -> T {
match p {
Ok(v) => v,
Err(e) => {
@ -103,15 +103,15 @@ fn extract_value<T: Default>(p: Result<T, ParseError<'_, T>>, input: &str) -> T
},
);
match e {
ParseError::Overflow => {
ExtendedParserError::Overflow => {
show_error!("{}: Numerical result out of range", input.quote());
Default::default()
}
ParseError::NotNumeric => {
ExtendedParserError::NotNumeric => {
show_error!("{}: expected a numeric value", input.quote());
Default::default()
}
ParseError::PartialMatch(v, rest) => {
ExtendedParserError::PartialMatch(v, rest) => {
let bytes = input.as_encoded_bytes();
if !bytes.is_empty() && bytes[0] == b'\'' {
show_warning!(

View file

@ -8,8 +8,8 @@
// spell-checker:ignore powf copysign prec inity bigdecimal extendedbigdecimal biguint
use bigdecimal::{
num_bigint::{BigInt, BigUint, Sign},
BigDecimal,
num_bigint::{BigInt, BigUint, Sign},
};
use num_traits::ToPrimitive;
use num_traits::Zero;
@ -18,7 +18,7 @@ use crate::format::extendedbigdecimal::ExtendedBigDecimal;
/// Base for number parsing
#[derive(Clone, Copy, PartialEq)]
pub enum Base {
enum Base {
/// Binary base
Binary = 2,
@ -53,7 +53,7 @@ impl Base {
/// Type returned if a number could not be parsed in its entirety
#[derive(Debug, PartialEq)]
pub enum ParseError<'a, T> {
pub enum ExtendedParserError<'a, T> {
/// The input as a whole makes no sense
NotNumeric,
/// The beginning of the input made sense and has been parsed,
@ -65,11 +65,14 @@ pub enum ParseError<'a, T> {
Overflow,
}
impl<'a, T> ParseError<'a, T> {
fn map<U>(self, f: impl FnOnce(T, &'a str) -> ParseError<'a, U>) -> ParseError<'a, U> {
impl<'a, T> ExtendedParserError<'a, T> {
fn map<U>(
self,
f: impl FnOnce(T, &'a str) -> ExtendedParserError<'a, U>,
) -> ExtendedParserError<'a, U> {
match self {
Self::NotNumeric => ParseError::NotNumeric,
Self::Overflow => ParseError::Overflow,
Self::NotNumeric => ExtendedParserError::NotNumeric,
Self::Overflow => ExtendedParserError::Overflow,
Self::PartialMatch(v, s) => f(v, s),
}
}
@ -77,18 +80,20 @@ impl<'a, T> ParseError<'a, T> {
/// A number parser for binary, octal, decimal, hexadecimal and single characters.
///
/// TODO: we just keep an ExtendedBigDecimal internally, so we don't really need this
/// struct actually.
///
/// If the fractional part cannot be represented on a `u64`, parsing continues
/// silently by ignoring non-significant digits.
pub struct ParsedNumber {
number: ExtendedBigDecimal,
/// It is implemented for `u64`/`i64`, where no fractional part is parsed,
/// and `f64` float, where octal is not allowed.
pub trait ExtendedParser {
// We pick a hopefully different name for our parser, to avoid clash with standard traits.
fn extended_parse(input: &str) -> Result<Self, ExtendedParserError<'_, Self>>
where
Self: Sized;
}
impl ParsedNumber {
fn into_i64(self) -> Option<i64> {
match self.number {
impl ExtendedParser for i64 {
/// Parse a number as i64. No fractional part is allowed.
fn extended_parse(input: &str) -> Result<i64, ExtendedParserError<'_, i64>> {
fn into_i64(ebd: ExtendedBigDecimal) -> Option<i64> {
match ebd {
ExtendedBigDecimal::BigDecimal(bd) => {
let (digits, scale) = bd.into_bigint_and_scale();
if scale == 0 {
@ -102,20 +107,22 @@ impl ParsedNumber {
}
}
/// Parse a number as i64. No fractional part is allowed.
pub fn parse_i64(input: &str) -> Result<i64, ParseError<'_, i64>> {
match Self::parse(input, true) {
Ok(v) => v.into_i64().ok_or(ParseError::Overflow),
match parse(input, true) {
Ok(v) => into_i64(v).ok_or(ExtendedParserError::Overflow),
Err(e) => Err(e.map(|v, rest| {
v.into_i64()
.map(|v| ParseError::PartialMatch(v, rest))
.unwrap_or(ParseError::Overflow)
into_i64(v)
.map(|v| ExtendedParserError::PartialMatch(v, rest))
.unwrap_or(ExtendedParserError::Overflow)
})),
}
}
}
fn into_u64(self) -> Option<u64> {
match self.number {
impl ExtendedParser for u64 {
/// Parse a number as u64. No fractional part is allowed.
fn extended_parse(input: &str) -> Result<u64, ExtendedParserError<'_, u64>> {
fn into_u64(ebd: ExtendedBigDecimal) -> Option<u64> {
match ebd {
ExtendedBigDecimal::BigDecimal(bd) => {
let (digits, scale) = bd.into_bigint_and_scale();
if scale == 0 {
@ -128,20 +135,23 @@ impl ParsedNumber {
}
}
/// Parse a number as u64. No fractional part is allowed.
pub fn parse_u64(input: &str) -> Result<u64, ParseError<'_, u64>> {
match Self::parse(input, true) {
Ok(v) => v.into_u64().ok_or(ParseError::Overflow),
match parse(input, true) {
Ok(v) => into_u64(v).ok_or(ExtendedParserError::Overflow),
Err(e) => Err(e.map(|v, rest| {
v.into_u64()
.map(|v| ParseError::PartialMatch(v, rest))
.unwrap_or(ParseError::Overflow)
into_u64(v)
.map(|v| ExtendedParserError::PartialMatch(v, rest))
.unwrap_or(ExtendedParserError::Overflow)
})),
}
}
}
fn into_f64(self) -> f64 {
match self.number {
impl ExtendedParser for f64 {
/// Parse a number as f64
fn extended_parse(input: &str) -> Result<f64, ExtendedParserError<'_, f64>> {
// TODO: This is generic, so this should probably be implemented as an ExtendedBigDecimal trait (ToPrimitive).
fn into_f64(ebd: ExtendedBigDecimal) -> f64 {
match ebd {
ExtendedBigDecimal::BigDecimal(bd) => bd.to_f64().unwrap(),
ExtendedBigDecimal::MinusZero => -0.0,
ExtendedBigDecimal::Nan => f64::NAN,
@ -151,22 +161,23 @@ impl ParsedNumber {
}
}
/// Parse a number as f64
pub fn parse_f64(input: &str) -> Result<f64, ParseError<'_, f64>> {
match Self::parse(input, false) {
Ok(v) => Ok(v.into_f64()),
Err(e) => Err(e.map(|v, rest| ParseError::PartialMatch(v.into_f64(), rest))),
match parse(input, false) {
Ok(v) => Ok(into_f64(v)),
Err(e) => Err(e.map(|v, rest| ExtendedParserError::PartialMatch(into_f64(v), rest))),
}
}
}
fn parse_special_value(input: &str, negative: bool) -> Result<Self, ParseError<'_, Self>> {
fn parse_special_value(
input: &str,
negative: bool,
) -> Result<ExtendedBigDecimal, ExtendedParserError<'_, ExtendedBigDecimal>> {
let prefix = input
.chars()
.take(3)
.map(|c| c.to_ascii_lowercase())
.collect::<String>();
let special = Self {
number: match prefix.as_str() {
let special = match prefix.as_str() {
"inf" => {
if negative {
ExtendedBigDecimal::MinusInfinity
@ -181,28 +192,30 @@ impl ParsedNumber {
ExtendedBigDecimal::Nan
}
}
_ => return Err(ParseError::NotNumeric),
},
_ => return Err(ExtendedParserError::NotNumeric),
};
if input.len() == 3 {
Ok(special)
} else {
Err(ParseError::PartialMatch(special, &input[3..]))
}
Err(ExtendedParserError::PartialMatch(special, &input[3..]))
}
}
#[allow(clippy::cognitive_complexity)]
fn parse(input: &str, integral_only: bool) -> Result<Self, ParseError<'_, Self>> {
#[allow(clippy::cognitive_complexity)]
fn parse(
input: &str,
integral_only: bool,
) -> Result<ExtendedBigDecimal, ExtendedParserError<'_, ExtendedBigDecimal>> {
// Parse the "'" prefix separately
if let Some(rest) = input.strip_prefix('\'') {
let mut chars = rest.char_indices().fuse();
let v = chars.next().map(|(_, c)| Self {
number: ExtendedBigDecimal::BigDecimal(u32::from(c).into()),
});
let v = chars
.next()
.map(|(_, c)| ExtendedBigDecimal::BigDecimal(u32::from(c).into()));
return match (v, chars.next()) {
(Some(v), None) => Ok(v),
(Some(v), Some((i, _))) => Err(ParseError::PartialMatch(v, &rest[i..])),
(None, _) => Err(ParseError::NotNumeric),
(Some(v), Some((i, _))) => Err(ExtendedParserError::PartialMatch(v, &rest[i..])),
(None, _) => Err(ExtendedParserError::NotNumeric),
};
}
@ -233,7 +246,7 @@ impl ParsedNumber {
(Base::Decimal, unsigned)
};
if rest.is_empty() {
return Err(ParseError::NotNumeric);
return Err(ExtendedParserError::NotNumeric);
}
// Parse the integral part of the number
@ -261,9 +274,9 @@ impl ParsedNumber {
// If nothing has been parsed, check if this is a special value, or declare the parsing unsuccessful
if let Some((0, _)) = chars.peek() {
if integral_only {
return Err(ParseError::NotNumeric);
return Err(ExtendedParserError::NotNumeric);
} else {
return Self::parse_special_value(unsigned, negative);
return parse_special_value(unsigned, negative);
}
}
@ -286,166 +299,158 @@ impl ParsedNumber {
// Return what has been parsed so far. It there are extra characters, mark the
// parsing as a partial match.
let parsed = Self { number: ebd };
if let Some((first_unparsed, _)) = chars.next() {
Err(ParseError::PartialMatch(parsed, &rest[first_unparsed..]))
Err(ExtendedParserError::PartialMatch(
ebd,
&rest[first_unparsed..],
))
} else {
Ok(parsed)
}
Ok(ebd)
}
}
#[cfg(test)]
mod tests {
use super::{ParseError, ParsedNumber};
use super::{ExtendedParser, ExtendedParserError};
#[test]
fn test_decimal_u64() {
assert_eq!(Ok(123), ParsedNumber::parse_u64("123"));
assert_eq!(
Ok(u64::MAX),
ParsedNumber::parse_u64(&format!("{}", u64::MAX))
);
assert_eq!(Ok(123), u64::extended_parse("123"));
assert_eq!(Ok(u64::MAX), u64::extended_parse(&format!("{}", u64::MAX)));
assert!(matches!(
ParsedNumber::parse_u64("-123"),
Err(ParseError::Overflow)
u64::extended_parse("-123"),
Err(ExtendedParserError::Overflow)
));
assert!(matches!(
ParsedNumber::parse_u64(""),
Err(ParseError::NotNumeric)
u64::extended_parse(""),
Err(ExtendedParserError::NotNumeric)
));
assert!(matches!(
ParsedNumber::parse_u64("123.15"),
Err(ParseError::PartialMatch(123, ".15"))
u64::extended_parse("123.15"),
Err(ExtendedParserError::PartialMatch(123, ".15"))
));
}
#[test]
fn test_decimal_i64() {
assert_eq!(Ok(123), ParsedNumber::parse_i64("123"));
assert_eq!(Ok(-123), ParsedNumber::parse_i64("-123"));
assert_eq!(Ok(123), i64::extended_parse("123"));
assert_eq!(Ok(-123), i64::extended_parse("-123"));
assert!(matches!(
ParsedNumber::parse_i64("--123"),
Err(ParseError::NotNumeric)
i64::extended_parse("--123"),
Err(ExtendedParserError::NotNumeric)
));
assert_eq!(
Ok(i64::MAX),
ParsedNumber::parse_i64(&format!("{}", i64::MAX))
);
assert_eq!(
Ok(i64::MIN),
ParsedNumber::parse_i64(&format!("{}", i64::MIN))
);
assert_eq!(Ok(i64::MAX), i64::extended_parse(&format!("{}", i64::MAX)));
assert_eq!(Ok(i64::MIN), i64::extended_parse(&format!("{}", i64::MIN)));
assert!(matches!(
ParsedNumber::parse_i64(&format!("{}", u64::MAX)),
Err(ParseError::Overflow)
i64::extended_parse(&format!("{}", u64::MAX)),
Err(ExtendedParserError::Overflow)
));
assert!(matches!(
ParsedNumber::parse_i64(&format!("{}", i64::MAX as u64 + 1)),
Err(ParseError::Overflow)
i64::extended_parse(&format!("{}", i64::MAX as u64 + 1)),
Err(ExtendedParserError::Overflow)
));
}
#[test]
fn test_decimal_f64() {
assert_eq!(Ok(123.0), ParsedNumber::parse_f64("123"));
assert_eq!(Ok(-123.0), ParsedNumber::parse_f64("-123"));
assert_eq!(Ok(123.0), ParsedNumber::parse_f64("123."));
assert_eq!(Ok(-123.0), ParsedNumber::parse_f64("-123."));
assert_eq!(Ok(123.0), ParsedNumber::parse_f64("123.0"));
assert_eq!(Ok(-123.0), ParsedNumber::parse_f64("-123.0"));
assert_eq!(Ok(123.15), ParsedNumber::parse_f64("123.15"));
assert_eq!(Ok(-123.15), ParsedNumber::parse_f64("-123.15"));
assert_eq!(Ok(0.15), ParsedNumber::parse_f64(".15"));
assert_eq!(Ok(-0.15), ParsedNumber::parse_f64("-.15"));
assert_eq!(Ok(123.0), f64::extended_parse("123"));
assert_eq!(Ok(-123.0), f64::extended_parse("-123"));
assert_eq!(Ok(123.0), f64::extended_parse("123."));
assert_eq!(Ok(-123.0), f64::extended_parse("-123."));
assert_eq!(Ok(123.0), f64::extended_parse("123.0"));
assert_eq!(Ok(-123.0), f64::extended_parse("-123.0"));
assert_eq!(Ok(123.15), f64::extended_parse("123.15"));
assert_eq!(Ok(-123.15), f64::extended_parse("-123.15"));
assert_eq!(Ok(0.15), f64::extended_parse(".15"));
assert_eq!(Ok(-0.15), f64::extended_parse("-.15"));
assert_eq!(
Ok(0.15),
ParsedNumber::parse_f64(".150000000000000000000000000231313")
f64::extended_parse(".150000000000000000000000000231313")
);
assert!(matches!(ParsedNumber::parse_f64("1.2.3"),
Err(ParseError::PartialMatch(f, ".3")) if f == 1.2));
assert_eq!(Ok(f64::INFINITY), ParsedNumber::parse_f64("inf"));
assert_eq!(Ok(f64::NEG_INFINITY), ParsedNumber::parse_f64("-inf"));
assert!(ParsedNumber::parse_f64("NaN").unwrap().is_nan());
assert!(ParsedNumber::parse_f64("NaN").unwrap().is_sign_positive());
assert!(ParsedNumber::parse_f64("-NaN").unwrap().is_nan());
assert!(ParsedNumber::parse_f64("-NaN").unwrap().is_sign_negative());
assert!(matches!(ParsedNumber::parse_f64("-infinity"),
Err(ParseError::PartialMatch(f, "inity")) if f == f64::NEG_INFINITY));
assert!(ParsedNumber::parse_f64(&format!("{}", u64::MAX)).is_ok());
assert!(ParsedNumber::parse_f64(&format!("{}", i64::MIN)).is_ok());
assert!(matches!(f64::extended_parse("1.2.3"),
Err(ExtendedParserError::PartialMatch(f, ".3")) if f == 1.2));
assert_eq!(Ok(f64::INFINITY), f64::extended_parse("inf"));
assert_eq!(Ok(f64::NEG_INFINITY), f64::extended_parse("-inf"));
assert!(f64::extended_parse("NaN").unwrap().is_nan());
assert!(f64::extended_parse("NaN").unwrap().is_sign_positive());
assert!(f64::extended_parse("-NaN").unwrap().is_nan());
assert!(f64::extended_parse("-NaN").unwrap().is_sign_negative());
assert!(matches!(f64::extended_parse("-infinity"),
Err(ExtendedParserError::PartialMatch(f, "inity")) if f == f64::NEG_INFINITY));
assert!(f64::extended_parse(&format!("{}", u64::MAX)).is_ok());
assert!(f64::extended_parse(&format!("{}", i64::MIN)).is_ok());
}
#[test]
fn test_hexadecimal() {
assert_eq!(Ok(0x123), ParsedNumber::parse_u64("0x123"));
assert_eq!(Ok(0x123), ParsedNumber::parse_u64("0X123"));
assert_eq!(Ok(0xfe), ParsedNumber::parse_u64("0xfE"));
assert_eq!(Ok(-0x123), ParsedNumber::parse_i64("-0x123"));
assert_eq!(Ok(0x123), u64::extended_parse("0x123"));
assert_eq!(Ok(0x123), u64::extended_parse("0X123"));
assert_eq!(Ok(0xfe), u64::extended_parse("0xfE"));
assert_eq!(Ok(-0x123), i64::extended_parse("-0x123"));
assert_eq!(Ok(0.5), ParsedNumber::parse_f64("0x.8"));
assert_eq!(Ok(0.0625), ParsedNumber::parse_f64("0x.1"));
assert_eq!(Ok(15.007_812_5), ParsedNumber::parse_f64("0xf.02"));
assert_eq!(Ok(0.5), f64::extended_parse("0x.8"));
assert_eq!(Ok(0.0625), f64::extended_parse("0x.1"));
assert_eq!(Ok(15.007_812_5), f64::extended_parse("0xf.02"));
}
#[test]
fn test_octal() {
assert_eq!(Ok(0), ParsedNumber::parse_u64("0"));
assert_eq!(Ok(0o123), ParsedNumber::parse_u64("0123"));
assert_eq!(Ok(0o123), ParsedNumber::parse_u64("00123"));
assert_eq!(Ok(0), ParsedNumber::parse_u64("00"));
assert_eq!(Ok(0), u64::extended_parse("0"));
assert_eq!(Ok(0o123), u64::extended_parse("0123"));
assert_eq!(Ok(0o123), u64::extended_parse("00123"));
assert_eq!(Ok(0), u64::extended_parse("00"));
assert!(matches!(
ParsedNumber::parse_u64("008"),
Err(ParseError::PartialMatch(0, "8"))
u64::extended_parse("008"),
Err(ExtendedParserError::PartialMatch(0, "8"))
));
assert!(matches!(
ParsedNumber::parse_u64("08"),
Err(ParseError::PartialMatch(0, "8"))
u64::extended_parse("08"),
Err(ExtendedParserError::PartialMatch(0, "8"))
));
assert!(matches!(
ParsedNumber::parse_u64("0."),
Err(ParseError::PartialMatch(0, "."))
u64::extended_parse("0."),
Err(ExtendedParserError::PartialMatch(0, "."))
));
}
#[test]
fn test_binary() {
assert_eq!(Ok(0b1011), ParsedNumber::parse_u64("0b1011"));
assert_eq!(Ok(0b1011), ParsedNumber::parse_u64("0B1011"));
assert_eq!(Ok(0b1011), u64::extended_parse("0b1011"));
assert_eq!(Ok(0b1011), u64::extended_parse("0B1011"));
}
#[test]
fn test_parsing_with_leading_whitespace() {
assert_eq!(Ok(1), ParsedNumber::parse_u64(" 0x1"));
assert_eq!(Ok(-2), ParsedNumber::parse_i64(" -0x2"));
assert_eq!(Ok(-3), ParsedNumber::parse_i64(" \t-0x3"));
assert_eq!(Ok(-4), ParsedNumber::parse_i64(" \n-0x4"));
assert_eq!(Ok(-5), ParsedNumber::parse_i64(" \n\t\u{000d}-0x5"));
assert_eq!(Ok(1), u64::extended_parse(" 0x1"));
assert_eq!(Ok(-2), i64::extended_parse(" -0x2"));
assert_eq!(Ok(-3), i64::extended_parse(" \t-0x3"));
assert_eq!(Ok(-4), i64::extended_parse(" \n-0x4"));
assert_eq!(Ok(-5), i64::extended_parse(" \n\t\u{000d}-0x5"));
// Ensure that trailing whitespace is still a partial match
assert_eq!(
Err(ParseError::PartialMatch(6, " ")),
ParsedNumber::parse_u64("0x6 ")
Err(ExtendedParserError::PartialMatch(6, " ")),
u64::extended_parse("0x6 ")
);
assert_eq!(
Err(ParseError::PartialMatch(7, "\t")),
ParsedNumber::parse_u64("0x7\t")
Err(ExtendedParserError::PartialMatch(7, "\t")),
u64::extended_parse("0x7\t")
);
assert_eq!(
Err(ParseError::PartialMatch(8, "\n")),
ParsedNumber::parse_u64("0x8\n")
Err(ExtendedParserError::PartialMatch(8, "\n")),
u64::extended_parse("0x8\n")
);
// Ensure that unicode non-ascii whitespace is a partial match
assert_eq!(
Err(ParseError::NotNumeric),
ParsedNumber::parse_i64("\u{2029}-0x9")
Err(ExtendedParserError::NotNumeric),
i64::extended_parse("\u{2029}-0x9")
);
// Ensure that whitespace after the number has "started" is not allowed
assert_eq!(
Err(ParseError::NotNumeric),
ParsedNumber::parse_i64("- 0x9")
Err(ExtendedParserError::NotNumeric),
i64::extended_parse("- 0x9")
);
}
}