From e5eb004793157134defa2316956e9bedba777c32 Mon Sep 17 00:00:00 2001 From: Nicolas Boichat Date: Tue, 8 Apr 2025 14:21:12 +0200 Subject: [PATCH 1/4] uucore: parser: num_parser: Return error if no digit has been parsed This is mostly important when parsing digits like `.` and `0x.` where we should return an error. Fixes #7684. --- .../src/lib/features/parser/num_parser.rs | 58 +++++++++++++++++-- 1 file changed, 52 insertions(+), 6 deletions(-) diff --git a/src/uucore/src/lib/features/parser/num_parser.rs b/src/uucore/src/lib/features/parser/num_parser.rs index 8e0968509..3b520a977 100644 --- a/src/uucore/src/lib/features/parser/num_parser.rs +++ b/src/uucore/src/lib/features/parser/num_parser.rs @@ -456,12 +456,12 @@ pub(crate) fn parse<'a>( // Parse the integral part of the number let mut chars = rest.chars().enumerate().fuse().peekable(); - let mut digits = BigUint::zero(); + let mut digits: Option = None; let mut scale = 0u64; let mut exponent = BigInt::zero(); while let Some(d) = chars.peek().and_then(|&(_, c)| base.digit(c)) { chars.next(); - digits = digits * base as u8 + d; + digits = Some(digits.unwrap_or_default() * base as u8 + d); } // Parse fractional/exponent part of the number for supported bases. @@ -472,7 +472,7 @@ pub(crate) fn parse<'a>( chars.next(); while let Some(d) = chars.peek().and_then(|&(_, c)| base.digit(c)) { chars.next(); - (digits, scale) = (digits * base as u8 + d, scale + 1); + (digits, scale) = (Some(digits.unwrap_or_default() * base as u8 + d), scale + 1); } } @@ -509,8 +509,8 @@ pub(crate) fn parse<'a>( } } - // If nothing has been parsed, check if this is a special value, or declare the parsing unsuccessful - if let Some((0, _)) = chars.peek() { + // If no digit has been parsed, check if this is a special value, or declare the parsing unsuccessful + if digits.is_none() { return if target == ParseTarget::Integral { Err(ExtendedParserError::NotNumeric) } else { @@ -518,6 +518,8 @@ pub(crate) fn parse<'a>( }; } + let mut digits = digits.unwrap(); + if let Some((_, ch)) = chars.peek() { if let Some(times) = allowed_suffixes .iter() @@ -529,7 +531,8 @@ pub(crate) fn parse<'a>( } } - let ebd_result = construct_extended_big_decimal(digits, negative, base, scale, exponent); + let ebd_result = + construct_extended_big_decimal(digits, negative, base, scale, exponent); // Return what has been parsed so far. If there are extra characters, mark the // parsing as a partial match. @@ -625,6 +628,15 @@ mod tests { i64::extended_parse(&format!("{}", i64::MIN as i128 - 1)), Err(ExtendedParserError::Overflow(i64::MIN)) )); + + assert!(matches!( + i64::extended_parse(""), + Err(ExtendedParserError::NotNumeric) + )); + assert!(matches!( + i64::extended_parse("."), + Err(ExtendedParserError::NotNumeric) + )); } #[test] @@ -811,6 +823,16 @@ mod tests { ExtendedBigDecimal::extended_parse(&format!("-0e{}", i64::MIN + 2)), Ok(ExtendedBigDecimal::MinusZero) ); + + /* Invalid numbers */ + assert_eq!( + Err(ExtendedParserError::NotNumeric), + ExtendedBigDecimal::extended_parse("") + ); + assert_eq!( + Err(ExtendedParserError::NotNumeric), + ExtendedBigDecimal::extended_parse(".") + ); } #[test] @@ -887,6 +909,16 @@ mod tests { ExtendedBigDecimal::MinusZero )) )); + + // TODO: GNU coreutils treats these 2 as partial match. + assert_eq!( + Err(ExtendedParserError::NotNumeric), + ExtendedBigDecimal::extended_parse("0x") + ); + assert_eq!( + Err(ExtendedParserError::NotNumeric), + ExtendedBigDecimal::extended_parse("0x.") + ); } #[test] @@ -935,6 +967,20 @@ mod tests { ebd == ExtendedBigDecimal::zero(), _ => false, }); + + assert!(match ExtendedBigDecimal::extended_parse("0b") { + Err(ExtendedParserError::PartialMatch(ebd, "b")) => ebd == ExtendedBigDecimal::zero(), + _ => false, + }); + assert!(match ExtendedBigDecimal::extended_parse("0b.") { + Err(ExtendedParserError::PartialMatch(ebd, "b.")) => ebd == ExtendedBigDecimal::zero(), + _ => false, + }); + // TODO: GNU coreutils treats this as partial match. + assert_eq!( + Err(ExtendedParserError::NotNumeric), + u64::extended_parse("0b") + ); } #[test] From 8363274f751f6bda812e5c38eea0c0ff71886ae3 Mon Sep 17 00:00:00 2001 From: Nicolas Boichat Date: Tue, 8 Apr 2025 14:52:24 +0200 Subject: [PATCH 2/4] uucore: parser: num_parser: Ignore empty exponents Numbers like 123.15e or 123.15e- should return PartialMatch. Numbers like `e`, `.e` are not valid. Fixes #7685. --- .../src/lib/features/parser/num_parser.rs | 78 +++++++++++++++++-- 1 file changed, 72 insertions(+), 6 deletions(-) diff --git a/src/uucore/src/lib/features/parser/num_parser.rs b/src/uucore/src/lib/features/parser/num_parser.rs index 3b520a977..77c6aa82d 100644 --- a/src/uucore/src/lib/features/parser/num_parser.rs +++ b/src/uucore/src/lib/features/parser/num_parser.rs @@ -458,7 +458,7 @@ pub(crate) fn parse<'a>( let mut chars = rest.chars().enumerate().fuse().peekable(); let mut digits: Option = None; let mut scale = 0u64; - let mut exponent = BigInt::zero(); + let mut exponent: Option = None; while let Some(d) = chars.peek().and_then(|&(_, c)| base.digit(c)) { chars.next(); digits = Some(digits.unwrap_or_default() * base as u8 + d); @@ -487,6 +487,8 @@ pub(crate) fn parse<'a>( .peek() .is_some_and(|&(_, c)| c.to_ascii_lowercase() == exp_char) { + // Save the iterator position in case we do not parse any exponent. + let save_chars = chars.clone(); chars.next(); let exp_negative = match chars.peek() { Some((_, '-')) => { @@ -501,10 +503,15 @@ pub(crate) fn parse<'a>( }; while let Some(d) = chars.peek().and_then(|&(_, c)| Base::Decimal.digit(c)) { chars.next(); - exponent = exponent * 10 + d as i64; + exponent = Some(exponent.unwrap_or_default() * 10 + d as i64); } - if exp_negative { - exponent = -exponent; + if let Some(exp) = &exponent { + if exp_negative { + exponent = Some(-exp); + } + } else { + // No exponent actually parsed, reset iterator to return partial match. + chars = save_chars; } } } @@ -532,7 +539,7 @@ pub(crate) fn parse<'a>( } let ebd_result = - construct_extended_big_decimal(digits, negative, base, scale, exponent); + construct_extended_big_decimal(digits, negative, base, scale, exponent.unwrap_or_default()); // Return what has been parsed so far. If there are extra characters, mark the // parsing as a partial match. @@ -671,6 +678,16 @@ mod tests { Ok(0.15), f64::extended_parse(".150000000000000000000000000231313") ); + assert!(matches!(f64::extended_parse("123.15e"), + Err(ExtendedParserError::PartialMatch(f, "e")) if f == 123.15)); + assert!(matches!(f64::extended_parse("123.15E"), + Err(ExtendedParserError::PartialMatch(f, "E")) if f == 123.15)); + assert!(matches!(f64::extended_parse("123.15e-"), + Err(ExtendedParserError::PartialMatch(f, "e-")) if f == 123.15)); + assert!(matches!(f64::extended_parse("123.15e+"), + Err(ExtendedParserError::PartialMatch(f, "e+")) if f == 123.15)); + assert!(matches!(f64::extended_parse("123.15e."), + Err(ExtendedParserError::PartialMatch(f, "e.")) if f == 123.15)); assert!(matches!(f64::extended_parse("1.2.3"), Err(ExtendedParserError::PartialMatch(f, ".3")) if f == 1.2)); assert!(matches!(f64::extended_parse("123.15p5"), @@ -833,6 +850,38 @@ mod tests { Err(ExtendedParserError::NotNumeric), ExtendedBigDecimal::extended_parse(".") ); + assert_eq!( + Err(ExtendedParserError::NotNumeric), + ExtendedBigDecimal::extended_parse("e") + ); + assert_eq!( + Err(ExtendedParserError::NotNumeric), + ExtendedBigDecimal::extended_parse(".e") + ); + assert_eq!( + Err(ExtendedParserError::NotNumeric), + ExtendedBigDecimal::extended_parse("-e") + ); + assert_eq!( + Err(ExtendedParserError::NotNumeric), + ExtendedBigDecimal::extended_parse("+.e") + ); + assert_eq!( + Err(ExtendedParserError::NotNumeric), + ExtendedBigDecimal::extended_parse("e10") + ); + assert_eq!( + Err(ExtendedParserError::NotNumeric), + ExtendedBigDecimal::extended_parse("e-10") + ); + assert_eq!( + Err(ExtendedParserError::NotNumeric), + ExtendedBigDecimal::extended_parse("-e10") + ); + assert_eq!( + Err(ExtendedParserError::NotNumeric), + ExtendedBigDecimal::extended_parse("+e10") + ); } #[test] @@ -853,6 +902,15 @@ mod tests { // but we can check that the number still gets parsed properly: 0x0.8e5 is 0x8e5 / 16**3 assert_eq!(Ok(0.555908203125), f64::extended_parse("0x0.8e5")); + assert!(matches!(f64::extended_parse("0x0.1p"), + Err(ExtendedParserError::PartialMatch(f, "p")) if f == 0.0625)); + assert!(matches!(f64::extended_parse("0x0.1p-"), + Err(ExtendedParserError::PartialMatch(f, "p-")) if f == 0.0625)); + assert!(matches!(f64::extended_parse("0x.1p+"), + Err(ExtendedParserError::PartialMatch(f, "p+")) if f == 0.0625)); + assert!(matches!(f64::extended_parse("0x.1p."), + Err(ExtendedParserError::PartialMatch(f, "p.")) if f == 0.0625)); + assert_eq!( Ok(ExtendedBigDecimal::BigDecimal( BigDecimal::from_str("0.0625").unwrap() @@ -910,7 +968,7 @@ mod tests { )) )); - // TODO: GNU coreutils treats these 2 as partial match. + // TODO: GNU coreutils treats these as partial matches. assert_eq!( Err(ExtendedParserError::NotNumeric), ExtendedBigDecimal::extended_parse("0x") @@ -919,6 +977,14 @@ mod tests { Err(ExtendedParserError::NotNumeric), ExtendedBigDecimal::extended_parse("0x.") ); + assert_eq!( + Err(ExtendedParserError::NotNumeric), + ExtendedBigDecimal::extended_parse("0xp") + ); + assert_eq!( + Err(ExtendedParserError::NotNumeric), + ExtendedBigDecimal::extended_parse("0xp-2") + ); } #[test] From a4c56fbcee218e8b083dc15bfef769639ad3099a Mon Sep 17 00:00:00 2001 From: Nicolas Boichat Date: Tue, 8 Apr 2025 15:20:29 +0200 Subject: [PATCH 3/4] uucore: parser: num_parser: Parse "0x"/"0b" as PartialMatch printf treats "0x" as a partial match of 0 and "x". --- .../src/lib/features/parser/num_parser.rs | 91 ++++++++++++++----- 1 file changed, 66 insertions(+), 25 deletions(-) diff --git a/src/uucore/src/lib/features/parser/num_parser.rs b/src/uucore/src/lib/features/parser/num_parser.rs index 77c6aa82d..303f48534 100644 --- a/src/uucore/src/lib/features/parser/num_parser.rs +++ b/src/uucore/src/lib/features/parser/num_parser.rs @@ -450,9 +450,6 @@ pub(crate) fn parse<'a>( } else { (Base::Decimal, unsigned) }; - if rest.is_empty() { - return Err(ExtendedParserError::NotNumeric); - } // Parse the integral part of the number let mut chars = rest.chars().enumerate().fuse().peekable(); @@ -518,6 +515,16 @@ pub(crate) fn parse<'a>( // If no digit has been parsed, check if this is a special value, or declare the parsing unsuccessful if digits.is_none() { + // If we trimmed an initial `0x`/`0b`, return a partial match. + if rest != unsigned { + let ebd = if negative { + ExtendedBigDecimal::MinusZero + } else { + ExtendedBigDecimal::zero() + }; + return Err(ExtendedParserError::PartialMatch(ebd, &unsigned[1..])); + } + return if target == ParseTarget::Integral { Err(ExtendedParserError::NotNumeric) } else { @@ -968,23 +975,41 @@ mod tests { )) )); - // TODO: GNU coreutils treats these as partial matches. - assert_eq!( - Err(ExtendedParserError::NotNumeric), - ExtendedBigDecimal::extended_parse("0x") - ); - assert_eq!( - Err(ExtendedParserError::NotNumeric), - ExtendedBigDecimal::extended_parse("0x.") - ); - assert_eq!( - Err(ExtendedParserError::NotNumeric), - ExtendedBigDecimal::extended_parse("0xp") - ); - assert_eq!( - Err(ExtendedParserError::NotNumeric), - ExtendedBigDecimal::extended_parse("0xp-2") - ); + // Not actually hex numbers, but the prefixes look like it. + assert!(matches!(f64::extended_parse("0x"), + Err(ExtendedParserError::PartialMatch(f, "x")) if f == 0.0)); + assert!(matches!(f64::extended_parse("0x."), + Err(ExtendedParserError::PartialMatch(f, "x.")) if f == 0.0)); + assert!(matches!(f64::extended_parse("0xp"), + Err(ExtendedParserError::PartialMatch(f, "xp")) if f == 0.0)); + assert!(matches!(f64::extended_parse("0xp-2"), + Err(ExtendedParserError::PartialMatch(f, "xp-2")) if f == 0.0)); + assert!(matches!(f64::extended_parse("0x.p-2"), + Err(ExtendedParserError::PartialMatch(f, "x.p-2")) if f == 0.0)); + assert!(matches!(f64::extended_parse("0X"), + Err(ExtendedParserError::PartialMatch(f, "X")) if f == 0.0)); + assert!(matches!(f64::extended_parse("-0x"), + Err(ExtendedParserError::PartialMatch(f, "x")) if f == -0.0)); + assert!(matches!(f64::extended_parse("+0x"), + Err(ExtendedParserError::PartialMatch(f, "x")) if f == 0.0)); + assert!(matches!(f64::extended_parse("-0x."), + Err(ExtendedParserError::PartialMatch(f, "x.")) if f == -0.0)); + assert!(matches!( + u64::extended_parse("0x"), + Err(ExtendedParserError::PartialMatch(0, "x")) + )); + assert!(matches!( + u64::extended_parse("-0x"), + Err(ExtendedParserError::PartialMatch(0, "x")) + )); + assert!(matches!( + i64::extended_parse("0x"), + Err(ExtendedParserError::PartialMatch(0, "x")) + )); + assert!(matches!( + i64::extended_parse("-0x"), + Err(ExtendedParserError::PartialMatch(0, "x")) + )); } #[test] @@ -1018,6 +1043,27 @@ mod tests { assert_eq!(Ok(0b1011), u64::extended_parse("+0b1011")); assert_eq!(Ok(-0b1011), i64::extended_parse("-0b1011")); + assert!(matches!( + u64::extended_parse("0b"), + Err(ExtendedParserError::PartialMatch(0, "b")) + )); + assert!(matches!( + u64::extended_parse("0b."), + Err(ExtendedParserError::PartialMatch(0, "b.")) + )); + assert!(matches!( + u64::extended_parse("-0b"), + Err(ExtendedParserError::PartialMatch(0, "b")) + )); + assert!(matches!( + i64::extended_parse("0b"), + Err(ExtendedParserError::PartialMatch(0, "b")) + )); + assert!(matches!( + i64::extended_parse("-0b"), + Err(ExtendedParserError::PartialMatch(0, "b")) + )); + // Binary not allowed for floats assert!(matches!( f64::extended_parse("0b100"), @@ -1042,11 +1088,6 @@ mod tests { Err(ExtendedParserError::PartialMatch(ebd, "b.")) => ebd == ExtendedBigDecimal::zero(), _ => false, }); - // TODO: GNU coreutils treats this as partial match. - assert_eq!( - Err(ExtendedParserError::NotNumeric), - u64::extended_parse("0b") - ); } #[test] From fce9f7961802efee77abb58a8bf11c6506fe036d Mon Sep 17 00:00:00 2001 From: Nicolas Boichat Date: Tue, 15 Apr 2025 20:55:56 +0200 Subject: [PATCH 4/4] tests: printf: Add more cases around 0, missing digits, etc. --- tests/by-util/test_printf.rs | 80 ++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) diff --git a/tests/by-util/test_printf.rs b/tests/by-util/test_printf.rs index 4c638986a..2d059c0fa 100644 --- a/tests/by-util/test_printf.rs +++ b/tests/by-util/test_printf.rs @@ -826,6 +826,12 @@ fn partial_integer() { .fails_with_code(1) .stdout_is("42 is a lot") .stderr_is("printf: '42x23': value not completely converted\n"); + + new_ucmd!() + .args(&["%d is not %s", "0xwa", "a lot"]) + .fails_with_code(1) + .stdout_is("0 is not a lot") + .stderr_is("printf: '0xwa': value not completely converted\n"); } #[test] @@ -1280,6 +1286,80 @@ fn float_switch_switch_decimal_scientific() { .stdout_only("1e-05"); } +#[test] +fn float_arg_zero() { + new_ucmd!() + .args(&["%f", "0."]) + .succeeds() + .stdout_only("0.000000"); + + new_ucmd!() + .args(&["%f", ".0"]) + .succeeds() + .stdout_only("0.000000"); + + new_ucmd!() + .args(&["%f", ".0e100000"]) + .succeeds() + .stdout_only("0.000000"); +} + +#[test] +fn float_arg_invalid() { + // Just a dot fails. + new_ucmd!() + .args(&["%f", "."]) + .fails() + .stdout_is("0.000000") + .stderr_contains("expected a numeric value"); + + new_ucmd!() + .args(&["%f", "-."]) + .fails() + .stdout_is("0.000000") + .stderr_contains("expected a numeric value"); + + // Just an exponent indicator fails. + new_ucmd!() + .args(&["%f", "e"]) + .fails() + .stdout_is("0.000000") + .stderr_contains("expected a numeric value"); + + // No digit but only exponent fails + new_ucmd!() + .args(&["%f", ".e12"]) + .fails() + .stdout_is("0.000000") + .stderr_contains("expected a numeric value"); + + // No exponent partially fails + new_ucmd!() + .args(&["%f", "123e"]) + .fails() + .stdout_is("123.000000") + .stderr_contains("value not completely converted"); + + // Nothing past `0x` parses as zero + new_ucmd!() + .args(&["%f", "0x"]) + .fails() + .stdout_is("0.000000") + .stderr_contains("value not completely converted"); + + new_ucmd!() + .args(&["%f", "0x."]) + .fails() + .stdout_is("0.000000") + .stderr_contains("value not completely converted"); + + new_ucmd!() + .args(&["%f", "0xp12"]) + .fails() + .stdout_is("0.000000") + .stderr_contains("value not completely converted"); +} + #[test] fn float_arg_with_whitespace() { new_ucmd!()