Merge pull request #5783 from samueltardieu/printf-compatibility

printf compatibility
2025-07-28 11:37:44 +00:00 · 2024-01-10 16:33:37 +01:00 · 2024-01-10 16:33:37 +01:00 · 0071442cba
commit 0071442cba
parent cd9863dd29 a85a792c88
4 changed files with 475 additions and 72 deletions
--- a/src/uucore/src/lib/features/format/argument.rs
+++ b/src/uucore/src/lib/features/format/argument.rs
@ -3,9 +3,14 @@
 // For the full copyright and license information, please view the LICENSE
 // file that was distributed with this source code.
 use crate::{
    error::set_exit_code,
    features::format::num_parser::{ParseError, ParsedNumber},
    quoting_style::{escape_name, Quotes, QuotingStyle},
    show_error, show_warning,
 };
 use os_display::Quotable;
-
+use std::ffi::OsStr;
 use crate::{error::set_exit_code, show_warning};
 /// An argument for formatting
 ///
@ -40,16 +45,7 @@ impl<'a, T: Iterator<Item = &'a FormatArgument>> ArgumentIter<'a> for T {
        };
        match next {
            FormatArgument::Char(c) => *c,
-            FormatArgument::Unparsed(s) => {
+            FormatArgument::Unparsed(s) => s.chars().next().unwrap_or('\0'),
                let mut chars = s.chars();
                let Some(c) = chars.next() else {
                    return '\0';
                };
                let None = chars.next() else {
                    return '\0';
                };
                c
            }
            _ => '\0',
        }
    }
@ -60,25 +56,7 @@ impl<'a, T: Iterator<Item = &'a FormatArgument>> ArgumentIter<'a> for T {
        };
        match next {
            FormatArgument::UnsignedInt(n) => *n,
-            FormatArgument::Unparsed(s) => {
+            FormatArgument::Unparsed(s) => extract_value(ParsedNumber::parse_u64(s), s),
                let opt = if let Some(s) = s.strip_prefix("0x") {
                    u64::from_str_radix(s, 16).ok()
                } else if let Some(s) = s.strip_prefix('0') {
                    u64::from_str_radix(s, 8).ok()
                } else if let Some(s) = s.strip_prefix('\'') {
                    s.chars().next().map(|c| c as u64)
                } else {
                    s.parse().ok()
                };
                match opt {
                    Some(n) => n,
                    None => {
                        show_warning!("{}: expected a numeric value", s.quote());
                        set_exit_code(1);
                        0
                    }
                }
            }
            _ => 0,
        }
    }
@ -89,29 +67,7 @@ impl<'a, T: Iterator<Item = &'a FormatArgument>> ArgumentIter<'a> for T {
        };
        match next {
            FormatArgument::SignedInt(n) => *n,
-            FormatArgument::Unparsed(s) => {
+            FormatArgument::Unparsed(s) => extract_value(ParsedNumber::parse_i64(s), s),
                // For hex, we parse `u64` because we do not allow another
                // minus sign. We might need to do more precise parsing here.
                let opt = if let Some(s) = s.strip_prefix("-0x") {
                    u64::from_str_radix(s, 16).ok().map(|x| -(x as i64))
                } else if let Some(s) = s.strip_prefix("0x") {
                    u64::from_str_radix(s, 16).ok().map(|x| x as i64)
                } else if s.starts_with("-0") || s.starts_with('0') {
                    i64::from_str_radix(s, 8).ok()
                } else if let Some(s) = s.strip_prefix('\'') {
                    s.chars().next().map(|x| x as i64)
                } else {
                    s.parse().ok()
                };
                match opt {
                    Some(n) => n,
                    None => {
                        show_warning!("{}: expected a numeric value", s.quote());
                        set_exit_code(1);
                        0
                    }
                }
            }
            _ => 0,
        }
    }
@ -122,23 +78,7 @@ impl<'a, T: Iterator<Item = &'a FormatArgument>> ArgumentIter<'a> for T {
        };
        match next {
            FormatArgument::Float(n) => *n,
-            FormatArgument::Unparsed(s) => {
+            FormatArgument::Unparsed(s) => extract_value(ParsedNumber::parse_f64(s), s),
                let opt = if s.starts_with("0x") || s.starts_with("-0x") {
                    unimplemented!("Hexadecimal floats are unimplemented!")
                } else if let Some(s) = s.strip_prefix('\'') {
                    s.chars().next().map(|x| x as u64 as f64)
                } else {
                    s.parse().ok()
                };
                match opt {
                    Some(n) => n,
                    None => {
                        show_warning!("{}: expected a numeric value", s.quote());
                        set_exit_code(1);
                        0.0
                    }
                }
            }
            _ => 0.0,
        }
    }
@ -150,3 +90,39 @@ impl<'a, T: Iterator<Item = &'a FormatArgument>> ArgumentIter<'a> for T {
        }
    }
 }
 fn extract_value<T: Default>(p: Result<T, ParseError<'_, T>>, input: &str) -> T {
    match p {
        Ok(v) => v,
        Err(e) => {
            set_exit_code(1);
            let input = escape_name(
                OsStr::new(input),
                &QuotingStyle::C {
                    quotes: Quotes::None,
                },
            );
            match e {
                ParseError::Overflow => {
                    show_error!("{}: Numerical result out of range", input.quote());
                    Default::default()
                }
                ParseError::NotNumeric => {
                    show_error!("{}: expected a numeric value", input.quote());
                    Default::default()
                }
                ParseError::PartialMatch(v, rest) => {
                    if input.starts_with('\'') {
                        show_warning!(
                            "{}: character(s) following character constant have been ignored",
                            &rest,
                        );
                    } else {
                        show_error!("{}: value not completely converted", input.quote());
                    }
                    v
                }
            }
        }
    }
 }
--- a/src/uucore/src/lib/features/format/mod.rs
+++ b/src/uucore/src/lib/features/format/mod.rs
@ -33,6 +33,7 @@
 mod argument;
 mod escape;
 pub mod num_format;
 pub mod num_parser;
 mod spec;
 pub use argument::*;
--- a/src/uucore/src/lib/features/format/num_parser.rs
+++ b/src/uucore/src/lib/features/format/num_parser.rs
@ -0,0 +1,378 @@
 // This file is part of the uutils coreutils package.
 //
 // For the full copyright and license information, please view the LICENSE
 // file that was distributed with this source code.
 //! Utilities for parsing numbers in various formats
 // spell-checker:ignore powf copysign prec inity
 #[derive(Clone, Copy, PartialEq)]
 pub enum Base {
    Binary = 2,
    Octal = 8,
    Decimal = 10,
    Hexadecimal = 16,
 }
 impl Base {
    pub fn digit(&self, c: char) -> Option<u64> {
        fn from_decimal(c: char) -> u64 {
            u64::from(c) - u64::from('0')
        }
        match self {
            Self::Binary => ('0'..='1').contains(&c).then(|| from_decimal(c)),
            Self::Octal => ('0'..='7').contains(&c).then(|| from_decimal(c)),
            Self::Decimal => c.is_ascii_digit().then(|| from_decimal(c)),
            Self::Hexadecimal => match c.to_ascii_lowercase() {
                '0'..='9' => Some(from_decimal(c)),
                c @ 'a'..='f' => Some(u64::from(c) - u64::from('a') + 10),
                _ => None,
            },
        }
    }
 }
 /// Type returned if a number could not be parsed in its entirety
 #[derive(Debug, PartialEq)]
 pub enum ParseError<'a, T> {
    /// The input as a whole makes no sense
    NotNumeric,
    /// The beginning of the input made sense and has been parsed,
    /// while the remaining doesn't.
    PartialMatch(T, &'a str),
    /// The integral part has overflowed the requested type, or
    /// has overflowed the `u64` internal storage when parsing the
    /// integral part of a floating point number.
    Overflow,
 }
 impl<'a, T> ParseError<'a, T> {
    fn map<U>(self, f: impl FnOnce(T, &'a str) -> ParseError<'a, U>) -> ParseError<'a, U> {
        match self {
            Self::NotNumeric => ParseError::NotNumeric,
            Self::Overflow => ParseError::Overflow,
            Self::PartialMatch(v, s) => f(v, s),
        }
    }
 }
 /// A number parser for binary, octal, decimal, hexadecimal and single characters.
 ///
 /// Internally, in order to get the maximum possible precision and cover the full
 /// range of u64 and i64 without losing precision for f64, the returned number is
 /// decomposed into:
 ///   - A `base` value
 ///   - A `neg` sign bit
 ///   - A `integral` positive part
 ///   - A `fractional` positive part
 ///   - A `precision` representing the number of digits in the fractional part
 ///
 /// If the fractional part cannot be represented on a `u64`, parsing continues
 /// silently by ignoring non-significant digits.
 pub struct ParsedNumber {
    base: Base,
    negative: bool,
    integral: u64,
    fractional: u64,
    precision: usize,
 }
 impl ParsedNumber {
    fn into_i64(self) -> Option<i64> {
        if self.negative {
            i64::try_from(-i128::from(self.integral)).ok()
        } else {
            i64::try_from(self.integral).ok()
        }
    }
    /// Parse a number as i64. No fractional part is allowed.
    pub fn parse_i64(input: &str) -> Result<i64, ParseError<'_, i64>> {
        match Self::parse(input, true) {
            Ok(v) => v.into_i64().ok_or(ParseError::Overflow),
            Err(e) => Err(e.map(|v, rest| {
                v.into_i64()
                    .map(|v| ParseError::PartialMatch(v, rest))
                    .unwrap_or(ParseError::Overflow)
            })),
        }
    }
    /// Parse a number as u64. No fractional part is allowed.
    pub fn parse_u64(input: &str) -> Result<u64, ParseError<'_, u64>> {
        match Self::parse(input, true) {
            Ok(v) | Err(ParseError::PartialMatch(v, _)) if v.negative => {
                Err(ParseError::NotNumeric)
            }
            Ok(v) => Ok(v.integral),
            Err(e) => Err(e.map(|v, rest| ParseError::PartialMatch(v.integral, rest))),
        }
    }
    fn into_f64(self) -> f64 {
        let n = self.integral as f64
            + (self.fractional as f64) / (self.base as u8 as f64).powf(self.precision as f64);
        if self.negative {
            -n
        } else {
            n
        }
    }
    /// Parse a number as f64
    pub fn parse_f64(input: &str) -> Result<f64, ParseError<'_, f64>> {
        match Self::parse(input, false) {
            Ok(v) => Ok(v.into_f64()),
            Err(ParseError::NotNumeric) => Self::parse_f64_special_values(input),
            Err(e) => Err(e.map(|v, rest| ParseError::PartialMatch(v.into_f64(), rest))),
        }
    }
    fn parse_f64_special_values(input: &str) -> Result<f64, ParseError<'_, f64>> {
        let (sign, rest) = if let Some(input) = input.strip_prefix('-') {
            (-1.0, input)
        } else {
            (1.0, input)
        };
        let prefix = rest
            .chars()
            .take(3)
            .map(|c| c.to_ascii_lowercase())
            .collect::<String>();
        let special = match prefix.as_str() {
            "inf" => f64::INFINITY,
            "nan" => f64::NAN,
            _ => return Err(ParseError::NotNumeric),
        }
        .copysign(sign);
        if rest.len() == 3 {
            Ok(special)
        } else {
            Err(ParseError::PartialMatch(special, &rest[3..]))
        }
    }
    #[allow(clippy::cognitive_complexity)]
    fn parse(input: &str, integral_only: bool) -> Result<Self, ParseError<'_, Self>> {
        // Parse the "'" prefix separately
        if let Some(rest) = input.strip_prefix('\'') {
            let mut chars = rest.char_indices().fuse();
            let v = chars.next().map(|(_, c)| Self {
                base: Base::Decimal,
                negative: false,
                integral: u64::from(c),
                fractional: 0,
                precision: 0,
            });
            return match (v, chars.next()) {
                (Some(v), None) => Ok(v),
                (Some(v), Some((i, _))) => Err(ParseError::PartialMatch(v, &rest[i..])),
                (None, _) => Err(ParseError::NotNumeric),
            };
        }
        // Initial minus sign
        let (negative, unsigned) = if let Some(input) = input.strip_prefix('-') {
            (true, input)
        } else {
            (false, input)
        };
        // Parse an optional base prefix ("0b" / "0B" / "0" / "0x" / "0X"). "0" is octal unless a
        // fractional part is allowed in which case it is an insignificant leading 0. A "0" prefix
        // will not be consumed in case the parsable string contains only "0": the leading extra "0"
        // will have no influence on the result.
        let (base, rest) = if let Some(rest) = unsigned.strip_prefix('0') {
            if let Some(rest) = rest.strip_prefix(['b', 'B']) {
                (Base::Binary, rest)
            } else if let Some(rest) = rest.strip_prefix(['x', 'X']) {
                (Base::Hexadecimal, rest)
            } else if integral_only {
                (Base::Octal, unsigned)
            } else {
                (Base::Decimal, unsigned)
            }
        } else {
            (Base::Decimal, unsigned)
        };
        if rest.is_empty() {
            return Err(ParseError::NotNumeric);
        }
        // Parse the integral part of the number
        let mut chars = rest.chars().enumerate().fuse().peekable();
        let mut integral = 0u64;
        while let Some(d) = chars.peek().and_then(|&(_, c)| base.digit(c)) {
            chars.next();
            integral = integral
                .checked_mul(base as u64)
                .and_then(|n| n.checked_add(d))
                .ok_or(ParseError::Overflow)?;
        }
        // Parse the fractional part of the number if there can be one and the input contains
        // a '.' decimal separator.
        let (mut fractional, mut precision) = (0u64, 0);
        if matches!(chars.peek(), Some(&(_, '.')))
            && matches!(base, Base::Decimal | Base::Hexadecimal)
            && !integral_only
        {
            chars.next();
            let mut ended = false;
            while let Some(d) = chars.peek().and_then(|&(_, c)| base.digit(c)) {
                chars.next();
                if !ended {
                    if let Some(f) = fractional
                        .checked_mul(base as u64)
                        .and_then(|n| n.checked_add(d))
                    {
                        (fractional, precision) = (f, precision + 1);
                    } else {
                        ended = true;
                    }
                }
            }
        }
        // If nothing has been parsed, declare the parsing unsuccessful
        if let Some((0, _)) = chars.peek() {
            return Err(ParseError::NotNumeric);
        }
        // Return what has been parsed so far. It there are extra characters, mark the
        // parsing as a partial match.
        let parsed = Self {
            base,
            negative,
            integral,
            fractional,
            precision,
        };
        if let Some((first_unparsed, _)) = chars.next() {
            Err(ParseError::PartialMatch(parsed, &rest[first_unparsed..]))
        } else {
            Ok(parsed)
        }
    }
 }
 #[cfg(test)]
 mod tests {
    use super::{ParseError, ParsedNumber};
    #[test]
    fn test_decimal_u64() {
        assert_eq!(Ok(123), ParsedNumber::parse_u64("123"));
        assert_eq!(
            Ok(u64::MAX),
            ParsedNumber::parse_u64(&format!("{}", u64::MAX))
        );
        assert!(matches!(
            ParsedNumber::parse_u64("-123"),
            Err(ParseError::NotNumeric)
        ));
        assert!(matches!(
            ParsedNumber::parse_u64(""),
            Err(ParseError::NotNumeric)
        ));
        assert!(matches!(
            ParsedNumber::parse_u64("123.15"),
            Err(ParseError::PartialMatch(123, ".15"))
        ));
    }
    #[test]
    fn test_decimal_i64() {
        assert_eq!(Ok(123), ParsedNumber::parse_i64("123"));
        assert_eq!(Ok(-123), ParsedNumber::parse_i64("-123"));
        assert!(matches!(
            ParsedNumber::parse_i64("--123"),
            Err(ParseError::NotNumeric)
        ));
        assert_eq!(
            Ok(i64::MAX),
            ParsedNumber::parse_i64(&format!("{}", i64::MAX))
        );
        assert_eq!(
            Ok(i64::MIN),
            ParsedNumber::parse_i64(&format!("{}", i64::MIN))
        );
        assert!(matches!(
            ParsedNumber::parse_i64(&format!("{}", u64::MAX)),
            Err(ParseError::Overflow)
        ));
        assert!(matches!(
            ParsedNumber::parse_i64(&format!("{}", i64::MAX as u64 + 1)),
            Err(ParseError::Overflow)
        ));
    }
    #[test]
    fn test_decimal_f64() {
        assert_eq!(Ok(123.0), ParsedNumber::parse_f64("123"));
        assert_eq!(Ok(-123.0), ParsedNumber::parse_f64("-123"));
        assert_eq!(Ok(123.0), ParsedNumber::parse_f64("123."));
        assert_eq!(Ok(-123.0), ParsedNumber::parse_f64("-123."));
        assert_eq!(Ok(123.0), ParsedNumber::parse_f64("123.0"));
        assert_eq!(Ok(-123.0), ParsedNumber::parse_f64("-123.0"));
        assert_eq!(Ok(123.15), ParsedNumber::parse_f64("123.15"));
        assert_eq!(Ok(-123.15), ParsedNumber::parse_f64("-123.15"));
        assert_eq!(Ok(0.15), ParsedNumber::parse_f64(".15"));
        assert_eq!(Ok(-0.15), ParsedNumber::parse_f64("-.15"));
        assert_eq!(
            Ok(0.15),
            ParsedNumber::parse_f64(".150000000000000000000000000231313")
        );
        assert!(matches!(ParsedNumber::parse_f64("1.2.3"),
                         Err(ParseError::PartialMatch(f, ".3")) if f == 1.2));
        assert_eq!(Ok(f64::INFINITY), ParsedNumber::parse_f64("inf"));
        assert_eq!(Ok(f64::NEG_INFINITY), ParsedNumber::parse_f64("-inf"));
        assert!(ParsedNumber::parse_f64("NaN").unwrap().is_nan());
        assert!(ParsedNumber::parse_f64("NaN").unwrap().is_sign_positive());
        assert!(ParsedNumber::parse_f64("-NaN").unwrap().is_nan());
        assert!(ParsedNumber::parse_f64("-NaN").unwrap().is_sign_negative());
        assert!(matches!(ParsedNumber::parse_f64("-infinity"),
                         Err(ParseError::PartialMatch(f, "inity")) if f == f64::NEG_INFINITY));
        assert!(ParsedNumber::parse_f64(&format!("{}", u64::MAX)).is_ok());
        assert!(ParsedNumber::parse_f64(&format!("{}", i64::MIN)).is_ok());
    }
    #[test]
    fn test_hexadecimal() {
        assert_eq!(Ok(0x123), ParsedNumber::parse_u64("0x123"));
        assert_eq!(Ok(0x123), ParsedNumber::parse_u64("0X123"));
        assert_eq!(Ok(0xfe), ParsedNumber::parse_u64("0xfE"));
        assert_eq!(Ok(-0x123), ParsedNumber::parse_i64("-0x123"));
        assert_eq!(Ok(0.5), ParsedNumber::parse_f64("0x.8"));
        assert_eq!(Ok(0.0625), ParsedNumber::parse_f64("0x.1"));
        assert_eq!(Ok(15.0078125), ParsedNumber::parse_f64("0xf.02"));
    }
    #[test]
    fn test_octal() {
        assert_eq!(Ok(0), ParsedNumber::parse_u64("0"));
        assert_eq!(Ok(0o123), ParsedNumber::parse_u64("0123"));
        assert_eq!(Ok(0o123), ParsedNumber::parse_u64("00123"));
        assert_eq!(Ok(0), ParsedNumber::parse_u64("00"));
        assert!(matches!(
            ParsedNumber::parse_u64("008"),
            Err(ParseError::PartialMatch(0, "8"))
        ));
        assert!(matches!(
            ParsedNumber::parse_u64("08"),
            Err(ParseError::PartialMatch(0, "8"))
        ));
        assert!(matches!(
            ParsedNumber::parse_u64("0."),
            Err(ParseError::PartialMatch(0, "."))
        ));
    }
    #[test]
    fn test_binary() {
        assert_eq!(Ok(0b1011), ParsedNumber::parse_u64("0b1011"));
        assert_eq!(Ok(0b1011), ParsedNumber::parse_u64("0B1011"));
    }
 }
--- a/tests/by-util/test_printf.rs
+++ b/tests/by-util/test_printf.rs
@ -162,6 +162,14 @@ fn sub_char() {
        .stdout_only("the letter A");
 }
 #[test]
 fn sub_char_from_string() {
    new_ucmd!()
        .args(&["%c%c%c", "five", "%", "oval"])
        .succeeds()
        .stdout_only("f%o");
 }
 #[test]
 fn sub_num_int() {
    new_ucmd!()
@ -427,7 +435,6 @@ fn sub_float_dec_places() {
 }
 #[test]
 #[ignore = "hexadecimal floats are unimplemented"]
 fn sub_float_hex_in() {
    new_ucmd!()
        .args(&["%f", "0xF1.1F"])
@ -591,3 +598,44 @@ fn sub_general_round_float_leading_zeroes() {
        .succeeds()
        .stdout_only("1.00001");
 }
 #[test]
 fn partial_float() {
    new_ucmd!()
        .args(&["%.2f is %s", "42.03x", "a lot"])
        .fails()
        .code_is(1)
        .stdout_is("42.03 is a lot")
        .stderr_is("printf: '42.03x': value not completely converted\n");
 }
 #[test]
 fn partial_integer() {
    new_ucmd!()
        .args(&["%d is %s", "42x23", "a lot"])
        .fails()
        .code_is(1)
        .stdout_is("42 is a lot")
        .stderr_is("printf: '42x23': value not completely converted\n");
 }
 #[test]
 fn test_overflow() {
    new_ucmd!()
        .args(&["%d", "36893488147419103232"])
        .fails()
        .code_is(1)
        .stderr_is("printf: '36893488147419103232': Numerical result out of range\n");
 }
 #[test]
 fn partial_char() {
    new_ucmd!()
        .args(&["%d", "'abc"])
        .fails()
        .code_is(1)
        .stdout_is("97")
        .stderr_is(
            "printf: warning: bc: character(s) following character constant have been ignored\n",
        );
 }