From 7f98d98472218c1a18cf0001f5e50d552b2dc6ca Mon Sep 17 00:00:00 2001 From: Joseph Jon Booker Date: Thu, 24 Apr 2025 21:37:28 -0500 Subject: [PATCH] printf: Add indexing to format strings --- src/uu/printf/src/printf.rs | 13 +- .../src/lib/features/format/argument.rs | 398 ++++++++++++++++-- src/uucore/src/lib/features/format/mod.rs | 7 +- .../src/lib/features/format/num_format.rs | 19 +- src/uucore/src/lib/features/format/spec.rs | 212 +++++++--- tests/by-util/test_printf.rs | 46 +- 6 files changed, 568 insertions(+), 127 deletions(-) diff --git a/src/uu/printf/src/printf.rs b/src/uu/printf/src/printf.rs index b8b0b6f4a..887ad4107 100644 --- a/src/uu/printf/src/printf.rs +++ b/src/uu/printf/src/printf.rs @@ -6,7 +6,7 @@ use clap::{Arg, ArgAction, Command}; use std::io::stdout; use std::ops::ControlFlow; use uucore::error::{UResult, UUsageError}; -use uucore::format::{FormatArgument, FormatItem, parse_spec_and_escape}; +use uucore::format::{FormatArgument, FormatArguments, FormatItem, parse_spec_and_escape}; use uucore::{format_usage, help_about, help_section, help_usage, os_str_as_bytes, show_warning}; const VERSION: &str = "version"; @@ -39,9 +39,8 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { }; let mut format_seen = false; - let mut args = values.iter().peekable(); - // Parse and process the format string + let mut args = FormatArguments::new(&values); for item in parse_spec_and_escape(format) { if let Ok(FormatItem::Spec(_)) = item { format_seen = true; @@ -51,12 +50,13 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { ControlFlow::Break(()) => return Ok(()), }; } + args.start_next_batch(); // Without format specs in the string, the iter would not consume any args, // leading to an infinite loop. Thus, we exit early. if !format_seen { - if let Some(arg) = args.next() { - let FormatArgument::Unparsed(arg_str) = arg else { + if !args.is_exhausted() { + let Some(FormatArgument::Unparsed(arg_str)) = args.peek_arg() else { unreachable!("All args are transformed to Unparsed") }; show_warning!("ignoring excess arguments, starting with '{arg_str}'"); @@ -64,13 +64,14 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { return Ok(()); } - while args.peek().is_some() { + while !args.is_exhausted() { for item in parse_spec_and_escape(format) { match item?.write(stdout(), &mut args)? { ControlFlow::Continue(()) => {} ControlFlow::Break(()) => return Ok(()), }; } + args.start_next_batch(); } Ok(()) diff --git a/src/uucore/src/lib/features/format/argument.rs b/src/uucore/src/lib/features/format/argument.rs index 48d7f8f93..4ca5a4a96 100644 --- a/src/uucore/src/lib/features/format/argument.rs +++ b/src/uucore/src/lib/features/format/argument.rs @@ -3,6 +3,8 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. +use super::ExtendedBigDecimal; +use crate::format::spec::ArgumentLocation; use crate::{ error::set_exit_code, parser::num_parser::{ExtendedParser, ExtendedParserError}, @@ -12,8 +14,6 @@ use crate::{ use os_display::Quotable; use std::ffi::OsStr; -use super::ExtendedBigDecimal; - /// An argument for formatting /// /// Each of these variants is only accepted by their respective directives. For @@ -21,7 +21,7 @@ use super::ExtendedBigDecimal; /// /// The [`FormatArgument::Unparsed`] variant contains a string that can be /// parsed into other types. This is used by the `printf` utility. -#[derive(Clone, Debug)] +#[derive(Clone, Debug, PartialEq)] pub enum FormatArgument { Char(char), String(String), @@ -32,33 +32,70 @@ pub enum FormatArgument { Unparsed(String), } -pub trait ArgumentIter<'a>: Iterator { - fn get_char(&mut self) -> u8; - fn get_i64(&mut self) -> i64; - fn get_u64(&mut self) -> u64; - fn get_extended_big_decimal(&mut self) -> ExtendedBigDecimal; - fn get_str(&mut self) -> &'a str; +/// A struct that holds a slice of format arguments and provides methods to access them +#[derive(Debug, PartialEq)] +pub struct FormatArguments<'a> { + args: &'a [FormatArgument], + next_arg_position: usize, + highest_arg_position: Option, + current_offset: usize, } -impl<'a, T: Iterator> ArgumentIter<'a> for T { - fn get_char(&mut self) -> u8 { - let Some(next) = self.next() else { - return b'\0'; - }; - match next { - FormatArgument::Char(c) => *c as u8, - FormatArgument::Unparsed(s) => s.bytes().next().unwrap_or(b'\0'), +impl<'a> FormatArguments<'a> { + /// Create a new FormatArguments from a slice of FormatArgument + pub fn new(args: &'a [FormatArgument]) -> Self { + Self { + args, + next_arg_position: 0, + highest_arg_position: None, + current_offset: 0, + } + } + + /// Get the next argument that would be used + pub fn peek_arg(&self) -> Option<&'a FormatArgument> { + self.args.get(self.next_arg_position) + } + + /// Check if all arguments have been consumed + pub fn is_exhausted(&self) -> bool { + self.current_offset >= self.args.len() + } + + pub fn start_next_batch(&mut self) { + self.current_offset = self + .next_arg_position + .max(self.highest_arg_position.map_or(0, |x| x.saturating_add(1))); + self.next_arg_position = self.current_offset; + } + + pub fn next_char(&mut self, position: &ArgumentLocation) -> u8 { + match self.next_arg(position) { + Some(FormatArgument::Char(c)) => *c as u8, + Some(FormatArgument::Unparsed(s)) => s.bytes().next().unwrap_or(b'\0'), _ => b'\0', } } - fn get_u64(&mut self) -> u64 { - let Some(next) = self.next() else { - return 0; - }; - match next { - FormatArgument::UnsignedInt(n) => *n, - FormatArgument::Unparsed(s) => { + pub fn next_string(&mut self, position: &ArgumentLocation) -> &'a str { + match self.next_arg(position) { + Some(FormatArgument::Unparsed(s) | FormatArgument::String(s)) => s, + _ => "", + } + } + + pub fn next_i64(&mut self, position: &ArgumentLocation) -> i64 { + match self.next_arg(position) { + Some(FormatArgument::SignedInt(n)) => *n, + Some(FormatArgument::Unparsed(s)) => extract_value(i64::extended_parse(s), s), + _ => 0, + } + } + + pub fn next_u64(&mut self, position: &ArgumentLocation) -> u64 { + match self.next_arg(position) { + Some(FormatArgument::UnsignedInt(n)) => *n, + Some(FormatArgument::Unparsed(s)) => { // Check if the string is a character literal enclosed in quotes if s.starts_with(['"', '\'']) { // Extract the content between the quotes safely using chars @@ -82,32 +119,30 @@ impl<'a, T: Iterator> ArgumentIter<'a> for T { } } - fn get_i64(&mut self) -> i64 { - let Some(next) = self.next() else { - return 0; - }; - match next { - FormatArgument::SignedInt(n) => *n, - FormatArgument::Unparsed(s) => extract_value(i64::extended_parse(s), s), - _ => 0, - } - } - - fn get_extended_big_decimal(&mut self) -> ExtendedBigDecimal { - let Some(next) = self.next() else { - return ExtendedBigDecimal::zero(); - }; - match next { - FormatArgument::Float(n) => n.clone(), - FormatArgument::Unparsed(s) => extract_value(ExtendedBigDecimal::extended_parse(s), s), + pub fn next_extended_big_decimal(&mut self, position: &ArgumentLocation) -> ExtendedBigDecimal { + match self.next_arg(position) { + Some(FormatArgument::Float(n)) => n.clone(), + Some(FormatArgument::Unparsed(s)) => { + extract_value(ExtendedBigDecimal::extended_parse(s), s) + } _ => ExtendedBigDecimal::zero(), } } - fn get_str(&mut self) -> &'a str { - match self.next() { - Some(FormatArgument::Unparsed(s) | FormatArgument::String(s)) => s, - _ => "", + fn get_at_relative_position(&mut self, pos: usize) -> Option<&'a FormatArgument> { + let pos = pos.saturating_sub(1).saturating_add(self.current_offset); + self.highest_arg_position = Some(self.highest_arg_position.map_or(pos, |x| x.max(pos))); + self.args.get(pos) + } + + fn next_arg(&mut self, position: &ArgumentLocation) -> Option<&'a FormatArgument> { + match position { + ArgumentLocation::NextArgument => { + let arg = self.args.get(self.next_arg_position); + self.next_arg_position += 1; + arg + } + ArgumentLocation::Position(pos) => self.get_at_relative_position(*pos), } } } @@ -151,3 +186,274 @@ fn extract_value(p: Result>, input: &s } } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_format_arguments_empty() { + let args = FormatArguments::new(&[]); + assert_eq!(None, args.peek_arg()); + assert!(args.is_exhausted()); + } + + #[test] + fn test_format_arguments_single_element() { + let mut args = FormatArguments::new(&[FormatArgument::Char('a')]); + assert!(!args.is_exhausted()); + assert_eq!(Some(&FormatArgument::Char('a')), args.peek_arg()); + assert!(!args.is_exhausted()); // Peek shouldn't consume + assert_eq!(b'a', args.next_char(&ArgumentLocation::NextArgument)); + args.start_next_batch(); + assert!(args.is_exhausted()); // After batch, exhausted with a single arg + assert_eq!(None, args.peek_arg()); + } + + #[test] + fn test_sequential_next_char() { + // Test with consistent sequential next_char calls + let mut args = FormatArguments::new(&[ + FormatArgument::Char('z'), + FormatArgument::Char('y'), + FormatArgument::Char('x'), + FormatArgument::Char('w'), + FormatArgument::Char('v'), + FormatArgument::Char('u'), + FormatArgument::Char('t'), + FormatArgument::Char('s'), + ]); + + // First batch - two sequential calls + assert_eq!(b'z', args.next_char(&ArgumentLocation::NextArgument)); + assert_eq!(b'y', args.next_char(&ArgumentLocation::NextArgument)); + args.start_next_batch(); + assert!(!args.is_exhausted()); + + // Second batch - same pattern + assert_eq!(b'x', args.next_char(&ArgumentLocation::NextArgument)); + assert_eq!(b'w', args.next_char(&ArgumentLocation::NextArgument)); + args.start_next_batch(); + assert!(!args.is_exhausted()); + + // Third batch - same pattern + assert_eq!(b'v', args.next_char(&ArgumentLocation::NextArgument)); + assert_eq!(b'u', args.next_char(&ArgumentLocation::NextArgument)); + args.start_next_batch(); + assert!(!args.is_exhausted()); + + // Fourth batch - same pattern (last batch) + assert_eq!(b't', args.next_char(&ArgumentLocation::NextArgument)); + assert_eq!(b's', args.next_char(&ArgumentLocation::NextArgument)); + args.start_next_batch(); + assert!(args.is_exhausted()); + } + + #[test] + fn test_sequential_different_methods() { + // Test with different method types in sequence + let args = [ + FormatArgument::Char('a'), + FormatArgument::String("hello".to_string()), + FormatArgument::Unparsed("123".to_string()), + FormatArgument::String("world".to_string()), + FormatArgument::Char('z'), + FormatArgument::String("test".to_string()), + ]; + let mut args = FormatArguments::new(&args); + + // First batch - next_char followed by next_string + assert_eq!(b'a', args.next_char(&ArgumentLocation::NextArgument)); + assert_eq!("hello", args.next_string(&ArgumentLocation::NextArgument)); + args.start_next_batch(); + assert!(!args.is_exhausted()); + + // Second batch - same pattern + assert_eq!(b'1', args.next_char(&ArgumentLocation::NextArgument)); // First byte of 123 + assert_eq!("world", args.next_string(&ArgumentLocation::NextArgument)); + args.start_next_batch(); + assert!(!args.is_exhausted()); + + // Third batch - same pattern (last batch) + assert_eq!(b'z', args.next_char(&ArgumentLocation::NextArgument)); + assert_eq!("test", args.next_string(&ArgumentLocation::NextArgument)); + args.start_next_batch(); + assert!(args.is_exhausted()); + } + + #[test] + fn test_position_access_pattern() { + // Test with consistent positional access patterns + let mut args = FormatArguments::new(&[ + FormatArgument::Char('a'), + FormatArgument::Char('b'), + FormatArgument::Char('c'), + FormatArgument::Char('d'), + FormatArgument::Char('e'), + FormatArgument::Char('f'), + FormatArgument::Char('g'), + FormatArgument::Char('h'), + FormatArgument::Char('i'), + ]); + + // First batch - positional access + assert_eq!(b'b', args.next_char(&ArgumentLocation::Position(2))); // Position 2 + assert_eq!(b'a', args.next_char(&ArgumentLocation::Position(1))); // Position 1 + assert_eq!(b'c', args.next_char(&ArgumentLocation::Position(3))); // Position 3 + args.start_next_batch(); + assert!(!args.is_exhausted()); + + // Second batch - same positional pattern + assert_eq!(b'e', args.next_char(&ArgumentLocation::Position(2))); // Position 2 + assert_eq!(b'd', args.next_char(&ArgumentLocation::Position(1))); // Position 1 + assert_eq!(b'f', args.next_char(&ArgumentLocation::Position(3))); // Position 3 + args.start_next_batch(); + assert!(!args.is_exhausted()); + + // Third batch - same positional pattern (last batch) + assert_eq!(b'h', args.next_char(&ArgumentLocation::Position(2))); // Position 2 + assert_eq!(b'g', args.next_char(&ArgumentLocation::Position(1))); // Position 1 + assert_eq!(b'i', args.next_char(&ArgumentLocation::Position(3))); // Position 3 + args.start_next_batch(); + assert!(args.is_exhausted()); + } + + #[test] + fn test_mixed_access_pattern() { + // Test with mixed sequential and positional access + let mut args = FormatArguments::new(&[ + FormatArgument::Char('a'), + FormatArgument::Char('b'), + FormatArgument::Char('c'), + FormatArgument::Char('d'), + FormatArgument::Char('e'), + FormatArgument::Char('f'), + FormatArgument::Char('g'), + FormatArgument::Char('h'), + ]); + + // First batch - mix of sequential and positional + assert_eq!(b'a', args.next_char(&ArgumentLocation::NextArgument)); // Sequential + assert_eq!(b'c', args.next_char(&ArgumentLocation::Position(3))); // Positional + args.start_next_batch(); + assert!(!args.is_exhausted()); + + // Second batch - same mixed pattern + assert_eq!(b'd', args.next_char(&ArgumentLocation::NextArgument)); // Sequential + assert_eq!(b'f', args.next_char(&ArgumentLocation::Position(3))); // Positional + args.start_next_batch(); + assert!(!args.is_exhausted()); + + // Last batch - same mixed pattern + assert_eq!(b'g', args.next_char(&ArgumentLocation::NextArgument)); // Sequential + assert_eq!(b'\0', args.next_char(&ArgumentLocation::Position(3))); // Out of bounds + args.start_next_batch(); + assert!(args.is_exhausted()); + } + + #[test] + fn test_numeric_argument_types() { + // Test with numeric argument types + let args = [ + FormatArgument::SignedInt(10), + FormatArgument::UnsignedInt(20), + FormatArgument::Float(ExtendedBigDecimal::zero()), + FormatArgument::SignedInt(30), + FormatArgument::UnsignedInt(40), + FormatArgument::Float(ExtendedBigDecimal::zero()), + ]; + let mut args = FormatArguments::new(&args); + + // First batch - i64, u64, decimal + assert_eq!(10, args.next_i64(&ArgumentLocation::NextArgument)); + assert_eq!(20, args.next_u64(&ArgumentLocation::NextArgument)); + let result = args.next_extended_big_decimal(&ArgumentLocation::NextArgument); + assert_eq!(ExtendedBigDecimal::zero(), result); + args.start_next_batch(); + assert!(!args.is_exhausted()); + + // Second batch - same pattern + assert_eq!(30, args.next_i64(&ArgumentLocation::NextArgument)); + assert_eq!(40, args.next_u64(&ArgumentLocation::NextArgument)); + let result = args.next_extended_big_decimal(&ArgumentLocation::NextArgument); + assert_eq!(ExtendedBigDecimal::zero(), result); + args.start_next_batch(); + assert!(args.is_exhausted()); + } + + #[test] + fn test_unparsed_arguments() { + // Test with unparsed arguments that get coerced + let args = [ + FormatArgument::Unparsed("hello".to_string()), + FormatArgument::Unparsed("123".to_string()), + FormatArgument::Unparsed("hello".to_string()), + FormatArgument::Unparsed("456".to_string()), + ]; + let mut args = FormatArguments::new(&args); + + // First batch - string, number + assert_eq!("hello", args.next_string(&ArgumentLocation::NextArgument)); + assert_eq!(123, args.next_i64(&ArgumentLocation::NextArgument)); + args.start_next_batch(); + assert!(!args.is_exhausted()); + + // Second batch - same pattern + assert_eq!("hello", args.next_string(&ArgumentLocation::NextArgument)); + assert_eq!(456, args.next_i64(&ArgumentLocation::NextArgument)); + args.start_next_batch(); + assert!(args.is_exhausted()); + } + + #[test] + fn test_mixed_types_with_positions() { + // Test with mixed types and positional access + let args = [ + FormatArgument::Char('a'), + FormatArgument::String("test".to_string()), + FormatArgument::UnsignedInt(42), + FormatArgument::Char('b'), + FormatArgument::String("more".to_string()), + FormatArgument::UnsignedInt(99), + ]; + let mut args = FormatArguments::new(&args); + + // First batch - positional access of different types + assert_eq!(b'a', args.next_char(&ArgumentLocation::Position(1))); + assert_eq!("test", args.next_string(&ArgumentLocation::Position(2))); + assert_eq!(42, args.next_u64(&ArgumentLocation::Position(3))); + args.start_next_batch(); + assert!(!args.is_exhausted()); + + // Second batch - same pattern + assert_eq!(b'b', args.next_char(&ArgumentLocation::Position(1))); + assert_eq!("more", args.next_string(&ArgumentLocation::Position(2))); + assert_eq!(99, args.next_u64(&ArgumentLocation::Position(3))); + args.start_next_batch(); + assert!(args.is_exhausted()); + } + + #[test] + fn test_partial_last_batch() { + // Test with a partial last batch (fewer elements than batch size) + let mut args = FormatArguments::new(&[ + FormatArgument::Char('a'), + FormatArgument::Char('b'), + FormatArgument::Char('c'), + FormatArgument::Char('d'), + FormatArgument::Char('e'), // Last batch has fewer elements + ]); + + // First batch + assert_eq!(b'a', args.next_char(&ArgumentLocation::NextArgument)); + assert_eq!(b'c', args.next_char(&ArgumentLocation::Position(3))); + args.start_next_batch(); + assert!(!args.is_exhausted()); + + // Second batch (partial) + assert_eq!(b'd', args.next_char(&ArgumentLocation::NextArgument)); + assert_eq!(b'\0', args.next_char(&ArgumentLocation::Position(3))); // Out of bounds + args.start_next_batch(); + assert!(args.is_exhausted()); + } +} diff --git a/src/uucore/src/lib/features/format/mod.rs b/src/uucore/src/lib/features/format/mod.rs index 2b372d3e0..0a887d075 100644 --- a/src/uucore/src/lib/features/format/mod.rs +++ b/src/uucore/src/lib/features/format/mod.rs @@ -161,10 +161,10 @@ impl FormatChar for EscapedChar { } impl FormatItem { - pub fn write<'a>( + pub fn write( &self, writer: impl Write, - args: &mut impl Iterator, + args: &mut FormatArguments, ) -> Result, FormatError> { match self { Self::Spec(spec) => spec.write(writer, args)?, @@ -280,7 +280,8 @@ fn printf_writer<'a>( format_string: impl AsRef<[u8]>, args: impl IntoIterator, ) -> Result<(), FormatError> { - let mut args = args.into_iter(); + let args = args.into_iter().cloned().collect::>(); + let mut args = FormatArguments::new(&args); for item in parse_spec_only(format_string.as_ref()) { item?.write(&mut writer, &mut args)?; } diff --git a/src/uucore/src/lib/features/format/num_format.rs b/src/uucore/src/lib/features/format/num_format.rs index c58ba93f3..12e19a095 100644 --- a/src/uucore/src/lib/features/format/num_format.rs +++ b/src/uucore/src/lib/features/format/num_format.rs @@ -99,6 +99,7 @@ impl Formatter for SignedInt { precision, positive_sign, alignment, + position: _position, } = s else { return Err(FormatError::WrongSpecType); @@ -107,13 +108,13 @@ impl Formatter for SignedInt { let width = match width { Some(CanAsterisk::Fixed(x)) => x, None => 0, - Some(CanAsterisk::Asterisk) => return Err(FormatError::WrongSpecType), + Some(CanAsterisk::Asterisk(_)) => return Err(FormatError::WrongSpecType), }; let precision = match precision { Some(CanAsterisk::Fixed(x)) => x, None => 0, - Some(CanAsterisk::Asterisk) => return Err(FormatError::WrongSpecType), + Some(CanAsterisk::Asterisk(_)) => return Err(FormatError::WrongSpecType), }; Ok(Self { @@ -146,7 +147,7 @@ impl Formatter for UnsignedInt { }; // Zeroes do not get a prefix. An octal value does also not get a - // prefix if the padded value will not start with a zero. + // prefix if the padded value does not start with a zero. let prefix = match (x, self.variant) { (1.., UnsignedIntVariant::Hexadecimal(Case::Lowercase, Prefix::Yes)) => "0x", (1.., UnsignedIntVariant::Hexadecimal(Case::Uppercase, Prefix::Yes)) => "0X", @@ -170,6 +171,7 @@ impl Formatter for UnsignedInt { precision, positive_sign: PositiveSign::None, alignment, + position, } = s { Spec::UnsignedInt { @@ -177,6 +179,7 @@ impl Formatter for UnsignedInt { width, precision, alignment, + position, } } else { s @@ -187,6 +190,7 @@ impl Formatter for UnsignedInt { width, precision, alignment, + position: _position, } = s else { return Err(FormatError::WrongSpecType); @@ -195,13 +199,13 @@ impl Formatter for UnsignedInt { let width = match width { Some(CanAsterisk::Fixed(x)) => x, None => 0, - Some(CanAsterisk::Asterisk) => return Err(FormatError::WrongSpecType), + Some(CanAsterisk::Asterisk(_)) => return Err(FormatError::WrongSpecType), }; let precision = match precision { Some(CanAsterisk::Fixed(x)) => x, None => 0, - Some(CanAsterisk::Asterisk) => return Err(FormatError::WrongSpecType), + Some(CanAsterisk::Asterisk(_)) => return Err(FormatError::WrongSpecType), }; Ok(Self { @@ -300,6 +304,7 @@ impl Formatter<&ExtendedBigDecimal> for Float { positive_sign, alignment, precision, + position: _position, } = s else { return Err(FormatError::WrongSpecType); @@ -308,13 +313,13 @@ impl Formatter<&ExtendedBigDecimal> for Float { let width = match width { Some(CanAsterisk::Fixed(x)) => x, None => 0, - Some(CanAsterisk::Asterisk) => return Err(FormatError::WrongSpecType), + Some(CanAsterisk::Asterisk(_)) => return Err(FormatError::WrongSpecType), }; let precision = match precision { Some(CanAsterisk::Fixed(x)) => Some(x), None => None, - Some(CanAsterisk::Asterisk) => return Err(FormatError::WrongSpecType), + Some(CanAsterisk::Asterisk(_)) => return Err(FormatError::WrongSpecType), }; Ok(Self { diff --git a/src/uucore/src/lib/features/format/spec.rs b/src/uucore/src/lib/features/format/spec.rs index 9bb1fb4ae..1e5d7240f 100644 --- a/src/uucore/src/lib/features/format/spec.rs +++ b/src/uucore/src/lib/features/format/spec.rs @@ -8,13 +8,14 @@ use crate::quoting_style::{QuotingStyle, escape_name}; use super::{ - ArgumentIter, ExtendedBigDecimal, FormatChar, FormatError, OctalParsing, + ExtendedBigDecimal, FormatChar, FormatError, OctalParsing, num_format::{ self, Case, FloatVariant, ForceDecimal, Formatter, NumberAlignment, PositiveSign, Prefix, UnsignedIntVariant, }, parse_escape_only, }; +use crate::format::FormatArguments; use std::{io::Write, ops::ControlFlow}; /// A parsed specification for formatting a value @@ -24,29 +25,38 @@ use std::{io::Write, ops::ControlFlow}; #[derive(Debug)] pub enum Spec { Char { + position: ArgumentLocation, width: Option>, align_left: bool, }, String { + position: ArgumentLocation, precision: Option>, width: Option>, align_left: bool, }, - EscapedString, - QuotedString, + EscapedString { + position: ArgumentLocation, + }, + QuotedString { + position: ArgumentLocation, + }, SignedInt { + position: ArgumentLocation, width: Option>, precision: Option>, positive_sign: PositiveSign, alignment: NumberAlignment, }, UnsignedInt { + position: ArgumentLocation, variant: UnsignedIntVariant, width: Option>, precision: Option>, alignment: NumberAlignment, }, Float { + position: ArgumentLocation, variant: FloatVariant, case: Case, force_decimal: ForceDecimal, @@ -57,12 +67,18 @@ pub enum Spec { }, } +#[derive(Clone, Copy, Debug)] +pub enum ArgumentLocation { + NextArgument, + Position(usize), +} + /// Precision and width specified might use an asterisk to indicate that they are /// determined by an argument. #[derive(Clone, Copy, Debug)] pub enum CanAsterisk { Fixed(T), - Asterisk, + Asterisk(ArgumentLocation), } /// Size of the expected type (ignored) @@ -130,14 +146,18 @@ impl Flags { impl Spec { pub fn parse<'a>(rest: &mut &'a [u8]) -> Result { - // Based on the C++ reference, the spec format looks like: + // Based on the C++ reference and the Single UNIX Specification, + // the spec format looks like: // - // %[flags][width][.precision][length]specifier + // %[argumentNum$][flags][width][.precision][length]specifier // // However, we have already parsed the '%'. let mut index = 0; let start = *rest; + // Check for a positional specifier (%m$) + let position = eat_argument_position(rest, &mut index); + let flags = Flags::parse(rest, &mut index); let positive_sign = match flags { @@ -182,6 +202,7 @@ impl Spec { return Err(&start[..index]); } Self::Char { + position, width, align_left: flags.minus, } @@ -191,6 +212,7 @@ impl Spec { return Err(&start[..index]); } Self::String { + position, precision, width, align_left: flags.minus, @@ -200,19 +222,20 @@ impl Spec { if flags.any() || width.is_some() || precision.is_some() { return Err(&start[..index]); } - Self::EscapedString + Self::EscapedString { position } } b'q' => { if flags.any() || width.is_some() || precision.is_some() { return Err(&start[..index]); } - Self::QuotedString + Self::QuotedString { position } } b'd' | b'i' => { if flags.hash { return Err(&start[..index]); } Self::SignedInt { + position, width, precision, alignment, @@ -233,6 +256,7 @@ impl Spec { _ => unreachable!(), }; Self::UnsignedInt { + position, variant, precision, width, @@ -240,6 +264,7 @@ impl Spec { } } c @ (b'f' | b'F' | b'e' | b'E' | b'g' | b'G' | b'a' | b'A') => Self::Float { + position, width, precision, variant: match c { @@ -314,24 +339,32 @@ impl Spec { length } - pub fn write<'a>( + pub fn write( &self, mut writer: impl Write, - mut args: impl ArgumentIter<'a>, + args: &mut FormatArguments, ) -> Result<(), FormatError> { match self { - Self::Char { width, align_left } => { - let (width, neg_width) = - resolve_asterisk_width(*width, &mut args).unwrap_or_default(); - write_padded(writer, &[args.get_char()], width, *align_left || neg_width) + Self::Char { + width, + align_left, + position, + } => { + let (width, neg_width) = resolve_asterisk_width(*width, args).unwrap_or_default(); + write_padded( + writer, + &[args.next_char(position)], + width, + *align_left || neg_width, + ) } Self::String { width, align_left, precision, + position, } => { - let (width, neg_width) = - resolve_asterisk_width(*width, &mut args).unwrap_or_default(); + let (width, neg_width) = resolve_asterisk_width(*width, args).unwrap_or_default(); // GNU does do this truncation on a byte level, see for instance: // printf "%.1s" 🙃 @@ -339,8 +372,8 @@ impl Spec { // For now, we let printf panic when we truncate within a code point. // TODO: We need to not use Rust's formatting for aligning the output, // so that we can just write bytes to stdout without panicking. - let precision = resolve_asterisk_precision(*precision, &mut args); - let s = args.get_str(); + let precision = resolve_asterisk_precision(*precision, args); + let s = args.next_string(position); let truncated = match precision { Some(p) if p < s.len() => &s[..p], _ => s, @@ -352,8 +385,8 @@ impl Spec { *align_left || neg_width, ) } - Self::EscapedString => { - let s = args.get_str(); + Self::EscapedString { position } => { + let s = args.next_string(position); let mut parsed = Vec::new(); for c in parse_escape_only(s.as_bytes(), OctalParsing::ThreeDigits) { match c.write(&mut parsed)? { @@ -366,9 +399,9 @@ impl Spec { } writer.write_all(&parsed).map_err(FormatError::IoError) } - Self::QuotedString => { + Self::QuotedString { position } => { let s = escape_name( - args.get_str().as_ref(), + args.next_string(position).as_ref(), &QuotingStyle::Shell { escape: true, always_quote: false, @@ -387,12 +420,11 @@ impl Spec { precision, positive_sign, alignment, + position, } => { - let (width, neg_width) = - resolve_asterisk_width(*width, &mut args).unwrap_or((0, false)); - let precision = - resolve_asterisk_precision(*precision, &mut args).unwrap_or_default(); - let i = args.get_i64(); + let (width, neg_width) = resolve_asterisk_width(*width, args).unwrap_or((0, false)); + let precision = resolve_asterisk_precision(*precision, args).unwrap_or_default(); + let i = args.next_i64(position); if precision as u64 > i32::MAX as u64 { return Err(FormatError::InvalidPrecision(precision.to_string())); @@ -416,12 +448,11 @@ impl Spec { width, precision, alignment, + position, } => { - let (width, neg_width) = - resolve_asterisk_width(*width, &mut args).unwrap_or((0, false)); - let precision = - resolve_asterisk_precision(*precision, &mut args).unwrap_or_default(); - let i = args.get_u64(); + let (width, neg_width) = resolve_asterisk_width(*width, args).unwrap_or((0, false)); + let precision = resolve_asterisk_precision(*precision, args).unwrap_or_default(); + let i = args.next_u64(position); if precision as u64 > i32::MAX as u64 { return Err(FormatError::InvalidPrecision(precision.to_string())); @@ -448,11 +479,11 @@ impl Spec { positive_sign, alignment, precision, + position, } => { - let (width, neg_width) = - resolve_asterisk_width(*width, &mut args).unwrap_or((0, false)); - let precision = resolve_asterisk_precision(*precision, &mut args); - let f: ExtendedBigDecimal = args.get_extended_big_decimal(); + let (width, neg_width) = resolve_asterisk_width(*width, args).unwrap_or((0, false)); + let precision = resolve_asterisk_precision(*precision, args); + let f: ExtendedBigDecimal = args.next_extended_big_decimal(position); if precision.is_some_and(|p| p as u64 > i32::MAX as u64) { return Err(FormatError::InvalidPrecision( @@ -482,14 +513,14 @@ impl Spec { /// Determine the width, potentially getting a value from args /// Returns the non-negative width and whether the value should be left-aligned. -fn resolve_asterisk_width<'a>( +fn resolve_asterisk_width( option: Option>, - mut args: impl ArgumentIter<'a>, + args: &mut FormatArguments, ) -> Option<(usize, bool)> { match option { None => None, - Some(CanAsterisk::Asterisk) => { - let nb = args.get_i64(); + Some(CanAsterisk::Asterisk(loc)) => { + let nb = args.next_i64(&loc); if nb < 0 { Some((usize::try_from(-(nb as isize)).ok().unwrap_or(0), true)) } else { @@ -502,13 +533,13 @@ fn resolve_asterisk_width<'a>( /// Determines the precision, which should (if defined) /// be a non-negative number. -fn resolve_asterisk_precision<'a>( +fn resolve_asterisk_precision( option: Option>, - mut args: impl ArgumentIter<'a>, + args: &mut FormatArguments, ) -> Option { match option { None => None, - Some(CanAsterisk::Asterisk) => match args.get_i64() { + Some(CanAsterisk::Asterisk(loc)) => match args.next_i64(&loc) { v if v >= 0 => usize::try_from(v).ok(), v if v < 0 => Some(0usize), _ => None, @@ -534,10 +565,28 @@ fn write_padded( .map_err(FormatError::IoError) } +// Check for a number ending with a '$' +fn eat_argument_position(rest: &mut &[u8], index: &mut usize) -> ArgumentLocation { + let original_index = *index; + if let Some(pos) = eat_number(rest, index) { + if let Some(&b'$') = rest.get(*index) { + *index += 1; + ArgumentLocation::Position(pos) + } else { + *index = original_index; + ArgumentLocation::NextArgument + } + } else { + *index = original_index; + ArgumentLocation::NextArgument + } +} + fn eat_asterisk_or_number(rest: &mut &[u8], index: &mut usize) -> Option> { if let Some(b'*') = rest.get(*index) { *index += 1; - Some(CanAsterisk::Asterisk) + // Check for a positional specifier (*m$) + Some(CanAsterisk::Asterisk(eat_argument_position(rest, index))) } else { eat_number(rest, index).map(CanAsterisk::Fixed) } @@ -569,14 +618,20 @@ mod tests { #[test] fn no_width() { - assert_eq!(None, resolve_asterisk_width(None, Vec::new().iter())); + assert_eq!( + None, + resolve_asterisk_width(None, &mut FormatArguments::new(&[])) + ); } #[test] fn fixed_width() { assert_eq!( Some((42, false)), - resolve_asterisk_width(Some(CanAsterisk::Fixed(42)), Vec::new().iter()) + resolve_asterisk_width( + Some(CanAsterisk::Fixed(42)), + &mut FormatArguments::new(&[]) + ) ); } @@ -585,30 +640,42 @@ mod tests { assert_eq!( Some((42, false)), resolve_asterisk_width( - Some(CanAsterisk::Asterisk), - [FormatArgument::SignedInt(42)].iter() + Some(CanAsterisk::Asterisk(ArgumentLocation::NextArgument)), + &mut FormatArguments::new(&[FormatArgument::SignedInt(42)]), ) ); assert_eq!( Some((42, false)), resolve_asterisk_width( - Some(CanAsterisk::Asterisk), - [FormatArgument::Unparsed("42".to_string())].iter() + Some(CanAsterisk::Asterisk(ArgumentLocation::NextArgument)), + &mut FormatArguments::new(&[FormatArgument::Unparsed("42".to_string())]), ) ); assert_eq!( Some((42, true)), resolve_asterisk_width( - Some(CanAsterisk::Asterisk), - [FormatArgument::SignedInt(-42)].iter() + Some(CanAsterisk::Asterisk(ArgumentLocation::NextArgument)), + &mut FormatArguments::new(&[FormatArgument::SignedInt(-42)]), ) ); assert_eq!( Some((42, true)), resolve_asterisk_width( - Some(CanAsterisk::Asterisk), - [FormatArgument::Unparsed("-42".to_string())].iter() + Some(CanAsterisk::Asterisk(ArgumentLocation::NextArgument)), + &mut FormatArguments::new(&[FormatArgument::Unparsed("-42".to_string())]), + ) + ); + + assert_eq!( + Some((2, false)), + resolve_asterisk_width( + Some(CanAsterisk::Asterisk(ArgumentLocation::Position(2))), + &mut FormatArguments::new(&[ + FormatArgument::Unparsed("1".to_string()), + FormatArgument::Unparsed("2".to_string()), + FormatArgument::Unparsed("3".to_string()) + ]), ) ); } @@ -620,14 +687,20 @@ mod tests { #[test] fn no_width() { - assert_eq!(None, resolve_asterisk_precision(None, Vec::new().iter())); + assert_eq!( + None, + resolve_asterisk_precision(None, &mut FormatArguments::new(&[])) + ); } #[test] fn fixed_width() { assert_eq!( Some(42), - resolve_asterisk_precision(Some(CanAsterisk::Fixed(42)), Vec::new().iter()) + resolve_asterisk_precision( + Some(CanAsterisk::Fixed(42)), + &mut FormatArguments::new(&[]) + ) ); } @@ -636,30 +709,41 @@ mod tests { assert_eq!( Some(42), resolve_asterisk_precision( - Some(CanAsterisk::Asterisk), - [FormatArgument::SignedInt(42)].iter() + Some(CanAsterisk::Asterisk(ArgumentLocation::NextArgument)), + &mut FormatArguments::new(&[FormatArgument::SignedInt(42)]), ) ); assert_eq!( Some(42), resolve_asterisk_precision( - Some(CanAsterisk::Asterisk), - [FormatArgument::Unparsed("42".to_string())].iter() + Some(CanAsterisk::Asterisk(ArgumentLocation::NextArgument)), + &mut FormatArguments::new(&[FormatArgument::Unparsed("42".to_string())]), ) ); assert_eq!( Some(0), resolve_asterisk_precision( - Some(CanAsterisk::Asterisk), - [FormatArgument::SignedInt(-42)].iter() + Some(CanAsterisk::Asterisk(ArgumentLocation::NextArgument)), + &mut FormatArguments::new(&[FormatArgument::SignedInt(-42)]), ) ); assert_eq!( Some(0), resolve_asterisk_precision( - Some(CanAsterisk::Asterisk), - [FormatArgument::Unparsed("-42".to_string())].iter() + Some(CanAsterisk::Asterisk(ArgumentLocation::NextArgument)), + &mut FormatArguments::new(&[FormatArgument::Unparsed("-42".to_string())]), + ) + ); + assert_eq!( + Some(2), + resolve_asterisk_precision( + Some(CanAsterisk::Asterisk(ArgumentLocation::Position(2))), + &mut FormatArguments::new(&[ + FormatArgument::Unparsed("1".to_string()), + FormatArgument::Unparsed("2".to_string()), + FormatArgument::Unparsed("3".to_string()) + ]), ) ); } diff --git a/tests/by-util/test_printf.rs b/tests/by-util/test_printf.rs index 2b53c10a2..2df226b8a 100644 --- a/tests/by-util/test_printf.rs +++ b/tests/by-util/test_printf.rs @@ -1281,7 +1281,7 @@ fn float_arg_with_whitespace() { .fails() .stderr_contains("expected a numeric value"); - // A input string with a whitespace special character that has + // An input string with a whitespace special character that has // not already been expanded should fail. new_ucmd!() .args(&["%f", "\\t0.1"]) @@ -1313,3 +1313,47 @@ fn mb_input() { .stderr_is(format!("printf: warning: {expected}: character(s) following character constant have been ignored\n")); } } + +#[test] +fn positional_format_specifiers() { + new_ucmd!() + .args(&["%1$d%d-", "5", "10", "6", "20"]) + .succeeds() + .stdout_only("55-1010-66-2020-"); + + new_ucmd!() + .args(&["%2$d%d-", "5", "10", "6", "20"]) + .succeeds() + .stdout_only("105-206-"); + + new_ucmd!() + .args(&["%3$d%d-", "5", "10", "6", "20"]) + .succeeds() + .stdout_only("65-020-"); + + new_ucmd!() + .args(&["%4$d%d-", "5", "10", "6", "20"]) + .succeeds() + .stdout_only("205-"); + + new_ucmd!() + .args(&["%5$d%d-", "5", "10", "6", "20"]) + .succeeds() + .stdout_only("05-"); + + new_ucmd!() + .args(&[ + "Octal: %6$o, Int: %1$d, Float: %4$f, String: %2$s, Hex: %7$x, Scientific: %5$e, Char: %9$c, Unsigned: %3$u, Integer: %8$i", + "42", // 1$d - Int + "hello", // 2$s - String + "100", // 3$u - Unsigned + "3.14159", // 4$f - Float + "0.00001", // 5$e - Scientific + "77", // 6$o - Octal + "255", // 7$x - Hex + "123", // 8$i - Integer + "A", // 9$c - Char + ]) + .succeeds() + .stdout_only("Octal: 115, Int: 42, Float: 3.141590, String: hello, Hex: ff, Scientific: 1.000000e-05, Char: A, Unsigned: 100, Integer: 123"); +}