1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-27 11:07:44 +00:00

Merge pull request #7837 from sargas/add-indexing-to-printf

printf: Add indexing to format strings
This commit is contained in:
Sylvestre Ledru 2025-04-25 23:58:01 +02:00 committed by GitHub
commit 606c0c1f57
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 568 additions and 127 deletions

View file

@ -6,7 +6,7 @@ use clap::{Arg, ArgAction, Command};
use std::io::stdout;
use std::ops::ControlFlow;
use uucore::error::{UResult, UUsageError};
use uucore::format::{FormatArgument, FormatItem, parse_spec_and_escape};
use uucore::format::{FormatArgument, FormatArguments, FormatItem, parse_spec_and_escape};
use uucore::{format_usage, help_about, help_section, help_usage, os_str_as_bytes, show_warning};
const VERSION: &str = "version";
@ -39,9 +39,8 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
};
let mut format_seen = false;
let mut args = values.iter().peekable();
// Parse and process the format string
let mut args = FormatArguments::new(&values);
for item in parse_spec_and_escape(format) {
if let Ok(FormatItem::Spec(_)) = item {
format_seen = true;
@ -51,12 +50,13 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
ControlFlow::Break(()) => return Ok(()),
};
}
args.start_next_batch();
// Without format specs in the string, the iter would not consume any args,
// leading to an infinite loop. Thus, we exit early.
if !format_seen {
if let Some(arg) = args.next() {
let FormatArgument::Unparsed(arg_str) = arg else {
if !args.is_exhausted() {
let Some(FormatArgument::Unparsed(arg_str)) = args.peek_arg() else {
unreachable!("All args are transformed to Unparsed")
};
show_warning!("ignoring excess arguments, starting with '{arg_str}'");
@ -64,13 +64,14 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
return Ok(());
}
while args.peek().is_some() {
while !args.is_exhausted() {
for item in parse_spec_and_escape(format) {
match item?.write(stdout(), &mut args)? {
ControlFlow::Continue(()) => {}
ControlFlow::Break(()) => return Ok(()),
};
}
args.start_next_batch();
}
Ok(())

View file

@ -3,6 +3,8 @@
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.
use super::ExtendedBigDecimal;
use crate::format::spec::ArgumentLocation;
use crate::{
error::set_exit_code,
parser::num_parser::{ExtendedParser, ExtendedParserError},
@ -12,8 +14,6 @@ use crate::{
use os_display::Quotable;
use std::ffi::OsStr;
use super::ExtendedBigDecimal;
/// An argument for formatting
///
/// Each of these variants is only accepted by their respective directives. For
@ -21,7 +21,7 @@ use super::ExtendedBigDecimal;
///
/// The [`FormatArgument::Unparsed`] variant contains a string that can be
/// parsed into other types. This is used by the `printf` utility.
#[derive(Clone, Debug)]
#[derive(Clone, Debug, PartialEq)]
pub enum FormatArgument {
Char(char),
String(String),
@ -32,33 +32,70 @@ pub enum FormatArgument {
Unparsed(String),
}
pub trait ArgumentIter<'a>: Iterator<Item = &'a FormatArgument> {
fn get_char(&mut self) -> u8;
fn get_i64(&mut self) -> i64;
fn get_u64(&mut self) -> u64;
fn get_extended_big_decimal(&mut self) -> ExtendedBigDecimal;
fn get_str(&mut self) -> &'a str;
/// A struct that holds a slice of format arguments and provides methods to access them
#[derive(Debug, PartialEq)]
pub struct FormatArguments<'a> {
args: &'a [FormatArgument],
next_arg_position: usize,
highest_arg_position: Option<usize>,
current_offset: usize,
}
impl<'a, T: Iterator<Item = &'a FormatArgument>> ArgumentIter<'a> for T {
fn get_char(&mut self) -> u8 {
let Some(next) = self.next() else {
return b'\0';
};
match next {
FormatArgument::Char(c) => *c as u8,
FormatArgument::Unparsed(s) => s.bytes().next().unwrap_or(b'\0'),
impl<'a> FormatArguments<'a> {
/// Create a new FormatArguments from a slice of FormatArgument
pub fn new(args: &'a [FormatArgument]) -> Self {
Self {
args,
next_arg_position: 0,
highest_arg_position: None,
current_offset: 0,
}
}
/// Get the next argument that would be used
pub fn peek_arg(&self) -> Option<&'a FormatArgument> {
self.args.get(self.next_arg_position)
}
/// Check if all arguments have been consumed
pub fn is_exhausted(&self) -> bool {
self.current_offset >= self.args.len()
}
pub fn start_next_batch(&mut self) {
self.current_offset = self
.next_arg_position
.max(self.highest_arg_position.map_or(0, |x| x.saturating_add(1)));
self.next_arg_position = self.current_offset;
}
pub fn next_char(&mut self, position: &ArgumentLocation) -> u8 {
match self.next_arg(position) {
Some(FormatArgument::Char(c)) => *c as u8,
Some(FormatArgument::Unparsed(s)) => s.bytes().next().unwrap_or(b'\0'),
_ => b'\0',
}
}
fn get_u64(&mut self) -> u64 {
let Some(next) = self.next() else {
return 0;
};
match next {
FormatArgument::UnsignedInt(n) => *n,
FormatArgument::Unparsed(s) => {
pub fn next_string(&mut self, position: &ArgumentLocation) -> &'a str {
match self.next_arg(position) {
Some(FormatArgument::Unparsed(s) | FormatArgument::String(s)) => s,
_ => "",
}
}
pub fn next_i64(&mut self, position: &ArgumentLocation) -> i64 {
match self.next_arg(position) {
Some(FormatArgument::SignedInt(n)) => *n,
Some(FormatArgument::Unparsed(s)) => extract_value(i64::extended_parse(s), s),
_ => 0,
}
}
pub fn next_u64(&mut self, position: &ArgumentLocation) -> u64 {
match self.next_arg(position) {
Some(FormatArgument::UnsignedInt(n)) => *n,
Some(FormatArgument::Unparsed(s)) => {
// Check if the string is a character literal enclosed in quotes
if s.starts_with(['"', '\'']) {
// Extract the content between the quotes safely using chars
@ -82,32 +119,30 @@ impl<'a, T: Iterator<Item = &'a FormatArgument>> ArgumentIter<'a> for T {
}
}
fn get_i64(&mut self) -> i64 {
let Some(next) = self.next() else {
return 0;
};
match next {
FormatArgument::SignedInt(n) => *n,
FormatArgument::Unparsed(s) => extract_value(i64::extended_parse(s), s),
_ => 0,
}
}
fn get_extended_big_decimal(&mut self) -> ExtendedBigDecimal {
let Some(next) = self.next() else {
return ExtendedBigDecimal::zero();
};
match next {
FormatArgument::Float(n) => n.clone(),
FormatArgument::Unparsed(s) => extract_value(ExtendedBigDecimal::extended_parse(s), s),
pub fn next_extended_big_decimal(&mut self, position: &ArgumentLocation) -> ExtendedBigDecimal {
match self.next_arg(position) {
Some(FormatArgument::Float(n)) => n.clone(),
Some(FormatArgument::Unparsed(s)) => {
extract_value(ExtendedBigDecimal::extended_parse(s), s)
}
_ => ExtendedBigDecimal::zero(),
}
}
fn get_str(&mut self) -> &'a str {
match self.next() {
Some(FormatArgument::Unparsed(s) | FormatArgument::String(s)) => s,
_ => "",
fn get_at_relative_position(&mut self, pos: usize) -> Option<&'a FormatArgument> {
let pos = pos.saturating_sub(1).saturating_add(self.current_offset);
self.highest_arg_position = Some(self.highest_arg_position.map_or(pos, |x| x.max(pos)));
self.args.get(pos)
}
fn next_arg(&mut self, position: &ArgumentLocation) -> Option<&'a FormatArgument> {
match position {
ArgumentLocation::NextArgument => {
let arg = self.args.get(self.next_arg_position);
self.next_arg_position += 1;
arg
}
ArgumentLocation::Position(pos) => self.get_at_relative_position(*pos),
}
}
}
@ -151,3 +186,274 @@ fn extract_value<T: Default>(p: Result<T, ExtendedParserError<'_, T>>, input: &s
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_format_arguments_empty() {
let args = FormatArguments::new(&[]);
assert_eq!(None, args.peek_arg());
assert!(args.is_exhausted());
}
#[test]
fn test_format_arguments_single_element() {
let mut args = FormatArguments::new(&[FormatArgument::Char('a')]);
assert!(!args.is_exhausted());
assert_eq!(Some(&FormatArgument::Char('a')), args.peek_arg());
assert!(!args.is_exhausted()); // Peek shouldn't consume
assert_eq!(b'a', args.next_char(&ArgumentLocation::NextArgument));
args.start_next_batch();
assert!(args.is_exhausted()); // After batch, exhausted with a single arg
assert_eq!(None, args.peek_arg());
}
#[test]
fn test_sequential_next_char() {
// Test with consistent sequential next_char calls
let mut args = FormatArguments::new(&[
FormatArgument::Char('z'),
FormatArgument::Char('y'),
FormatArgument::Char('x'),
FormatArgument::Char('w'),
FormatArgument::Char('v'),
FormatArgument::Char('u'),
FormatArgument::Char('t'),
FormatArgument::Char('s'),
]);
// First batch - two sequential calls
assert_eq!(b'z', args.next_char(&ArgumentLocation::NextArgument));
assert_eq!(b'y', args.next_char(&ArgumentLocation::NextArgument));
args.start_next_batch();
assert!(!args.is_exhausted());
// Second batch - same pattern
assert_eq!(b'x', args.next_char(&ArgumentLocation::NextArgument));
assert_eq!(b'w', args.next_char(&ArgumentLocation::NextArgument));
args.start_next_batch();
assert!(!args.is_exhausted());
// Third batch - same pattern
assert_eq!(b'v', args.next_char(&ArgumentLocation::NextArgument));
assert_eq!(b'u', args.next_char(&ArgumentLocation::NextArgument));
args.start_next_batch();
assert!(!args.is_exhausted());
// Fourth batch - same pattern (last batch)
assert_eq!(b't', args.next_char(&ArgumentLocation::NextArgument));
assert_eq!(b's', args.next_char(&ArgumentLocation::NextArgument));
args.start_next_batch();
assert!(args.is_exhausted());
}
#[test]
fn test_sequential_different_methods() {
// Test with different method types in sequence
let args = [
FormatArgument::Char('a'),
FormatArgument::String("hello".to_string()),
FormatArgument::Unparsed("123".to_string()),
FormatArgument::String("world".to_string()),
FormatArgument::Char('z'),
FormatArgument::String("test".to_string()),
];
let mut args = FormatArguments::new(&args);
// First batch - next_char followed by next_string
assert_eq!(b'a', args.next_char(&ArgumentLocation::NextArgument));
assert_eq!("hello", args.next_string(&ArgumentLocation::NextArgument));
args.start_next_batch();
assert!(!args.is_exhausted());
// Second batch - same pattern
assert_eq!(b'1', args.next_char(&ArgumentLocation::NextArgument)); // First byte of 123
assert_eq!("world", args.next_string(&ArgumentLocation::NextArgument));
args.start_next_batch();
assert!(!args.is_exhausted());
// Third batch - same pattern (last batch)
assert_eq!(b'z', args.next_char(&ArgumentLocation::NextArgument));
assert_eq!("test", args.next_string(&ArgumentLocation::NextArgument));
args.start_next_batch();
assert!(args.is_exhausted());
}
#[test]
fn test_position_access_pattern() {
// Test with consistent positional access patterns
let mut args = FormatArguments::new(&[
FormatArgument::Char('a'),
FormatArgument::Char('b'),
FormatArgument::Char('c'),
FormatArgument::Char('d'),
FormatArgument::Char('e'),
FormatArgument::Char('f'),
FormatArgument::Char('g'),
FormatArgument::Char('h'),
FormatArgument::Char('i'),
]);
// First batch - positional access
assert_eq!(b'b', args.next_char(&ArgumentLocation::Position(2))); // Position 2
assert_eq!(b'a', args.next_char(&ArgumentLocation::Position(1))); // Position 1
assert_eq!(b'c', args.next_char(&ArgumentLocation::Position(3))); // Position 3
args.start_next_batch();
assert!(!args.is_exhausted());
// Second batch - same positional pattern
assert_eq!(b'e', args.next_char(&ArgumentLocation::Position(2))); // Position 2
assert_eq!(b'd', args.next_char(&ArgumentLocation::Position(1))); // Position 1
assert_eq!(b'f', args.next_char(&ArgumentLocation::Position(3))); // Position 3
args.start_next_batch();
assert!(!args.is_exhausted());
// Third batch - same positional pattern (last batch)
assert_eq!(b'h', args.next_char(&ArgumentLocation::Position(2))); // Position 2
assert_eq!(b'g', args.next_char(&ArgumentLocation::Position(1))); // Position 1
assert_eq!(b'i', args.next_char(&ArgumentLocation::Position(3))); // Position 3
args.start_next_batch();
assert!(args.is_exhausted());
}
#[test]
fn test_mixed_access_pattern() {
// Test with mixed sequential and positional access
let mut args = FormatArguments::new(&[
FormatArgument::Char('a'),
FormatArgument::Char('b'),
FormatArgument::Char('c'),
FormatArgument::Char('d'),
FormatArgument::Char('e'),
FormatArgument::Char('f'),
FormatArgument::Char('g'),
FormatArgument::Char('h'),
]);
// First batch - mix of sequential and positional
assert_eq!(b'a', args.next_char(&ArgumentLocation::NextArgument)); // Sequential
assert_eq!(b'c', args.next_char(&ArgumentLocation::Position(3))); // Positional
args.start_next_batch();
assert!(!args.is_exhausted());
// Second batch - same mixed pattern
assert_eq!(b'd', args.next_char(&ArgumentLocation::NextArgument)); // Sequential
assert_eq!(b'f', args.next_char(&ArgumentLocation::Position(3))); // Positional
args.start_next_batch();
assert!(!args.is_exhausted());
// Last batch - same mixed pattern
assert_eq!(b'g', args.next_char(&ArgumentLocation::NextArgument)); // Sequential
assert_eq!(b'\0', args.next_char(&ArgumentLocation::Position(3))); // Out of bounds
args.start_next_batch();
assert!(args.is_exhausted());
}
#[test]
fn test_numeric_argument_types() {
// Test with numeric argument types
let args = [
FormatArgument::SignedInt(10),
FormatArgument::UnsignedInt(20),
FormatArgument::Float(ExtendedBigDecimal::zero()),
FormatArgument::SignedInt(30),
FormatArgument::UnsignedInt(40),
FormatArgument::Float(ExtendedBigDecimal::zero()),
];
let mut args = FormatArguments::new(&args);
// First batch - i64, u64, decimal
assert_eq!(10, args.next_i64(&ArgumentLocation::NextArgument));
assert_eq!(20, args.next_u64(&ArgumentLocation::NextArgument));
let result = args.next_extended_big_decimal(&ArgumentLocation::NextArgument);
assert_eq!(ExtendedBigDecimal::zero(), result);
args.start_next_batch();
assert!(!args.is_exhausted());
// Second batch - same pattern
assert_eq!(30, args.next_i64(&ArgumentLocation::NextArgument));
assert_eq!(40, args.next_u64(&ArgumentLocation::NextArgument));
let result = args.next_extended_big_decimal(&ArgumentLocation::NextArgument);
assert_eq!(ExtendedBigDecimal::zero(), result);
args.start_next_batch();
assert!(args.is_exhausted());
}
#[test]
fn test_unparsed_arguments() {
// Test with unparsed arguments that get coerced
let args = [
FormatArgument::Unparsed("hello".to_string()),
FormatArgument::Unparsed("123".to_string()),
FormatArgument::Unparsed("hello".to_string()),
FormatArgument::Unparsed("456".to_string()),
];
let mut args = FormatArguments::new(&args);
// First batch - string, number
assert_eq!("hello", args.next_string(&ArgumentLocation::NextArgument));
assert_eq!(123, args.next_i64(&ArgumentLocation::NextArgument));
args.start_next_batch();
assert!(!args.is_exhausted());
// Second batch - same pattern
assert_eq!("hello", args.next_string(&ArgumentLocation::NextArgument));
assert_eq!(456, args.next_i64(&ArgumentLocation::NextArgument));
args.start_next_batch();
assert!(args.is_exhausted());
}
#[test]
fn test_mixed_types_with_positions() {
// Test with mixed types and positional access
let args = [
FormatArgument::Char('a'),
FormatArgument::String("test".to_string()),
FormatArgument::UnsignedInt(42),
FormatArgument::Char('b'),
FormatArgument::String("more".to_string()),
FormatArgument::UnsignedInt(99),
];
let mut args = FormatArguments::new(&args);
// First batch - positional access of different types
assert_eq!(b'a', args.next_char(&ArgumentLocation::Position(1)));
assert_eq!("test", args.next_string(&ArgumentLocation::Position(2)));
assert_eq!(42, args.next_u64(&ArgumentLocation::Position(3)));
args.start_next_batch();
assert!(!args.is_exhausted());
// Second batch - same pattern
assert_eq!(b'b', args.next_char(&ArgumentLocation::Position(1)));
assert_eq!("more", args.next_string(&ArgumentLocation::Position(2)));
assert_eq!(99, args.next_u64(&ArgumentLocation::Position(3)));
args.start_next_batch();
assert!(args.is_exhausted());
}
#[test]
fn test_partial_last_batch() {
// Test with a partial last batch (fewer elements than batch size)
let mut args = FormatArguments::new(&[
FormatArgument::Char('a'),
FormatArgument::Char('b'),
FormatArgument::Char('c'),
FormatArgument::Char('d'),
FormatArgument::Char('e'), // Last batch has fewer elements
]);
// First batch
assert_eq!(b'a', args.next_char(&ArgumentLocation::NextArgument));
assert_eq!(b'c', args.next_char(&ArgumentLocation::Position(3)));
args.start_next_batch();
assert!(!args.is_exhausted());
// Second batch (partial)
assert_eq!(b'd', args.next_char(&ArgumentLocation::NextArgument));
assert_eq!(b'\0', args.next_char(&ArgumentLocation::Position(3))); // Out of bounds
args.start_next_batch();
assert!(args.is_exhausted());
}
}

View file

@ -161,10 +161,10 @@ impl FormatChar for EscapedChar {
}
impl<C: FormatChar> FormatItem<C> {
pub fn write<'a>(
pub fn write(
&self,
writer: impl Write,
args: &mut impl Iterator<Item = &'a FormatArgument>,
args: &mut FormatArguments,
) -> Result<ControlFlow<()>, FormatError> {
match self {
Self::Spec(spec) => spec.write(writer, args)?,
@ -280,7 +280,8 @@ fn printf_writer<'a>(
format_string: impl AsRef<[u8]>,
args: impl IntoIterator<Item = &'a FormatArgument>,
) -> Result<(), FormatError> {
let mut args = args.into_iter();
let args = args.into_iter().cloned().collect::<Vec<_>>();
let mut args = FormatArguments::new(&args);
for item in parse_spec_only(format_string.as_ref()) {
item?.write(&mut writer, &mut args)?;
}

View file

@ -99,6 +99,7 @@ impl Formatter<i64> for SignedInt {
precision,
positive_sign,
alignment,
position: _position,
} = s
else {
return Err(FormatError::WrongSpecType);
@ -107,13 +108,13 @@ impl Formatter<i64> for SignedInt {
let width = match width {
Some(CanAsterisk::Fixed(x)) => x,
None => 0,
Some(CanAsterisk::Asterisk) => return Err(FormatError::WrongSpecType),
Some(CanAsterisk::Asterisk(_)) => return Err(FormatError::WrongSpecType),
};
let precision = match precision {
Some(CanAsterisk::Fixed(x)) => x,
None => 0,
Some(CanAsterisk::Asterisk) => return Err(FormatError::WrongSpecType),
Some(CanAsterisk::Asterisk(_)) => return Err(FormatError::WrongSpecType),
};
Ok(Self {
@ -146,7 +147,7 @@ impl Formatter<u64> for UnsignedInt {
};
// Zeroes do not get a prefix. An octal value does also not get a
// prefix if the padded value will not start with a zero.
// prefix if the padded value does not start with a zero.
let prefix = match (x, self.variant) {
(1.., UnsignedIntVariant::Hexadecimal(Case::Lowercase, Prefix::Yes)) => "0x",
(1.., UnsignedIntVariant::Hexadecimal(Case::Uppercase, Prefix::Yes)) => "0X",
@ -170,6 +171,7 @@ impl Formatter<u64> for UnsignedInt {
precision,
positive_sign: PositiveSign::None,
alignment,
position,
} = s
{
Spec::UnsignedInt {
@ -177,6 +179,7 @@ impl Formatter<u64> for UnsignedInt {
width,
precision,
alignment,
position,
}
} else {
s
@ -187,6 +190,7 @@ impl Formatter<u64> for UnsignedInt {
width,
precision,
alignment,
position: _position,
} = s
else {
return Err(FormatError::WrongSpecType);
@ -195,13 +199,13 @@ impl Formatter<u64> for UnsignedInt {
let width = match width {
Some(CanAsterisk::Fixed(x)) => x,
None => 0,
Some(CanAsterisk::Asterisk) => return Err(FormatError::WrongSpecType),
Some(CanAsterisk::Asterisk(_)) => return Err(FormatError::WrongSpecType),
};
let precision = match precision {
Some(CanAsterisk::Fixed(x)) => x,
None => 0,
Some(CanAsterisk::Asterisk) => return Err(FormatError::WrongSpecType),
Some(CanAsterisk::Asterisk(_)) => return Err(FormatError::WrongSpecType),
};
Ok(Self {
@ -300,6 +304,7 @@ impl Formatter<&ExtendedBigDecimal> for Float {
positive_sign,
alignment,
precision,
position: _position,
} = s
else {
return Err(FormatError::WrongSpecType);
@ -308,13 +313,13 @@ impl Formatter<&ExtendedBigDecimal> for Float {
let width = match width {
Some(CanAsterisk::Fixed(x)) => x,
None => 0,
Some(CanAsterisk::Asterisk) => return Err(FormatError::WrongSpecType),
Some(CanAsterisk::Asterisk(_)) => return Err(FormatError::WrongSpecType),
};
let precision = match precision {
Some(CanAsterisk::Fixed(x)) => Some(x),
None => None,
Some(CanAsterisk::Asterisk) => return Err(FormatError::WrongSpecType),
Some(CanAsterisk::Asterisk(_)) => return Err(FormatError::WrongSpecType),
};
Ok(Self {

View file

@ -8,13 +8,14 @@
use crate::quoting_style::{QuotingStyle, escape_name};
use super::{
ArgumentIter, ExtendedBigDecimal, FormatChar, FormatError, OctalParsing,
ExtendedBigDecimal, FormatChar, FormatError, OctalParsing,
num_format::{
self, Case, FloatVariant, ForceDecimal, Formatter, NumberAlignment, PositiveSign, Prefix,
UnsignedIntVariant,
},
parse_escape_only,
};
use crate::format::FormatArguments;
use std::{io::Write, ops::ControlFlow};
/// A parsed specification for formatting a value
@ -24,29 +25,38 @@ use std::{io::Write, ops::ControlFlow};
#[derive(Debug)]
pub enum Spec {
Char {
position: ArgumentLocation,
width: Option<CanAsterisk<usize>>,
align_left: bool,
},
String {
position: ArgumentLocation,
precision: Option<CanAsterisk<usize>>,
width: Option<CanAsterisk<usize>>,
align_left: bool,
},
EscapedString,
QuotedString,
EscapedString {
position: ArgumentLocation,
},
QuotedString {
position: ArgumentLocation,
},
SignedInt {
position: ArgumentLocation,
width: Option<CanAsterisk<usize>>,
precision: Option<CanAsterisk<usize>>,
positive_sign: PositiveSign,
alignment: NumberAlignment,
},
UnsignedInt {
position: ArgumentLocation,
variant: UnsignedIntVariant,
width: Option<CanAsterisk<usize>>,
precision: Option<CanAsterisk<usize>>,
alignment: NumberAlignment,
},
Float {
position: ArgumentLocation,
variant: FloatVariant,
case: Case,
force_decimal: ForceDecimal,
@ -57,12 +67,18 @@ pub enum Spec {
},
}
#[derive(Clone, Copy, Debug)]
pub enum ArgumentLocation {
NextArgument,
Position(usize),
}
/// Precision and width specified might use an asterisk to indicate that they are
/// determined by an argument.
#[derive(Clone, Copy, Debug)]
pub enum CanAsterisk<T> {
Fixed(T),
Asterisk,
Asterisk(ArgumentLocation),
}
/// Size of the expected type (ignored)
@ -130,14 +146,18 @@ impl Flags {
impl Spec {
pub fn parse<'a>(rest: &mut &'a [u8]) -> Result<Self, &'a [u8]> {
// Based on the C++ reference, the spec format looks like:
// Based on the C++ reference and the Single UNIX Specification,
// the spec format looks like:
//
// %[flags][width][.precision][length]specifier
// %[argumentNum$][flags][width][.precision][length]specifier
//
// However, we have already parsed the '%'.
let mut index = 0;
let start = *rest;
// Check for a positional specifier (%m$)
let position = eat_argument_position(rest, &mut index);
let flags = Flags::parse(rest, &mut index);
let positive_sign = match flags {
@ -182,6 +202,7 @@ impl Spec {
return Err(&start[..index]);
}
Self::Char {
position,
width,
align_left: flags.minus,
}
@ -191,6 +212,7 @@ impl Spec {
return Err(&start[..index]);
}
Self::String {
position,
precision,
width,
align_left: flags.minus,
@ -200,19 +222,20 @@ impl Spec {
if flags.any() || width.is_some() || precision.is_some() {
return Err(&start[..index]);
}
Self::EscapedString
Self::EscapedString { position }
}
b'q' => {
if flags.any() || width.is_some() || precision.is_some() {
return Err(&start[..index]);
}
Self::QuotedString
Self::QuotedString { position }
}
b'd' | b'i' => {
if flags.hash {
return Err(&start[..index]);
}
Self::SignedInt {
position,
width,
precision,
alignment,
@ -233,6 +256,7 @@ impl Spec {
_ => unreachable!(),
};
Self::UnsignedInt {
position,
variant,
precision,
width,
@ -240,6 +264,7 @@ impl Spec {
}
}
c @ (b'f' | b'F' | b'e' | b'E' | b'g' | b'G' | b'a' | b'A') => Self::Float {
position,
width,
precision,
variant: match c {
@ -314,24 +339,32 @@ impl Spec {
length
}
pub fn write<'a>(
pub fn write(
&self,
mut writer: impl Write,
mut args: impl ArgumentIter<'a>,
args: &mut FormatArguments,
) -> Result<(), FormatError> {
match self {
Self::Char { width, align_left } => {
let (width, neg_width) =
resolve_asterisk_width(*width, &mut args).unwrap_or_default();
write_padded(writer, &[args.get_char()], width, *align_left || neg_width)
Self::Char {
width,
align_left,
position,
} => {
let (width, neg_width) = resolve_asterisk_width(*width, args).unwrap_or_default();
write_padded(
writer,
&[args.next_char(position)],
width,
*align_left || neg_width,
)
}
Self::String {
width,
align_left,
precision,
position,
} => {
let (width, neg_width) =
resolve_asterisk_width(*width, &mut args).unwrap_or_default();
let (width, neg_width) = resolve_asterisk_width(*width, args).unwrap_or_default();
// GNU does do this truncation on a byte level, see for instance:
// printf "%.1s" 🙃
@ -339,8 +372,8 @@ impl Spec {
// For now, we let printf panic when we truncate within a code point.
// TODO: We need to not use Rust's formatting for aligning the output,
// so that we can just write bytes to stdout without panicking.
let precision = resolve_asterisk_precision(*precision, &mut args);
let s = args.get_str();
let precision = resolve_asterisk_precision(*precision, args);
let s = args.next_string(position);
let truncated = match precision {
Some(p) if p < s.len() => &s[..p],
_ => s,
@ -352,8 +385,8 @@ impl Spec {
*align_left || neg_width,
)
}
Self::EscapedString => {
let s = args.get_str();
Self::EscapedString { position } => {
let s = args.next_string(position);
let mut parsed = Vec::new();
for c in parse_escape_only(s.as_bytes(), OctalParsing::ThreeDigits) {
match c.write(&mut parsed)? {
@ -366,9 +399,9 @@ impl Spec {
}
writer.write_all(&parsed).map_err(FormatError::IoError)
}
Self::QuotedString => {
Self::QuotedString { position } => {
let s = escape_name(
args.get_str().as_ref(),
args.next_string(position).as_ref(),
&QuotingStyle::Shell {
escape: true,
always_quote: false,
@ -387,12 +420,11 @@ impl Spec {
precision,
positive_sign,
alignment,
position,
} => {
let (width, neg_width) =
resolve_asterisk_width(*width, &mut args).unwrap_or((0, false));
let precision =
resolve_asterisk_precision(*precision, &mut args).unwrap_or_default();
let i = args.get_i64();
let (width, neg_width) = resolve_asterisk_width(*width, args).unwrap_or((0, false));
let precision = resolve_asterisk_precision(*precision, args).unwrap_or_default();
let i = args.next_i64(position);
if precision as u64 > i32::MAX as u64 {
return Err(FormatError::InvalidPrecision(precision.to_string()));
@ -416,12 +448,11 @@ impl Spec {
width,
precision,
alignment,
position,
} => {
let (width, neg_width) =
resolve_asterisk_width(*width, &mut args).unwrap_or((0, false));
let precision =
resolve_asterisk_precision(*precision, &mut args).unwrap_or_default();
let i = args.get_u64();
let (width, neg_width) = resolve_asterisk_width(*width, args).unwrap_or((0, false));
let precision = resolve_asterisk_precision(*precision, args).unwrap_or_default();
let i = args.next_u64(position);
if precision as u64 > i32::MAX as u64 {
return Err(FormatError::InvalidPrecision(precision.to_string()));
@ -448,11 +479,11 @@ impl Spec {
positive_sign,
alignment,
precision,
position,
} => {
let (width, neg_width) =
resolve_asterisk_width(*width, &mut args).unwrap_or((0, false));
let precision = resolve_asterisk_precision(*precision, &mut args);
let f: ExtendedBigDecimal = args.get_extended_big_decimal();
let (width, neg_width) = resolve_asterisk_width(*width, args).unwrap_or((0, false));
let precision = resolve_asterisk_precision(*precision, args);
let f: ExtendedBigDecimal = args.next_extended_big_decimal(position);
if precision.is_some_and(|p| p as u64 > i32::MAX as u64) {
return Err(FormatError::InvalidPrecision(
@ -482,14 +513,14 @@ impl Spec {
/// Determine the width, potentially getting a value from args
/// Returns the non-negative width and whether the value should be left-aligned.
fn resolve_asterisk_width<'a>(
fn resolve_asterisk_width(
option: Option<CanAsterisk<usize>>,
mut args: impl ArgumentIter<'a>,
args: &mut FormatArguments,
) -> Option<(usize, bool)> {
match option {
None => None,
Some(CanAsterisk::Asterisk) => {
let nb = args.get_i64();
Some(CanAsterisk::Asterisk(loc)) => {
let nb = args.next_i64(&loc);
if nb < 0 {
Some((usize::try_from(-(nb as isize)).ok().unwrap_or(0), true))
} else {
@ -502,13 +533,13 @@ fn resolve_asterisk_width<'a>(
/// Determines the precision, which should (if defined)
/// be a non-negative number.
fn resolve_asterisk_precision<'a>(
fn resolve_asterisk_precision(
option: Option<CanAsterisk<usize>>,
mut args: impl ArgumentIter<'a>,
args: &mut FormatArguments,
) -> Option<usize> {
match option {
None => None,
Some(CanAsterisk::Asterisk) => match args.get_i64() {
Some(CanAsterisk::Asterisk(loc)) => match args.next_i64(&loc) {
v if v >= 0 => usize::try_from(v).ok(),
v if v < 0 => Some(0usize),
_ => None,
@ -534,10 +565,28 @@ fn write_padded(
.map_err(FormatError::IoError)
}
// Check for a number ending with a '$'
fn eat_argument_position(rest: &mut &[u8], index: &mut usize) -> ArgumentLocation {
let original_index = *index;
if let Some(pos) = eat_number(rest, index) {
if let Some(&b'$') = rest.get(*index) {
*index += 1;
ArgumentLocation::Position(pos)
} else {
*index = original_index;
ArgumentLocation::NextArgument
}
} else {
*index = original_index;
ArgumentLocation::NextArgument
}
}
fn eat_asterisk_or_number(rest: &mut &[u8], index: &mut usize) -> Option<CanAsterisk<usize>> {
if let Some(b'*') = rest.get(*index) {
*index += 1;
Some(CanAsterisk::Asterisk)
// Check for a positional specifier (*m$)
Some(CanAsterisk::Asterisk(eat_argument_position(rest, index)))
} else {
eat_number(rest, index).map(CanAsterisk::Fixed)
}
@ -569,14 +618,20 @@ mod tests {
#[test]
fn no_width() {
assert_eq!(None, resolve_asterisk_width(None, Vec::new().iter()));
assert_eq!(
None,
resolve_asterisk_width(None, &mut FormatArguments::new(&[]))
);
}
#[test]
fn fixed_width() {
assert_eq!(
Some((42, false)),
resolve_asterisk_width(Some(CanAsterisk::Fixed(42)), Vec::new().iter())
resolve_asterisk_width(
Some(CanAsterisk::Fixed(42)),
&mut FormatArguments::new(&[])
)
);
}
@ -585,30 +640,42 @@ mod tests {
assert_eq!(
Some((42, false)),
resolve_asterisk_width(
Some(CanAsterisk::Asterisk),
[FormatArgument::SignedInt(42)].iter()
Some(CanAsterisk::Asterisk(ArgumentLocation::NextArgument)),
&mut FormatArguments::new(&[FormatArgument::SignedInt(42)]),
)
);
assert_eq!(
Some((42, false)),
resolve_asterisk_width(
Some(CanAsterisk::Asterisk),
[FormatArgument::Unparsed("42".to_string())].iter()
Some(CanAsterisk::Asterisk(ArgumentLocation::NextArgument)),
&mut FormatArguments::new(&[FormatArgument::Unparsed("42".to_string())]),
)
);
assert_eq!(
Some((42, true)),
resolve_asterisk_width(
Some(CanAsterisk::Asterisk),
[FormatArgument::SignedInt(-42)].iter()
Some(CanAsterisk::Asterisk(ArgumentLocation::NextArgument)),
&mut FormatArguments::new(&[FormatArgument::SignedInt(-42)]),
)
);
assert_eq!(
Some((42, true)),
resolve_asterisk_width(
Some(CanAsterisk::Asterisk),
[FormatArgument::Unparsed("-42".to_string())].iter()
Some(CanAsterisk::Asterisk(ArgumentLocation::NextArgument)),
&mut FormatArguments::new(&[FormatArgument::Unparsed("-42".to_string())]),
)
);
assert_eq!(
Some((2, false)),
resolve_asterisk_width(
Some(CanAsterisk::Asterisk(ArgumentLocation::Position(2))),
&mut FormatArguments::new(&[
FormatArgument::Unparsed("1".to_string()),
FormatArgument::Unparsed("2".to_string()),
FormatArgument::Unparsed("3".to_string())
]),
)
);
}
@ -620,14 +687,20 @@ mod tests {
#[test]
fn no_width() {
assert_eq!(None, resolve_asterisk_precision(None, Vec::new().iter()));
assert_eq!(
None,
resolve_asterisk_precision(None, &mut FormatArguments::new(&[]))
);
}
#[test]
fn fixed_width() {
assert_eq!(
Some(42),
resolve_asterisk_precision(Some(CanAsterisk::Fixed(42)), Vec::new().iter())
resolve_asterisk_precision(
Some(CanAsterisk::Fixed(42)),
&mut FormatArguments::new(&[])
)
);
}
@ -636,30 +709,41 @@ mod tests {
assert_eq!(
Some(42),
resolve_asterisk_precision(
Some(CanAsterisk::Asterisk),
[FormatArgument::SignedInt(42)].iter()
Some(CanAsterisk::Asterisk(ArgumentLocation::NextArgument)),
&mut FormatArguments::new(&[FormatArgument::SignedInt(42)]),
)
);
assert_eq!(
Some(42),
resolve_asterisk_precision(
Some(CanAsterisk::Asterisk),
[FormatArgument::Unparsed("42".to_string())].iter()
Some(CanAsterisk::Asterisk(ArgumentLocation::NextArgument)),
&mut FormatArguments::new(&[FormatArgument::Unparsed("42".to_string())]),
)
);
assert_eq!(
Some(0),
resolve_asterisk_precision(
Some(CanAsterisk::Asterisk),
[FormatArgument::SignedInt(-42)].iter()
Some(CanAsterisk::Asterisk(ArgumentLocation::NextArgument)),
&mut FormatArguments::new(&[FormatArgument::SignedInt(-42)]),
)
);
assert_eq!(
Some(0),
resolve_asterisk_precision(
Some(CanAsterisk::Asterisk),
[FormatArgument::Unparsed("-42".to_string())].iter()
Some(CanAsterisk::Asterisk(ArgumentLocation::NextArgument)),
&mut FormatArguments::new(&[FormatArgument::Unparsed("-42".to_string())]),
)
);
assert_eq!(
Some(2),
resolve_asterisk_precision(
Some(CanAsterisk::Asterisk(ArgumentLocation::Position(2))),
&mut FormatArguments::new(&[
FormatArgument::Unparsed("1".to_string()),
FormatArgument::Unparsed("2".to_string()),
FormatArgument::Unparsed("3".to_string())
]),
)
);
}

View file

@ -1281,7 +1281,7 @@ fn float_arg_with_whitespace() {
.fails()
.stderr_contains("expected a numeric value");
// A input string with a whitespace special character that has
// An input string with a whitespace special character that has
// not already been expanded should fail.
new_ucmd!()
.args(&["%f", "\\t0.1"])
@ -1313,3 +1313,47 @@ fn mb_input() {
.stderr_is(format!("printf: warning: {expected}: character(s) following character constant have been ignored\n"));
}
}
#[test]
fn positional_format_specifiers() {
new_ucmd!()
.args(&["%1$d%d-", "5", "10", "6", "20"])
.succeeds()
.stdout_only("55-1010-66-2020-");
new_ucmd!()
.args(&["%2$d%d-", "5", "10", "6", "20"])
.succeeds()
.stdout_only("105-206-");
new_ucmd!()
.args(&["%3$d%d-", "5", "10", "6", "20"])
.succeeds()
.stdout_only("65-020-");
new_ucmd!()
.args(&["%4$d%d-", "5", "10", "6", "20"])
.succeeds()
.stdout_only("205-");
new_ucmd!()
.args(&["%5$d%d-", "5", "10", "6", "20"])
.succeeds()
.stdout_only("05-");
new_ucmd!()
.args(&[
"Octal: %6$o, Int: %1$d, Float: %4$f, String: %2$s, Hex: %7$x, Scientific: %5$e, Char: %9$c, Unsigned: %3$u, Integer: %8$i",
"42", // 1$d - Int
"hello", // 2$s - String
"100", // 3$u - Unsigned
"3.14159", // 4$f - Float
"0.00001", // 5$e - Scientific
"77", // 6$o - Octal
"255", // 7$x - Hex
"123", // 8$i - Integer
"A", // 9$c - Char
])
.succeeds()
.stdout_only("Octal: 115, Int: 42, Float: 3.141590, String: hello, Hex: ff, Scientific: 1.000000e-05, Char: A, Unsigned: 100, Integer: 123");
}