1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-08-02 05:57:46 +00:00

printf: parse arguments and handle escape codes

This commit is contained in:
Terts Diepraam 2023-11-16 17:00:41 +01:00
parent e7d58f673f
commit eaf5006379
6 changed files with 320 additions and 94 deletions

View file

@ -6,9 +6,12 @@
// spell-checker:ignore (change!) each's // spell-checker:ignore (change!) each's
// spell-checker:ignore (ToDO) LONGHELP FORMATSTRING templating parameterizing formatstr // spell-checker:ignore (ToDO) LONGHELP FORMATSTRING templating parameterizing formatstr
use std::io::stdout;
use std::ops::ControlFlow;
use clap::{crate_version, Arg, ArgAction, Command}; use clap::{crate_version, Arg, ArgAction, Command};
use uucore::error::{UResult, UUsageError}; use uucore::error::{UResult, UUsageError};
use uucore::format::{printf, FormatArgument}; use uucore::format::{parse_spec_and_escape, FormatArgument};
use uucore::{format_usage, help_about, help_section, help_usage}; use uucore::{format_usage, help_about, help_section, help_usage};
const VERSION: &str = "version"; const VERSION: &str = "version";
@ -30,12 +33,28 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
let format_string = matches let format_string = matches
.get_one::<String>(options::FORMATSTRING) .get_one::<String>(options::FORMATSTRING)
.ok_or_else(|| UUsageError::new(1, "missing operand"))?; .ok_or_else(|| UUsageError::new(1, "missing operand"))?;
let values: Vec<_> = match matches.get_many::<String>(options::ARGUMENT) { let values: Vec<_> = match matches.get_many::<String>(options::ARGUMENT) {
Some(s) => s.map(|s| FormatArgument::Unparsed(s.to_string())).collect(), Some(s) => s.map(|s| FormatArgument::Unparsed(s.to_string())).collect(),
None => vec![], None => vec![],
}; };
printf(format_string, &values)?; let mut args = values.iter().peekable();
for item in parse_spec_and_escape(format_string.as_ref()) {
match item?.write(stdout(), &mut args)? {
ControlFlow::Continue(()) => {}
ControlFlow::Break(()) => break,
};
}
while args.peek().is_some() {
for item in parse_spec_and_escape(format_string.as_ref()) {
match item?.write(stdout(), &mut args)? {
ControlFlow::Continue(()) => {}
ControlFlow::Break(()) => break,
};
}
}
Ok(()) Ok(())
} }

View file

@ -0,0 +1,60 @@
#[derive(Clone, Debug)]
pub enum FormatArgument {
Char(char),
String(String),
UnsignedInt(u64),
SignedInt(i64),
Float(f64),
/// Special argument that gets coerced into the other variants
Unparsed(String),
}
impl FormatArgument {
pub fn get_char(&self) -> Option<char> {
match self {
Self::Char(c) => Some(*c),
Self::Unparsed(s) => {
let mut chars = s.chars();
let Some(c) = chars.next() else {
return None;
};
let None = chars.next() else {
return None;
};
Some(c)
}
_ => None,
}
}
pub fn get_u64(&self) -> Option<u64> {
match self {
Self::UnsignedInt(n) => Some(*n),
Self::Unparsed(s) => s.parse().ok(),
_ => None,
}
}
pub fn get_i64(&self) -> Option<i64> {
match self {
Self::SignedInt(n) => Some(*n),
Self::Unparsed(s) => s.parse().ok(),
_ => None,
}
}
pub fn get_f64(&self) -> Option<f64> {
match self {
Self::Float(n) => Some(*n),
Self::Unparsed(s) => s.parse().ok(),
_ => None,
}
}
pub fn get_str(&self) -> Option<&str> {
match self {
Self::Unparsed(s) | Self::String(s) => Some(s),
_ => None,
}
}
}

View file

@ -0,0 +1,100 @@
#[derive(Debug)]
pub enum EscapedChar {
Char(u8),
Backslash(u8),
End,
}
#[repr(u8)]
#[derive(Clone, Copy)]
enum Base {
Oct = 8,
Hex = 16,
}
impl Base {
fn max_digits(&self) -> u8 {
match self {
Self::Oct => 3,
Self::Hex => 2,
}
}
fn to_digit(&self, c: u8) -> Option<u8> {
match self {
Base::Oct => {
if matches!(c, b'0'..=b'7') {
Some(c - b'0')
} else {
None
}
}
Base::Hex => match c {
b'0'..=b'9' => Some(c - b'0'),
b'A'..=b'F' => Some(c - b'A' + 10),
b'a'..=b'f' => Some(c - b'a' + 10),
_ => None,
},
}
}
}
/// Parse the numeric part of the `\xHHH` and `\0NNN` escape sequences
fn parse_code(input: &mut &[u8], base: Base) -> Option<u8> {
// All arithmetic on `ret` needs to be wrapping, because octal input can
// take 3 digits, which is 9 bits, and therefore more than what fits in a
// `u8`. GNU just seems to wrap these values.
// Note that if we instead make `ret` a `u32` and use `char::from_u32` will
// yield incorrect results because it will interpret values larger than
// `u8::MAX` as unicode.
let [c, rest @ ..] = input else { return None };
let mut ret = base.to_digit(*c)?;
*input = &rest[..];
for _ in 1..base.max_digits() {
let [c, rest @ ..] = input else { break };
let Some(n) = base.to_digit(*c) else { break };
ret = ret.wrapping_mul(base as u8).wrapping_add(n);
*input = &rest[..];
}
Some(ret)
}
pub fn parse_escape_code(rest: &mut &[u8]) -> EscapedChar {
if let [c, new_rest @ ..] = rest {
// This is for the \NNN syntax for octal sequences.
// Note that '0' is intentionally omitted because that
// would be the \0NNN syntax.
if let b'1'..=b'7' = c {
if let Some(parsed) = parse_code(rest, Base::Oct) {
return EscapedChar::Char(parsed);
}
}
*rest = &new_rest[..];
match c {
b'\\' => EscapedChar::Char(b'\\'),
b'a' => EscapedChar::Char(b'\x07'),
b'b' => EscapedChar::Char(b'\x08'),
b'c' => return EscapedChar::End,
b'e' => EscapedChar::Char(b'\x1b'),
b'f' => EscapedChar::Char(b'\x0c'),
b'n' => EscapedChar::Char(b'\n'),
b'r' => EscapedChar::Char(b'\r'),
b't' => EscapedChar::Char(b'\t'),
b'v' => EscapedChar::Char(b'\x0b'),
b'x' => {
if let Some(c) = parse_code(rest, Base::Hex) {
EscapedChar::Char(c)
} else {
EscapedChar::Backslash(b'x')
}
}
b'0' => EscapedChar::Char(parse_code(rest, Base::Oct).unwrap_or(b'\0')),
c => EscapedChar::Backslash(*c),
}
} else {
EscapedChar::Char(b'\\')
}
}

View file

@ -8,8 +8,19 @@
//! [`Format`] struct, which represents a parsed format string. This reduces //! [`Format`] struct, which represents a parsed format string. This reduces
//! the need for parsing a format string multiple times and assures that no //! the need for parsing a format string multiple times and assures that no
//! parsing errors occur during writing. //! parsing errors occur during writing.
//!
//! There are three kinds of parsing that we might want to do:
//!
//! 1. Only `printf` specifiers (for e.g. `seq`, `dd`)
//! 2. Only escape sequences (for e.g. `echo`)
//! 3. Both `printf` specifiers and escape sequences (for e.g. `printf`)
//!
//! This module aims to combine all three use cases.
// spell-checker:ignore (vars) charf decf floatf intf scif strf Cninety // spell-checker:ignore (vars) charf decf floatf intf scif strf Cninety
mod escape;
mod argument;
pub mod num_format; pub mod num_format;
mod spec; mod spec;
@ -18,11 +29,16 @@ use std::{
error::Error, error::Error,
fmt::Display, fmt::Display,
io::{stdout, Write}, io::{stdout, Write},
ops::ControlFlow,
}; };
pub use argument::*;
use crate::error::UError; use crate::error::UError;
use self::num_format::Formatter; use self::{
escape::{parse_escape_code, EscapedChar},
num_format::Formatter,
};
#[derive(Debug)] #[derive(Debug)]
pub enum FormatError { pub enum FormatError {
@ -54,80 +70,116 @@ impl Display for FormatError {
} }
/// A single item to format /// A single item to format
enum FormatItem { pub enum FormatItem<C: FormatChar> {
/// A format specifier /// A format specifier
Spec(Spec), Spec(Spec),
/// Some plain text
Text(Vec<u8>),
/// A single character /// A single character
/// Char(C),
/// Added in addition to `Text` as an optimization.
Char(u8),
} }
#[derive(Clone, Debug)] pub trait FormatChar {
pub enum FormatArgument { fn write(&self, writer: impl Write) -> std::io::Result<ControlFlow<()>>;
Char(char),
String(String),
UnsignedInt(u64),
SignedInt(i64),
Float(f64),
// Special argument that gets coerced into the other variants
Unparsed(String),
} }
impl FormatItem { impl FormatChar for u8 {
fn write<'a>( fn write(&self, mut writer: impl Write) -> std::io::Result<ControlFlow<()>> {
&self, writer.write(&[*self])?;
mut writer: impl Write, Ok(ControlFlow::Continue(()))
args: &mut impl Iterator<Item = &'a FormatArgument>, }
) -> Result<(), FormatError> { }
impl FormatChar for EscapedChar {
fn write(&self, mut writer: impl Write) -> std::io::Result<ControlFlow<()>> {
match self { match self {
FormatItem::Spec(spec) => spec.write(writer, args), EscapedChar::Char(c) => {
FormatItem::Text(bytes) => writer.write_all(bytes).map_err(FormatError::IoError), writer.write(&[*c])?;
FormatItem::Char(char) => writer.write_all(&[*char]).map_err(FormatError::IoError),
} }
EscapedChar::Backslash(c) => {
writer.write(&[b'\\', *c])?;
}
EscapedChar::End => return Ok(ControlFlow::Break(())),
}
Ok(ControlFlow::Continue(()))
} }
} }
fn parse_iter(fmt: &[u8]) -> impl Iterator<Item = Result<FormatItem, FormatError>> + '_ { impl<C: FormatChar> FormatItem<C> {
let mut rest = fmt; pub fn write<'a>(
std::iter::from_fn(move || { &self,
if rest.is_empty() { writer: impl Write,
return None; args: &mut impl Iterator<Item = &'a FormatArgument>,
) -> Result<ControlFlow<()>, FormatError> {
match self {
FormatItem::Spec(spec) => spec.write(writer, args)?,
FormatItem::Char(c) => return c.write(writer).map_err(FormatError::IoError),
};
Ok(ControlFlow::Continue(()))
}
} }
match rest.iter().position(|c| *c == b'%') { pub fn parse_spec_and_escape(
None => { fmt: &[u8],
let final_text = rest; ) -> impl Iterator<Item = Result<FormatItem<EscapedChar>, FormatError>> + '_ {
rest = &[]; let mut current = fmt;
Some(Ok(FormatItem::Text(final_text.into()))) std::iter::from_fn(move || match current {
[] => return None,
[b'%', b'%', rest @ ..] => {
current = rest;
Some(Ok(FormatItem::Char(EscapedChar::Char(b'%'))))
} }
Some(0) => { [b'%', rest @ ..] => {
// Handle the spec current = rest;
rest = &rest[1..]; let spec = match Spec::parse(&mut current) {
match rest.get(0) {
None => Some(Ok(FormatItem::Char(b'%'))),
Some(b'%') => {
rest = &rest[1..];
Some(Ok(FormatItem::Char(b'%')))
}
Some(_) => {
let spec = match Spec::parse(&mut rest) {
Some(spec) => spec, Some(spec) => spec,
None => return Some(Err(dbg!(FormatError::SpecError))), None => return Some(Err(FormatError::SpecError)),
}; };
Some(Ok(FormatItem::Spec(spec))) Some(Ok(FormatItem::Spec(spec)))
} }
[b'\\', rest @ ..] => {
current = rest;
Some(Ok(FormatItem::Char(parse_escape_code(&mut current))))
} }
[c, rest @ ..] => {
current = rest;
Some(Ok(FormatItem::Char(EscapedChar::Char(*c))))
} }
Some(i) => { })
// The `after` slice includes the % so it will be handled correctly
// in the next iteration.
let (before, after) = rest.split_at(i);
rest = after;
return Some(Ok(FormatItem::Text(before.into())));
} }
fn parse_spec_only(fmt: &[u8]) -> impl Iterator<Item = Result<FormatItem<u8>, FormatError>> + '_ {
let mut current = fmt;
std::iter::from_fn(move || match current {
[] => return None,
[b'%', b'%', rest @ ..] => {
current = rest;
Some(Ok(FormatItem::Char(b'%')))
}
[b'%', rest @ ..] => {
current = rest;
let spec = match Spec::parse(&mut current) {
Some(spec) => spec,
None => return Some(Err(FormatError::SpecError)),
};
Some(Ok(FormatItem::Spec(spec)))
}
[c, rest @ ..] => {
current = rest;
Some(Ok(FormatItem::Char(*c)))
}
})
}
fn parse_escape_only(fmt: &[u8]) -> impl Iterator<Item = Result<EscapedChar, FormatError>> + '_ {
let mut current = fmt;
std::iter::from_fn(move || match current {
[] => return None,
[b'\\', rest @ ..] => {
current = rest;
Some(Ok(parse_escape_code(&mut current)))
}
[c, rest @ ..] => {
current = rest;
Some(Ok(EscapedChar::Char(*c)))
} }
}) })
} }
@ -144,7 +196,7 @@ fn parse_iter(fmt: &[u8]) -> impl Iterator<Item = Result<FormatItem, FormatError
/// ```rust /// ```rust
/// use uucore::format::printf; /// use uucore::format::printf;
/// ///
/// printf("hello %s", &["world".to_string()]).unwrap(); /// printf("hello %s", &[FormatArgument::String("world")]).unwrap();
/// // prints "hello world" /// // prints "hello world"
/// ``` /// ```
pub fn printf<'a>( pub fn printf<'a>(
@ -160,7 +212,7 @@ fn printf_writer<'a>(
args: impl IntoIterator<Item = &'a FormatArgument>, args: impl IntoIterator<Item = &'a FormatArgument>,
) -> Result<(), FormatError> { ) -> Result<(), FormatError> {
let mut args = args.into_iter(); let mut args = args.into_iter();
for item in parse_iter(format_string.as_ref()) { for item in parse_spec_only(format_string.as_ref()) {
item?.write(&mut writer, &mut args)?; item?.write(&mut writer, &mut args)?;
} }
Ok(()) Ok(())
@ -205,7 +257,7 @@ pub struct Format<F: Formatter> {
impl<F: Formatter> Format<F> { impl<F: Formatter> Format<F> {
pub fn parse(format_string: impl AsRef<[u8]>) -> Result<Self, FormatError> { pub fn parse(format_string: impl AsRef<[u8]>) -> Result<Self, FormatError> {
let mut iter = parse_iter(format_string.as_ref()); let mut iter = parse_spec_only(format_string.as_ref());
let mut prefix = Vec::new(); let mut prefix = Vec::new();
let mut spec = None; let mut spec = None;
@ -215,7 +267,6 @@ impl<F: Formatter> Format<F> {
spec = Some(s); spec = Some(s);
break; break;
} }
FormatItem::Text(t) => prefix.extend_from_slice(&t),
FormatItem::Char(c) => prefix.push(c), FormatItem::Char(c) => prefix.push(c),
} }
} }
@ -230,9 +281,8 @@ impl<F: Formatter> Format<F> {
for item in &mut iter { for item in &mut iter {
match item? { match item? {
FormatItem::Spec(_) => { FormatItem::Spec(_) => {
return Err(dbg!(FormatError::SpecError)); return Err(FormatError::SpecError);
} }
FormatItem::Text(t) => suffix.extend_from_slice(&t),
FormatItem::Char(c) => suffix.push(c), FormatItem::Char(c) => suffix.push(c),
} }
} }

View file

@ -93,7 +93,7 @@ impl Formatter for SignedInt {
alignment, alignment,
} = s } = s
else { else {
return Err(dbg!(FormatError::SpecError)); return Err(FormatError::SpecError);
}; };
let width = match width { let width = match width {
@ -152,7 +152,7 @@ impl Formatter for UnsignedInt {
alignment, alignment,
} = s } = s
else { else {
return Err(dbg!(FormatError::SpecError)); return Err(FormatError::SpecError);
}; };
let width = match width { let width = match width {
@ -241,19 +241,19 @@ impl Formatter for Float {
precision, precision,
} = s } = s
else { else {
return Err(dbg!(FormatError::SpecError)); return Err(FormatError::SpecError);
}; };
let width = match width { let width = match width {
Some(CanAsterisk::Fixed(x)) => x, Some(CanAsterisk::Fixed(x)) => x,
None => 0, None => 0,
Some(CanAsterisk::Asterisk) => return Err(dbg!(FormatError::SpecError)), Some(CanAsterisk::Asterisk) => return Err(FormatError::SpecError),
}; };
let precision = match precision { let precision = match precision {
Some(CanAsterisk::Fixed(x)) => x, Some(CanAsterisk::Fixed(x)) => x,
None => 0, None => 0,
Some(CanAsterisk::Asterisk) => return Err(dbg!(FormatError::SpecError)), Some(CanAsterisk::Asterisk) => return Err(FormatError::SpecError),
}; };
Ok(Self { Ok(Self {

View file

@ -212,10 +212,7 @@ impl Spec {
(false, false) => PositiveSign::None, (false, false) => PositiveSign::None,
}, },
}, },
x => { _ => return None,
dbg!("{:b}", x);
return dbg!(None)
},
}) })
} }
@ -228,16 +225,16 @@ impl Spec {
&Spec::Char { width, align_left } => { &Spec::Char { width, align_left } => {
let width = resolve_asterisk(width, &mut args)?.unwrap_or(0); let width = resolve_asterisk(width, &mut args)?.unwrap_or(0);
let arg = next_arg(&mut args)?; let arg = next_arg(&mut args)?;
match arg { match arg.get_char() {
FormatArgument::Char(c) => write_padded(writer, c, width, false, align_left), Some(c) => write_padded(writer, c, width, false, align_left),
_ => Err(FormatError::InvalidArgument(arg.clone())), _ => Err(FormatError::InvalidArgument(arg.clone())),
} }
} }
&Spec::String { width, align_left } => { &Spec::String { width, align_left } => {
let width = resolve_asterisk(width, &mut args)?.unwrap_or(0); let width = resolve_asterisk(width, &mut args)?.unwrap_or(0);
let arg = next_arg(&mut args)?; let arg = next_arg(&mut args)?;
match arg { match arg.get_str() {
FormatArgument::String(s) => write_padded(writer, s, width, false, align_left), Some(s) => write_padded(writer, s, width, false, align_left),
_ => Err(FormatError::InvalidArgument(arg.clone())), _ => Err(FormatError::InvalidArgument(arg.clone())),
} }
} }
@ -249,7 +246,7 @@ impl Spec {
let width = resolve_asterisk(width, &mut args)?.unwrap_or(0); let width = resolve_asterisk(width, &mut args)?.unwrap_or(0);
let arg = next_arg(&mut args)?; let arg = next_arg(&mut args)?;
let FormatArgument::SignedInt(i) = arg else { let Some(i) = arg.get_i64() else {
return Err(FormatError::InvalidArgument(arg.clone())); return Err(FormatError::InvalidArgument(arg.clone()));
}; };
@ -258,7 +255,7 @@ impl Spec {
positive_sign, positive_sign,
alignment, alignment,
} }
.fmt(writer, *i) .fmt(writer, i)
.map_err(FormatError::IoError) .map_err(FormatError::IoError)
} }
&Spec::UnsignedInt { &Spec::UnsignedInt {
@ -269,7 +266,7 @@ impl Spec {
let width = resolve_asterisk(width, &mut args)?.unwrap_or(0); let width = resolve_asterisk(width, &mut args)?.unwrap_or(0);
let arg = next_arg(args)?; let arg = next_arg(args)?;
let FormatArgument::UnsignedInt(i) = arg else { let Some(i) = arg.get_u64() else {
return Err(FormatError::InvalidArgument(arg.clone())); return Err(FormatError::InvalidArgument(arg.clone()));
}; };
@ -278,7 +275,7 @@ impl Spec {
width, width,
alignment, alignment,
} }
.fmt(writer, *i) .fmt(writer, i)
.map_err(FormatError::IoError) .map_err(FormatError::IoError)
} }
&Spec::Float { &Spec::Float {
@ -294,7 +291,7 @@ impl Spec {
let precision = resolve_asterisk(precision, &mut args)?.unwrap_or(6); let precision = resolve_asterisk(precision, &mut args)?.unwrap_or(6);
let arg = next_arg(args)?; let arg = next_arg(args)?;
let FormatArgument::Float(f) = arg else { let Some(f) = arg.get_f64() else {
return Err(FormatError::InvalidArgument(arg.clone())); return Err(FormatError::InvalidArgument(arg.clone()));
}; };
@ -307,7 +304,7 @@ impl Spec {
alignment, alignment,
precision, precision,
} }
.fmt(writer, *f) .fmt(writer, f)
.map_err(FormatError::IoError) .map_err(FormatError::IoError)
} }
} }
@ -322,8 +319,8 @@ fn resolve_asterisk<'a>(
None => None, None => None,
Some(CanAsterisk::Asterisk) => { Some(CanAsterisk::Asterisk) => {
let arg = next_arg(args)?; let arg = next_arg(args)?;
match arg { match arg.get_u64() {
FormatArgument::UnsignedInt(u) => match usize::try_from(*u) { Some(u) => match usize::try_from(u) {
Ok(u) => Some(u), Ok(u) => Some(u),
Err(_) => return Err(FormatError::InvalidArgument(arg.clone())), Err(_) => return Err(FormatError::InvalidArgument(arg.clone())),
}, },