1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-08-01 05:27:45 +00:00

printf: parse arguments and handle escape codes

This commit is contained in:
Terts Diepraam 2023-11-16 17:00:41 +01:00
parent e7d58f673f
commit eaf5006379
6 changed files with 320 additions and 94 deletions

View file

@ -6,9 +6,12 @@
// spell-checker:ignore (change!) each's
// spell-checker:ignore (ToDO) LONGHELP FORMATSTRING templating parameterizing formatstr
use std::io::stdout;
use std::ops::ControlFlow;
use clap::{crate_version, Arg, ArgAction, Command};
use uucore::error::{UResult, UUsageError};
use uucore::format::{printf, FormatArgument};
use uucore::format::{parse_spec_and_escape, FormatArgument};
use uucore::{format_usage, help_about, help_section, help_usage};
const VERSION: &str = "version";
@ -30,12 +33,28 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
let format_string = matches
.get_one::<String>(options::FORMATSTRING)
.ok_or_else(|| UUsageError::new(1, "missing operand"))?;
let values: Vec<_> = match matches.get_many::<String>(options::ARGUMENT) {
Some(s) => s.map(|s| FormatArgument::Unparsed(s.to_string())).collect(),
None => vec![],
};
printf(format_string, &values)?;
let mut args = values.iter().peekable();
for item in parse_spec_and_escape(format_string.as_ref()) {
match item?.write(stdout(), &mut args)? {
ControlFlow::Continue(()) => {}
ControlFlow::Break(()) => break,
};
}
while args.peek().is_some() {
for item in parse_spec_and_escape(format_string.as_ref()) {
match item?.write(stdout(), &mut args)? {
ControlFlow::Continue(()) => {}
ControlFlow::Break(()) => break,
};
}
}
Ok(())
}

View file

@ -0,0 +1,60 @@
#[derive(Clone, Debug)]
pub enum FormatArgument {
Char(char),
String(String),
UnsignedInt(u64),
SignedInt(i64),
Float(f64),
/// Special argument that gets coerced into the other variants
Unparsed(String),
}
impl FormatArgument {
pub fn get_char(&self) -> Option<char> {
match self {
Self::Char(c) => Some(*c),
Self::Unparsed(s) => {
let mut chars = s.chars();
let Some(c) = chars.next() else {
return None;
};
let None = chars.next() else {
return None;
};
Some(c)
}
_ => None,
}
}
pub fn get_u64(&self) -> Option<u64> {
match self {
Self::UnsignedInt(n) => Some(*n),
Self::Unparsed(s) => s.parse().ok(),
_ => None,
}
}
pub fn get_i64(&self) -> Option<i64> {
match self {
Self::SignedInt(n) => Some(*n),
Self::Unparsed(s) => s.parse().ok(),
_ => None,
}
}
pub fn get_f64(&self) -> Option<f64> {
match self {
Self::Float(n) => Some(*n),
Self::Unparsed(s) => s.parse().ok(),
_ => None,
}
}
pub fn get_str(&self) -> Option<&str> {
match self {
Self::Unparsed(s) | Self::String(s) => Some(s),
_ => None,
}
}
}

View file

@ -0,0 +1,100 @@
#[derive(Debug)]
pub enum EscapedChar {
Char(u8),
Backslash(u8),
End,
}
#[repr(u8)]
#[derive(Clone, Copy)]
enum Base {
Oct = 8,
Hex = 16,
}
impl Base {
fn max_digits(&self) -> u8 {
match self {
Self::Oct => 3,
Self::Hex => 2,
}
}
fn to_digit(&self, c: u8) -> Option<u8> {
match self {
Base::Oct => {
if matches!(c, b'0'..=b'7') {
Some(c - b'0')
} else {
None
}
}
Base::Hex => match c {
b'0'..=b'9' => Some(c - b'0'),
b'A'..=b'F' => Some(c - b'A' + 10),
b'a'..=b'f' => Some(c - b'a' + 10),
_ => None,
},
}
}
}
/// Parse the numeric part of the `\xHHH` and `\0NNN` escape sequences
fn parse_code(input: &mut &[u8], base: Base) -> Option<u8> {
// All arithmetic on `ret` needs to be wrapping, because octal input can
// take 3 digits, which is 9 bits, and therefore more than what fits in a
// `u8`. GNU just seems to wrap these values.
// Note that if we instead make `ret` a `u32` and use `char::from_u32` will
// yield incorrect results because it will interpret values larger than
// `u8::MAX` as unicode.
let [c, rest @ ..] = input else { return None };
let mut ret = base.to_digit(*c)?;
*input = &rest[..];
for _ in 1..base.max_digits() {
let [c, rest @ ..] = input else { break };
let Some(n) = base.to_digit(*c) else { break };
ret = ret.wrapping_mul(base as u8).wrapping_add(n);
*input = &rest[..];
}
Some(ret)
}
pub fn parse_escape_code(rest: &mut &[u8]) -> EscapedChar {
if let [c, new_rest @ ..] = rest {
// This is for the \NNN syntax for octal sequences.
// Note that '0' is intentionally omitted because that
// would be the \0NNN syntax.
if let b'1'..=b'7' = c {
if let Some(parsed) = parse_code(rest, Base::Oct) {
return EscapedChar::Char(parsed);
}
}
*rest = &new_rest[..];
match c {
b'\\' => EscapedChar::Char(b'\\'),
b'a' => EscapedChar::Char(b'\x07'),
b'b' => EscapedChar::Char(b'\x08'),
b'c' => return EscapedChar::End,
b'e' => EscapedChar::Char(b'\x1b'),
b'f' => EscapedChar::Char(b'\x0c'),
b'n' => EscapedChar::Char(b'\n'),
b'r' => EscapedChar::Char(b'\r'),
b't' => EscapedChar::Char(b'\t'),
b'v' => EscapedChar::Char(b'\x0b'),
b'x' => {
if let Some(c) = parse_code(rest, Base::Hex) {
EscapedChar::Char(c)
} else {
EscapedChar::Backslash(b'x')
}
}
b'0' => EscapedChar::Char(parse_code(rest, Base::Oct).unwrap_or(b'\0')),
c => EscapedChar::Backslash(*c),
}
} else {
EscapedChar::Char(b'\\')
}
}

View file

@ -8,8 +8,19 @@
//! [`Format`] struct, which represents a parsed format string. This reduces
//! the need for parsing a format string multiple times and assures that no
//! parsing errors occur during writing.
//!
//! There are three kinds of parsing that we might want to do:
//!
//! 1. Only `printf` specifiers (for e.g. `seq`, `dd`)
//! 2. Only escape sequences (for e.g. `echo`)
//! 3. Both `printf` specifiers and escape sequences (for e.g. `printf`)
//!
//! This module aims to combine all three use cases.
// spell-checker:ignore (vars) charf decf floatf intf scif strf Cninety
mod escape;
mod argument;
pub mod num_format;
mod spec;
@ -18,11 +29,16 @@ use std::{
error::Error,
fmt::Display,
io::{stdout, Write},
ops::ControlFlow,
};
pub use argument::*;
use crate::error::UError;
use self::num_format::Formatter;
use self::{
escape::{parse_escape_code, EscapedChar},
num_format::Formatter,
};
#[derive(Debug)]
pub enum FormatError {
@ -54,80 +70,116 @@ impl Display for FormatError {
}
/// A single item to format
enum FormatItem {
pub enum FormatItem<C: FormatChar> {
/// A format specifier
Spec(Spec),
/// Some plain text
Text(Vec<u8>),
/// A single character
///
/// Added in addition to `Text` as an optimization.
Char(u8),
Char(C),
}
#[derive(Clone, Debug)]
pub enum FormatArgument {
Char(char),
String(String),
UnsignedInt(u64),
SignedInt(i64),
Float(f64),
// Special argument that gets coerced into the other variants
Unparsed(String),
pub trait FormatChar {
fn write(&self, writer: impl Write) -> std::io::Result<ControlFlow<()>>;
}
impl FormatItem {
fn write<'a>(
&self,
mut writer: impl Write,
args: &mut impl Iterator<Item = &'a FormatArgument>,
) -> Result<(), FormatError> {
match self {
FormatItem::Spec(spec) => spec.write(writer, args),
FormatItem::Text(bytes) => writer.write_all(bytes).map_err(FormatError::IoError),
FormatItem::Char(char) => writer.write_all(&[*char]).map_err(FormatError::IoError),
}
impl FormatChar for u8 {
fn write(&self, mut writer: impl Write) -> std::io::Result<ControlFlow<()>> {
writer.write(&[*self])?;
Ok(ControlFlow::Continue(()))
}
}
fn parse_iter(fmt: &[u8]) -> impl Iterator<Item = Result<FormatItem, FormatError>> + '_ {
let mut rest = fmt;
std::iter::from_fn(move || {
if rest.is_empty() {
return None;
impl FormatChar for EscapedChar {
fn write(&self, mut writer: impl Write) -> std::io::Result<ControlFlow<()>> {
match self {
EscapedChar::Char(c) => {
writer.write(&[*c])?;
}
EscapedChar::Backslash(c) => {
writer.write(&[b'\\', *c])?;
}
EscapedChar::End => return Ok(ControlFlow::Break(())),
}
Ok(ControlFlow::Continue(()))
}
}
match rest.iter().position(|c| *c == b'%') {
None => {
let final_text = rest;
rest = &[];
Some(Ok(FormatItem::Text(final_text.into())))
}
Some(0) => {
// Handle the spec
rest = &rest[1..];
match rest.get(0) {
None => Some(Ok(FormatItem::Char(b'%'))),
Some(b'%') => {
rest = &rest[1..];
Some(Ok(FormatItem::Char(b'%')))
}
Some(_) => {
let spec = match Spec::parse(&mut rest) {
Some(spec) => spec,
None => return Some(Err(dbg!(FormatError::SpecError))),
};
Some(Ok(FormatItem::Spec(spec)))
}
}
}
Some(i) => {
// The `after` slice includes the % so it will be handled correctly
// in the next iteration.
let (before, after) = rest.split_at(i);
rest = after;
return Some(Ok(FormatItem::Text(before.into())));
}
impl<C: FormatChar> FormatItem<C> {
pub fn write<'a>(
&self,
writer: impl Write,
args: &mut impl Iterator<Item = &'a FormatArgument>,
) -> Result<ControlFlow<()>, FormatError> {
match self {
FormatItem::Spec(spec) => spec.write(writer, args)?,
FormatItem::Char(c) => return c.write(writer).map_err(FormatError::IoError),
};
Ok(ControlFlow::Continue(()))
}
}
pub fn parse_spec_and_escape(
fmt: &[u8],
) -> impl Iterator<Item = Result<FormatItem<EscapedChar>, FormatError>> + '_ {
let mut current = fmt;
std::iter::from_fn(move || match current {
[] => return None,
[b'%', b'%', rest @ ..] => {
current = rest;
Some(Ok(FormatItem::Char(EscapedChar::Char(b'%'))))
}
[b'%', rest @ ..] => {
current = rest;
let spec = match Spec::parse(&mut current) {
Some(spec) => spec,
None => return Some(Err(FormatError::SpecError)),
};
Some(Ok(FormatItem::Spec(spec)))
}
[b'\\', rest @ ..] => {
current = rest;
Some(Ok(FormatItem::Char(parse_escape_code(&mut current))))
}
[c, rest @ ..] => {
current = rest;
Some(Ok(FormatItem::Char(EscapedChar::Char(*c))))
}
})
}
fn parse_spec_only(fmt: &[u8]) -> impl Iterator<Item = Result<FormatItem<u8>, FormatError>> + '_ {
let mut current = fmt;
std::iter::from_fn(move || match current {
[] => return None,
[b'%', b'%', rest @ ..] => {
current = rest;
Some(Ok(FormatItem::Char(b'%')))
}
[b'%', rest @ ..] => {
current = rest;
let spec = match Spec::parse(&mut current) {
Some(spec) => spec,
None => return Some(Err(FormatError::SpecError)),
};
Some(Ok(FormatItem::Spec(spec)))
}
[c, rest @ ..] => {
current = rest;
Some(Ok(FormatItem::Char(*c)))
}
})
}
fn parse_escape_only(fmt: &[u8]) -> impl Iterator<Item = Result<EscapedChar, FormatError>> + '_ {
let mut current = fmt;
std::iter::from_fn(move || match current {
[] => return None,
[b'\\', rest @ ..] => {
current = rest;
Some(Ok(parse_escape_code(&mut current)))
}
[c, rest @ ..] => {
current = rest;
Some(Ok(EscapedChar::Char(*c)))
}
})
}
@ -144,7 +196,7 @@ fn parse_iter(fmt: &[u8]) -> impl Iterator<Item = Result<FormatItem, FormatError
/// ```rust
/// use uucore::format::printf;
///
/// printf("hello %s", &["world".to_string()]).unwrap();
/// printf("hello %s", &[FormatArgument::String("world")]).unwrap();
/// // prints "hello world"
/// ```
pub fn printf<'a>(
@ -160,7 +212,7 @@ fn printf_writer<'a>(
args: impl IntoIterator<Item = &'a FormatArgument>,
) -> Result<(), FormatError> {
let mut args = args.into_iter();
for item in parse_iter(format_string.as_ref()) {
for item in parse_spec_only(format_string.as_ref()) {
item?.write(&mut writer, &mut args)?;
}
Ok(())
@ -191,10 +243,10 @@ pub fn sprintf<'a>(
}
/// A parsed format for a single float value
///
///
/// This is used by `seq`. It can be constructed with [`FloatFormat::parse`]
/// and can write a value with [`FloatFormat::fmt`].
///
///
/// It can only accept a single specification without any asterisk parameters.
/// If it does get more specifications, it will return an error.
pub struct Format<F: Formatter> {
@ -205,7 +257,7 @@ pub struct Format<F: Formatter> {
impl<F: Formatter> Format<F> {
pub fn parse(format_string: impl AsRef<[u8]>) -> Result<Self, FormatError> {
let mut iter = parse_iter(format_string.as_ref());
let mut iter = parse_spec_only(format_string.as_ref());
let mut prefix = Vec::new();
let mut spec = None;
@ -215,7 +267,6 @@ impl<F: Formatter> Format<F> {
spec = Some(s);
break;
}
FormatItem::Text(t) => prefix.extend_from_slice(&t),
FormatItem::Char(c) => prefix.push(c),
}
}
@ -230,9 +281,8 @@ impl<F: Formatter> Format<F> {
for item in &mut iter {
match item? {
FormatItem::Spec(_) => {
return Err(dbg!(FormatError::SpecError));
return Err(FormatError::SpecError);
}
FormatItem::Text(t) => suffix.extend_from_slice(&t),
FormatItem::Char(c) => suffix.push(c),
}
}

View file

@ -93,7 +93,7 @@ impl Formatter for SignedInt {
alignment,
} = s
else {
return Err(dbg!(FormatError::SpecError));
return Err(FormatError::SpecError);
};
let width = match width {
@ -152,7 +152,7 @@ impl Formatter for UnsignedInt {
alignment,
} = s
else {
return Err(dbg!(FormatError::SpecError));
return Err(FormatError::SpecError);
};
let width = match width {
@ -241,19 +241,19 @@ impl Formatter for Float {
precision,
} = s
else {
return Err(dbg!(FormatError::SpecError));
return Err(FormatError::SpecError);
};
let width = match width {
Some(CanAsterisk::Fixed(x)) => x,
None => 0,
Some(CanAsterisk::Asterisk) => return Err(dbg!(FormatError::SpecError)),
Some(CanAsterisk::Asterisk) => return Err(FormatError::SpecError),
};
let precision = match precision {
Some(CanAsterisk::Fixed(x)) => x,
None => 0,
Some(CanAsterisk::Asterisk) => return Err(dbg!(FormatError::SpecError)),
Some(CanAsterisk::Asterisk) => return Err(FormatError::SpecError),
};
Ok(Self {

View file

@ -212,10 +212,7 @@ impl Spec {
(false, false) => PositiveSign::None,
},
},
x => {
dbg!("{:b}", x);
return dbg!(None)
},
_ => return None,
})
}
@ -228,16 +225,16 @@ impl Spec {
&Spec::Char { width, align_left } => {
let width = resolve_asterisk(width, &mut args)?.unwrap_or(0);
let arg = next_arg(&mut args)?;
match arg {
FormatArgument::Char(c) => write_padded(writer, c, width, false, align_left),
match arg.get_char() {
Some(c) => write_padded(writer, c, width, false, align_left),
_ => Err(FormatError::InvalidArgument(arg.clone())),
}
}
&Spec::String { width, align_left } => {
let width = resolve_asterisk(width, &mut args)?.unwrap_or(0);
let arg = next_arg(&mut args)?;
match arg {
FormatArgument::String(s) => write_padded(writer, s, width, false, align_left),
match arg.get_str() {
Some(s) => write_padded(writer, s, width, false, align_left),
_ => Err(FormatError::InvalidArgument(arg.clone())),
}
}
@ -249,7 +246,7 @@ impl Spec {
let width = resolve_asterisk(width, &mut args)?.unwrap_or(0);
let arg = next_arg(&mut args)?;
let FormatArgument::SignedInt(i) = arg else {
let Some(i) = arg.get_i64() else {
return Err(FormatError::InvalidArgument(arg.clone()));
};
@ -258,7 +255,7 @@ impl Spec {
positive_sign,
alignment,
}
.fmt(writer, *i)
.fmt(writer, i)
.map_err(FormatError::IoError)
}
&Spec::UnsignedInt {
@ -269,7 +266,7 @@ impl Spec {
let width = resolve_asterisk(width, &mut args)?.unwrap_or(0);
let arg = next_arg(args)?;
let FormatArgument::UnsignedInt(i) = arg else {
let Some(i) = arg.get_u64() else {
return Err(FormatError::InvalidArgument(arg.clone()));
};
@ -278,7 +275,7 @@ impl Spec {
width,
alignment,
}
.fmt(writer, *i)
.fmt(writer, i)
.map_err(FormatError::IoError)
}
&Spec::Float {
@ -294,7 +291,7 @@ impl Spec {
let precision = resolve_asterisk(precision, &mut args)?.unwrap_or(6);
let arg = next_arg(args)?;
let FormatArgument::Float(f) = arg else {
let Some(f) = arg.get_f64() else {
return Err(FormatError::InvalidArgument(arg.clone()));
};
@ -307,7 +304,7 @@ impl Spec {
alignment,
precision,
}
.fmt(writer, *f)
.fmt(writer, f)
.map_err(FormatError::IoError)
}
}
@ -322,8 +319,8 @@ fn resolve_asterisk<'a>(
None => None,
Some(CanAsterisk::Asterisk) => {
let arg = next_arg(args)?;
match arg {
FormatArgument::UnsignedInt(u) => match usize::try_from(*u) {
match arg.get_u64() {
Some(u) => match usize::try_from(u) {
Ok(u) => Some(u),
Err(_) => return Err(FormatError::InvalidArgument(arg.clone())),
},