From eaf500637900a47c4e00497aaccccf3d6d7dd5c8 Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Thu, 16 Nov 2023 17:00:41 +0100 Subject: [PATCH] printf: parse arguments and handle escape codes --- src/uu/printf/src/printf.rs | 23 ++- .../src/lib/features/format/argument.rs | 60 ++++++ src/uucore/src/lib/features/format/escape.rs | 100 +++++++++ src/uucore/src/lib/features/format/mod.rs | 192 +++++++++++------- .../src/lib/features/format/num_format.rs | 10 +- src/uucore/src/lib/features/format/spec.rs | 29 ++- 6 files changed, 320 insertions(+), 94 deletions(-) create mode 100644 src/uucore/src/lib/features/format/argument.rs create mode 100644 src/uucore/src/lib/features/format/escape.rs diff --git a/src/uu/printf/src/printf.rs b/src/uu/printf/src/printf.rs index 6e270ec26..00d03816e 100644 --- a/src/uu/printf/src/printf.rs +++ b/src/uu/printf/src/printf.rs @@ -6,9 +6,12 @@ // spell-checker:ignore (change!) each's // spell-checker:ignore (ToDO) LONGHELP FORMATSTRING templating parameterizing formatstr +use std::io::stdout; +use std::ops::ControlFlow; + use clap::{crate_version, Arg, ArgAction, Command}; use uucore::error::{UResult, UUsageError}; -use uucore::format::{printf, FormatArgument}; +use uucore::format::{parse_spec_and_escape, FormatArgument}; use uucore::{format_usage, help_about, help_section, help_usage}; const VERSION: &str = "version"; @@ -30,12 +33,28 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { let format_string = matches .get_one::(options::FORMATSTRING) .ok_or_else(|| UUsageError::new(1, "missing operand"))?; + let values: Vec<_> = match matches.get_many::(options::ARGUMENT) { Some(s) => s.map(|s| FormatArgument::Unparsed(s.to_string())).collect(), None => vec![], }; - printf(format_string, &values)?; + let mut args = values.iter().peekable(); + for item in parse_spec_and_escape(format_string.as_ref()) { + match item?.write(stdout(), &mut args)? { + ControlFlow::Continue(()) => {} + ControlFlow::Break(()) => break, + }; + } + + while args.peek().is_some() { + for item in parse_spec_and_escape(format_string.as_ref()) { + match item?.write(stdout(), &mut args)? { + ControlFlow::Continue(()) => {} + ControlFlow::Break(()) => break, + }; + } + } Ok(()) } diff --git a/src/uucore/src/lib/features/format/argument.rs b/src/uucore/src/lib/features/format/argument.rs new file mode 100644 index 000000000..007f519c2 --- /dev/null +++ b/src/uucore/src/lib/features/format/argument.rs @@ -0,0 +1,60 @@ +#[derive(Clone, Debug)] +pub enum FormatArgument { + Char(char), + String(String), + UnsignedInt(u64), + SignedInt(i64), + Float(f64), + /// Special argument that gets coerced into the other variants + Unparsed(String), +} + +impl FormatArgument { + pub fn get_char(&self) -> Option { + match self { + Self::Char(c) => Some(*c), + Self::Unparsed(s) => { + let mut chars = s.chars(); + let Some(c) = chars.next() else { + return None; + }; + let None = chars.next() else { + return None; + }; + Some(c) + } + _ => None, + } + } + + pub fn get_u64(&self) -> Option { + match self { + Self::UnsignedInt(n) => Some(*n), + Self::Unparsed(s) => s.parse().ok(), + _ => None, + } + } + + pub fn get_i64(&self) -> Option { + match self { + Self::SignedInt(n) => Some(*n), + Self::Unparsed(s) => s.parse().ok(), + _ => None, + } + } + + pub fn get_f64(&self) -> Option { + match self { + Self::Float(n) => Some(*n), + Self::Unparsed(s) => s.parse().ok(), + _ => None, + } + } + + pub fn get_str(&self) -> Option<&str> { + match self { + Self::Unparsed(s) | Self::String(s) => Some(s), + _ => None, + } + } +} \ No newline at end of file diff --git a/src/uucore/src/lib/features/format/escape.rs b/src/uucore/src/lib/features/format/escape.rs new file mode 100644 index 000000000..b8c21741c --- /dev/null +++ b/src/uucore/src/lib/features/format/escape.rs @@ -0,0 +1,100 @@ +#[derive(Debug)] +pub enum EscapedChar { + Char(u8), + Backslash(u8), + End, +} + +#[repr(u8)] +#[derive(Clone, Copy)] +enum Base { + Oct = 8, + Hex = 16, +} + +impl Base { + fn max_digits(&self) -> u8 { + match self { + Self::Oct => 3, + Self::Hex => 2, + } + } + + fn to_digit(&self, c: u8) -> Option { + match self { + Base::Oct => { + if matches!(c, b'0'..=b'7') { + Some(c - b'0') + } else { + None + } + } + Base::Hex => match c { + b'0'..=b'9' => Some(c - b'0'), + b'A'..=b'F' => Some(c - b'A' + 10), + b'a'..=b'f' => Some(c - b'a' + 10), + _ => None, + }, + } + } +} + +/// Parse the numeric part of the `\xHHH` and `\0NNN` escape sequences +fn parse_code(input: &mut &[u8], base: Base) -> Option { + // All arithmetic on `ret` needs to be wrapping, because octal input can + // take 3 digits, which is 9 bits, and therefore more than what fits in a + // `u8`. GNU just seems to wrap these values. + // Note that if we instead make `ret` a `u32` and use `char::from_u32` will + // yield incorrect results because it will interpret values larger than + // `u8::MAX` as unicode. + let [c, rest @ ..] = input else { return None }; + let mut ret = base.to_digit(*c)?; + *input = &rest[..]; + + for _ in 1..base.max_digits() { + let [c, rest @ ..] = input else { break }; + let Some(n) = base.to_digit(*c) else { break }; + ret = ret.wrapping_mul(base as u8).wrapping_add(n); + *input = &rest[..]; + } + + Some(ret) +} + +pub fn parse_escape_code(rest: &mut &[u8]) -> EscapedChar { + if let [c, new_rest @ ..] = rest { + // This is for the \NNN syntax for octal sequences. + // Note that '0' is intentionally omitted because that + // would be the \0NNN syntax. + if let b'1'..=b'7' = c { + if let Some(parsed) = parse_code(rest, Base::Oct) { + return EscapedChar::Char(parsed); + } + } + + *rest = &new_rest[..]; + match c { + b'\\' => EscapedChar::Char(b'\\'), + b'a' => EscapedChar::Char(b'\x07'), + b'b' => EscapedChar::Char(b'\x08'), + b'c' => return EscapedChar::End, + b'e' => EscapedChar::Char(b'\x1b'), + b'f' => EscapedChar::Char(b'\x0c'), + b'n' => EscapedChar::Char(b'\n'), + b'r' => EscapedChar::Char(b'\r'), + b't' => EscapedChar::Char(b'\t'), + b'v' => EscapedChar::Char(b'\x0b'), + b'x' => { + if let Some(c) = parse_code(rest, Base::Hex) { + EscapedChar::Char(c) + } else { + EscapedChar::Backslash(b'x') + } + } + b'0' => EscapedChar::Char(parse_code(rest, Base::Oct).unwrap_or(b'\0')), + c => EscapedChar::Backslash(*c), + } + } else { + EscapedChar::Char(b'\\') + } +} diff --git a/src/uucore/src/lib/features/format/mod.rs b/src/uucore/src/lib/features/format/mod.rs index d6500b20c..8fa8d0717 100644 --- a/src/uucore/src/lib/features/format/mod.rs +++ b/src/uucore/src/lib/features/format/mod.rs @@ -8,8 +8,19 @@ //! [`Format`] struct, which represents a parsed format string. This reduces //! the need for parsing a format string multiple times and assures that no //! parsing errors occur during writing. +//! +//! There are three kinds of parsing that we might want to do: +//! +//! 1. Only `printf` specifiers (for e.g. `seq`, `dd`) +//! 2. Only escape sequences (for e.g. `echo`) +//! 3. Both `printf` specifiers and escape sequences (for e.g. `printf`) +//! +//! This module aims to combine all three use cases. + // spell-checker:ignore (vars) charf decf floatf intf scif strf Cninety +mod escape; +mod argument; pub mod num_format; mod spec; @@ -18,11 +29,16 @@ use std::{ error::Error, fmt::Display, io::{stdout, Write}, + ops::ControlFlow, }; +pub use argument::*; use crate::error::UError; -use self::num_format::Formatter; +use self::{ + escape::{parse_escape_code, EscapedChar}, + num_format::Formatter, +}; #[derive(Debug)] pub enum FormatError { @@ -54,80 +70,116 @@ impl Display for FormatError { } /// A single item to format -enum FormatItem { +pub enum FormatItem { /// A format specifier Spec(Spec), - /// Some plain text - Text(Vec), /// A single character - /// - /// Added in addition to `Text` as an optimization. - Char(u8), + Char(C), } -#[derive(Clone, Debug)] -pub enum FormatArgument { - Char(char), - String(String), - UnsignedInt(u64), - SignedInt(i64), - Float(f64), - // Special argument that gets coerced into the other variants - Unparsed(String), +pub trait FormatChar { + fn write(&self, writer: impl Write) -> std::io::Result>; } -impl FormatItem { - fn write<'a>( - &self, - mut writer: impl Write, - args: &mut impl Iterator, - ) -> Result<(), FormatError> { - match self { - FormatItem::Spec(spec) => spec.write(writer, args), - FormatItem::Text(bytes) => writer.write_all(bytes).map_err(FormatError::IoError), - FormatItem::Char(char) => writer.write_all(&[*char]).map_err(FormatError::IoError), - } +impl FormatChar for u8 { + fn write(&self, mut writer: impl Write) -> std::io::Result> { + writer.write(&[*self])?; + Ok(ControlFlow::Continue(())) } } -fn parse_iter(fmt: &[u8]) -> impl Iterator> + '_ { - let mut rest = fmt; - std::iter::from_fn(move || { - if rest.is_empty() { - return None; +impl FormatChar for EscapedChar { + fn write(&self, mut writer: impl Write) -> std::io::Result> { + match self { + EscapedChar::Char(c) => { + writer.write(&[*c])?; + } + EscapedChar::Backslash(c) => { + writer.write(&[b'\\', *c])?; + } + EscapedChar::End => return Ok(ControlFlow::Break(())), } + Ok(ControlFlow::Continue(())) + } +} - match rest.iter().position(|c| *c == b'%') { - None => { - let final_text = rest; - rest = &[]; - Some(Ok(FormatItem::Text(final_text.into()))) - } - Some(0) => { - // Handle the spec - rest = &rest[1..]; - match rest.get(0) { - None => Some(Ok(FormatItem::Char(b'%'))), - Some(b'%') => { - rest = &rest[1..]; - Some(Ok(FormatItem::Char(b'%'))) - } - Some(_) => { - let spec = match Spec::parse(&mut rest) { - Some(spec) => spec, - None => return Some(Err(dbg!(FormatError::SpecError))), - }; - Some(Ok(FormatItem::Spec(spec))) - } - } - } - Some(i) => { - // The `after` slice includes the % so it will be handled correctly - // in the next iteration. - let (before, after) = rest.split_at(i); - rest = after; - return Some(Ok(FormatItem::Text(before.into()))); - } +impl FormatItem { + pub fn write<'a>( + &self, + writer: impl Write, + args: &mut impl Iterator, + ) -> Result, FormatError> { + match self { + FormatItem::Spec(spec) => spec.write(writer, args)?, + FormatItem::Char(c) => return c.write(writer).map_err(FormatError::IoError), + }; + Ok(ControlFlow::Continue(())) + } +} + +pub fn parse_spec_and_escape( + fmt: &[u8], +) -> impl Iterator, FormatError>> + '_ { + let mut current = fmt; + std::iter::from_fn(move || match current { + [] => return None, + [b'%', b'%', rest @ ..] => { + current = rest; + Some(Ok(FormatItem::Char(EscapedChar::Char(b'%')))) + } + [b'%', rest @ ..] => { + current = rest; + let spec = match Spec::parse(&mut current) { + Some(spec) => spec, + None => return Some(Err(FormatError::SpecError)), + }; + Some(Ok(FormatItem::Spec(spec))) + } + [b'\\', rest @ ..] => { + current = rest; + Some(Ok(FormatItem::Char(parse_escape_code(&mut current)))) + } + [c, rest @ ..] => { + current = rest; + Some(Ok(FormatItem::Char(EscapedChar::Char(*c)))) + } + }) +} + +fn parse_spec_only(fmt: &[u8]) -> impl Iterator, FormatError>> + '_ { + let mut current = fmt; + std::iter::from_fn(move || match current { + [] => return None, + [b'%', b'%', rest @ ..] => { + current = rest; + Some(Ok(FormatItem::Char(b'%'))) + } + [b'%', rest @ ..] => { + current = rest; + let spec = match Spec::parse(&mut current) { + Some(spec) => spec, + None => return Some(Err(FormatError::SpecError)), + }; + Some(Ok(FormatItem::Spec(spec))) + } + [c, rest @ ..] => { + current = rest; + Some(Ok(FormatItem::Char(*c))) + } + }) +} + +fn parse_escape_only(fmt: &[u8]) -> impl Iterator> + '_ { + let mut current = fmt; + std::iter::from_fn(move || match current { + [] => return None, + [b'\\', rest @ ..] => { + current = rest; + Some(Ok(parse_escape_code(&mut current))) + } + [c, rest @ ..] => { + current = rest; + Some(Ok(EscapedChar::Char(*c))) } }) } @@ -144,7 +196,7 @@ fn parse_iter(fmt: &[u8]) -> impl Iterator( @@ -160,7 +212,7 @@ fn printf_writer<'a>( args: impl IntoIterator, ) -> Result<(), FormatError> { let mut args = args.into_iter(); - for item in parse_iter(format_string.as_ref()) { + for item in parse_spec_only(format_string.as_ref()) { item?.write(&mut writer, &mut args)?; } Ok(()) @@ -191,10 +243,10 @@ pub fn sprintf<'a>( } /// A parsed format for a single float value -/// +/// /// This is used by `seq`. It can be constructed with [`FloatFormat::parse`] /// and can write a value with [`FloatFormat::fmt`]. -/// +/// /// It can only accept a single specification without any asterisk parameters. /// If it does get more specifications, it will return an error. pub struct Format { @@ -205,7 +257,7 @@ pub struct Format { impl Format { pub fn parse(format_string: impl AsRef<[u8]>) -> Result { - let mut iter = parse_iter(format_string.as_ref()); + let mut iter = parse_spec_only(format_string.as_ref()); let mut prefix = Vec::new(); let mut spec = None; @@ -215,7 +267,6 @@ impl Format { spec = Some(s); break; } - FormatItem::Text(t) => prefix.extend_from_slice(&t), FormatItem::Char(c) => prefix.push(c), } } @@ -230,9 +281,8 @@ impl Format { for item in &mut iter { match item? { FormatItem::Spec(_) => { - return Err(dbg!(FormatError::SpecError)); + return Err(FormatError::SpecError); } - FormatItem::Text(t) => suffix.extend_from_slice(&t), FormatItem::Char(c) => suffix.push(c), } } diff --git a/src/uucore/src/lib/features/format/num_format.rs b/src/uucore/src/lib/features/format/num_format.rs index 046249a13..339b52209 100644 --- a/src/uucore/src/lib/features/format/num_format.rs +++ b/src/uucore/src/lib/features/format/num_format.rs @@ -93,7 +93,7 @@ impl Formatter for SignedInt { alignment, } = s else { - return Err(dbg!(FormatError::SpecError)); + return Err(FormatError::SpecError); }; let width = match width { @@ -152,7 +152,7 @@ impl Formatter for UnsignedInt { alignment, } = s else { - return Err(dbg!(FormatError::SpecError)); + return Err(FormatError::SpecError); }; let width = match width { @@ -241,19 +241,19 @@ impl Formatter for Float { precision, } = s else { - return Err(dbg!(FormatError::SpecError)); + return Err(FormatError::SpecError); }; let width = match width { Some(CanAsterisk::Fixed(x)) => x, None => 0, - Some(CanAsterisk::Asterisk) => return Err(dbg!(FormatError::SpecError)), + Some(CanAsterisk::Asterisk) => return Err(FormatError::SpecError), }; let precision = match precision { Some(CanAsterisk::Fixed(x)) => x, None => 0, - Some(CanAsterisk::Asterisk) => return Err(dbg!(FormatError::SpecError)), + Some(CanAsterisk::Asterisk) => return Err(FormatError::SpecError), }; Ok(Self { diff --git a/src/uucore/src/lib/features/format/spec.rs b/src/uucore/src/lib/features/format/spec.rs index abc9b7a87..258005bb5 100644 --- a/src/uucore/src/lib/features/format/spec.rs +++ b/src/uucore/src/lib/features/format/spec.rs @@ -212,10 +212,7 @@ impl Spec { (false, false) => PositiveSign::None, }, }, - x => { - dbg!("{:b}", x); - return dbg!(None) - }, + _ => return None, }) } @@ -228,16 +225,16 @@ impl Spec { &Spec::Char { width, align_left } => { let width = resolve_asterisk(width, &mut args)?.unwrap_or(0); let arg = next_arg(&mut args)?; - match arg { - FormatArgument::Char(c) => write_padded(writer, c, width, false, align_left), + match arg.get_char() { + Some(c) => write_padded(writer, c, width, false, align_left), _ => Err(FormatError::InvalidArgument(arg.clone())), } } &Spec::String { width, align_left } => { let width = resolve_asterisk(width, &mut args)?.unwrap_or(0); let arg = next_arg(&mut args)?; - match arg { - FormatArgument::String(s) => write_padded(writer, s, width, false, align_left), + match arg.get_str() { + Some(s) => write_padded(writer, s, width, false, align_left), _ => Err(FormatError::InvalidArgument(arg.clone())), } } @@ -249,7 +246,7 @@ impl Spec { let width = resolve_asterisk(width, &mut args)?.unwrap_or(0); let arg = next_arg(&mut args)?; - let FormatArgument::SignedInt(i) = arg else { + let Some(i) = arg.get_i64() else { return Err(FormatError::InvalidArgument(arg.clone())); }; @@ -258,7 +255,7 @@ impl Spec { positive_sign, alignment, } - .fmt(writer, *i) + .fmt(writer, i) .map_err(FormatError::IoError) } &Spec::UnsignedInt { @@ -269,7 +266,7 @@ impl Spec { let width = resolve_asterisk(width, &mut args)?.unwrap_or(0); let arg = next_arg(args)?; - let FormatArgument::UnsignedInt(i) = arg else { + let Some(i) = arg.get_u64() else { return Err(FormatError::InvalidArgument(arg.clone())); }; @@ -278,7 +275,7 @@ impl Spec { width, alignment, } - .fmt(writer, *i) + .fmt(writer, i) .map_err(FormatError::IoError) } &Spec::Float { @@ -294,7 +291,7 @@ impl Spec { let precision = resolve_asterisk(precision, &mut args)?.unwrap_or(6); let arg = next_arg(args)?; - let FormatArgument::Float(f) = arg else { + let Some(f) = arg.get_f64() else { return Err(FormatError::InvalidArgument(arg.clone())); }; @@ -307,7 +304,7 @@ impl Spec { alignment, precision, } - .fmt(writer, *f) + .fmt(writer, f) .map_err(FormatError::IoError) } } @@ -322,8 +319,8 @@ fn resolve_asterisk<'a>( None => None, Some(CanAsterisk::Asterisk) => { let arg = next_arg(args)?; - match arg { - FormatArgument::UnsignedInt(u) => match usize::try_from(*u) { + match arg.get_u64() { + Some(u) => match usize::try_from(u) { Ok(u) => Some(u), Err(_) => return Err(FormatError::InvalidArgument(arg.clone())), },