1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-28 11:37:44 +00:00

Merge pull request #4185 from jfinkels/uucore-sprintf-2

uucore: add sprintf() function
This commit is contained in:
Sylvestre Ledru 2022-12-03 10:21:09 +01:00 committed by GitHub
commit 3880b463fe
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 202 additions and 105 deletions

View file

@ -4,7 +4,8 @@
use clap::{crate_version, Arg, ArgAction, Command}; use clap::{crate_version, Arg, ArgAction, Command};
use uucore::error::{UResult, UUsageError}; use uucore::error::{UResult, UUsageError};
use uucore::{format_usage, memo}; use uucore::format_usage;
use uucore::memo::printf;
const VERSION: &str = "version"; const VERSION: &str = "version";
const HELP: &str = "help"; const HELP: &str = "help";
@ -281,7 +282,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
None => vec![], None => vec![],
}; };
memo::Memo::run_all(format_string, &values[..])?; printf(format_string, &values[..])?;
Ok(()) Ok(())
} }

View file

@ -13,7 +13,7 @@ use num_traits::Zero;
use uucore::error::FromIo; use uucore::error::FromIo;
use uucore::error::UResult; use uucore::error::UResult;
use uucore::format_usage; use uucore::format_usage;
use uucore::memo::Memo; use uucore::memo::printf;
use uucore::show; use uucore::show;
mod error; mod error;
@ -275,13 +275,10 @@ fn print_seq(
// If there was an argument `-f FORMAT`, then use that format // If there was an argument `-f FORMAT`, then use that format
// template instead of the default formatting strategy. // template instead of the default formatting strategy.
// //
// The `Memo::run_all()` function takes in the template and // TODO The `printf()` method takes a string as its second
// the current value and writes the result to `stdout`.
//
// TODO The `run_all()` method takes a string as its second
// parameter but we have an `ExtendedBigDecimal`. In order to // parameter but we have an `ExtendedBigDecimal`. In order to
// satisfy the signature of the function, we convert the // satisfy the signature of the function, we convert the
// `ExtendedBigDecimal` into a string. The `Memo::run_all()` // `ExtendedBigDecimal` into a string. The `printf()`
// logic will subsequently parse that string into something // logic will subsequently parse that string into something
// similar to an `ExtendedBigDecimal` again before rendering // similar to an `ExtendedBigDecimal` again before rendering
// it as a string and ultimately writing to `stdout`. We // it as a string and ultimately writing to `stdout`. We
@ -290,7 +287,7 @@ fn print_seq(
match format { match format {
Some(f) => { Some(f) => {
let s = format!("{}", value); let s = format!("{}", value);
if let Err(x) = Memo::run_all(f, &[s]) { if let Err(x) = printf(f, &[s]) {
show!(x); show!(x);
exit(1); exit(1);
} }
@ -348,14 +345,14 @@ fn print_seq_integers(
// If there was an argument `-f FORMAT`, then use that format // If there was an argument `-f FORMAT`, then use that format
// template instead of the default formatting strategy. // template instead of the default formatting strategy.
// //
// The `Memo::run_all()` function takes in the template and // The `printf()` function takes in the template and
// the current value and writes the result to `stdout`. // the current value and writes the result to `stdout`.
// //
// TODO See similar comment about formatting in `print_seq()`. // TODO See similar comment about formatting in `print_seq()`.
match format { match format {
Some(f) => { Some(f) => {
let s = format!("{}", value); let s = format!("{}", value);
if let Err(x) = Memo::run_all(f, &[s]) { if let Err(x) = printf(f, &[s]) {
show!(x); show!(x);
exit(1); exit(1);
} }

View file

@ -1,22 +1,27 @@
//! Memo runner of printf //! Main entry point for our implementation of printf.
//! Takes a format string and arguments //!
//! 1. tokenize format string into tokens, consuming //! The [`printf`] and [`sprintf`] closely match the behavior of the
//! any subst. arguments along the way. //! corresponding C functions: the former renders a formatted string
//! 2. feeds remaining arguments into function //! to stdout, the latter renders to a new [`String`] object.
//! that prints tokens.
use crate::display::Quotable; use crate::display::Quotable;
use crate::error::UResult; use crate::error::{UResult, USimpleError};
use crate::features::tokenize::sub::Sub; use crate::features::tokenize::sub::SubParser;
use crate::features::tokenize::token::{Token, Tokenizer}; use crate::features::tokenize::token::Token;
use crate::features::tokenize::unescaped_text::UnescapedText; use crate::features::tokenize::unescaped_text::UnescapedText;
use crate::show_warning; use crate::show_warning;
use itertools::put_back_n; use itertools::put_back_n;
use std::io::{stdout, Cursor, Write};
use std::iter::Peekable; use std::iter::Peekable;
use std::slice::Iter; use std::slice::Iter;
pub struct Memo { /// Memo runner of printf
tokens: Vec<Box<dyn Token>>, /// Takes a format string and arguments
/// 1. tokenize format string into tokens, consuming
/// any subst. arguments along the way.
/// 2. feeds remaining arguments into function
/// that prints tokens.
struct Memo {
tokens: Vec<Token>,
} }
fn warn_excess_args(first_arg: &str) { fn warn_excess_args(first_arg: &str) {
@ -27,18 +32,22 @@ fn warn_excess_args(first_arg: &str) {
} }
impl Memo { impl Memo {
pub fn new(pf_string: &str, pf_args_it: &mut Peekable<Iter<String>>) -> UResult<Self> { fn new<W>(
writer: &mut W,
pf_string: &str,
pf_args_it: &mut Peekable<Iter<String>>,
) -> UResult<Self>
where
W: Write,
{
let mut pm = Self { tokens: Vec::new() }; let mut pm = Self { tokens: Vec::new() };
let mut tmp_token: Option<Box<dyn Token>>;
let mut it = put_back_n(pf_string.chars()); let mut it = put_back_n(pf_string.chars());
let mut has_sub = false; let mut has_sub = false;
loop { loop {
tmp_token = UnescapedText::from_it(&mut it, pf_args_it)?; if let Some(x) = UnescapedText::from_it_core(writer, &mut it, false) {
if let Some(x) = tmp_token {
pm.tokens.push(x); pm.tokens.push(x);
} }
tmp_token = Sub::from_it(&mut it, pf_args_it)?; if let Some(x) = SubParser::from_it(writer, &mut it, pf_args_it)? {
if let Some(x) = tmp_token {
if !has_sub { if !has_sub {
has_sub = true; has_sub = true;
} }
@ -67,19 +76,100 @@ impl Memo {
} }
Ok(pm) Ok(pm)
} }
pub fn apply(&self, pf_args_it: &mut Peekable<Iter<String>>) { fn apply<W>(&self, writer: &mut W, pf_args_it: &mut Peekable<Iter<String>>)
where
W: Write,
{
for tkn in &self.tokens { for tkn in &self.tokens {
tkn.print(pf_args_it); tkn.write(writer, pf_args_it);
} }
} }
pub fn run_all(pf_string: &str, pf_args: &[String]) -> UResult<()> { fn run_all<W>(writer: &mut W, pf_string: &str, pf_args: &[String]) -> UResult<()>
where
W: Write,
{
let mut arg_it = pf_args.iter().peekable(); let mut arg_it = pf_args.iter().peekable();
let pm = Self::new(pf_string, &mut arg_it)?; let pm = Self::new(writer, pf_string, &mut arg_it)?;
loop { loop {
if arg_it.peek().is_none() { if arg_it.peek().is_none() {
return Ok(()); return Ok(());
} }
pm.apply(&mut arg_it); pm.apply(writer, &mut arg_it);
} }
} }
} }
/// Write a formatted string to stdout.
///
/// `format_string` contains the template and `args` contains the
/// arguments to render into the template.
///
/// See also [`sprintf`], which creates a new formatted [`String`].
///
/// # Examples
///
/// ```rust
/// use uucore::memo::printf;
///
/// printf("hello %s", &["world".to_string()]).unwrap();
/// // prints "hello world"
/// ```
pub fn printf(format_string: &str, args: &[String]) -> UResult<()> {
let mut writer = stdout();
Memo::run_all(&mut writer, format_string, args)
}
/// Create a new formatted string.
///
/// `format_string` contains the template and `args` contains the
/// arguments to render into the template.
///
/// See also [`printf`], which prints to stdout.
///
/// # Examples
///
/// ```rust
/// use uucore::memo::sprintf;
///
/// let s = sprintf("hello %s", &["world".to_string()]).unwrap();
/// assert_eq!(s, "hello world".to_string());
/// ```
pub fn sprintf(format_string: &str, args: &[String]) -> UResult<String> {
let mut writer = Cursor::new(vec![]);
Memo::run_all(&mut writer, format_string, args)?;
let buf = writer.into_inner();
match String::from_utf8(buf) {
Ok(s) => Ok(s),
Err(e) => Err(USimpleError::new(
1,
format!("failed to parse formatted string as UTF-8: {}", e),
)),
}
}
#[cfg(test)]
mod tests {
use crate::memo::sprintf;
#[test]
fn test_sprintf_smoke() {
assert_eq!(sprintf("", &[]).unwrap(), "".to_string())
}
#[test]
fn test_sprintf_no_args() {
assert_eq!(
sprintf("hello world", &[]).unwrap(),
"hello world".to_string()
)
}
#[test]
fn test_sprintf_string() {
assert_eq!(
sprintf("hello %s", &["world".to_string()]).unwrap(),
"hello world".to_string()
)
}
}

View file

@ -9,6 +9,7 @@ use crate::error::{UError, UResult};
use itertools::{put_back_n, PutBackN}; use itertools::{put_back_n, PutBackN};
use std::error::Error; use std::error::Error;
use std::fmt::Display; use std::fmt::Display;
use std::io::Write;
use std::iter::Peekable; use std::iter::Peekable;
use std::process::exit; use std::process::exit;
use std::slice::Iter; use std::slice::Iter;
@ -112,7 +113,7 @@ impl Sub {
} }
#[derive(Default)] #[derive(Default)]
struct SubParser { pub(crate) struct SubParser {
min_width_tmp: Option<String>, min_width_tmp: Option<String>,
min_width_is_asterisk: bool, min_width_is_asterisk: bool,
past_decimal: bool, past_decimal: bool,
@ -127,20 +128,24 @@ impl SubParser {
fn new() -> Self { fn new() -> Self {
Self::default() Self::default()
} }
fn from_it( pub(crate) fn from_it<W>(
writer: &mut W,
it: &mut PutBackN<Chars>, it: &mut PutBackN<Chars>,
args: &mut Peekable<Iter<String>>, args: &mut Peekable<Iter<String>>,
) -> UResult<Option<Box<dyn token::Token>>> { ) -> UResult<Option<token::Token>>
where
W: Write,
{
let mut parser = Self::new(); let mut parser = Self::new();
if parser.sub_vals_retrieved(it)? { if parser.sub_vals_retrieved(it)? {
let t: Box<dyn token::Token> = Self::build_token(parser); let t = Self::build_token(parser);
t.print(args); t.write(writer, args);
Ok(Some(t)) Ok(Some(t))
} else { } else {
Ok(None) Ok(None)
} }
} }
fn build_token(parser: Self) -> Box<dyn token::Token> { fn build_token(parser: Self) -> token::Token {
// not a self method so as to allow move of sub-parser vals. // not a self method so as to allow move of sub-parser vals.
// return new Sub struct as token // return new Sub struct as token
let prefix_char = match &parser.min_width_tmp { let prefix_char = match &parser.min_width_tmp {
@ -148,7 +153,7 @@ impl SubParser {
_ => ' ', _ => ' ',
}; };
let t: Box<dyn token::Token> = Box::new(Sub::new( token::Token::Sub(Sub::new(
if parser.min_width_is_asterisk { if parser.min_width_is_asterisk {
CanAsterisk::Asterisk CanAsterisk::Asterisk
} else { } else {
@ -166,8 +171,7 @@ impl SubParser {
parser.field_char.unwrap(), parser.field_char.unwrap(),
parser.text_so_far, parser.text_so_far,
prefix_char, prefix_char,
)); ))
t
} }
fn sub_vals_retrieved(&mut self, it: &mut PutBackN<Chars>) -> UResult<bool> { fn sub_vals_retrieved(&mut self, it: &mut PutBackN<Chars>) -> UResult<bool> {
if !Self::successfully_eat_prefix(it, &mut self.text_so_far)? { if !Self::successfully_eat_prefix(it, &mut self.text_so_far)? {
@ -337,16 +341,11 @@ impl SubParser {
} }
} }
impl token::Tokenizer for Sub { impl Sub {
fn from_it( pub(crate) fn write<W>(&self, writer: &mut W, pf_args_it: &mut Peekable<Iter<String>>)
it: &mut PutBackN<Chars>, where
args: &mut Peekable<Iter<String>>, W: Write,
) -> UResult<Option<Box<dyn token::Token>>> { {
SubParser::from_it(it, args)
}
}
impl token::Token for Sub {
fn print(&self, pf_args_it: &mut Peekable<Iter<String>>) {
let field = FormatField { let field = FormatField {
min_width: match self.min_width { min_width: match self.min_width {
CanAsterisk::Fixed(x) => x, CanAsterisk::Fixed(x) => x,
@ -397,7 +396,7 @@ impl token::Token for Sub {
}), }),
'b' => { 'b' => {
let mut a_it = put_back_n(arg_string.chars()); let mut a_it = put_back_n(arg_string.chars());
UnescapedText::from_it_core(&mut a_it, true); UnescapedText::from_it_core(writer, &mut a_it, true);
None None
} }
// for 'c': get iter of string vals, // for 'c': get iter of string vals,
@ -417,7 +416,8 @@ impl token::Token for Sub {
}; };
if let Some(pre_min_width) = pre_min_width_opt { if let Some(pre_min_width) = pre_min_width_opt {
// if have a string, print it, ensuring minimum width is met. // if have a string, print it, ensuring minimum width is met.
print!( write!(
writer,
"{}", "{}",
match field.min_width { match field.min_width {
Some(min_width) => { Some(min_width) => {
@ -443,7 +443,8 @@ impl token::Token for Sub {
} }
None => pre_min_width, None => pre_min_width,
} }
); )
.ok();
} }
} }
} }

View file

@ -1,16 +1,29 @@
//! Traits and enums dealing with Tokenization of printf Format String //! Traits and enums dealing with Tokenization of printf Format String
use itertools::PutBackN; use std::io::Write;
use std::iter::Peekable; use std::iter::Peekable;
use std::slice::Iter; use std::slice::Iter;
use std::str::Chars;
use crate::error::UResult; use crate::features::tokenize::sub::Sub;
use crate::features::tokenize::unescaped_text::UnescapedText;
// A token object is an object that can print the expected output // A token object is an object that can print the expected output
// of a contiguous segment of the format string, and // of a contiguous segment of the format string, and
// requires at most 1 argument // requires at most 1 argument
pub trait Token { pub enum Token {
fn print(&self, args: &mut Peekable<Iter<String>>); Sub(Sub),
UnescapedText(UnescapedText),
}
impl Token {
pub(crate) fn write<W>(&self, writer: &mut W, args: &mut Peekable<Iter<String>>)
where
W: Write,
{
match self {
Self::Sub(sub) => sub.write(writer, args),
Self::UnescapedText(unescaped_text) => unescaped_text.write(writer),
}
}
} }
// A tokenizer object is an object that takes an iterator // A tokenizer object is an object that takes an iterator
@ -24,10 +37,3 @@ pub trait Token {
// printing of that token's value. Essentially tokenizing // printing of that token's value. Essentially tokenizing
// a whole format string will print the format string and consume // a whole format string will print the format string and consume
// a number of arguments equal to the number of argument-using tokens // a number of arguments equal to the number of argument-using tokens
pub trait Tokenizer {
fn from_it(
it: &mut PutBackN<Chars>,
args: &mut Peekable<Iter<String>>,
) -> UResult<Option<Box<dyn Token>>>;
}

View file

@ -7,14 +7,10 @@
use itertools::PutBackN; use itertools::PutBackN;
use std::char::from_u32; use std::char::from_u32;
use std::io::{stdout, Write}; use std::io::Write;
use std::iter::Peekable;
use std::process::exit; use std::process::exit;
use std::slice::Iter;
use std::str::Chars; use std::str::Chars;
use crate::error::UResult;
use super::token; use super::token;
const EXIT_OK: i32 = 0; const EXIT_OK: i32 = 0;
@ -22,19 +18,19 @@ const EXIT_ERR: i32 = 1;
// by default stdout only flushes // by default stdout only flushes
// to console when a newline is passed. // to console when a newline is passed.
fn flush_char(c: char) { macro_rules! write_and_flush {
print!("{}", c); ($writer:expr, $($args:tt)+) => ({
let _ = stdout().flush(); write!($writer, "{}", $($args)+).ok();
$writer.flush().ok();
})
} }
fn flush_str(s: &str) { fn flush_bytes<W>(writer: &mut W, bslice: &[u8])
print!("{}", s); where
let _ = stdout().flush(); W: Write,
} {
writer.write_all(bslice).ok();
fn flush_bytes(bslice: &[u8]) { writer.flush().ok();
let _ = stdout().write(bslice);
let _ = stdout().flush();
} }
#[derive(Default)] #[derive(Default)]
@ -106,7 +102,14 @@ impl UnescapedText {
// adding the unescaped bytes // adding the unescaped bytes
// to the passed byte_vec // to the passed byte_vec
// in subs_mode change octal behavior // in subs_mode change octal behavior
fn handle_escaped(byte_vec: &mut Vec<u8>, it: &mut PutBackN<Chars>, subs_mode: bool) { fn handle_escaped<W>(
writer: &mut W,
byte_vec: &mut Vec<u8>,
it: &mut PutBackN<Chars>,
subs_mode: bool,
) where
W: Write,
{
let ch = it.next().unwrap_or('\\'); let ch = it.next().unwrap_or('\\');
match ch { match ch {
'0'..='9' | 'x' => { '0'..='9' | 'x' => {
@ -139,7 +142,7 @@ impl UnescapedText {
let val = (Self::base_to_u32(min_len, max_len, base, it) % 256) as u8; let val = (Self::base_to_u32(min_len, max_len, base, it) % 256) as u8;
byte_vec.push(val); byte_vec.push(val);
let bvec = [val]; let bvec = [val];
flush_bytes(&bvec); flush_bytes(writer, &bvec);
} else { } else {
byte_vec.push(ch as u8); byte_vec.push(ch as u8);
} }
@ -187,7 +190,7 @@ impl UnescapedText {
} }
}; };
s.push(ch); s.push(ch);
flush_str(&s); write_and_flush!(writer, &s);
byte_vec.extend(s.bytes()); byte_vec.extend(s.bytes());
} }
}; };
@ -197,10 +200,14 @@ impl UnescapedText {
// and return a wrapper around a Vec<u8> of unescaped bytes // and return a wrapper around a Vec<u8> of unescaped bytes
// break on encounter of sub symbol ('%[^%]') unless called // break on encounter of sub symbol ('%[^%]') unless called
// through %b subst. // through %b subst.
pub fn from_it_core( pub fn from_it_core<W>(
writer: &mut W,
it: &mut PutBackN<Chars>, it: &mut PutBackN<Chars>,
subs_mode: bool, subs_mode: bool,
) -> Option<Box<dyn token::Token>> { ) -> Option<token::Token>
where
W: Write,
{
let mut addchar = false; let mut addchar = false;
let mut new_text = Self::new(); let mut new_text = Self::new();
let mut tmp_str = String::new(); let mut tmp_str = String::new();
@ -215,7 +222,7 @@ impl UnescapedText {
// lazy branch eval // lazy branch eval
// remember this fn could be called // remember this fn could be called
// many times in a single exec through %b // many times in a single exec through %b
flush_char(ch); write_and_flush!(writer, ch);
tmp_str.push(ch); tmp_str.push(ch);
} }
'\\' => { '\\' => {
@ -230,12 +237,12 @@ impl UnescapedText {
new_vec.extend(tmp_str.bytes()); new_vec.extend(tmp_str.bytes());
tmp_str = String::new(); tmp_str = String::new();
} }
Self::handle_escaped(new_vec, it, subs_mode); Self::handle_escaped(writer, new_vec, it, subs_mode);
} }
x if x == '%' && !subs_mode => { x if x == '%' && !subs_mode => {
if let Some(follow) = it.next() { if let Some(follow) = it.next() {
if follow == '%' { if follow == '%' {
flush_char(ch); write_and_flush!(writer, ch);
tmp_str.push(ch); tmp_str.push(ch);
} else { } else {
it.put_back(follow); it.put_back(follow);
@ -248,7 +255,7 @@ impl UnescapedText {
} }
} }
_ => { _ => {
flush_char(ch); write_and_flush!(writer, ch);
tmp_str.push(ch); tmp_str.push(ch);
} }
} }
@ -258,22 +265,17 @@ impl UnescapedText {
} }
} }
if addchar { if addchar {
Some(Box::new(new_text)) Some(token::Token::UnescapedText(new_text))
} else { } else {
None None
} }
} }
} }
impl token::Tokenizer for UnescapedText { impl UnescapedText {
fn from_it( pub(crate) fn write<W>(&self, writer: &mut W)
it: &mut PutBackN<Chars>, where
_: &mut Peekable<Iter<String>>, W: Write,
) -> UResult<Option<Box<dyn token::Token>>> { {
Ok(Self::from_it_core(it, false)) flush_bytes(writer, &self.0[..]);
}
}
impl token::Token for UnescapedText {
fn print(&self, _: &mut Peekable<Iter<String>>) {
flush_bytes(&self.0[..]);
} }
} }