1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-28 11:37:44 +00:00

Merge pull request #4185 from jfinkels/uucore-sprintf-2

uucore: add sprintf() function
This commit is contained in:
Sylvestre Ledru 2022-12-03 10:21:09 +01:00 committed by GitHub
commit 3880b463fe
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 202 additions and 105 deletions

View file

@ -4,7 +4,8 @@
use clap::{crate_version, Arg, ArgAction, Command};
use uucore::error::{UResult, UUsageError};
use uucore::{format_usage, memo};
use uucore::format_usage;
use uucore::memo::printf;
const VERSION: &str = "version";
const HELP: &str = "help";
@ -281,7 +282,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
None => vec![],
};
memo::Memo::run_all(format_string, &values[..])?;
printf(format_string, &values[..])?;
Ok(())
}

View file

@ -13,7 +13,7 @@ use num_traits::Zero;
use uucore::error::FromIo;
use uucore::error::UResult;
use uucore::format_usage;
use uucore::memo::Memo;
use uucore::memo::printf;
use uucore::show;
mod error;
@ -275,13 +275,10 @@ fn print_seq(
// If there was an argument `-f FORMAT`, then use that format
// template instead of the default formatting strategy.
//
// The `Memo::run_all()` function takes in the template and
// the current value and writes the result to `stdout`.
//
// TODO The `run_all()` method takes a string as its second
// TODO The `printf()` method takes a string as its second
// parameter but we have an `ExtendedBigDecimal`. In order to
// satisfy the signature of the function, we convert the
// `ExtendedBigDecimal` into a string. The `Memo::run_all()`
// `ExtendedBigDecimal` into a string. The `printf()`
// logic will subsequently parse that string into something
// similar to an `ExtendedBigDecimal` again before rendering
// it as a string and ultimately writing to `stdout`. We
@ -290,7 +287,7 @@ fn print_seq(
match format {
Some(f) => {
let s = format!("{}", value);
if let Err(x) = Memo::run_all(f, &[s]) {
if let Err(x) = printf(f, &[s]) {
show!(x);
exit(1);
}
@ -348,14 +345,14 @@ fn print_seq_integers(
// If there was an argument `-f FORMAT`, then use that format
// template instead of the default formatting strategy.
//
// The `Memo::run_all()` function takes in the template and
// The `printf()` function takes in the template and
// the current value and writes the result to `stdout`.
//
// TODO See similar comment about formatting in `print_seq()`.
match format {
Some(f) => {
let s = format!("{}", value);
if let Err(x) = Memo::run_all(f, &[s]) {
if let Err(x) = printf(f, &[s]) {
show!(x);
exit(1);
}

View file

@ -1,22 +1,27 @@
//! Memo runner of printf
//! Takes a format string and arguments
//! 1. tokenize format string into tokens, consuming
//! any subst. arguments along the way.
//! 2. feeds remaining arguments into function
//! that prints tokens.
//! Main entry point for our implementation of printf.
//!
//! The [`printf`] and [`sprintf`] closely match the behavior of the
//! corresponding C functions: the former renders a formatted string
//! to stdout, the latter renders to a new [`String`] object.
use crate::display::Quotable;
use crate::error::UResult;
use crate::features::tokenize::sub::Sub;
use crate::features::tokenize::token::{Token, Tokenizer};
use crate::error::{UResult, USimpleError};
use crate::features::tokenize::sub::SubParser;
use crate::features::tokenize::token::Token;
use crate::features::tokenize::unescaped_text::UnescapedText;
use crate::show_warning;
use itertools::put_back_n;
use std::io::{stdout, Cursor, Write};
use std::iter::Peekable;
use std::slice::Iter;
pub struct Memo {
tokens: Vec<Box<dyn Token>>,
/// Memo runner of printf
/// Takes a format string and arguments
/// 1. tokenize format string into tokens, consuming
/// any subst. arguments along the way.
/// 2. feeds remaining arguments into function
/// that prints tokens.
struct Memo {
tokens: Vec<Token>,
}
fn warn_excess_args(first_arg: &str) {
@ -27,18 +32,22 @@ fn warn_excess_args(first_arg: &str) {
}
impl Memo {
pub fn new(pf_string: &str, pf_args_it: &mut Peekable<Iter<String>>) -> UResult<Self> {
fn new<W>(
writer: &mut W,
pf_string: &str,
pf_args_it: &mut Peekable<Iter<String>>,
) -> UResult<Self>
where
W: Write,
{
let mut pm = Self { tokens: Vec::new() };
let mut tmp_token: Option<Box<dyn Token>>;
let mut it = put_back_n(pf_string.chars());
let mut has_sub = false;
loop {
tmp_token = UnescapedText::from_it(&mut it, pf_args_it)?;
if let Some(x) = tmp_token {
if let Some(x) = UnescapedText::from_it_core(writer, &mut it, false) {
pm.tokens.push(x);
}
tmp_token = Sub::from_it(&mut it, pf_args_it)?;
if let Some(x) = tmp_token {
if let Some(x) = SubParser::from_it(writer, &mut it, pf_args_it)? {
if !has_sub {
has_sub = true;
}
@ -67,19 +76,100 @@ impl Memo {
}
Ok(pm)
}
pub fn apply(&self, pf_args_it: &mut Peekable<Iter<String>>) {
fn apply<W>(&self, writer: &mut W, pf_args_it: &mut Peekable<Iter<String>>)
where
W: Write,
{
for tkn in &self.tokens {
tkn.print(pf_args_it);
tkn.write(writer, pf_args_it);
}
}
pub fn run_all(pf_string: &str, pf_args: &[String]) -> UResult<()> {
fn run_all<W>(writer: &mut W, pf_string: &str, pf_args: &[String]) -> UResult<()>
where
W: Write,
{
let mut arg_it = pf_args.iter().peekable();
let pm = Self::new(pf_string, &mut arg_it)?;
let pm = Self::new(writer, pf_string, &mut arg_it)?;
loop {
if arg_it.peek().is_none() {
return Ok(());
}
pm.apply(&mut arg_it);
pm.apply(writer, &mut arg_it);
}
}
}
/// Write a formatted string to stdout.
///
/// `format_string` contains the template and `args` contains the
/// arguments to render into the template.
///
/// See also [`sprintf`], which creates a new formatted [`String`].
///
/// # Examples
///
/// ```rust
/// use uucore::memo::printf;
///
/// printf("hello %s", &["world".to_string()]).unwrap();
/// // prints "hello world"
/// ```
pub fn printf(format_string: &str, args: &[String]) -> UResult<()> {
let mut writer = stdout();
Memo::run_all(&mut writer, format_string, args)
}
/// Create a new formatted string.
///
/// `format_string` contains the template and `args` contains the
/// arguments to render into the template.
///
/// See also [`printf`], which prints to stdout.
///
/// # Examples
///
/// ```rust
/// use uucore::memo::sprintf;
///
/// let s = sprintf("hello %s", &["world".to_string()]).unwrap();
/// assert_eq!(s, "hello world".to_string());
/// ```
pub fn sprintf(format_string: &str, args: &[String]) -> UResult<String> {
let mut writer = Cursor::new(vec![]);
Memo::run_all(&mut writer, format_string, args)?;
let buf = writer.into_inner();
match String::from_utf8(buf) {
Ok(s) => Ok(s),
Err(e) => Err(USimpleError::new(
1,
format!("failed to parse formatted string as UTF-8: {}", e),
)),
}
}
#[cfg(test)]
mod tests {
use crate::memo::sprintf;
#[test]
fn test_sprintf_smoke() {
assert_eq!(sprintf("", &[]).unwrap(), "".to_string())
}
#[test]
fn test_sprintf_no_args() {
assert_eq!(
sprintf("hello world", &[]).unwrap(),
"hello world".to_string()
)
}
#[test]
fn test_sprintf_string() {
assert_eq!(
sprintf("hello %s", &["world".to_string()]).unwrap(),
"hello world".to_string()
)
}
}

View file

@ -9,6 +9,7 @@ use crate::error::{UError, UResult};
use itertools::{put_back_n, PutBackN};
use std::error::Error;
use std::fmt::Display;
use std::io::Write;
use std::iter::Peekable;
use std::process::exit;
use std::slice::Iter;
@ -112,7 +113,7 @@ impl Sub {
}
#[derive(Default)]
struct SubParser {
pub(crate) struct SubParser {
min_width_tmp: Option<String>,
min_width_is_asterisk: bool,
past_decimal: bool,
@ -127,20 +128,24 @@ impl SubParser {
fn new() -> Self {
Self::default()
}
fn from_it(
pub(crate) fn from_it<W>(
writer: &mut W,
it: &mut PutBackN<Chars>,
args: &mut Peekable<Iter<String>>,
) -> UResult<Option<Box<dyn token::Token>>> {
) -> UResult<Option<token::Token>>
where
W: Write,
{
let mut parser = Self::new();
if parser.sub_vals_retrieved(it)? {
let t: Box<dyn token::Token> = Self::build_token(parser);
t.print(args);
let t = Self::build_token(parser);
t.write(writer, args);
Ok(Some(t))
} else {
Ok(None)
}
}
fn build_token(parser: Self) -> Box<dyn token::Token> {
fn build_token(parser: Self) -> token::Token {
// not a self method so as to allow move of sub-parser vals.
// return new Sub struct as token
let prefix_char = match &parser.min_width_tmp {
@ -148,7 +153,7 @@ impl SubParser {
_ => ' ',
};
let t: Box<dyn token::Token> = Box::new(Sub::new(
token::Token::Sub(Sub::new(
if parser.min_width_is_asterisk {
CanAsterisk::Asterisk
} else {
@ -166,8 +171,7 @@ impl SubParser {
parser.field_char.unwrap(),
parser.text_so_far,
prefix_char,
));
t
))
}
fn sub_vals_retrieved(&mut self, it: &mut PutBackN<Chars>) -> UResult<bool> {
if !Self::successfully_eat_prefix(it, &mut self.text_so_far)? {
@ -337,16 +341,11 @@ impl SubParser {
}
}
impl token::Tokenizer for Sub {
fn from_it(
it: &mut PutBackN<Chars>,
args: &mut Peekable<Iter<String>>,
) -> UResult<Option<Box<dyn token::Token>>> {
SubParser::from_it(it, args)
}
}
impl token::Token for Sub {
fn print(&self, pf_args_it: &mut Peekable<Iter<String>>) {
impl Sub {
pub(crate) fn write<W>(&self, writer: &mut W, pf_args_it: &mut Peekable<Iter<String>>)
where
W: Write,
{
let field = FormatField {
min_width: match self.min_width {
CanAsterisk::Fixed(x) => x,
@ -397,7 +396,7 @@ impl token::Token for Sub {
}),
'b' => {
let mut a_it = put_back_n(arg_string.chars());
UnescapedText::from_it_core(&mut a_it, true);
UnescapedText::from_it_core(writer, &mut a_it, true);
None
}
// for 'c': get iter of string vals,
@ -417,7 +416,8 @@ impl token::Token for Sub {
};
if let Some(pre_min_width) = pre_min_width_opt {
// if have a string, print it, ensuring minimum width is met.
print!(
write!(
writer,
"{}",
match field.min_width {
Some(min_width) => {
@ -443,7 +443,8 @@ impl token::Token for Sub {
}
None => pre_min_width,
}
);
)
.ok();
}
}
}

View file

@ -1,16 +1,29 @@
//! Traits and enums dealing with Tokenization of printf Format String
use itertools::PutBackN;
use std::io::Write;
use std::iter::Peekable;
use std::slice::Iter;
use std::str::Chars;
use crate::error::UResult;
use crate::features::tokenize::sub::Sub;
use crate::features::tokenize::unescaped_text::UnescapedText;
// A token object is an object that can print the expected output
// of a contiguous segment of the format string, and
// requires at most 1 argument
pub trait Token {
fn print(&self, args: &mut Peekable<Iter<String>>);
pub enum Token {
Sub(Sub),
UnescapedText(UnescapedText),
}
impl Token {
pub(crate) fn write<W>(&self, writer: &mut W, args: &mut Peekable<Iter<String>>)
where
W: Write,
{
match self {
Self::Sub(sub) => sub.write(writer, args),
Self::UnescapedText(unescaped_text) => unescaped_text.write(writer),
}
}
}
// A tokenizer object is an object that takes an iterator
@ -24,10 +37,3 @@ pub trait Token {
// printing of that token's value. Essentially tokenizing
// a whole format string will print the format string and consume
// a number of arguments equal to the number of argument-using tokens
pub trait Tokenizer {
fn from_it(
it: &mut PutBackN<Chars>,
args: &mut Peekable<Iter<String>>,
) -> UResult<Option<Box<dyn Token>>>;
}

View file

@ -7,14 +7,10 @@
use itertools::PutBackN;
use std::char::from_u32;
use std::io::{stdout, Write};
use std::iter::Peekable;
use std::io::Write;
use std::process::exit;
use std::slice::Iter;
use std::str::Chars;
use crate::error::UResult;
use super::token;
const EXIT_OK: i32 = 0;
@ -22,19 +18,19 @@ const EXIT_ERR: i32 = 1;
// by default stdout only flushes
// to console when a newline is passed.
fn flush_char(c: char) {
print!("{}", c);
let _ = stdout().flush();
macro_rules! write_and_flush {
($writer:expr, $($args:tt)+) => ({
write!($writer, "{}", $($args)+).ok();
$writer.flush().ok();
})
}
fn flush_str(s: &str) {
print!("{}", s);
let _ = stdout().flush();
}
fn flush_bytes(bslice: &[u8]) {
let _ = stdout().write(bslice);
let _ = stdout().flush();
fn flush_bytes<W>(writer: &mut W, bslice: &[u8])
where
W: Write,
{
writer.write_all(bslice).ok();
writer.flush().ok();
}
#[derive(Default)]
@ -106,7 +102,14 @@ impl UnescapedText {
// adding the unescaped bytes
// to the passed byte_vec
// in subs_mode change octal behavior
fn handle_escaped(byte_vec: &mut Vec<u8>, it: &mut PutBackN<Chars>, subs_mode: bool) {
fn handle_escaped<W>(
writer: &mut W,
byte_vec: &mut Vec<u8>,
it: &mut PutBackN<Chars>,
subs_mode: bool,
) where
W: Write,
{
let ch = it.next().unwrap_or('\\');
match ch {
'0'..='9' | 'x' => {
@ -139,7 +142,7 @@ impl UnescapedText {
let val = (Self::base_to_u32(min_len, max_len, base, it) % 256) as u8;
byte_vec.push(val);
let bvec = [val];
flush_bytes(&bvec);
flush_bytes(writer, &bvec);
} else {
byte_vec.push(ch as u8);
}
@ -187,7 +190,7 @@ impl UnescapedText {
}
};
s.push(ch);
flush_str(&s);
write_and_flush!(writer, &s);
byte_vec.extend(s.bytes());
}
};
@ -197,10 +200,14 @@ impl UnescapedText {
// and return a wrapper around a Vec<u8> of unescaped bytes
// break on encounter of sub symbol ('%[^%]') unless called
// through %b subst.
pub fn from_it_core(
pub fn from_it_core<W>(
writer: &mut W,
it: &mut PutBackN<Chars>,
subs_mode: bool,
) -> Option<Box<dyn token::Token>> {
) -> Option<token::Token>
where
W: Write,
{
let mut addchar = false;
let mut new_text = Self::new();
let mut tmp_str = String::new();
@ -215,7 +222,7 @@ impl UnescapedText {
// lazy branch eval
// remember this fn could be called
// many times in a single exec through %b
flush_char(ch);
write_and_flush!(writer, ch);
tmp_str.push(ch);
}
'\\' => {
@ -230,12 +237,12 @@ impl UnescapedText {
new_vec.extend(tmp_str.bytes());
tmp_str = String::new();
}
Self::handle_escaped(new_vec, it, subs_mode);
Self::handle_escaped(writer, new_vec, it, subs_mode);
}
x if x == '%' && !subs_mode => {
if let Some(follow) = it.next() {
if follow == '%' {
flush_char(ch);
write_and_flush!(writer, ch);
tmp_str.push(ch);
} else {
it.put_back(follow);
@ -248,7 +255,7 @@ impl UnescapedText {
}
}
_ => {
flush_char(ch);
write_and_flush!(writer, ch);
tmp_str.push(ch);
}
}
@ -258,22 +265,17 @@ impl UnescapedText {
}
}
if addchar {
Some(Box::new(new_text))
Some(token::Token::UnescapedText(new_text))
} else {
None
}
}
}
impl token::Tokenizer for UnescapedText {
fn from_it(
it: &mut PutBackN<Chars>,
_: &mut Peekable<Iter<String>>,
) -> UResult<Option<Box<dyn token::Token>>> {
Ok(Self::from_it_core(it, false))
}
}
impl token::Token for UnescapedText {
fn print(&self, _: &mut Peekable<Iter<String>>) {
flush_bytes(&self.0[..]);
impl UnescapedText {
pub(crate) fn write<W>(&self, writer: &mut W)
where
W: Write,
{
flush_bytes(writer, &self.0[..]);
}
}