1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-27 11:07:44 +00:00

Merge pull request #7208 from jtracey/printf-go

printf: improve support of printing multi-byte values of characters
This commit is contained in:
Sylvestre Ledru 2025-04-24 22:02:13 +02:00 committed by GitHub
commit 18db15e4e6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 85 additions and 127 deletions

View file

@ -7,7 +7,7 @@ use std::io::stdout;
use std::ops::ControlFlow; use std::ops::ControlFlow;
use uucore::error::{UResult, UUsageError}; use uucore::error::{UResult, UUsageError};
use uucore::format::{FormatArgument, FormatItem, parse_spec_and_escape}; use uucore::format::{FormatArgument, FormatItem, parse_spec_and_escape};
use uucore::{format_usage, help_about, help_section, help_usage, show_warning}; use uucore::{format_usage, help_about, help_section, help_usage, os_str_as_bytes, show_warning};
const VERSION: &str = "version"; const VERSION: &str = "version";
const HELP: &str = "help"; const HELP: &str = "help";
@ -19,23 +19,30 @@ mod options {
pub const FORMAT: &str = "FORMAT"; pub const FORMAT: &str = "FORMAT";
pub const ARGUMENT: &str = "ARGUMENT"; pub const ARGUMENT: &str = "ARGUMENT";
} }
#[uucore::main] #[uucore::main]
pub fn uumain(args: impl uucore::Args) -> UResult<()> { pub fn uumain(args: impl uucore::Args) -> UResult<()> {
let matches = uu_app().get_matches_from(args); let matches = uu_app().get_matches_from(args);
let format = matches let format = matches
.get_one::<String>(options::FORMAT) .get_one::<std::ffi::OsString>(options::FORMAT)
.ok_or_else(|| UUsageError::new(1, "missing operand"))?; .ok_or_else(|| UUsageError::new(1, "missing operand"))?;
let format = os_str_as_bytes(format)?;
let values: Vec<_> = match matches.get_many::<String>(options::ARGUMENT) { let values: Vec<_> = match matches.get_many::<std::ffi::OsString>(options::ARGUMENT) {
Some(s) => s.map(|s| FormatArgument::Unparsed(s.to_string())).collect(), // FIXME: use os_str_as_bytes once FormatArgument supports Vec<u8>
Some(s) => s
.map(|os_string| {
FormatArgument::Unparsed(std::ffi::OsStr::to_string_lossy(os_string).to_string())
})
.collect(),
None => vec![], None => vec![],
}; };
let mut format_seen = false; let mut format_seen = false;
let mut args = values.iter().peekable(); let mut args = values.iter().peekable();
for item in parse_spec_and_escape(format.as_ref()) {
// Parse and process the format string
for item in parse_spec_and_escape(format) {
if let Ok(FormatItem::Spec(_)) = item { if let Ok(FormatItem::Spec(_)) = item {
format_seen = true; format_seen = true;
} }
@ -58,7 +65,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
} }
while args.peek().is_some() { while args.peek().is_some() {
for item in parse_spec_and_escape(format.as_ref()) { for item in parse_spec_and_escape(format) {
match item?.write(stdout(), &mut args)? { match item?.write(stdout(), &mut args)? {
ControlFlow::Continue(()) => {} ControlFlow::Continue(()) => {}
ControlFlow::Break(()) => return Ok(()), ControlFlow::Break(()) => return Ok(()),
@ -90,6 +97,10 @@ pub fn uu_app() -> Command {
.help("Print version information") .help("Print version information")
.action(ArgAction::Version), .action(ArgAction::Version),
) )
.arg(Arg::new(options::FORMAT)) .arg(Arg::new(options::FORMAT).value_parser(clap::value_parser!(std::ffi::OsString)))
.arg(Arg::new(options::ARGUMENT).action(ArgAction::Append)) .arg(
Arg::new(options::ARGUMENT)
.action(ArgAction::Append)
.value_parser(clap::value_parser!(std::ffi::OsString)),
)
} }

View file

@ -58,7 +58,26 @@ impl<'a, T: Iterator<Item = &'a FormatArgument>> ArgumentIter<'a> for T {
}; };
match next { match next {
FormatArgument::UnsignedInt(n) => *n, FormatArgument::UnsignedInt(n) => *n,
FormatArgument::Unparsed(s) => extract_value(u64::extended_parse(s), s), FormatArgument::Unparsed(s) => {
// Check if the string is a character literal enclosed in quotes
if s.starts_with(['"', '\'']) {
// Extract the content between the quotes safely using chars
let mut chars = s.trim_matches(|c| c == '"' || c == '\'').chars();
if let Some(first_char) = chars.next() {
if chars.clone().count() > 0 {
// Emit a warning if there are additional characters
let remaining: String = chars.collect();
show_warning!(
"{}: character(s) following character constant have been ignored",
remaining
);
}
return first_char as u64; // Use only the first character
}
return 0; // Empty quotes
}
extract_value(u64::extended_parse(s), s)
}
_ => 0, _ => 0,
} }
} }

View file

@ -16,40 +16,6 @@ fn basic_literal() {
.stdout_only("hello world"); .stdout_only("hello world");
} }
#[test]
fn escaped_tab() {
new_ucmd!()
.args(&["hello\\t world"])
.succeeds()
.stdout_only("hello\t world");
}
#[test]
fn escaped_newline() {
new_ucmd!()
.args(&["hello\\n world"])
.succeeds()
.stdout_only("hello\n world");
}
#[test]
fn escaped_slash() {
new_ucmd!()
.args(&["hello\\\\ world"])
.succeeds()
.stdout_only("hello\\ world");
}
#[test]
fn unescaped_double_quote() {
new_ucmd!().args(&["\\\""]).succeeds().stdout_only("\"");
}
#[test]
fn escaped_hex() {
new_ucmd!().args(&["\\x41"]).succeeds().stdout_only("A");
}
#[test] #[test]
fn test_missing_escaped_hex_value() { fn test_missing_escaped_hex_value() {
new_ucmd!() new_ucmd!()
@ -58,17 +24,12 @@ fn test_missing_escaped_hex_value() {
.stderr_only("printf: missing hexadecimal number in escape\n"); .stderr_only("printf: missing hexadecimal number in escape\n");
} }
#[test]
fn escaped_octal() {
new_ucmd!().args(&["\\101"]).succeeds().stdout_only("A");
}
#[test] #[test]
fn escaped_octal_and_newline() { fn escaped_octal_and_newline() {
new_ucmd!() new_ucmd!()
.args(&["\\0377\\n"]) .args(&["\\101\\0377\\n"])
.succeeds() .succeeds()
.stdout_only("\x1F7\n"); .stdout_only("A\x1F7\n");
} }
#[test] #[test]
@ -145,38 +106,6 @@ fn escaped_unrecognized() {
new_ucmd!().args(&["c\\d"]).succeeds().stdout_only("c\\d"); new_ucmd!().args(&["c\\d"]).succeeds().stdout_only("c\\d");
} }
#[test]
fn sub_string() {
new_ucmd!()
.args(&["hello %s", "world"])
.succeeds()
.stdout_only("hello world");
}
#[test]
fn sub_multi_field() {
new_ucmd!()
.args(&["%s %s", "hello", "world"])
.succeeds()
.stdout_only("hello world");
}
#[test]
fn sub_repeat_format_str() {
new_ucmd!()
.args(&["%s.", "hello", "world"])
.succeeds()
.stdout_only("hello.world.");
}
#[test]
fn sub_string_ignore_escapes() {
new_ucmd!()
.args(&["hello %s", "\\tworld"])
.succeeds()
.stdout_only("hello \\tworld");
}
#[test] #[test]
fn sub_b_string_handle_escapes() { fn sub_b_string_handle_escapes() {
new_ucmd!() new_ucmd!()
@ -705,27 +634,11 @@ fn sub_any_asterisk_second_param_with_integer() {
} }
#[test] #[test]
fn sub_any_specifiers_no_params() { fn sub_any_specifiers() {
new_ucmd!() // spell-checker:disable-next-line
.args(&["%ztlhLji", "3"]) //spell-checker:disable-line for format in ["%ztlhLji", "%0ztlhLji", "%0.ztlhLji"] {
.succeeds() new_ucmd!().args(&[format, "3"]).succeeds().stdout_only("3");
.stdout_only("3"); }
}
#[test]
fn sub_any_specifiers_after_first_param() {
new_ucmd!()
.args(&["%0ztlhLji", "3"]) //spell-checker:disable-line
.succeeds()
.stdout_only("3");
}
#[test]
fn sub_any_specifiers_after_period() {
new_ucmd!()
.args(&["%0.ztlhLji", "3"]) //spell-checker:disable-line
.succeeds()
.stdout_only("3");
} }
#[test] #[test]
@ -1027,33 +940,23 @@ fn pad_string() {
} }
#[test] #[test]
fn format_spec_zero_char_fails() { fn format_spec_zero_fails() {
// It is invalid to have the format spec '%0c' // It is invalid to have the format spec
new_ucmd!().args(&["%0c", "3"]).fails_with_code(1); for format in ["%0c", "%0s"] {
new_ucmd!().args(&[format, "3"]).fails_with_code(1);
}
} }
#[test] #[test]
fn format_spec_zero_string_fails() { fn invalid_precision_tests() {
// It is invalid to have the format spec '%0s'
new_ucmd!().args(&["%0s", "3"]).fails_with_code(1);
}
#[test]
fn invalid_precision_fails() {
// It is invalid to have length of output string greater than i32::MAX // It is invalid to have length of output string greater than i32::MAX
new_ucmd!() for format in ["%.*d", "%.*f"] {
.args(&["%.*d", "2147483648", "0"]) let expected_error = "printf: invalid precision: '2147483648'\n";
.fails() new_ucmd!()
.stderr_is("printf: invalid precision: '2147483648'\n"); .args(&[format, "2147483648", "0"])
} .fails()
.stderr_is(expected_error);
#[test] }
fn float_invalid_precision_fails() {
// It is invalid to have length of output string greater than i32::MAX
new_ucmd!()
.args(&["%.*f", "2147483648", "0"])
.fails()
.stderr_is("printf: invalid precision: '2147483648'\n");
} }
// The following padding-tests test for the cases in which flags in ['0', ' '] are given. // The following padding-tests test for the cases in which flags in ['0', ' '] are given.
@ -1385,3 +1288,28 @@ fn float_arg_with_whitespace() {
.fails() .fails()
.stderr_contains("expected a numeric value"); .stderr_contains("expected a numeric value");
} }
#[test]
fn mb_input() {
for format in ["\"á", "\'á", "'\u{e1}"] {
new_ucmd!()
.args(&["%04x\n", format])
.succeeds()
.stdout_only("00e1\n");
}
let cases = vec![
("\"á=", "="),
("\'á-", "-"),
("\'á=-==", "=-=="),
("'\u{e1}++", "++"),
];
for (format, expected) in cases {
new_ucmd!()
.args(&["%04x\n", format])
.succeeds()
.stdout_is("00e1\n")
.stderr_is(format!("printf: warning: {expected}: character(s) following character constant have been ignored\n"));
}
}