diff --git a/src/uu/printf/src/printf.rs b/src/uu/printf/src/printf.rs index 9a81529e5..b8b0b6f4a 100644 --- a/src/uu/printf/src/printf.rs +++ b/src/uu/printf/src/printf.rs @@ -7,7 +7,7 @@ use std::io::stdout; use std::ops::ControlFlow; use uucore::error::{UResult, UUsageError}; use uucore::format::{FormatArgument, FormatItem, parse_spec_and_escape}; -use uucore::{format_usage, help_about, help_section, help_usage, show_warning}; +use uucore::{format_usage, help_about, help_section, help_usage, os_str_as_bytes, show_warning}; const VERSION: &str = "version"; const HELP: &str = "help"; @@ -19,23 +19,30 @@ mod options { pub const FORMAT: &str = "FORMAT"; pub const ARGUMENT: &str = "ARGUMENT"; } - #[uucore::main] pub fn uumain(args: impl uucore::Args) -> UResult<()> { let matches = uu_app().get_matches_from(args); let format = matches - .get_one::(options::FORMAT) + .get_one::(options::FORMAT) .ok_or_else(|| UUsageError::new(1, "missing operand"))?; + let format = os_str_as_bytes(format)?; - let values: Vec<_> = match matches.get_many::(options::ARGUMENT) { - Some(s) => s.map(|s| FormatArgument::Unparsed(s.to_string())).collect(), + let values: Vec<_> = match matches.get_many::(options::ARGUMENT) { + // FIXME: use os_str_as_bytes once FormatArgument supports Vec + Some(s) => s + .map(|os_string| { + FormatArgument::Unparsed(std::ffi::OsStr::to_string_lossy(os_string).to_string()) + }) + .collect(), None => vec![], }; let mut format_seen = false; let mut args = values.iter().peekable(); - for item in parse_spec_and_escape(format.as_ref()) { + + // Parse and process the format string + for item in parse_spec_and_escape(format) { if let Ok(FormatItem::Spec(_)) = item { format_seen = true; } @@ -58,7 +65,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { } while args.peek().is_some() { - for item in parse_spec_and_escape(format.as_ref()) { + for item in parse_spec_and_escape(format) { match item?.write(stdout(), &mut args)? { ControlFlow::Continue(()) => {} ControlFlow::Break(()) => return Ok(()), @@ -90,6 +97,10 @@ pub fn uu_app() -> Command { .help("Print version information") .action(ArgAction::Version), ) - .arg(Arg::new(options::FORMAT)) - .arg(Arg::new(options::ARGUMENT).action(ArgAction::Append)) + .arg(Arg::new(options::FORMAT).value_parser(clap::value_parser!(std::ffi::OsString))) + .arg( + Arg::new(options::ARGUMENT) + .action(ArgAction::Append) + .value_parser(clap::value_parser!(std::ffi::OsString)), + ) } diff --git a/src/uucore/src/lib/features/format/argument.rs b/src/uucore/src/lib/features/format/argument.rs index 72f17758a..48d7f8f93 100644 --- a/src/uucore/src/lib/features/format/argument.rs +++ b/src/uucore/src/lib/features/format/argument.rs @@ -58,7 +58,26 @@ impl<'a, T: Iterator> ArgumentIter<'a> for T { }; match next { FormatArgument::UnsignedInt(n) => *n, - FormatArgument::Unparsed(s) => extract_value(u64::extended_parse(s), s), + FormatArgument::Unparsed(s) => { + // Check if the string is a character literal enclosed in quotes + if s.starts_with(['"', '\'']) { + // Extract the content between the quotes safely using chars + let mut chars = s.trim_matches(|c| c == '"' || c == '\'').chars(); + if let Some(first_char) = chars.next() { + if chars.clone().count() > 0 { + // Emit a warning if there are additional characters + let remaining: String = chars.collect(); + show_warning!( + "{}: character(s) following character constant have been ignored", + remaining + ); + } + return first_char as u64; // Use only the first character + } + return 0; // Empty quotes + } + extract_value(u64::extended_parse(s), s) + } _ => 0, } } diff --git a/tests/by-util/test_printf.rs b/tests/by-util/test_printf.rs index 2d059c0fa..2b53c10a2 100644 --- a/tests/by-util/test_printf.rs +++ b/tests/by-util/test_printf.rs @@ -16,40 +16,6 @@ fn basic_literal() { .stdout_only("hello world"); } -#[test] -fn escaped_tab() { - new_ucmd!() - .args(&["hello\\t world"]) - .succeeds() - .stdout_only("hello\t world"); -} - -#[test] -fn escaped_newline() { - new_ucmd!() - .args(&["hello\\n world"]) - .succeeds() - .stdout_only("hello\n world"); -} - -#[test] -fn escaped_slash() { - new_ucmd!() - .args(&["hello\\\\ world"]) - .succeeds() - .stdout_only("hello\\ world"); -} - -#[test] -fn unescaped_double_quote() { - new_ucmd!().args(&["\\\""]).succeeds().stdout_only("\""); -} - -#[test] -fn escaped_hex() { - new_ucmd!().args(&["\\x41"]).succeeds().stdout_only("A"); -} - #[test] fn test_missing_escaped_hex_value() { new_ucmd!() @@ -58,17 +24,12 @@ fn test_missing_escaped_hex_value() { .stderr_only("printf: missing hexadecimal number in escape\n"); } -#[test] -fn escaped_octal() { - new_ucmd!().args(&["\\101"]).succeeds().stdout_only("A"); -} - #[test] fn escaped_octal_and_newline() { new_ucmd!() - .args(&["\\0377\\n"]) + .args(&["\\101\\0377\\n"]) .succeeds() - .stdout_only("\x1F7\n"); + .stdout_only("A\x1F7\n"); } #[test] @@ -145,38 +106,6 @@ fn escaped_unrecognized() { new_ucmd!().args(&["c\\d"]).succeeds().stdout_only("c\\d"); } -#[test] -fn sub_string() { - new_ucmd!() - .args(&["hello %s", "world"]) - .succeeds() - .stdout_only("hello world"); -} - -#[test] -fn sub_multi_field() { - new_ucmd!() - .args(&["%s %s", "hello", "world"]) - .succeeds() - .stdout_only("hello world"); -} - -#[test] -fn sub_repeat_format_str() { - new_ucmd!() - .args(&["%s.", "hello", "world"]) - .succeeds() - .stdout_only("hello.world."); -} - -#[test] -fn sub_string_ignore_escapes() { - new_ucmd!() - .args(&["hello %s", "\\tworld"]) - .succeeds() - .stdout_only("hello \\tworld"); -} - #[test] fn sub_b_string_handle_escapes() { new_ucmd!() @@ -705,27 +634,11 @@ fn sub_any_asterisk_second_param_with_integer() { } #[test] -fn sub_any_specifiers_no_params() { - new_ucmd!() - .args(&["%ztlhLji", "3"]) //spell-checker:disable-line - .succeeds() - .stdout_only("3"); -} - -#[test] -fn sub_any_specifiers_after_first_param() { - new_ucmd!() - .args(&["%0ztlhLji", "3"]) //spell-checker:disable-line - .succeeds() - .stdout_only("3"); -} - -#[test] -fn sub_any_specifiers_after_period() { - new_ucmd!() - .args(&["%0.ztlhLji", "3"]) //spell-checker:disable-line - .succeeds() - .stdout_only("3"); +fn sub_any_specifiers() { + // spell-checker:disable-next-line + for format in ["%ztlhLji", "%0ztlhLji", "%0.ztlhLji"] { + new_ucmd!().args(&[format, "3"]).succeeds().stdout_only("3"); + } } #[test] @@ -1027,33 +940,23 @@ fn pad_string() { } #[test] -fn format_spec_zero_char_fails() { - // It is invalid to have the format spec '%0c' - new_ucmd!().args(&["%0c", "3"]).fails_with_code(1); +fn format_spec_zero_fails() { + // It is invalid to have the format spec + for format in ["%0c", "%0s"] { + new_ucmd!().args(&[format, "3"]).fails_with_code(1); + } } #[test] -fn format_spec_zero_string_fails() { - // It is invalid to have the format spec '%0s' - new_ucmd!().args(&["%0s", "3"]).fails_with_code(1); -} - -#[test] -fn invalid_precision_fails() { +fn invalid_precision_tests() { // It is invalid to have length of output string greater than i32::MAX - new_ucmd!() - .args(&["%.*d", "2147483648", "0"]) - .fails() - .stderr_is("printf: invalid precision: '2147483648'\n"); -} - -#[test] -fn float_invalid_precision_fails() { - // It is invalid to have length of output string greater than i32::MAX - new_ucmd!() - .args(&["%.*f", "2147483648", "0"]) - .fails() - .stderr_is("printf: invalid precision: '2147483648'\n"); + for format in ["%.*d", "%.*f"] { + let expected_error = "printf: invalid precision: '2147483648'\n"; + new_ucmd!() + .args(&[format, "2147483648", "0"]) + .fails() + .stderr_is(expected_error); + } } // The following padding-tests test for the cases in which flags in ['0', ' '] are given. @@ -1385,3 +1288,28 @@ fn float_arg_with_whitespace() { .fails() .stderr_contains("expected a numeric value"); } + +#[test] +fn mb_input() { + for format in ["\"á", "\'á", "'\u{e1}"] { + new_ucmd!() + .args(&["%04x\n", format]) + .succeeds() + .stdout_only("00e1\n"); + } + + let cases = vec![ + ("\"á=", "="), + ("\'á-", "-"), + ("\'á=-==", "=-=="), + ("'\u{e1}++", "++"), + ]; + + for (format, expected) in cases { + new_ucmd!() + .args(&["%04x\n", format]) + .succeeds() + .stdout_is("00e1\n") + .stderr_is(format!("printf: warning: {expected}: character(s) following character constant have been ignored\n")); + } +}