mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-27 11:07:44 +00:00
Merge pull request #7208 from jtracey/printf-go
printf: improve support of printing multi-byte values of characters
This commit is contained in:
commit
18db15e4e6
3 changed files with 85 additions and 127 deletions
|
@ -7,7 +7,7 @@ use std::io::stdout;
|
|||
use std::ops::ControlFlow;
|
||||
use uucore::error::{UResult, UUsageError};
|
||||
use uucore::format::{FormatArgument, FormatItem, parse_spec_and_escape};
|
||||
use uucore::{format_usage, help_about, help_section, help_usage, show_warning};
|
||||
use uucore::{format_usage, help_about, help_section, help_usage, os_str_as_bytes, show_warning};
|
||||
|
||||
const VERSION: &str = "version";
|
||||
const HELP: &str = "help";
|
||||
|
@ -19,23 +19,30 @@ mod options {
|
|||
pub const FORMAT: &str = "FORMAT";
|
||||
pub const ARGUMENT: &str = "ARGUMENT";
|
||||
}
|
||||
|
||||
#[uucore::main]
|
||||
pub fn uumain(args: impl uucore::Args) -> UResult<()> {
|
||||
let matches = uu_app().get_matches_from(args);
|
||||
|
||||
let format = matches
|
||||
.get_one::<String>(options::FORMAT)
|
||||
.get_one::<std::ffi::OsString>(options::FORMAT)
|
||||
.ok_or_else(|| UUsageError::new(1, "missing operand"))?;
|
||||
let format = os_str_as_bytes(format)?;
|
||||
|
||||
let values: Vec<_> = match matches.get_many::<String>(options::ARGUMENT) {
|
||||
Some(s) => s.map(|s| FormatArgument::Unparsed(s.to_string())).collect(),
|
||||
let values: Vec<_> = match matches.get_many::<std::ffi::OsString>(options::ARGUMENT) {
|
||||
// FIXME: use os_str_as_bytes once FormatArgument supports Vec<u8>
|
||||
Some(s) => s
|
||||
.map(|os_string| {
|
||||
FormatArgument::Unparsed(std::ffi::OsStr::to_string_lossy(os_string).to_string())
|
||||
})
|
||||
.collect(),
|
||||
None => vec![],
|
||||
};
|
||||
|
||||
let mut format_seen = false;
|
||||
let mut args = values.iter().peekable();
|
||||
for item in parse_spec_and_escape(format.as_ref()) {
|
||||
|
||||
// Parse and process the format string
|
||||
for item in parse_spec_and_escape(format) {
|
||||
if let Ok(FormatItem::Spec(_)) = item {
|
||||
format_seen = true;
|
||||
}
|
||||
|
@ -58,7 +65,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
|
|||
}
|
||||
|
||||
while args.peek().is_some() {
|
||||
for item in parse_spec_and_escape(format.as_ref()) {
|
||||
for item in parse_spec_and_escape(format) {
|
||||
match item?.write(stdout(), &mut args)? {
|
||||
ControlFlow::Continue(()) => {}
|
||||
ControlFlow::Break(()) => return Ok(()),
|
||||
|
@ -90,6 +97,10 @@ pub fn uu_app() -> Command {
|
|||
.help("Print version information")
|
||||
.action(ArgAction::Version),
|
||||
)
|
||||
.arg(Arg::new(options::FORMAT))
|
||||
.arg(Arg::new(options::ARGUMENT).action(ArgAction::Append))
|
||||
.arg(Arg::new(options::FORMAT).value_parser(clap::value_parser!(std::ffi::OsString)))
|
||||
.arg(
|
||||
Arg::new(options::ARGUMENT)
|
||||
.action(ArgAction::Append)
|
||||
.value_parser(clap::value_parser!(std::ffi::OsString)),
|
||||
)
|
||||
}
|
||||
|
|
|
@ -58,7 +58,26 @@ impl<'a, T: Iterator<Item = &'a FormatArgument>> ArgumentIter<'a> for T {
|
|||
};
|
||||
match next {
|
||||
FormatArgument::UnsignedInt(n) => *n,
|
||||
FormatArgument::Unparsed(s) => extract_value(u64::extended_parse(s), s),
|
||||
FormatArgument::Unparsed(s) => {
|
||||
// Check if the string is a character literal enclosed in quotes
|
||||
if s.starts_with(['"', '\'']) {
|
||||
// Extract the content between the quotes safely using chars
|
||||
let mut chars = s.trim_matches(|c| c == '"' || c == '\'').chars();
|
||||
if let Some(first_char) = chars.next() {
|
||||
if chars.clone().count() > 0 {
|
||||
// Emit a warning if there are additional characters
|
||||
let remaining: String = chars.collect();
|
||||
show_warning!(
|
||||
"{}: character(s) following character constant have been ignored",
|
||||
remaining
|
||||
);
|
||||
}
|
||||
return first_char as u64; // Use only the first character
|
||||
}
|
||||
return 0; // Empty quotes
|
||||
}
|
||||
extract_value(u64::extended_parse(s), s)
|
||||
}
|
||||
_ => 0,
|
||||
}
|
||||
}
|
||||
|
|
|
@ -16,40 +16,6 @@ fn basic_literal() {
|
|||
.stdout_only("hello world");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn escaped_tab() {
|
||||
new_ucmd!()
|
||||
.args(&["hello\\t world"])
|
||||
.succeeds()
|
||||
.stdout_only("hello\t world");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn escaped_newline() {
|
||||
new_ucmd!()
|
||||
.args(&["hello\\n world"])
|
||||
.succeeds()
|
||||
.stdout_only("hello\n world");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn escaped_slash() {
|
||||
new_ucmd!()
|
||||
.args(&["hello\\\\ world"])
|
||||
.succeeds()
|
||||
.stdout_only("hello\\ world");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unescaped_double_quote() {
|
||||
new_ucmd!().args(&["\\\""]).succeeds().stdout_only("\"");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn escaped_hex() {
|
||||
new_ucmd!().args(&["\\x41"]).succeeds().stdout_only("A");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_missing_escaped_hex_value() {
|
||||
new_ucmd!()
|
||||
|
@ -58,17 +24,12 @@ fn test_missing_escaped_hex_value() {
|
|||
.stderr_only("printf: missing hexadecimal number in escape\n");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn escaped_octal() {
|
||||
new_ucmd!().args(&["\\101"]).succeeds().stdout_only("A");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn escaped_octal_and_newline() {
|
||||
new_ucmd!()
|
||||
.args(&["\\0377\\n"])
|
||||
.args(&["\\101\\0377\\n"])
|
||||
.succeeds()
|
||||
.stdout_only("\x1F7\n");
|
||||
.stdout_only("A\x1F7\n");
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -145,38 +106,6 @@ fn escaped_unrecognized() {
|
|||
new_ucmd!().args(&["c\\d"]).succeeds().stdout_only("c\\d");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sub_string() {
|
||||
new_ucmd!()
|
||||
.args(&["hello %s", "world"])
|
||||
.succeeds()
|
||||
.stdout_only("hello world");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sub_multi_field() {
|
||||
new_ucmd!()
|
||||
.args(&["%s %s", "hello", "world"])
|
||||
.succeeds()
|
||||
.stdout_only("hello world");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sub_repeat_format_str() {
|
||||
new_ucmd!()
|
||||
.args(&["%s.", "hello", "world"])
|
||||
.succeeds()
|
||||
.stdout_only("hello.world.");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sub_string_ignore_escapes() {
|
||||
new_ucmd!()
|
||||
.args(&["hello %s", "\\tworld"])
|
||||
.succeeds()
|
||||
.stdout_only("hello \\tworld");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sub_b_string_handle_escapes() {
|
||||
new_ucmd!()
|
||||
|
@ -705,27 +634,11 @@ fn sub_any_asterisk_second_param_with_integer() {
|
|||
}
|
||||
|
||||
#[test]
|
||||
fn sub_any_specifiers_no_params() {
|
||||
new_ucmd!()
|
||||
.args(&["%ztlhLji", "3"]) //spell-checker:disable-line
|
||||
.succeeds()
|
||||
.stdout_only("3");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sub_any_specifiers_after_first_param() {
|
||||
new_ucmd!()
|
||||
.args(&["%0ztlhLji", "3"]) //spell-checker:disable-line
|
||||
.succeeds()
|
||||
.stdout_only("3");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sub_any_specifiers_after_period() {
|
||||
new_ucmd!()
|
||||
.args(&["%0.ztlhLji", "3"]) //spell-checker:disable-line
|
||||
.succeeds()
|
||||
.stdout_only("3");
|
||||
fn sub_any_specifiers() {
|
||||
// spell-checker:disable-next-line
|
||||
for format in ["%ztlhLji", "%0ztlhLji", "%0.ztlhLji"] {
|
||||
new_ucmd!().args(&[format, "3"]).succeeds().stdout_only("3");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -1027,33 +940,23 @@ fn pad_string() {
|
|||
}
|
||||
|
||||
#[test]
|
||||
fn format_spec_zero_char_fails() {
|
||||
// It is invalid to have the format spec '%0c'
|
||||
new_ucmd!().args(&["%0c", "3"]).fails_with_code(1);
|
||||
fn format_spec_zero_fails() {
|
||||
// It is invalid to have the format spec
|
||||
for format in ["%0c", "%0s"] {
|
||||
new_ucmd!().args(&[format, "3"]).fails_with_code(1);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn format_spec_zero_string_fails() {
|
||||
// It is invalid to have the format spec '%0s'
|
||||
new_ucmd!().args(&["%0s", "3"]).fails_with_code(1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn invalid_precision_fails() {
|
||||
fn invalid_precision_tests() {
|
||||
// It is invalid to have length of output string greater than i32::MAX
|
||||
new_ucmd!()
|
||||
.args(&["%.*d", "2147483648", "0"])
|
||||
.fails()
|
||||
.stderr_is("printf: invalid precision: '2147483648'\n");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn float_invalid_precision_fails() {
|
||||
// It is invalid to have length of output string greater than i32::MAX
|
||||
new_ucmd!()
|
||||
.args(&["%.*f", "2147483648", "0"])
|
||||
.fails()
|
||||
.stderr_is("printf: invalid precision: '2147483648'\n");
|
||||
for format in ["%.*d", "%.*f"] {
|
||||
let expected_error = "printf: invalid precision: '2147483648'\n";
|
||||
new_ucmd!()
|
||||
.args(&[format, "2147483648", "0"])
|
||||
.fails()
|
||||
.stderr_is(expected_error);
|
||||
}
|
||||
}
|
||||
|
||||
// The following padding-tests test for the cases in which flags in ['0', ' '] are given.
|
||||
|
@ -1385,3 +1288,28 @@ fn float_arg_with_whitespace() {
|
|||
.fails()
|
||||
.stderr_contains("expected a numeric value");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn mb_input() {
|
||||
for format in ["\"á", "\'á", "'\u{e1}"] {
|
||||
new_ucmd!()
|
||||
.args(&["%04x\n", format])
|
||||
.succeeds()
|
||||
.stdout_only("00e1\n");
|
||||
}
|
||||
|
||||
let cases = vec![
|
||||
("\"á=", "="),
|
||||
("\'á-", "-"),
|
||||
("\'á=-==", "=-=="),
|
||||
("'\u{e1}++", "++"),
|
||||
];
|
||||
|
||||
for (format, expected) in cases {
|
||||
new_ucmd!()
|
||||
.args(&["%04x\n", format])
|
||||
.succeeds()
|
||||
.stdout_is("00e1\n")
|
||||
.stderr_is(format!("printf: warning: {expected}: character(s) following character constant have been ignored\n"));
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue