diff --git a/src/uu/tr/src/tr.rs b/src/uu/tr/src/tr.rs index 4c0389bd2..defe575af 100644 --- a/src/uu/tr/src/tr.rs +++ b/src/uu/tr/src/tr.rs @@ -8,12 +8,13 @@ mod operation; mod unicode_table; -use clap::{crate_version, Arg, ArgAction, Command}; +use clap::{crate_version, value_parser, Arg, ArgAction, Command}; use operation::{ translate_input, Sequence, SqueezeOperation, SymbolTranslator, TranslateOperation, }; +use std::ffi::OsString; use std::io::{stdin, stdout, BufWriter}; -use uucore::{format_usage, help_about, help_section, help_usage, show}; +use uucore::{format_usage, help_about, help_section, help_usage, os_str_as_bytes, show}; use crate::operation::DeleteOperation; use uucore::display::Quotable; @@ -43,7 +44,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { // Ultimately this should be OsString, but we might want to wait for the // pattern API on OsStr let sets: Vec<_> = matches - .get_many::(options::SETS) + .get_many::(options::SETS) .into_iter() .flatten() .map(ToOwned::to_owned) @@ -97,7 +98,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { } if let Some(first) = sets.first() { - if first.ends_with('\\') { + if let Some(b'\\') = os_str_as_bytes(first)?.last() { show!(USimpleError::new( 0, "warning: an unescaped backslash at end of string is not portable" @@ -113,10 +114,10 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { // According to the man page: translating only happens if deleting or if a second set is given let translating = !delete_flag && sets.len() > 1; - let mut sets_iter = sets.iter().map(|c| c.as_str()); + let mut sets_iter = sets.iter().map(|c| c.as_os_str()); let (set1, set2) = Sequence::solve_set_characters( - sets_iter.next().unwrap_or_default().as_bytes(), - sets_iter.next().unwrap_or_default().as_bytes(), + os_str_as_bytes(sets_iter.next().unwrap_or_default())?, + os_str_as_bytes(sets_iter.next().unwrap_or_default())?, complement_flag, // if we are not translating then we don't truncate set1 truncate_set1_flag && translating, @@ -195,5 +196,9 @@ pub fn uu_app() -> Command { .action(ArgAction::SetTrue) .overrides_with(options::TRUNCATE_SET1), ) - .arg(Arg::new(options::SETS).num_args(1..)) + .arg( + Arg::new(options::SETS) + .num_args(1..) + .value_parser(value_parser!(OsString)), + ) } diff --git a/tests/by-util/test_tr.rs b/tests/by-util/test_tr.rs index 209c4f284..c0421c248 100644 --- a/tests/by-util/test_tr.rs +++ b/tests/by-util/test_tr.rs @@ -5,6 +5,9 @@ // spell-checker:ignore aabbaa aabbcc aabc abbb abbbcddd abcc abcdefabcdef abcdefghijk abcdefghijklmn abcdefghijklmnop ABCDEFGHIJKLMNOPQRS abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRSTUVWXYZ ABCDEFZZ abcxyz ABCXYZ abcxyzabcxyz ABCXYZABCXYZ acbdef alnum amzamz AMZXAMZ bbbd cclass cefgm cntrl compl dabcdef dncase Gzabcdefg PQRST upcase wxyzz xdigit XXXYYY xycde xyyye xyyz xyzzzzxyzzzz ZABCDEF Zamz Cdefghijkl Cdefghijklmn asdfqqwweerr qwerr asdfqwer qwer aassddffqwer asdfqwer use crate::common::util::TestScenario; +#[cfg(unix)] +use std::{ffi::OsStr, os::unix::ffi::OsStrExt}; + #[test] fn test_invalid_arg() { new_ucmd!().arg("--definitely-invalid").fails().code_is(1); @@ -1427,3 +1430,18 @@ fn check_complement_set2_too_big() { .fails() .stderr_contains("when translating with complemented character classes,\nstring2 must map all characters in the domain to one"); } + +#[test] +#[cfg(unix)] +fn test_truncate_non_utf8_set() { + let stdin = &[b'\x01', b'a', b'm', b'p', 0xfe_u8, 0xff_u8]; + let set1 = OsStr::from_bytes(&[b'a', 0xfe_u8, 0xff_u8, b'z']); + let set2 = OsStr::from_bytes(b"01234"); + + new_ucmd!() + .arg(set1) + .arg(set2) + .pipe_in(*stdin) + .succeeds() + .stdout_is_bytes(b"\x010mp12"); +}