1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-28 11:37:44 +00:00

tr: accept non utf8 arguments for sets

This commit is contained in:
Dorian Péron 2024-07-12 10:55:45 +02:00 committed by Ben Wiederhake
parent e7f965d6a8
commit 9ab7fa9806
2 changed files with 31 additions and 8 deletions

View file

@ -8,12 +8,13 @@
mod operation; mod operation;
mod unicode_table; mod unicode_table;
use clap::{crate_version, Arg, ArgAction, Command}; use clap::{crate_version, value_parser, Arg, ArgAction, Command};
use operation::{ use operation::{
translate_input, Sequence, SqueezeOperation, SymbolTranslator, TranslateOperation, translate_input, Sequence, SqueezeOperation, SymbolTranslator, TranslateOperation,
}; };
use std::ffi::OsString;
use std::io::{stdin, stdout, BufWriter}; use std::io::{stdin, stdout, BufWriter};
use uucore::{format_usage, help_about, help_section, help_usage, show}; use uucore::{format_usage, help_about, help_section, help_usage, os_str_as_bytes, show};
use crate::operation::DeleteOperation; use crate::operation::DeleteOperation;
use uucore::display::Quotable; use uucore::display::Quotable;
@ -43,7 +44,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
// Ultimately this should be OsString, but we might want to wait for the // Ultimately this should be OsString, but we might want to wait for the
// pattern API on OsStr // pattern API on OsStr
let sets: Vec<_> = matches let sets: Vec<_> = matches
.get_many::<String>(options::SETS) .get_many::<OsString>(options::SETS)
.into_iter() .into_iter()
.flatten() .flatten()
.map(ToOwned::to_owned) .map(ToOwned::to_owned)
@ -97,7 +98,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
} }
if let Some(first) = sets.first() { if let Some(first) = sets.first() {
if first.ends_with('\\') { if let Some(b'\\') = os_str_as_bytes(first)?.last() {
show!(USimpleError::new( show!(USimpleError::new(
0, 0,
"warning: an unescaped backslash at end of string is not portable" "warning: an unescaped backslash at end of string is not portable"
@ -113,10 +114,10 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
// According to the man page: translating only happens if deleting or if a second set is given // According to the man page: translating only happens if deleting or if a second set is given
let translating = !delete_flag && sets.len() > 1; let translating = !delete_flag && sets.len() > 1;
let mut sets_iter = sets.iter().map(|c| c.as_str()); let mut sets_iter = sets.iter().map(|c| c.as_os_str());
let (set1, set2) = Sequence::solve_set_characters( let (set1, set2) = Sequence::solve_set_characters(
sets_iter.next().unwrap_or_default().as_bytes(), os_str_as_bytes(sets_iter.next().unwrap_or_default())?,
sets_iter.next().unwrap_or_default().as_bytes(), os_str_as_bytes(sets_iter.next().unwrap_or_default())?,
complement_flag, complement_flag,
// if we are not translating then we don't truncate set1 // if we are not translating then we don't truncate set1
truncate_set1_flag && translating, truncate_set1_flag && translating,
@ -195,5 +196,9 @@ pub fn uu_app() -> Command {
.action(ArgAction::SetTrue) .action(ArgAction::SetTrue)
.overrides_with(options::TRUNCATE_SET1), .overrides_with(options::TRUNCATE_SET1),
) )
.arg(Arg::new(options::SETS).num_args(1..)) .arg(
Arg::new(options::SETS)
.num_args(1..)
.value_parser(value_parser!(OsString)),
)
} }

View file

@ -5,6 +5,9 @@
// spell-checker:ignore aabbaa aabbcc aabc abbb abbbcddd abcc abcdefabcdef abcdefghijk abcdefghijklmn abcdefghijklmnop ABCDEFGHIJKLMNOPQRS abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRSTUVWXYZ ABCDEFZZ abcxyz ABCXYZ abcxyzabcxyz ABCXYZABCXYZ acbdef alnum amzamz AMZXAMZ bbbd cclass cefgm cntrl compl dabcdef dncase Gzabcdefg PQRST upcase wxyzz xdigit XXXYYY xycde xyyye xyyz xyzzzzxyzzzz ZABCDEF Zamz Cdefghijkl Cdefghijklmn asdfqqwweerr qwerr asdfqwer qwer aassddffqwer asdfqwer // spell-checker:ignore aabbaa aabbcc aabc abbb abbbcddd abcc abcdefabcdef abcdefghijk abcdefghijklmn abcdefghijklmnop ABCDEFGHIJKLMNOPQRS abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRSTUVWXYZ ABCDEFZZ abcxyz ABCXYZ abcxyzabcxyz ABCXYZABCXYZ acbdef alnum amzamz AMZXAMZ bbbd cclass cefgm cntrl compl dabcdef dncase Gzabcdefg PQRST upcase wxyzz xdigit XXXYYY xycde xyyye xyyz xyzzzzxyzzzz ZABCDEF Zamz Cdefghijkl Cdefghijklmn asdfqqwweerr qwerr asdfqwer qwer aassddffqwer asdfqwer
use crate::common::util::TestScenario; use crate::common::util::TestScenario;
#[cfg(unix)]
use std::{ffi::OsStr, os::unix::ffi::OsStrExt};
#[test] #[test]
fn test_invalid_arg() { fn test_invalid_arg() {
new_ucmd!().arg("--definitely-invalid").fails().code_is(1); new_ucmd!().arg("--definitely-invalid").fails().code_is(1);
@ -1427,3 +1430,18 @@ fn check_complement_set2_too_big() {
.fails() .fails()
.stderr_contains("when translating with complemented character classes,\nstring2 must map all characters in the domain to one"); .stderr_contains("when translating with complemented character classes,\nstring2 must map all characters in the domain to one");
} }
#[test]
#[cfg(unix)]
fn test_truncate_non_utf8_set() {
let stdin = &[b'\x01', b'a', b'm', b'p', 0xfe_u8, 0xff_u8];
let set1 = OsStr::from_bytes(&[b'a', 0xfe_u8, 0xff_u8, b'z']);
let set2 = OsStr::from_bytes(b"01234");
new_ucmd!()
.arg(set1)
.arg(set2)
.pipe_in(*stdin)
.succeeds()
.stdout_is_bytes(b"\x010mp12");
}