From 368e984fac148aa10b898fea9927d7680ad63723 Mon Sep 17 00:00:00 2001 From: Christopher Regali <60792386+ChrisRega@users.noreply.github.com> Date: Sun, 25 Apr 2021 23:28:42 +0200 Subject: [PATCH] Change unchecked unwrapping to unwrap_or_default for Args-trait (#1845) (#1852) * Change unchecked unwrapping to unwrap_or_default for argument parsing (resolving #1845) * Added unit-testing for the collect_str function on invalid utf8 OsStrs * Added a warning-message for identification purpose to the collect_str method. * - Add removal of wrongly encoded empty strings to basename - Add testing of broken encoding to basename - Changed UCommand to use collect_str in args method to allow for integration testing of that method - Change UCommand to use unwarp_or_default in arg method to match the behaviour of collect_str * Trying out a new pattern for convert_str for getting a feeling of how the API feels with more control * Adding convenience API for compact calls * Add new API to everywhere, fix test for basename * Added unit-testing for the conversion options * Added unit-testing for the conversion options for windows * fixed compilation and some merge hiccups * Remove windows tests in order to make merge request build * Fix formatting to match rustfmt for the merged file * Improve documentation of the collect_str method and the unit-tests * Fix compilation problems with test Co-authored-by: Christopher Regali Co-authored-by: Sylvestre Ledru --- src/uu/arch/src/arch.rs | 6 +- src/uu/base32/src/base32.rs | 4 +- src/uu/base64/src/base64.rs | 4 +- src/uu/basename/src/basename.rs | 6 +- src/uu/cat/src/cat.rs | 5 +- src/uu/chgrp/src/chgrp.rs | 5 +- src/uu/chmod/src/chmod.rs | 5 +- src/uu/chown/src/chown.rs | 5 +- src/uu/chroot/src/chroot.rs | 6 +- src/uu/cksum/src/cksum.rs | 5 +- src/uu/comm/src/comm.rs | 4 + src/uu/csplit/src/csplit.rs | 5 +- src/uu/cut/src/cut.rs | 5 +- src/uu/dircolors/src/dircolors.rs | 6 +- src/uu/dirname/src/dirname.rs | 5 +- src/uu/du/src/du.rs | 5 +- src/uu/echo/src/echo.rs | 4 + src/uu/expr/src/expr.rs | 6 +- src/uu/factor/src/cli.rs | 6 +- src/uu/fold/src/fold.rs | 5 +- src/uu/hostid/src/hostid.rs | 6 +- src/uu/kill/src/kill.rs | 5 +- src/uu/link/src/link.rs | 6 +- src/uu/logname/src/logname.rs | 6 +- src/uu/ls/src/ls.rs | 5 +- src/uu/mkfifo/src/mkfifo.rs | 5 +- src/uu/mknod/src/mknod.rs | 5 +- src/uu/more/src/more.rs | 4 + src/uu/nl/src/nl.rs | 5 +- src/uu/nohup/src/nohup.rs | 4 + src/uu/od/src/od.rs | 5 +- src/uu/pathchk/src/pathchk.rs | 4 + src/uu/pinky/src/pinky.rs | 3 +- src/uu/printf/src/printf.rs | 6 +- src/uu/ptx/src/ptx.rs | 5 +- src/uu/relpath/src/relpath.rs | 5 +- src/uu/shred/src/shred.rs | 5 +- src/uu/shuf/src/shuf.rs | 17 +-- src/uu/sort/src/sort.rs | 5 +- src/uu/stdbuf/src/stdbuf.rs | 4 + src/uu/sum/src/sum.rs | 5 +- src/uu/tac/src/tac.rs | 5 +- src/uu/timeout/src/timeout.rs | 6 +- src/uu/tr/src/tr.rs | 4 + src/uu/tsort/src/tsort.rs | 5 +- src/uu/tty/src/tty.rs | 5 +- src/uu/unexpand/src/unexpand.rs | 5 +- src/uu/unlink/src/unlink.rs | 5 +- src/uu/who/src/who.rs | 5 +- src/uucore/src/lib/lib.rs | 170 +++++++++++++++++++++++++++++- tests/by-util/test_basename.rs | 16 +++ tests/common/util.rs | 14 ++- 52 files changed, 402 insertions(+), 55 deletions(-) diff --git a/src/uu/arch/src/arch.rs b/src/uu/arch/src/arch.rs index 20392b11f..a4c57e282 100644 --- a/src/uu/arch/src/arch.rs +++ b/src/uu/arch/src/arch.rs @@ -10,13 +10,17 @@ extern crate uucore; use platform_info::*; +use uucore::InvalidEncodingHandling; static SYNTAX: &str = "Display machine architecture"; static SUMMARY: &str = "Determine architecture name for current machine."; static LONG_HELP: &str = ""; pub fn uumain(args: impl uucore::Args) -> i32 { - app!(SYNTAX, SUMMARY, LONG_HELP).parse(args.collect_str()); + app!(SYNTAX, SUMMARY, LONG_HELP).parse( + args.collect_str(InvalidEncodingHandling::ConvertLossy) + .accept_any(), + ); let uts = return_if_err!(1, PlatformInfo::new()); println!("{}", uts.machine().trim()); 0 diff --git a/src/uu/base32/src/base32.rs b/src/uu/base32/src/base32.rs index b47f4d4cc..b5f346f48 100644 --- a/src/uu/base32/src/base32.rs +++ b/src/uu/base32/src/base32.rs @@ -8,6 +8,7 @@ #[macro_use] extern crate uucore; use uucore::encoding::Format; +use uucore::InvalidEncodingHandling; mod base_common; @@ -25,7 +26,8 @@ static LONG_HELP: &str = " pub fn uumain(args: impl uucore::Args) -> i32 { base_common::execute( - args.collect_str(), + args.collect_str(InvalidEncodingHandling::ConvertLossy) + .accept_any(), SYNTAX, SUMMARY, LONG_HELP, diff --git a/src/uu/base64/src/base64.rs b/src/uu/base64/src/base64.rs index ee60f3de5..61a8dc5cb 100644 --- a/src/uu/base64/src/base64.rs +++ b/src/uu/base64/src/base64.rs @@ -9,6 +9,7 @@ #[macro_use] extern crate uucore; use uucore::encoding::Format; +use uucore::InvalidEncodingHandling; mod base_common; @@ -26,7 +27,8 @@ static LONG_HELP: &str = " pub fn uumain(args: impl uucore::Args) -> i32 { base_common::execute( - args.collect_str(), + args.collect_str(InvalidEncodingHandling::Ignore) + .accept_any(), SYNTAX, SUMMARY, LONG_HELP, diff --git a/src/uu/basename/src/basename.rs b/src/uu/basename/src/basename.rs index 7b02a7a83..68b705d53 100644 --- a/src/uu/basename/src/basename.rs +++ b/src/uu/basename/src/basename.rs @@ -11,6 +11,7 @@ extern crate uucore; use std::path::{is_separator, PathBuf}; +use uucore::InvalidEncodingHandling; static NAME: &str = "basename"; static SYNTAX: &str = "NAME [SUFFIX]"; @@ -19,8 +20,9 @@ static SUMMARY: &str = "Print NAME with any leading directory components removed static LONG_HELP: &str = ""; pub fn uumain(args: impl uucore::Args) -> i32 { - let args = args.collect_str(); - + let args = args + .collect_str(InvalidEncodingHandling::ConvertLossy) + .accept_any(); // // Argument parsing // diff --git a/src/uu/cat/src/cat.rs b/src/uu/cat/src/cat.rs index e507c5acd..8dea096be 100644 --- a/src/uu/cat/src/cat.rs +++ b/src/uu/cat/src/cat.rs @@ -35,6 +35,7 @@ use std::net::Shutdown; use std::os::unix::fs::FileTypeExt; #[cfg(unix)] use unix_socket::UnixStream; +use uucore::InvalidEncodingHandling; static NAME: &str = "cat"; static VERSION: &str = env!("CARGO_PKG_VERSION"); @@ -166,7 +167,9 @@ mod options { } pub fn uumain(args: impl uucore::Args) -> i32 { - let args = args.collect_str(); + let args = args + .collect_str(InvalidEncodingHandling::Ignore) + .accept_any(); let matches = App::new(executable!()) .name(NAME) diff --git a/src/uu/chgrp/src/chgrp.rs b/src/uu/chgrp/src/chgrp.rs index 592a0a905..2afef7de0 100644 --- a/src/uu/chgrp/src/chgrp.rs +++ b/src/uu/chgrp/src/chgrp.rs @@ -22,6 +22,7 @@ use std::fs::Metadata; use std::os::unix::fs::MetadataExt; use std::path::Path; +use uucore::InvalidEncodingHandling; static SYNTAX: &str = "chgrp [OPTION]... GROUP FILE...\n or : chgrp [OPTION]... --reference=RFILE FILE..."; @@ -32,7 +33,9 @@ const FTS_PHYSICAL: u8 = 1 << 1; const FTS_LOGICAL: u8 = 1 << 2; pub fn uumain(args: impl uucore::Args) -> i32 { - let args = args.collect_str(); + let args = args + .collect_str(InvalidEncodingHandling::ConvertLossy) + .accept_any(); let mut opts = app!(SYNTAX, SUMMARY, ""); opts.optflag("c", diff --git a/src/uu/chmod/src/chmod.rs b/src/uu/chmod/src/chmod.rs index dc11be7b8..d01f0316e 100644 --- a/src/uu/chmod/src/chmod.rs +++ b/src/uu/chmod/src/chmod.rs @@ -17,6 +17,7 @@ use std::path::Path; use uucore::fs::display_permissions_unix; #[cfg(not(windows))] use uucore::mode; +use uucore::InvalidEncodingHandling; use walkdir::WalkDir; static VERSION: &str = env!("CARGO_PKG_VERSION"); @@ -49,7 +50,9 @@ fn get_long_usage() -> String { } pub fn uumain(args: impl uucore::Args) -> i32 { - let mut args = args.collect_str(); + let mut args = args + .collect_str(InvalidEncodingHandling::ConvertLossy) + .accept_any(); // Before we can parse 'args' with clap (and previously getopts), // a possible MODE prefix '-' needs to be removed (e.g. "chmod -x FILE"). diff --git a/src/uu/chown/src/chown.rs b/src/uu/chown/src/chown.rs index 23fb030ba..ff9c42dd0 100644 --- a/src/uu/chown/src/chown.rs +++ b/src/uu/chown/src/chown.rs @@ -23,6 +23,7 @@ use std::os::unix::fs::MetadataExt; use std::convert::AsRef; use std::path::Path; +use uucore::InvalidEncodingHandling; static ABOUT: &str = "change file owner and group"; static VERSION: &str = env!("CARGO_PKG_VERSION"); @@ -67,7 +68,9 @@ fn get_usage() -> String { } pub fn uumain(args: impl uucore::Args) -> i32 { - let args = args.collect_str(); + let args = args + .collect_str(InvalidEncodingHandling::Ignore) + .accept_any(); let usage = get_usage(); diff --git a/src/uu/chroot/src/chroot.rs b/src/uu/chroot/src/chroot.rs index 7e672da1e..9480830e5 100644 --- a/src/uu/chroot/src/chroot.rs +++ b/src/uu/chroot/src/chroot.rs @@ -15,8 +15,8 @@ use std::ffi::CString; use std::io::Error; use std::path::Path; use std::process::Command; -use uucore::entries; use uucore::libc::{self, chroot, setgid, setgroups, setuid}; +use uucore::{entries, InvalidEncodingHandling}; static VERSION: &str = env!("CARGO_PKG_VERSION"); static NAME: &str = "chroot"; @@ -32,7 +32,9 @@ mod options { } pub fn uumain(args: impl uucore::Args) -> i32 { - let args = args.collect_str(); + let args = args + .collect_str(InvalidEncodingHandling::ConvertLossy) + .accept_any(); let matches = App::new(executable!()) .version(VERSION) diff --git a/src/uu/cksum/src/cksum.rs b/src/uu/cksum/src/cksum.rs index b6436de87..1d45c1332 100644 --- a/src/uu/cksum/src/cksum.rs +++ b/src/uu/cksum/src/cksum.rs @@ -14,6 +14,7 @@ use clap::{App, Arg}; use std::fs::File; use std::io::{self, stdin, BufReader, Read}; use std::path::Path; +use uucore::InvalidEncodingHandling; // NOTE: CRC_TABLE_LEN *must* be <= 256 as we cast 0..CRC_TABLE_LEN to u8 const CRC_TABLE_LEN: usize = 256; @@ -180,7 +181,9 @@ mod options { } pub fn uumain(args: impl uucore::Args) -> i32 { - let args = args.collect_str(); + let args = args + .collect_str(InvalidEncodingHandling::Ignore) + .accept_any(); let matches = App::new(executable!()) .name(NAME) diff --git a/src/uu/comm/src/comm.rs b/src/uu/comm/src/comm.rs index 34b4330c9..d1d07461c 100644 --- a/src/uu/comm/src/comm.rs +++ b/src/uu/comm/src/comm.rs @@ -14,6 +14,7 @@ use std::cmp::Ordering; use std::fs::File; use std::io::{self, stdin, BufRead, BufReader, Stdin}; use std::path::Path; +use uucore::InvalidEncodingHandling; use clap::{App, Arg, ArgMatches}; @@ -134,6 +135,9 @@ fn open_file(name: &str) -> io::Result { pub fn uumain(args: impl uucore::Args) -> i32 { let usage = get_usage(); + let args = args + .collect_str(InvalidEncodingHandling::ConvertLossy) + .accept_any(); let matches = App::new(executable!()) .version(VERSION) diff --git a/src/uu/csplit/src/csplit.rs b/src/uu/csplit/src/csplit.rs index ce11ba49a..9d2f81f43 100644 --- a/src/uu/csplit/src/csplit.rs +++ b/src/uu/csplit/src/csplit.rs @@ -17,6 +17,7 @@ mod splitname; use crate::csplit_error::CsplitError; use crate::splitname::SplitName; +use uucore::InvalidEncodingHandling; static VERSION: &str = env!("CARGO_PKG_VERSION"); static SUMMARY: &str = "split a file into sections determined by context lines"; @@ -711,7 +712,9 @@ mod tests { pub fn uumain(args: impl uucore::Args) -> i32 { let usage = get_usage(); - let args = args.collect_str(); + let args = args + .collect_str(InvalidEncodingHandling::Ignore) + .accept_any(); let matches = App::new(executable!()) .version(VERSION) diff --git a/src/uu/cut/src/cut.rs b/src/uu/cut/src/cut.rs index 5bf310daa..71c52601e 100644 --- a/src/uu/cut/src/cut.rs +++ b/src/uu/cut/src/cut.rs @@ -17,6 +17,7 @@ use std::path::Path; use self::searcher::Searcher; use uucore::ranges::Range; +use uucore::InvalidEncodingHandling; mod buffer; mod searcher; @@ -443,7 +444,9 @@ mod options { } pub fn uumain(args: impl uucore::Args) -> i32 { - let args = args.collect_str(); + let args = args + .collect_str(InvalidEncodingHandling::Ignore) + .accept_any(); let matches = App::new(executable!()) .name(NAME) diff --git a/src/uu/dircolors/src/dircolors.rs b/src/uu/dircolors/src/dircolors.rs index 6cb5f9e1b..a2d819620 100644 --- a/src/uu/dircolors/src/dircolors.rs +++ b/src/uu/dircolors/src/dircolors.rs @@ -53,7 +53,9 @@ pub fn guess_syntax() -> OutputFmt { } pub fn uumain(args: impl uucore::Args) -> i32 { - let args = args.collect_str(); + let args = args + .collect_str(InvalidEncodingHandling::Ignore) + .accept_any(); let matches = app!(SYNTAX, SUMMARY, LONG_HELP) .optflag("b", "sh", "output Bourne shell code to set LS_COLORS") @@ -202,6 +204,8 @@ enum ParseState { Pass, } use std::collections::HashMap; +use uucore::InvalidEncodingHandling; + fn parse(lines: T, fmt: OutputFmt, fp: &str) -> Result where T: IntoIterator, diff --git a/src/uu/dirname/src/dirname.rs b/src/uu/dirname/src/dirname.rs index 1cf35d0c4..5937f16ca 100644 --- a/src/uu/dirname/src/dirname.rs +++ b/src/uu/dirname/src/dirname.rs @@ -10,6 +10,7 @@ extern crate uucore; use clap::{App, Arg}; use std::path::Path; +use uucore::InvalidEncodingHandling; static NAME: &str = "dirname"; static SYNTAX: &str = "[OPTION] NAME..."; @@ -27,7 +28,9 @@ mod options { } pub fn uumain(args: impl uucore::Args) -> i32 { - let args = args.collect_str(); + let args = args + .collect_str(InvalidEncodingHandling::ConvertLossy) + .accept_any(); let matches = App::new(executable!()) .name(NAME) diff --git a/src/uu/du/src/du.rs b/src/uu/du/src/du.rs index fa3b3c80a..89dd3f739 100644 --- a/src/uu/du/src/du.rs +++ b/src/uu/du/src/du.rs @@ -25,6 +25,7 @@ use std::os::windows::fs::MetadataExt; use std::os::windows::io::AsRawHandle; use std::path::PathBuf; use std::time::{Duration, UNIX_EPOCH}; +use uucore::InvalidEncodingHandling; #[cfg(windows)] use winapi::shared::minwindef::{DWORD, LPVOID}; #[cfg(windows)] @@ -362,7 +363,9 @@ fn convert_size_other(size: u64, _multiplier: u64, block_size: u64) -> String { #[allow(clippy::cognitive_complexity)] pub fn uumain(args: impl uucore::Args) -> i32 { - let args = args.collect_str(); + let args = args + .collect_str(InvalidEncodingHandling::Ignore) + .accept_any(); let syntax = format!( "[OPTION]... [FILE]... diff --git a/src/uu/echo/src/echo.rs b/src/uu/echo/src/echo.rs index 4d38d7748..56cd967f4 100644 --- a/src/uu/echo/src/echo.rs +++ b/src/uu/echo/src/echo.rs @@ -13,6 +13,7 @@ use clap::{crate_version, App, Arg}; use std::io::{self, Write}; use std::iter::Peekable; use std::str::Chars; +use uucore::InvalidEncodingHandling; const NAME: &str = "echo"; const SUMMARY: &str = "display a line of text"; @@ -113,6 +114,9 @@ fn print_escaped(input: &str, mut output: impl Write) -> io::Result { } pub fn uumain(args: impl uucore::Args) -> i32 { + let args = args + .collect_str(InvalidEncodingHandling::ConvertLossy) + .accept_any(); let matches = App::new(executable!()) .name(NAME) // TrailingVarArg specifies the final positional argument is a VarArg diff --git a/src/uu/expr/src/expr.rs b/src/uu/expr/src/expr.rs index 4a13812d3..5d63bed80 100644 --- a/src/uu/expr/src/expr.rs +++ b/src/uu/expr/src/expr.rs @@ -8,6 +8,8 @@ #[macro_use] extern crate uucore; +use uucore::InvalidEncodingHandling; + mod syntax_tree; mod tokens; @@ -15,7 +17,9 @@ static NAME: &str = "expr"; static VERSION: &str = env!("CARGO_PKG_VERSION"); pub fn uumain(args: impl uucore::Args) -> i32 { - let args = args.collect_str(); + let args = args + .collect_str(InvalidEncodingHandling::ConvertLossy) + .accept_any(); // For expr utility we do not want getopts. // The following usage should work without escaping hyphens: `expr -15 = 1 + 2 \* \( 3 - -4 \)` diff --git a/src/uu/factor/src/cli.rs b/src/uu/factor/src/cli.rs index 34bcf6a2d..fb7b3f192 100644 --- a/src/uu/factor/src/cli.rs +++ b/src/uu/factor/src/cli.rs @@ -14,6 +14,7 @@ use std::io::{self, stdin, stdout, BufRead, Write}; mod factor; pub(crate) use factor::*; +use uucore::InvalidEncodingHandling; mod miller_rabin; pub mod numeric; @@ -33,7 +34,10 @@ fn print_factors_str(num_str: &str, w: &mut impl io::Write) -> Result<(), Box i32 { - let matches = app!(SYNTAX, SUMMARY, LONG_HELP).parse(args.collect_str()); + let matches = app!(SYNTAX, SUMMARY, LONG_HELP).parse( + args.collect_str(InvalidEncodingHandling::Ignore) + .accept_any(), + ); let stdout = stdout(); let mut w = io::BufWriter::new(stdout.lock()); diff --git a/src/uu/fold/src/fold.rs b/src/uu/fold/src/fold.rs index fa703eade..1f52748f1 100644 --- a/src/uu/fold/src/fold.rs +++ b/src/uu/fold/src/fold.rs @@ -14,6 +14,7 @@ use clap::{App, Arg}; use std::fs::File; use std::io::{stdin, BufRead, BufReader, Read}; use std::path::Path; +use uucore::InvalidEncodingHandling; const TAB_WIDTH: usize = 8; @@ -31,7 +32,9 @@ mod options { } pub fn uumain(args: impl uucore::Args) -> i32 { - let args = args.collect_str(); + let args = args + .collect_str(InvalidEncodingHandling::ConvertLossy) + .accept_any(); let (args, obs_width) = handle_obsolete(&args[..]); let matches = App::new(executable!()) diff --git a/src/uu/hostid/src/hostid.rs b/src/uu/hostid/src/hostid.rs index 5a4909c62..551866521 100644 --- a/src/uu/hostid/src/hostid.rs +++ b/src/uu/hostid/src/hostid.rs @@ -11,6 +11,7 @@ extern crate uucore; use libc::c_long; +use uucore::InvalidEncodingHandling; static SYNTAX: &str = "[options]"; static SUMMARY: &str = ""; @@ -22,7 +23,10 @@ extern "C" { } pub fn uumain(args: impl uucore::Args) -> i32 { - app!(SYNTAX, SUMMARY, LONG_HELP).parse(args.collect_str()); + app!(SYNTAX, SUMMARY, LONG_HELP).parse( + args.collect_str(InvalidEncodingHandling::ConvertLossy) + .accept_any(), + ); hostid(); 0 } diff --git a/src/uu/kill/src/kill.rs b/src/uu/kill/src/kill.rs index 9af9c74f7..916c13cc3 100644 --- a/src/uu/kill/src/kill.rs +++ b/src/uu/kill/src/kill.rs @@ -13,6 +13,7 @@ extern crate uucore; use libc::{c_int, pid_t}; use std::io::Error; use uucore::signals::ALL_SIGNALS; +use uucore::InvalidEncodingHandling; static SYNTAX: &str = "[options] [...]"; static SUMMARY: &str = ""; @@ -29,7 +30,9 @@ pub enum Mode { } pub fn uumain(args: impl uucore::Args) -> i32 { - let args = args.collect_str(); + let args = args + .collect_str(InvalidEncodingHandling::Ignore) + .accept_any(); let (args, obs_signal) = handle_obsolete(args); let matches = app!(SYNTAX, SUMMARY, LONG_HELP) diff --git a/src/uu/link/src/link.rs b/src/uu/link/src/link.rs index 51c7acb5f..b82d7cfac 100644 --- a/src/uu/link/src/link.rs +++ b/src/uu/link/src/link.rs @@ -11,6 +11,7 @@ extern crate uucore; use std::fs::hard_link; use std::io::Error; use std::path::Path; +use uucore::InvalidEncodingHandling; static SYNTAX: &str = "[OPTIONS] FILE1 FILE2"; static SUMMARY: &str = "Create a link named FILE2 to FILE1"; @@ -24,7 +25,10 @@ pub fn normalize_error_message(e: Error) -> String { } pub fn uumain(args: impl uucore::Args) -> i32 { - let matches = app!(SYNTAX, SUMMARY, LONG_HELP).parse(args.collect_str()); + let matches = app!(SYNTAX, SUMMARY, LONG_HELP).parse( + args.collect_str(InvalidEncodingHandling::Ignore) + .accept_any(), + ); if matches.free.len() != 2 { crash!(1, "{}", msg_wrong_number_of_arguments!(2)); } diff --git a/src/uu/logname/src/logname.rs b/src/uu/logname/src/logname.rs index c1f0c31aa..8c6a946f5 100644 --- a/src/uu/logname/src/logname.rs +++ b/src/uu/logname/src/logname.rs @@ -13,6 +13,7 @@ extern crate uucore; use std::ffi::CStr; +use uucore::InvalidEncodingHandling; extern "C" { // POSIX requires using getlogin (or equivalent code) @@ -35,7 +36,10 @@ static SUMMARY: &str = "Print user's login name"; static LONG_HELP: &str = ""; pub fn uumain(args: impl uucore::Args) -> i32 { - app!(SYNTAX, SUMMARY, LONG_HELP).parse(args.collect_str()); + app!(SYNTAX, SUMMARY, LONG_HELP).parse( + args.collect_str(InvalidEncodingHandling::ConvertLossy) + .accept_any(), + ); match get_userlogin() { Some(userlogin) => println!("{}", userlogin), diff --git a/src/uu/ls/src/ls.rs b/src/uu/ls/src/ls.rs index 6317c1975..9a3b98e66 100644 --- a/src/uu/ls/src/ls.rs +++ b/src/uu/ls/src/ls.rs @@ -503,7 +503,9 @@ impl Config { } pub fn uumain(args: impl uucore::Args) -> i32 { - let args = args.collect_str(); + let args = args + .collect_str(InvalidEncodingHandling::Ignore) + .accept_any(); let usage = get_usage(); @@ -1391,6 +1393,7 @@ fn get_inode(metadata: &Metadata) -> String { use std::sync::Mutex; #[cfg(unix)] use uucore::entries; +use uucore::InvalidEncodingHandling; #[cfg(unix)] fn cached_uid2usr(uid: u32) -> String { diff --git a/src/uu/mkfifo/src/mkfifo.rs b/src/uu/mkfifo/src/mkfifo.rs index 14701af4d..2fdd4abec 100644 --- a/src/uu/mkfifo/src/mkfifo.rs +++ b/src/uu/mkfifo/src/mkfifo.rs @@ -11,6 +11,7 @@ extern crate uucore; use clap::{App, Arg}; use libc::mkfifo; use std::ffi::CString; +use uucore::InvalidEncodingHandling; static NAME: &str = "mkfifo"; static VERSION: &str = env!("CARGO_PKG_VERSION"); @@ -25,7 +26,9 @@ mod options { } pub fn uumain(args: impl uucore::Args) -> i32 { - let args = args.collect_str(); + let args = args + .collect_str(InvalidEncodingHandling::Ignore) + .accept_any(); let matches = App::new(executable!()) .name(NAME) diff --git a/src/uu/mknod/src/mknod.rs b/src/uu/mknod/src/mknod.rs index 98404f89f..fc6fb0870 100644 --- a/src/uu/mknod/src/mknod.rs +++ b/src/uu/mknod/src/mknod.rs @@ -16,6 +16,7 @@ use libc::{S_IFBLK, S_IFCHR, S_IFIFO, S_IRGRP, S_IROTH, S_IRUSR, S_IWGRP, S_IWOT use getopts::Options; use std::ffi::CString; +use uucore::InvalidEncodingHandling; static NAME: &str = "mknod"; static VERSION: &str = env!("CARGO_PKG_VERSION"); @@ -40,7 +41,9 @@ fn _makenod(path: CString, mode: mode_t, dev: dev_t) -> i32 { #[allow(clippy::cognitive_complexity)] pub fn uumain(args: impl uucore::Args) -> i32 { - let args = args.collect_str(); + let args = args + .collect_str(InvalidEncodingHandling::Ignore) + .accept_any(); let mut opts = Options::new(); diff --git a/src/uu/more/src/more.rs b/src/uu/more/src/more.rs index 59e7d0faa..eabdbee85 100644 --- a/src/uu/more/src/more.rs +++ b/src/uu/more/src/more.rs @@ -17,6 +17,7 @@ use std::io::{stdin, stdout, BufRead, BufReader, Read, Write}; extern crate nix; #[cfg(all(unix, not(target_os = "fuchsia")))] use nix::sys::termios::{self, LocalFlags, SetArg}; +use uucore::InvalidEncodingHandling; #[cfg(target_os = "redox")] extern crate redox_termios; @@ -38,6 +39,9 @@ fn get_usage() -> String { pub fn uumain(args: impl uucore::Args) -> i32 { let usage = get_usage(); + let args = args + .collect_str(InvalidEncodingHandling::ConvertLossy) + .accept_any(); let matches = App::new(executable!()) .version(VERSION) diff --git a/src/uu/nl/src/nl.rs b/src/uu/nl/src/nl.rs index 3b5b5a2e8..7c5f01811 100644 --- a/src/uu/nl/src/nl.rs +++ b/src/uu/nl/src/nl.rs @@ -16,6 +16,7 @@ use std::fs::File; use std::io::{stdin, BufRead, BufReader, Read}; use std::iter::repeat; use std::path::Path; +use uucore::InvalidEncodingHandling; mod helper; @@ -84,7 +85,9 @@ pub mod options { } pub fn uumain(args: impl uucore::Args) -> i32 { - let args = args.collect_str(); + let args = args + .collect_str(InvalidEncodingHandling::ConvertLossy) + .accept_any(); let matches = App::new(executable!()) .name(NAME) diff --git a/src/uu/nohup/src/nohup.rs b/src/uu/nohup/src/nohup.rs index afbf2541b..83153ad37 100644 --- a/src/uu/nohup/src/nohup.rs +++ b/src/uu/nohup/src/nohup.rs @@ -20,6 +20,7 @@ use std::io::Error; use std::os::unix::prelude::*; use std::path::{Path, PathBuf}; use uucore::fs::{is_stderr_interactive, is_stdin_interactive, is_stdout_interactive}; +use uucore::InvalidEncodingHandling; static VERSION: &str = env!("CARGO_PKG_VERSION"); static ABOUT: &str = "Run COMMAND ignoring hangup signals."; @@ -42,6 +43,9 @@ mod options { pub fn uumain(args: impl uucore::Args) -> i32 { let usage = get_usage(); + let args = args + .collect_str(InvalidEncodingHandling::ConvertLossy) + .accept_any(); let matches = App::new(executable!()) .version(VERSION) diff --git a/src/uu/od/src/od.rs b/src/uu/od/src/od.rs index 36eae66ab..571ca543d 100644 --- a/src/uu/od/src/od.rs +++ b/src/uu/od/src/od.rs @@ -42,6 +42,7 @@ use crate::partialreader::*; use crate::peekreader::*; use crate::prn_char::format_ascii_dump; use clap::{self, AppSettings, Arg, ArgMatches}; +use uucore::InvalidEncodingHandling; static VERSION: &str = env!("CARGO_PKG_VERSION"); const PEEK_BUFFER_SIZE: usize = 4; // utf-8 can be 4 bytes @@ -221,7 +222,9 @@ impl OdOptions { /// parses and validates command line parameters, prepares data structures, /// opens the input and calls `odfunc` to process the input. pub fn uumain(args: impl uucore::Args) -> i32 { - let args = args.collect_str(); + let args = args + .collect_str(InvalidEncodingHandling::Ignore) + .accept_any(); let clap_opts = clap::App::new(executable!()) .version(VERSION) diff --git a/src/uu/pathchk/src/pathchk.rs b/src/uu/pathchk/src/pathchk.rs index c27e52513..5606c4d6a 100644 --- a/src/uu/pathchk/src/pathchk.rs +++ b/src/uu/pathchk/src/pathchk.rs @@ -15,6 +15,7 @@ extern crate uucore; use clap::{App, Arg}; use std::fs; use std::io::{ErrorKind, Write}; +use uucore::InvalidEncodingHandling; // operating mode enum Mode { @@ -45,6 +46,9 @@ fn get_usage() -> String { pub fn uumain(args: impl uucore::Args) -> i32 { let usage = get_usage(); + let args = args + .collect_str(InvalidEncodingHandling::ConvertLossy) + .accept_any(); let matches = App::new(executable!()) .version(VERSION) diff --git a/src/uu/pinky/src/pinky.rs b/src/uu/pinky/src/pinky.rs index 851a3cd42..3392c9a79 100644 --- a/src/uu/pinky/src/pinky.rs +++ b/src/uu/pinky/src/pinky.rs @@ -20,6 +20,7 @@ use std::fs::File; use std::os::unix::fs::MetadataExt; use std::path::PathBuf; +use uucore::InvalidEncodingHandling; static SYNTAX: &str = "[OPTION]... [USER]..."; static SUMMARY: &str = "A lightweight 'finger' program; print user information."; @@ -27,7 +28,7 @@ static SUMMARY: &str = "A lightweight 'finger' program; print user information. const BUFSIZE: usize = 1024; pub fn uumain(args: impl uucore::Args) -> i32 { - let args = args.collect_str(); + let args = args.collect_str(InvalidEncodingHandling::Ignore).accept_any(); let long_help = &format!( " diff --git a/src/uu/printf/src/printf.rs b/src/uu/printf/src/printf.rs index d947a7d83..88d18838d 100644 --- a/src/uu/printf/src/printf.rs +++ b/src/uu/printf/src/printf.rs @@ -2,6 +2,8 @@ // spell-checker:ignore (change!) each's // spell-checker:ignore (ToDO) LONGHELP FORMATSTRING templating parameterizing formatstr +use uucore::InvalidEncodingHandling; + mod cli; mod memo; mod tokenize; @@ -271,7 +273,9 @@ COPYRIGHT : "; pub fn uumain(args: impl uucore::Args) -> i32 { - let args = args.collect_str(); + let args = args + .collect_str(InvalidEncodingHandling::Ignore) + .accept_any(); let location = &args[0]; if args.len() <= 1 { diff --git a/src/uu/ptx/src/ptx.rs b/src/uu/ptx/src/ptx.rs index 69b3f7aa1..d2aa619b4 100644 --- a/src/uu/ptx/src/ptx.rs +++ b/src/uu/ptx/src/ptx.rs @@ -17,6 +17,7 @@ use std::collections::{BTreeSet, HashMap, HashSet}; use std::default::Default; use std::fs::File; use std::io::{stdin, stdout, BufRead, BufReader, BufWriter, Read, Write}; +use uucore::InvalidEncodingHandling; static NAME: &str = "ptx"; static VERSION: &str = env!("CARGO_PKG_VERSION"); @@ -630,7 +631,9 @@ mod options { } pub fn uumain(args: impl uucore::Args) -> i32 { - let args = args.collect_str(); + let args = args + .collect_str(InvalidEncodingHandling::Ignore) + .accept_any(); // let mut opts = Options::new(); let matches = App::new(executable!()) diff --git a/src/uu/relpath/src/relpath.rs b/src/uu/relpath/src/relpath.rs index 82779107a..f1b2c9a43 100644 --- a/src/uu/relpath/src/relpath.rs +++ b/src/uu/relpath/src/relpath.rs @@ -14,6 +14,7 @@ use clap::{App, Arg}; use std::env; use std::path::{Path, PathBuf}; use uucore::fs::{canonicalize, CanonicalizeMode}; +use uucore::InvalidEncodingHandling; static VERSION: &str = env!("CARGO_PKG_VERSION"); static ABOUT: &str = "Convert TO destination to the relative path from the FROM dir. @@ -30,7 +31,9 @@ fn get_usage() -> String { } pub fn uumain(args: impl uucore::Args) -> i32 { - let args = args.collect_str(); + let args = args + .collect_str(InvalidEncodingHandling::ConvertLossy) + .accept_any(); let usage = get_usage(); let matches = App::new(executable!()) diff --git a/src/uu/shred/src/shred.rs b/src/uu/shred/src/shred.rs index b89d48a10..15a4eff26 100644 --- a/src/uu/shred/src/shred.rs +++ b/src/uu/shred/src/shred.rs @@ -17,6 +17,7 @@ use std::io; use std::io::prelude::*; use std::io::SeekFrom; use std::path::{Path, PathBuf}; +use uucore::InvalidEncodingHandling; #[macro_use] extern crate uucore; @@ -270,7 +271,9 @@ pub mod options { } pub fn uumain(args: impl uucore::Args) -> i32 { - let args = args.collect_str(); + let args = args + .collect_str(InvalidEncodingHandling::Ignore) + .accept_any(); let usage = get_usage(); diff --git a/src/uu/shuf/src/shuf.rs b/src/uu/shuf/src/shuf.rs index f7af05214..9c735673c 100644 --- a/src/uu/shuf/src/shuf.rs +++ b/src/uu/shuf/src/shuf.rs @@ -14,6 +14,13 @@ use clap::{App, Arg}; use rand::Rng; use std::fs::File; use std::io::{stdin, stdout, BufReader, BufWriter, Read, Write}; +use uucore::InvalidEncodingHandling; + +enum Mode { + Default(String), + Echo(Vec), + InputRange((usize, usize)), +} static NAME: &str = "shuf"; static VERSION: &str = env!("CARGO_PKG_VERSION"); @@ -34,12 +41,6 @@ struct Options { sep: u8, } -enum Mode { - Default(String), - Echo(Vec), - InputRange((usize, usize)), -} - mod options { pub static ECHO: &str = "echo"; pub static INPUT_RANGE: &str = "input-range"; @@ -52,6 +53,10 @@ mod options { } pub fn uumain(args: impl uucore::Args) -> i32 { + let args = args + .collect_str(InvalidEncodingHandling::ConvertLossy) + .accept_any(); + let matches = App::new(executable!()) .name(NAME) .version(VERSION) diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index 5fd7ddcce..de19d0a5c 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -37,6 +37,7 @@ use std::ops::Range; use std::path::Path; use unicode_width::UnicodeWidthStr; use uucore::fs::is_stdin_interactive; // for Iterator::dedup() +use uucore::InvalidEncodingHandling; static NAME: &str = "sort"; static ABOUT: &str = "Display sorted concatenation of all FILE(s)."; @@ -768,7 +769,9 @@ With no FILE, or when FILE is -, read standard input.", } pub fn uumain(args: impl uucore::Args) -> i32 { - let args = args.collect_str(); + let args = args + .collect_str(InvalidEncodingHandling::Ignore) + .accept_any(); let usage = get_usage(); let mut settings: GlobalSettings = Default::default(); diff --git a/src/uu/stdbuf/src/stdbuf.rs b/src/uu/stdbuf/src/stdbuf.rs index ddbd76133..77f6d9dad 100644 --- a/src/uu/stdbuf/src/stdbuf.rs +++ b/src/uu/stdbuf/src/stdbuf.rs @@ -19,6 +19,7 @@ use std::path::PathBuf; use std::process::Command; use tempfile::tempdir; use tempfile::TempDir; +use uucore::InvalidEncodingHandling; static VERSION: &str = env!("CARGO_PKG_VERSION"); static ABOUT: &str = @@ -186,6 +187,9 @@ fn get_preload_env(tmp_dir: &mut TempDir) -> io::Result<(String, PathBuf)> { } pub fn uumain(args: impl uucore::Args) -> i32 { + let args = args + .collect_str(InvalidEncodingHandling::Ignore) + .accept_any(); let usage = get_usage(); let matches = App::new(executable!()) diff --git a/src/uu/sum/src/sum.rs b/src/uu/sum/src/sum.rs index d0fbc7c0d..ea833c0d2 100644 --- a/src/uu/sum/src/sum.rs +++ b/src/uu/sum/src/sum.rs @@ -14,6 +14,7 @@ use clap::{App, Arg}; use std::fs::File; use std::io::{stdin, Read, Result}; use std::path::Path; +use uucore::InvalidEncodingHandling; static NAME: &str = "sum"; static VERSION: &str = env!("CARGO_PKG_VERSION"); @@ -94,7 +95,9 @@ mod options { } pub fn uumain(args: impl uucore::Args) -> i32 { - let args = args.collect_str(); + let args = args + .collect_str(InvalidEncodingHandling::ConvertLossy) + .accept_any(); let matches = App::new(executable!()) .name(NAME) diff --git a/src/uu/tac/src/tac.rs b/src/uu/tac/src/tac.rs index 1fb6489da..a638d578d 100644 --- a/src/uu/tac/src/tac.rs +++ b/src/uu/tac/src/tac.rs @@ -13,6 +13,7 @@ extern crate uucore; use clap::{App, Arg}; use std::io::{stdin, stdout, BufReader, Read, Stdout, Write}; use std::{fs::File, path::Path}; +use uucore::InvalidEncodingHandling; static NAME: &str = "tac"; static VERSION: &str = env!("CARGO_PKG_VERSION"); @@ -27,7 +28,9 @@ mod options { } pub fn uumain(args: impl uucore::Args) -> i32 { - let args = args.collect_str(); + let args = args + .collect_str(InvalidEncodingHandling::ConvertLossy) + .accept_any(); let matches = App::new(executable!()) .name(NAME) diff --git a/src/uu/timeout/src/timeout.rs b/src/uu/timeout/src/timeout.rs index 23e2ec842..7d557f1ce 100644 --- a/src/uu/timeout/src/timeout.rs +++ b/src/uu/timeout/src/timeout.rs @@ -18,6 +18,7 @@ use std::process::{Command, Stdio}; use std::time::Duration; use uucore::process::ChildExt; use uucore::signals::signal_by_name_or_value; +use uucore::InvalidEncodingHandling; static VERSION: &str = env!("CARGO_PKG_VERSION"); static ABOUT: &str = "Start COMMAND, and kill it if still running after DURATION."; @@ -98,7 +99,10 @@ impl Config { } pub fn uumain(args: impl uucore::Args) -> i32 { - let args = args.collect_str(); + let args = args + .collect_str(InvalidEncodingHandling::ConvertLossy) + .accept_any(); + let usage = get_usage(); let app = App::new("timeout") diff --git a/src/uu/tr/src/tr.rs b/src/uu/tr/src/tr.rs index b94b11b9d..6c1c0746a 100644 --- a/src/uu/tr/src/tr.rs +++ b/src/uu/tr/src/tr.rs @@ -21,6 +21,7 @@ use fnv::FnvHashMap; use std::io::{stdin, stdout, BufRead, BufWriter, Write}; use crate::expand::ExpandSet; +use uucore::InvalidEncodingHandling; static NAME: &str = "tr"; static VERSION: &str = env!("CARGO_PKG_VERSION"); @@ -187,6 +188,9 @@ fn get_usage() -> String { pub fn uumain(args: impl uucore::Args) -> i32 { let usage = get_usage(); + let args = args + .collect_str(InvalidEncodingHandling::ConvertLossy) + .accept_any(); let matches = App::new(executable!()) .version(VERSION) diff --git a/src/uu/tsort/src/tsort.rs b/src/uu/tsort/src/tsort.rs index 967a9514a..c96939b20 100644 --- a/src/uu/tsort/src/tsort.rs +++ b/src/uu/tsort/src/tsort.rs @@ -14,6 +14,7 @@ use std::collections::{HashMap, HashSet}; use std::fs::File; use std::io::{stdin, BufRead, BufReader, Read}; use std::path::Path; +use uucore::InvalidEncodingHandling; static VERSION: &str = env!("CARGO_PKG_VERSION"); static SUMMARY: &str = "Topological sort the strings in FILE. @@ -26,7 +27,9 @@ mod options { } pub fn uumain(args: impl uucore::Args) -> i32 { - let args = args.collect_str(); + let args = args + .collect_str(InvalidEncodingHandling::ConvertLossy) + .accept_any(); let matches = App::new(executable!()) .version(VERSION) diff --git a/src/uu/tty/src/tty.rs b/src/uu/tty/src/tty.rs index 815c6f96b..ef2d848e9 100644 --- a/src/uu/tty/src/tty.rs +++ b/src/uu/tty/src/tty.rs @@ -15,6 +15,7 @@ extern crate uucore; use clap::{App, Arg}; use std::ffi::CStr; use uucore::fs::is_stdin_interactive; +use uucore::InvalidEncodingHandling; static VERSION: &str = env!("CARGO_PKG_VERSION"); static ABOUT: &str = "Print the file name of the terminal connected to standard input."; @@ -28,8 +29,10 @@ fn get_usage() -> String { } pub fn uumain(args: impl uucore::Args) -> i32 { - let args = args.collect_str(); let usage = get_usage(); + let args = args + .collect_str(InvalidEncodingHandling::ConvertLossy) + .accept_any(); let matches = App::new(executable!()) .version(VERSION) diff --git a/src/uu/unexpand/src/unexpand.rs b/src/uu/unexpand/src/unexpand.rs index a811d3b66..22b6b807a 100644 --- a/src/uu/unexpand/src/unexpand.rs +++ b/src/uu/unexpand/src/unexpand.rs @@ -16,6 +16,7 @@ use std::fs::File; use std::io::{stdin, stdout, BufRead, BufReader, BufWriter, Read, Stdout, Write}; use std::str::from_utf8; use unicode_width::UnicodeWidthChar; +use uucore::InvalidEncodingHandling; static NAME: &str = "unexpand"; static VERSION: &str = env!("CARGO_PKG_VERSION"); @@ -90,7 +91,9 @@ impl Options { } pub fn uumain(args: impl uucore::Args) -> i32 { - let args = args.collect_str(); + let args = args + .collect_str(InvalidEncodingHandling::Ignore) + .accept_any(); let matches = App::new(executable!()) .name(NAME) diff --git a/src/uu/unlink/src/unlink.rs b/src/uu/unlink/src/unlink.rs index 26460e59c..9d9d6385b 100644 --- a/src/uu/unlink/src/unlink.rs +++ b/src/uu/unlink/src/unlink.rs @@ -17,6 +17,7 @@ use libc::{lstat, stat, unlink}; use libc::{S_IFLNK, S_IFMT, S_IFREG}; use std::ffi::CString; use std::io::{Error, ErrorKind}; +use uucore::InvalidEncodingHandling; static VERSION: &str = env!("CARGO_PKG_VERSION"); static ABOUT: &str = "Unlink the file at [FILE]."; @@ -27,7 +28,9 @@ fn get_usage() -> String { } pub fn uumain(args: impl uucore::Args) -> i32 { - let args = args.collect_str(); + let args = args + .collect_str(InvalidEncodingHandling::ConvertLossy) + .accept_any(); let usage = get_usage(); diff --git a/src/uu/who/src/who.rs b/src/uu/who/src/who.rs index 9444985dc..e979d2d46 100644 --- a/src/uu/who/src/who.rs +++ b/src/uu/who/src/who.rs @@ -16,6 +16,7 @@ use std::borrow::Cow; use std::ffi::CStr; use std::os::unix::fs::MetadataExt; use std::path::PathBuf; +use uucore::InvalidEncodingHandling; static SYNTAX: &str = "[OPTION]... [ FILE | ARG1 ARG2 ]"; static SUMMARY: &str = "Print information about users who are currently logged in."; @@ -44,7 +45,9 @@ If ARG1 ARG2 given, -m presumed: 'am i' or 'mom likes' are usual. "; pub fn uumain(args: impl uucore::Args) -> i32 { - let args = args.collect_str(); + let args = args + .collect_str(InvalidEncodingHandling::Ignore) + .accept_any(); let mut opts = app!(SYNTAX, SUMMARY, LONG_HELP); opts.optflag("a", "all", "same as -b -d --login -p -r -t -T -u"); diff --git a/src/uucore/src/lib/lib.rs b/src/uucore/src/lib/lib.rs index 208e9536c..6dddf8696 100644 --- a/src/uucore/src/lib/lib.rs +++ b/src/uucore/src/lib/lib.rs @@ -68,10 +68,94 @@ pub use crate::features::wide; use std::ffi::OsString; +pub enum InvalidEncodingHandling { + Ignore, + ConvertLossy, + Panic, +} + +#[must_use] +pub enum ConversionResult { + Complete(Vec), + Lossy(Vec), +} + +impl ConversionResult { + pub fn accept_any(self) -> Vec { + match self { + Self::Complete(result) => result, + Self::Lossy(result) => result, + } + } + + pub fn expect_lossy(self, msg: &str) -> Vec { + match self { + Self::Lossy(result) => result, + Self::Complete(_) => { + panic!("{}", msg); + } + } + } + + pub fn expect_complete(self, msg: &str) -> Vec { + match self { + Self::Complete(result) => result, + Self::Lossy(_) => { + panic!("{}", msg); + } + } + } +} + pub trait Args: Iterator + Sized { - fn collect_str(self) -> Vec { - // FIXME: avoid unwrap() - self.map(|s| s.into_string().unwrap()).collect() + /// Converts each iterator item to a String and collects these into a vector + /// On invalid encoding, the result will depend on the argument. This method allows to either drop entries with illegal encoding + /// completely (```InvalidEncodingHandling::Ignore```), convert them using lossy-conversion (```InvalidEncodingHandling::Lossy```) which will + /// result in strange strings or can chosen to panic (```InvalidEncodingHandling::Panic```). + /// # Arguments + /// * `handling` - This switch allows to switch the behavior, when invalid encoding is encountered + /// # Panics + /// * Occurs, when invalid encoding is encountered and handling is set to ```InvalidEncodingHandling::Panic``` + fn collect_str(self, handling: InvalidEncodingHandling) -> ConversionResult { + let mut full_conversion = true; + let result_vector: Vec = self + .map(|s| match s.into_string() { + Ok(string) => Ok(string), + Err(s_ret) => { + full_conversion = false; + let lossy_conversion = s_ret.to_string_lossy(); + eprintln!( + "Input with broken encoding occured! (s = '{}') ", + &lossy_conversion + ); + match handling { + InvalidEncodingHandling::Ignore => Err(String::new()), + InvalidEncodingHandling::ConvertLossy => Err(lossy_conversion.to_string()), + InvalidEncodingHandling::Panic => { + panic!("Broken encoding found but caller cannot handle it") + } + } + } + }) + .filter(|s| match handling { + InvalidEncodingHandling::Ignore => s.is_ok(), + _ => true, + }) + .map(|s| match s.is_ok() { + true => s.unwrap(), + false => s.unwrap_err(), + }) + .collect(); + + match full_conversion { + true => ConversionResult::Complete(result_vector), + false => ConversionResult::Lossy(result_vector), + } + } + + /// convience function for a more slim interface + fn collect_str_lossy(self) -> ConversionResult { + self.collect_str(InvalidEncodingHandling::ConvertLossy) } } @@ -85,3 +169,83 @@ pub fn args() -> impl Iterator { pub fn args_os() -> impl Iterator { wild::args_os() } + +#[cfg(test)] +mod tests { + use super::*; + use std::ffi::OsStr; + + fn make_os_vec(os_str: &OsStr) -> Vec { + vec![ + OsString::from("test"), + OsString::from("สวัสดี"), + os_str.to_os_string(), + ] + } + + fn collect_os_str(vec: Vec, handling: InvalidEncodingHandling) -> ConversionResult { + vec.into_iter().collect_str(handling) + } + + fn test_invalid_utf8_args_lossy(os_str: &OsStr) { + //assert our string is invalid utf8 + assert!(os_str.to_os_string().into_string().is_err()); + let test_vec = make_os_vec(os_str); + let collected_to_str = + collect_os_str(test_vec.clone(), InvalidEncodingHandling::ConvertLossy) + .expect_lossy("Lossy conversion expected in this test: bad encoding entries should be converted as good as possible"); + //conservation of length - when accepting lossy conversion no arguments may be dropped + assert_eq!(collected_to_str.len(), test_vec.len()); + //first indices identical + for index in 0..2 { + assert_eq!( + collected_to_str.get(index).unwrap(), + test_vec.get(index).unwrap().to_str().unwrap() + ); + } + //lossy conversion for string with illegal encoding is done + assert_eq!( + *collected_to_str.get(2).unwrap(), + os_str.to_os_string().to_string_lossy() + ); + } + + fn test_invalid_utf8_args_ignore(os_str: &OsStr) { + //assert our string is invalid utf8 + assert!(os_str.to_os_string().into_string().is_err()); + let test_vec = make_os_vec(os_str); + let collected_to_str = collect_os_str(test_vec.clone(), InvalidEncodingHandling::Ignore) + .expect_lossy( + "Lossy conversion expected in this test: bad encoding entries should be filtered", + ); + //assert that the broken entry is filtered out + assert_eq!(collected_to_str.len(), test_vec.len() - 1); + //assert that the unbroken indices are converted as expected + for index in 0..2 { + assert_eq!( + collected_to_str.get(index).unwrap(), + test_vec.get(index).unwrap().to_str().unwrap() + ); + } + } + + #[test] + fn valid_utf8_encoding_args() { + //create a vector containing only correct encoding + let test_vec = make_os_vec(&OsString::from("test2")); + //expect complete conversion without losses, even when lossy conversion is accepted + let _ = collect_os_str(test_vec.clone(), InvalidEncodingHandling::ConvertLossy) + .expect_complete("Lossy conversion not expected in this test"); + } + + #[cfg(any(unix, target_os = "redox"))] + #[test] + fn invalid_utf8_args_unix() { + use std::os::unix::ffi::OsStrExt; + + let source = [0x66, 0x6f, 0x80, 0x6f]; + let os_str = OsStr::from_bytes(&source[..]); + test_invalid_utf8_args_lossy(os_str); + test_invalid_utf8_args_ignore(os_str); + } +} diff --git a/tests/by-util/test_basename.rs b/tests/by-util/test_basename.rs index 3483e800c..8d32b4008 100644 --- a/tests/by-util/test_basename.rs +++ b/tests/by-util/test_basename.rs @@ -1,4 +1,5 @@ use crate::common::util::*; +use std::ffi::OsStr; #[test] fn test_directory() { @@ -84,3 +85,18 @@ fn test_no_args() { fn test_too_many_args() { expect_error(vec!["a", "b", "c"]); } + +fn test_invalid_utf8_args(os_str: &OsStr) { + let test_vec = vec![os_str.to_os_string()]; + new_ucmd!().args(&test_vec).succeeds().stdout_is("fo�o\n"); +} + +#[cfg(any(unix, target_os = "redox"))] +#[test] +fn invalid_utf8_args_unix() { + use std::os::unix::ffi::OsStrExt; + + let source = [0x66, 0x6f, 0x80, 0x6f]; + let os_str = OsStr::from_bytes(&source[..]); + test_invalid_utf8_args(os_str); +} diff --git a/tests/common/util.rs b/tests/common/util.rs index 93bbccc24..1ade70127 100644 --- a/tests/common/util.rs +++ b/tests/common/util.rs @@ -20,6 +20,7 @@ use std::str::from_utf8; use std::thread::sleep; use std::time::Duration; use tempfile::TempDir; +use uucore::{Args, InvalidEncodingHandling}; #[cfg(windows)] static PROGNAME: &str = concat!(env!("CARGO_PKG_NAME"), ".exe"); @@ -751,7 +752,8 @@ impl UCommand { panic!("{}", ALREADY_RUN); } self.comm_string.push_str(" "); - self.comm_string.push_str(arg.as_ref().to_str().unwrap()); + self.comm_string + .push_str(arg.as_ref().to_str().unwrap_or_default()); self.raw.arg(arg.as_ref()); self } @@ -762,9 +764,15 @@ impl UCommand { if self.has_run { panic!("{}", MULTIPLE_STDIN_MEANINGLESS); } - for s in args { + let strings = args + .iter() + .map(|s| s.as_ref().to_os_string()) + .collect_str(InvalidEncodingHandling::Ignore) + .accept_any(); + + for s in strings { self.comm_string.push_str(" "); - self.comm_string.push_str(s.as_ref().to_str().unwrap()); + self.comm_string.push_str(&s); } self.raw.args(args.as_ref());