diff --git a/.vscode/cspell.dictionaries/workspace.wordlist.txt b/.vscode/cspell.dictionaries/workspace.wordlist.txt index bee22c2d0..aec4b4a42 100644 --- a/.vscode/cspell.dictionaries/workspace.wordlist.txt +++ b/.vscode/cspell.dictionaries/workspace.wordlist.txt @@ -9,6 +9,7 @@ aho-corasick backtrace blake2b_simd bstr +bytecount byteorder chacha chrono @@ -106,14 +107,18 @@ whoami # * vars/errno errno +EACCES EBADF +EBUSY EEXIST EINVAL ENODATA ENOENT ENOSYS +ENOTEMPTY EOPNOTSUPP EPERM +EROFS # * vars/fcntl F_GETFL diff --git a/Cargo.lock b/Cargo.lock index adc373f85..c632db295 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -188,6 +188,12 @@ dependencies = [ "utf8-width", ] +[[package]] +name = "bytecount" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72feb31ffc86498dacdbd0fcebb56138e7177a8cc5cea4516031d15ae85a742e" + [[package]] name = "byteorder" version = "1.4.3" @@ -2040,6 +2046,12 @@ dependencies = [ "log", ] +[[package]] +name = "utf-8" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" + [[package]] name = "utf8-width" version = "0.1.5" @@ -2790,6 +2802,7 @@ name = "uu_rmdir" version = "0.0.7" dependencies = [ "clap", + "libc", "uucore", "uucore_procs", ] @@ -3110,10 +3123,12 @@ dependencies = [ name = "uu_wc" version = "0.0.7" dependencies = [ + "bytecount", "clap", "libc", "nix 0.20.0", - "thiserror", + "unicode-width", + "utf-8", "uucore", "uucore_procs", ] diff --git a/DEVELOPER_INSTRUCTIONS.md b/DEVELOPER_INSTRUCTIONS.md index e0a5cf001..027d4dca1 100644 --- a/DEVELOPER_INSTRUCTIONS.md +++ b/DEVELOPER_INSTRUCTIONS.md @@ -1,3 +1,29 @@ +Documentation +------------- + +The source of the documentation is available on: + +https://uutils.github.io/coreutils-docs/coreutils/ + +The documentation is updated everyday on this repository: + +https://github.com/uutils/coreutils-docs + +Running GNU tests +----------------- + + + +- Check out https://github.com/coreutils/coreutils next to your fork as gnu +- Check out https://github.com/coreutils/gnulib next to your fork as gnulib +- Rename the checkout of your fork to uutils + +At the end you should have uutils, gnu and gnulib checked out next to each other. + +- Run `cd uutils && ./util/build-gnu.sh && cd ..` to get everything ready (this may take a while) +- Finally, you can run `tests with bash uutils/util/run-gnu-test.sh `. Instead of `` insert the test you want to run, e.g. `tests/misc/wc-proc`. + + Code Coverage Report Generation --------------------------------- diff --git a/src/uu/base32/src/base_common.rs b/src/uu/base32/src/base_common.rs index 615093311..78962d9db 100644 --- a/src/uu/base32/src/base_common.rs +++ b/src/uu/base32/src/base_common.rs @@ -122,7 +122,7 @@ pub fn base_app<'a>(name: &str, version: &'a str, about: &'a str) -> App<'static pub fn get_input<'a>(config: &Config, stdin_ref: &'a Stdin) -> Box { match &config.to_read { Some(name) => { - let file_buf = safe_unwrap!(File::open(Path::new(name))); + let file_buf = crash_if_err!(1, File::open(Path::new(name))); Box::new(BufReader::new(file_buf)) // as Box } None => { diff --git a/src/uu/cp/src/cp.rs b/src/uu/cp/src/cp.rs index 9a80b18ab..df70ccea8 100644 --- a/src/uu/cp/src/cp.rs +++ b/src/uu/cp/src/cp.rs @@ -231,8 +231,6 @@ fn usage() -> String { mod options { pub const ARCHIVE: &str = "archive"; pub const ATTRIBUTES_ONLY: &str = "attributes-only"; - pub const BACKUP: &str = "backup"; - pub const BACKUP_NO_ARG: &str = "b"; pub const CLI_SYMBOLIC_LINKS: &str = "cli-symbolic-links"; pub const CONTEXT: &str = "context"; pub const COPY_CONTENTS: &str = "copy-contents"; @@ -257,7 +255,6 @@ mod options { pub const REMOVE_DESTINATION: &str = "remove-destination"; pub const SPARSE: &str = "sparse"; pub const STRIP_TRAILING_SLASHES: &str = "strip-trailing-slashes"; - pub const SUFFIX: &str = "suffix"; pub const SYMBOLIC_LINK: &str = "symbolic-link"; pub const TARGET_DIRECTORY: &str = "target-directory"; pub const UPDATE: &str = "update"; @@ -355,24 +352,9 @@ pub fn uu_app() -> App<'static, 'static> { .conflicts_with(options::FORCE) .help("remove each existing destination file before attempting to open it \ (contrast with --force). On Windows, current only works for writeable files.")) - .arg(Arg::with_name(options::BACKUP) - .long(options::BACKUP) - .help("make a backup of each existing destination file") - .takes_value(true) - .require_equals(true) - .min_values(0) - .value_name("CONTROL") - ) - .arg(Arg::with_name(options::BACKUP_NO_ARG) - .short(options::BACKUP_NO_ARG) - .help("like --backup but does not accept an argument") - ) - .arg(Arg::with_name(options::SUFFIX) - .short("S") - .long(options::SUFFIX) - .takes_value(true) - .value_name("SUFFIX") - .help("override the usual backup suffix")) + .arg(backup_control::arguments::backup()) + .arg(backup_control::arguments::backup_no_args()) + .arg(backup_control::arguments::suffix()) .arg(Arg::with_name(options::UPDATE) .short("u") .long(options::UPDATE) @@ -604,20 +586,12 @@ impl Options { || matches.is_present(options::RECURSIVE_ALIAS) || matches.is_present(options::ARCHIVE); - let backup_mode = backup_control::determine_backup_mode( - matches.is_present(options::BACKUP_NO_ARG), - matches.is_present(options::BACKUP), - matches.value_of(options::BACKUP), - ); - let backup_mode = match backup_mode { - Err(err) => { - return Err(Error::Backup(err)); - } + let backup_mode = match backup_control::determine_backup_mode(matches) { + Err(e) => return Err(Error::Backup(format!("{}", e))), Ok(mode) => mode, }; - let backup_suffix = - backup_control::determine_backup_suffix(matches.value_of(options::SUFFIX)); + let backup_suffix = backup_control::determine_backup_suffix(matches); let overwrite = OverwriteMode::from_matches(matches); diff --git a/src/uu/csplit/src/csplit.rs b/src/uu/csplit/src/csplit.rs index 603d25265..977583a2f 100644 --- a/src/uu/csplit/src/csplit.rs +++ b/src/uu/csplit/src/csplit.rs @@ -725,14 +725,14 @@ pub fn uumain(args: impl uucore::Args) -> i32 { .unwrap() .map(str::to_string) .collect(); - let patterns = return_if_err!(1, patterns::get_patterns(&patterns[..])); + let patterns = crash_if_err!(1, patterns::get_patterns(&patterns[..])); let options = CsplitOptions::new(&matches); if file_name == "-" { let stdin = io::stdin(); crash_if_err!(1, csplit(&options, patterns, stdin.lock())); } else { - let file = return_if_err!(1, File::open(file_name)); - let file_metadata = return_if_err!(1, file.metadata()); + let file = crash_if_err!(1, File::open(file_name)); + let file_metadata = crash_if_err!(1, file.metadata()); if !file_metadata.is_file() { crash!(1, "'{}' is not a regular file", file_name); } diff --git a/src/uu/expand/src/expand.rs b/src/uu/expand/src/expand.rs index 04cb42649..f5a5686f6 100644 --- a/src/uu/expand/src/expand.rs +++ b/src/uu/expand/src/expand.rs @@ -329,12 +329,15 @@ fn expand(options: Options) { // now dump out either spaces if we're expanding, or a literal tab if we're not if init || !options.iflag { if nts <= options.tspaces.len() { - safe_unwrap!(output.write_all(options.tspaces[..nts].as_bytes())); + crash_if_err!( + 1, + output.write_all(options.tspaces[..nts].as_bytes()) + ); } else { - safe_unwrap!(output.write_all(" ".repeat(nts).as_bytes())); + crash_if_err!(1, output.write_all(" ".repeat(nts).as_bytes())); }; } else { - safe_unwrap!(output.write_all(&buf[byte..byte + nbytes])); + crash_if_err!(1, output.write_all(&buf[byte..byte + nbytes])); } } _ => { @@ -352,14 +355,14 @@ fn expand(options: Options) { init = false; } - safe_unwrap!(output.write_all(&buf[byte..byte + nbytes])); + crash_if_err!(1, output.write_all(&buf[byte..byte + nbytes])); } } byte += nbytes; // advance the pointer } - safe_unwrap!(output.flush()); + crash_if_err!(1, output.flush()); buf.truncate(0); // clear the buffer } } diff --git a/src/uu/fold/src/fold.rs b/src/uu/fold/src/fold.rs index c5628125d..c4cc16469 100644 --- a/src/uu/fold/src/fold.rs +++ b/src/uu/fold/src/fold.rs @@ -119,7 +119,7 @@ fn fold(filenames: Vec, bytes: bool, spaces: bool, width: usize) { stdin_buf = stdin(); &mut stdin_buf as &mut dyn Read } else { - file_buf = safe_unwrap!(File::open(Path::new(filename))); + file_buf = crash_if_err!(1, File::open(Path::new(filename))); &mut file_buf as &mut dyn Read }); diff --git a/src/uu/hashsum/src/hashsum.rs b/src/uu/hashsum/src/hashsum.rs index 384ef1388..f308da300 100644 --- a/src/uu/hashsum/src/hashsum.rs +++ b/src/uu/hashsum/src/hashsum.rs @@ -469,7 +469,7 @@ where stdin_buf = stdin(); Box::new(stdin_buf) as Box } else { - file_buf = safe_unwrap!(File::open(filename)); + file_buf = crash_if_err!(1, File::open(filename)); Box::new(file_buf) as Box }); if options.check { @@ -486,19 +486,25 @@ where } else { "+".to_string() }; - let gnu_re = safe_unwrap!(Regex::new(&format!( - r"^(?P[a-fA-F0-9]{}) (?P[ \*])(?P.*)", - modifier, - ))); - let bsd_re = safe_unwrap!(Regex::new(&format!( - r"^{algorithm} \((?P.*)\) = (?P[a-fA-F0-9]{digest_size})", - algorithm = options.algoname, - digest_size = modifier, - ))); + let gnu_re = crash_if_err!( + 1, + Regex::new(&format!( + r"^(?P[a-fA-F0-9]{}) (?P[ \*])(?P.*)", + modifier, + )) + ); + let bsd_re = crash_if_err!( + 1, + Regex::new(&format!( + r"^{algorithm} \((?P.*)\) = (?P[a-fA-F0-9]{digest_size})", + algorithm = options.algoname, + digest_size = modifier, + )) + ); let buffer = file; for (i, line) in buffer.lines().enumerate() { - let line = safe_unwrap!(line); + let line = crash_if_err!(1, line); let (ck_filename, sum, binary_check) = match gnu_re.captures(&line) { Some(caps) => ( caps.name("fileName").unwrap().as_str(), @@ -528,14 +534,17 @@ where } }, }; - let f = safe_unwrap!(File::open(ck_filename)); + let f = crash_if_err!(1, File::open(ck_filename)); let mut ckf = BufReader::new(Box::new(f) as Box); - let real_sum = safe_unwrap!(digest_reader( - &mut *options.digest, - &mut ckf, - binary_check, - options.output_bits - )) + let real_sum = crash_if_err!( + 1, + digest_reader( + &mut *options.digest, + &mut ckf, + binary_check, + options.output_bits + ) + ) .to_ascii_lowercase(); if sum == real_sum { if !options.quiet { @@ -549,12 +558,15 @@ where } } } else { - let sum = safe_unwrap!(digest_reader( - &mut *options.digest, - &mut file, - options.binary, - options.output_bits - )); + let sum = crash_if_err!( + 1, + digest_reader( + &mut *options.digest, + &mut file, + options.binary, + options.output_bits + ) + ); if options.tag { println!("{} ({}) = {}", options.algoname, filename.display(), sum); } else { diff --git a/src/uu/install/src/install.rs b/src/uu/install/src/install.rs index 72b7ddea8..548ffec59 100644 --- a/src/uu/install/src/install.rs +++ b/src/uu/install/src/install.rs @@ -155,8 +155,6 @@ static ABOUT: &str = "Copy SOURCE to DEST or multiple SOURCE(s) to the existing DIRECTORY, while setting permission modes and owner/group"; static OPT_COMPARE: &str = "compare"; -static OPT_BACKUP: &str = "backup"; -static OPT_BACKUP_NO_ARG: &str = "backup2"; static OPT_DIRECTORY: &str = "directory"; static OPT_IGNORED: &str = "ignored"; static OPT_CREATE_LEADING: &str = "create-leading"; @@ -166,7 +164,6 @@ static OPT_OWNER: &str = "owner"; static OPT_PRESERVE_TIMESTAMPS: &str = "preserve-timestamps"; static OPT_STRIP: &str = "strip"; static OPT_STRIP_PROGRAM: &str = "strip-program"; -static OPT_SUFFIX: &str = "suffix"; static OPT_TARGET_DIRECTORY: &str = "target-directory"; static OPT_NO_TARGET_DIRECTORY: &str = "no-target-directory"; static OPT_VERBOSE: &str = "verbose"; @@ -209,19 +206,10 @@ pub fn uu_app() -> App<'static, 'static> { .version(crate_version!()) .about(ABOUT) .arg( - Arg::with_name(OPT_BACKUP) - .long(OPT_BACKUP) - .help("make a backup of each existing destination file") - .takes_value(true) - .require_equals(true) - .min_values(0) - .value_name("CONTROL") + backup_control::arguments::backup() ) .arg( - // TODO implement flag - Arg::with_name(OPT_BACKUP_NO_ARG) - .short("b") - .help("like --backup but does not accept an argument") + backup_control::arguments::backup_no_args() ) .arg( Arg::with_name(OPT_IGNORED) @@ -279,9 +267,9 @@ pub fn uu_app() -> App<'static, 'static> { ) .arg( Arg::with_name(OPT_STRIP) - .short("s") - .long(OPT_STRIP) - .help("strip symbol tables (no action Windows)") + .short("s") + .long(OPT_STRIP) + .help("strip symbol tables (no action Windows)") ) .arg( Arg::with_name(OPT_STRIP_PROGRAM) @@ -290,14 +278,7 @@ pub fn uu_app() -> App<'static, 'static> { .value_name("PROGRAM") ) .arg( - // TODO implement flag - Arg::with_name(OPT_SUFFIX) - .short("S") - .long(OPT_SUFFIX) - .help("override the usual backup suffix") - .value_name("SUFFIX") - .takes_value(true) - .min_values(1) + backup_control::arguments::suffix() ) .arg( // TODO implement flag @@ -387,23 +368,14 @@ fn behavior(matches: &ArgMatches) -> UResult { None }; - let backup_mode = backup_control::determine_backup_mode( - matches.is_present(OPT_BACKUP_NO_ARG), - matches.is_present(OPT_BACKUP), - matches.value_of(OPT_BACKUP), - ); - let backup_mode = match backup_mode { - Err(err) => return Err(USimpleError::new(1, err)), - Ok(mode) => mode, - }; - + let backup_mode = backup_control::determine_backup_mode(matches)?; let target_dir = matches.value_of(OPT_TARGET_DIRECTORY).map(|d| d.to_owned()); Ok(Behavior { main_function, specified_mode, backup_mode, - suffix: backup_control::determine_backup_suffix(matches.value_of(OPT_SUFFIX)), + suffix: backup_control::determine_backup_suffix(matches), owner: matches.value_of(OPT_OWNER).unwrap_or("").to_string(), group: matches.value_of(OPT_GROUP).unwrap_or("").to_string(), verbose: matches.is_present(OPT_VERBOSE), diff --git a/src/uu/ln/src/ln.rs b/src/uu/ln/src/ln.rs index f1689a44b..4eeb637e7 100644 --- a/src/uu/ln/src/ln.rs +++ b/src/uu/ln/src/ln.rs @@ -54,7 +54,6 @@ enum LnError { FailedToLink(String), MissingDestination(String), ExtraOperand(String), - InvalidBackupMode(String), } impl Display for LnError { @@ -72,7 +71,6 @@ impl Display for LnError { s, uucore::execution_phrase() ), - Self::InvalidBackupMode(s) => write!(f, "{}", s), } } } @@ -87,7 +85,6 @@ impl UError for LnError { Self::FailedToLink(_) => 1, Self::MissingDestination(_) => 1, Self::ExtraOperand(_) => 1, - Self::InvalidBackupMode(_) => 1, } } } @@ -119,13 +116,10 @@ fn long_usage() -> String { static ABOUT: &str = "change file owner and group"; mod options { - pub const BACKUP_NO_ARG: &str = "b"; - pub const BACKUP: &str = "backup"; pub const FORCE: &str = "force"; pub const INTERACTIVE: &str = "interactive"; pub const NO_DEREFERENCE: &str = "no-dereference"; pub const SYMBOLIC: &str = "symbolic"; - pub const SUFFIX: &str = "suffix"; pub const TARGET_DIRECTORY: &str = "target-directory"; pub const NO_TARGET_DIRECTORY: &str = "no-target-directory"; pub const RELATIVE: &str = "relative"; @@ -164,19 +158,8 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { OverwriteMode::NoClobber }; - let backup_mode = backup_control::determine_backup_mode( - matches.is_present(options::BACKUP_NO_ARG), - matches.is_present(options::BACKUP), - matches.value_of(options::BACKUP), - ); - let backup_mode = match backup_mode { - Err(err) => { - return Err(LnError::InvalidBackupMode(err).into()); - } - Ok(mode) => mode, - }; - - let backup_suffix = backup_control::determine_backup_suffix(matches.value_of(options::SUFFIX)); + let backup_mode = backup_control::determine_backup_mode(&matches)?; + let backup_suffix = backup_control::determine_backup_suffix(&matches); let settings = Settings { overwrite: overwrite_mode, @@ -199,20 +182,8 @@ pub fn uu_app() -> App<'static, 'static> { App::new(uucore::util_name()) .version(crate_version!()) .about(ABOUT) - .arg( - Arg::with_name(options::BACKUP) - .long(options::BACKUP) - .help("make a backup of each existing destination file") - .takes_value(true) - .require_equals(true) - .min_values(0) - .value_name("CONTROL"), - ) - .arg( - Arg::with_name(options::BACKUP_NO_ARG) - .short(options::BACKUP_NO_ARG) - .help("like --backup but does not accept an argument"), - ) + .arg(backup_control::arguments::backup()) + .arg(backup_control::arguments::backup_no_args()) // TODO: opts.arg( // Arg::with_name(("d", "directory", "allow users with appropriate privileges to attempt \ // to make hard links to directories"); @@ -250,14 +221,7 @@ pub fn uu_app() -> App<'static, 'static> { // override added for https://github.com/uutils/coreutils/issues/2359 .overrides_with(options::SYMBOLIC), ) - .arg( - Arg::with_name(options::SUFFIX) - .short("S") - .long(options::SUFFIX) - .help("override the usual backup suffix") - .value_name("SUFFIX") - .takes_value(true), - ) + .arg(backup_control::arguments::suffix()) .arg( Arg::with_name(options::TARGET_DIRECTORY) .short("t") diff --git a/src/uu/ls/src/ls.rs b/src/uu/ls/src/ls.rs index da8955152..3d957474c 100644 --- a/src/uu/ls/src/ls.rs +++ b/src/uu/ls/src/ls.rs @@ -1362,8 +1362,8 @@ fn enter_directory(dir: &PathData, config: &Config, out: &mut BufWriter) vec![] }; - let mut temp: Vec<_> = safe_unwrap!(fs::read_dir(&dir.p_buf)) - .map(|res| safe_unwrap!(res)) + let mut temp: Vec<_> = crash_if_err!(1, fs::read_dir(&dir.p_buf)) + .map(|res| crash_if_err!(1, res)) .filter(|e| should_display(e, config)) .map(|e| PathData::new(DirEntry::path(&e), Some(e.file_type()), None, config, false)) .collect(); diff --git a/src/uu/ls/src/quoting_style.rs b/src/uu/ls/src/quoting_style.rs index f9ba55f7b..5f421b2ee 100644 --- a/src/uu/ls/src/quoting_style.rs +++ b/src/uu/ls/src/quoting_style.rs @@ -1,6 +1,9 @@ use std::char::from_digit; -const SPECIAL_SHELL_CHARS: &str = "~`#$&*()|[]{};\\'\"<>?! "; +// These are characters with special meaning in the shell (e.g. bash). +// The first const contains characters that only have a special meaning when they appear at the beginning of a name. +const SPECIAL_SHELL_CHARS_START: &[char] = &['~', '#']; +const SPECIAL_SHELL_CHARS: &str = "`$&*()|[]{};\\'\"<>?! "; pub(super) enum QuotingStyle { Shell { @@ -198,6 +201,8 @@ fn shell_without_escape(name: &str, quotes: Quotes, show_control_chars: bool) -> } } } + + must_quote = must_quote || name.starts_with(SPECIAL_SHELL_CHARS_START); (escaped_str, must_quote) } @@ -246,6 +251,7 @@ fn shell_with_escape(name: &str, quotes: Quotes) -> (String, bool) { } } } + must_quote = must_quote || name.starts_with(SPECIAL_SHELL_CHARS_START); (escaped_str, must_quote) } @@ -659,4 +665,29 @@ mod tests { ], ); } + + #[test] + fn test_tilde_and_hash() { + check_names("~", vec![("'~'", "shell"), ("'~'", "shell-escape")]); + check_names( + "~name", + vec![("'~name'", "shell"), ("'~name'", "shell-escape")], + ); + check_names( + "some~name", + vec![("some~name", "shell"), ("some~name", "shell-escape")], + ); + check_names("name~", vec![("name~", "shell"), ("name~", "shell-escape")]); + + check_names("#", vec![("'#'", "shell"), ("'#'", "shell-escape")]); + check_names( + "#name", + vec![("'#name'", "shell"), ("'#name'", "shell-escape")], + ); + check_names( + "some#name", + vec![("some#name", "shell"), ("some#name", "shell-escape")], + ); + check_names("name#", vec![("name#", "shell"), ("name#", "shell-escape")]); + } } diff --git a/src/uu/mv/src/mv.rs b/src/uu/mv/src/mv.rs index 5f825113b..e2e2352a0 100644 --- a/src/uu/mv/src/mv.rs +++ b/src/uu/mv/src/mv.rs @@ -44,13 +44,10 @@ pub enum OverwriteMode { static ABOUT: &str = "Move SOURCE to DEST, or multiple SOURCE(s) to DIRECTORY."; static LONG_HELP: &str = ""; -static OPT_BACKUP: &str = "backup"; -static OPT_BACKUP_NO_ARG: &str = "b"; static OPT_FORCE: &str = "force"; static OPT_INTERACTIVE: &str = "interactive"; static OPT_NO_CLOBBER: &str = "no-clobber"; static OPT_STRIP_TRAILING_SLASHES: &str = "strip-trailing-slashes"; -static OPT_SUFFIX: &str = "suffix"; static OPT_TARGET_DIRECTORY: &str = "target-directory"; static OPT_NO_TARGET_DIRECTORY: &str = "no-target-directory"; static OPT_UPDATE: &str = "update"; @@ -85,14 +82,9 @@ pub fn uumain(args: impl uucore::Args) -> i32 { .unwrap_or_default(); let overwrite_mode = determine_overwrite_mode(&matches); - let backup_mode = backup_control::determine_backup_mode( - matches.is_present(OPT_BACKUP_NO_ARG), - matches.is_present(OPT_BACKUP), - matches.value_of(OPT_BACKUP), - ); - let backup_mode = match backup_mode { - Err(err) => { - show_usage_error!("{}", err); + let backup_mode = match backup_control::determine_backup_mode(&matches) { + Err(e) => { + show!(e); return 1; } Ok(mode) => mode, @@ -103,7 +95,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { return 1; } - let backup_suffix = backup_control::determine_backup_suffix(matches.value_of(OPT_SUFFIX)); + let backup_suffix = backup_control::determine_backup_suffix(&matches); let behavior = Behavior { overwrite: overwrite_mode, @@ -137,18 +129,10 @@ pub fn uu_app() -> App<'static, 'static> { .version(crate_version!()) .about(ABOUT) .arg( - Arg::with_name(OPT_BACKUP) - .long(OPT_BACKUP) - .help("make a backup of each existing destination file") - .takes_value(true) - .require_equals(true) - .min_values(0) - .value_name("CONTROL") + backup_control::arguments::backup() ) .arg( - Arg::with_name(OPT_BACKUP_NO_ARG) - .short(OPT_BACKUP_NO_ARG) - .help("like --backup but does not accept an argument") + backup_control::arguments::backup_no_args() ) .arg( Arg::with_name(OPT_FORCE) @@ -173,12 +157,7 @@ pub fn uu_app() -> App<'static, 'static> { .help("remove any trailing slashes from each SOURCE argument") ) .arg( - Arg::with_name(OPT_SUFFIX) - .short("S") - .long(OPT_SUFFIX) - .help("override the usual backup suffix") - .takes_value(true) - .value_name("SUFFIX") + backup_control::arguments::suffix() ) .arg( Arg::with_name(OPT_TARGET_DIRECTORY) diff --git a/src/uu/pinky/src/pinky.rs b/src/uu/pinky/src/pinky.rs index 02573c994..4aa27affa 100644 --- a/src/uu/pinky/src/pinky.rs +++ b/src/uu/pinky/src/pinky.rs @@ -291,7 +291,7 @@ impl Pinky { let mut s = ut.host(); if self.include_where && !s.is_empty() { - s = safe_unwrap!(ut.canon_host()); + s = crash_if_err!(1, ut.canon_host()); print!(" {}", s); } diff --git a/src/uu/rmdir/Cargo.toml b/src/uu/rmdir/Cargo.toml index 27d94ec1d..cdb08f908 100644 --- a/src/uu/rmdir/Cargo.toml +++ b/src/uu/rmdir/Cargo.toml @@ -18,6 +18,7 @@ path = "src/rmdir.rs" clap = { version = "2.33", features = ["wrap_help"] } uucore = { version=">=0.0.9", package="uucore", path="../../uucore" } uucore_procs = { version=">=0.0.6", package="uucore_procs", path="../../uucore_procs" } +libc = "0.2.42" [[bin]] name = "rmdir" diff --git a/src/uu/rmdir/src/rmdir.rs b/src/uu/rmdir/src/rmdir.rs index cafd8e982..d8cad0421 100644 --- a/src/uu/rmdir/src/rmdir.rs +++ b/src/uu/rmdir/src/rmdir.rs @@ -11,8 +11,11 @@ extern crate uucore; use clap::{crate_version, App, Arg}; -use std::fs; +use std::fs::{read_dir, remove_dir}; +use std::io; use std::path::Path; +use uucore::error::{set_exit_code, strip_errno, UResult}; +use uucore::util_name; static ABOUT: &str = "Remove the DIRECTORY(ies), if they are empty."; static OPT_IGNORE_FAIL_NON_EMPTY: &str = "ignore-fail-on-non-empty"; @@ -21,35 +24,158 @@ static OPT_VERBOSE: &str = "verbose"; static ARG_DIRS: &str = "dirs"; -#[cfg(unix)] -static ENOTDIR: i32 = 20; -#[cfg(windows)] -static ENOTDIR: i32 = 267; - fn usage() -> String { format!("{0} [OPTION]... DIRECTORY...", uucore::execution_phrase()) } -pub fn uumain(args: impl uucore::Args) -> i32 { +#[uucore_procs::gen_uumain] +pub fn uumain(args: impl uucore::Args) -> UResult<()> { let usage = usage(); let matches = uu_app().usage(&usage[..]).get_matches_from(args); - let dirs: Vec = matches - .values_of(ARG_DIRS) - .map(|v| v.map(ToString::to_string).collect()) - .unwrap_or_default(); + let opts = Opts { + ignore: matches.is_present(OPT_IGNORE_FAIL_NON_EMPTY), + parents: matches.is_present(OPT_PARENTS), + verbose: matches.is_present(OPT_VERBOSE), + }; - let ignore = matches.is_present(OPT_IGNORE_FAIL_NON_EMPTY); - let parents = matches.is_present(OPT_PARENTS); - let verbose = matches.is_present(OPT_VERBOSE); + for path in matches + .values_of_os(ARG_DIRS) + .unwrap_or_default() + .map(Path::new) + { + if let Err(error) = remove(path, opts) { + let Error { error, path } = error; - match remove(dirs, ignore, parents, verbose) { - Ok(()) => ( /* pass */ ), - Err(e) => return e, + if opts.ignore && dir_not_empty(&error, path) { + continue; + } + + set_exit_code(1); + + // If `foo` is a symlink to a directory then `rmdir foo/` may give + // a "not a directory" error. This is confusing as `rm foo/` says + // "is a directory". + // This differs from system to system. Some don't give an error. + // Windows simply allows calling RemoveDirectory on symlinks so we + // don't need to worry about it here. + // GNU rmdir seems to print "Symbolic link not followed" if: + // - It has a trailing slash + // - It's a symlink + // - It either points to a directory or dangles + #[cfg(unix)] + { + use std::ffi::OsStr; + use std::os::unix::ffi::OsStrExt; + + fn is_symlink(path: &Path) -> io::Result { + Ok(path.symlink_metadata()?.file_type().is_symlink()) + } + + fn points_to_directory(path: &Path) -> io::Result { + Ok(path.metadata()?.file_type().is_dir()) + } + + let path = path.as_os_str().as_bytes(); + if error.raw_os_error() == Some(libc::ENOTDIR) && path.ends_with(b"/") { + // Strip the trailing slash or .symlink_metadata() will follow the symlink + let path: &Path = OsStr::from_bytes(&path[..path.len() - 1]).as_ref(); + if is_symlink(path).unwrap_or(false) + && points_to_directory(path).unwrap_or(true) + { + show_error!( + "failed to remove '{}/': Symbolic link not followed", + path.display() + ); + continue; + } + } + } + + show_error!( + "failed to remove '{}': {}", + path.display(), + strip_errno(&error) + ); + } } - 0 + Ok(()) +} + +struct Error<'a> { + error: io::Error, + path: &'a Path, +} + +fn remove(mut path: &Path, opts: Opts) -> Result<(), Error<'_>> { + remove_single(path, opts)?; + if opts.parents { + while let Some(new) = path.parent() { + path = new; + if path.as_os_str() == "" { + break; + } + remove_single(path, opts)?; + } + } + Ok(()) +} + +fn remove_single(path: &Path, opts: Opts) -> Result<(), Error<'_>> { + if opts.verbose { + println!("{}: removing directory, '{}'", util_name(), path.display()); + } + remove_dir(path).map_err(|error| Error { error, path }) +} + +// POSIX: https://pubs.opengroup.org/onlinepubs/009696799/functions/rmdir.html +#[cfg(not(windows))] +const NOT_EMPTY_CODES: &[i32] = &[libc::ENOTEMPTY, libc::EEXIST]; + +// 145 is ERROR_DIR_NOT_EMPTY, determined experimentally. +#[cfg(windows)] +const NOT_EMPTY_CODES: &[i32] = &[145]; + +// Other error codes you might get for directories that could be found and are +// not empty. +// This is a subset of the error codes listed in rmdir(2) from the Linux man-pages +// project. Maybe other systems have additional codes that apply? +#[cfg(not(windows))] +const PERHAPS_EMPTY_CODES: &[i32] = &[libc::EACCES, libc::EBUSY, libc::EPERM, libc::EROFS]; + +// Probably incomplete, I can't find a list of possible errors for +// RemoveDirectory anywhere. +#[cfg(windows)] +const PERHAPS_EMPTY_CODES: &[i32] = &[ + 5, // ERROR_ACCESS_DENIED, found experimentally. +]; + +fn dir_not_empty(error: &io::Error, path: &Path) -> bool { + if let Some(code) = error.raw_os_error() { + if NOT_EMPTY_CODES.contains(&code) { + return true; + } + // If --ignore-fail-on-non-empty is used then we want to ignore all errors + // for non-empty directories, even if the error was e.g. because there's + // no permission. So we do an additional check. + if PERHAPS_EMPTY_CODES.contains(&code) { + if let Ok(mut iterator) = read_dir(path) { + if iterator.next().is_some() { + return true; + } + } + } + } + false +} + +#[derive(Clone, Copy, Debug)] +struct Opts { + ignore: bool, + parents: bool, + verbose: bool, } pub fn uu_app() -> App<'static, 'static> { @@ -84,57 +210,3 @@ pub fn uu_app() -> App<'static, 'static> { .required(true), ) } - -fn remove(dirs: Vec, ignore: bool, parents: bool, verbose: bool) -> Result<(), i32> { - let mut r = Ok(()); - - for dir in &dirs { - let path = Path::new(&dir[..]); - r = remove_dir(path, ignore, verbose).and(r); - if parents { - let mut p = path; - while let Some(new_p) = p.parent() { - p = new_p; - match p.as_os_str().to_str() { - None => break, - Some(s) => match s { - "" | "." | "/" => break, - _ => (), - }, - }; - r = remove_dir(p, ignore, verbose).and(r); - } - } - } - - r -} - -fn remove_dir(path: &Path, ignore: bool, verbose: bool) -> Result<(), i32> { - let mut read_dir = fs::read_dir(path).map_err(|e| { - if e.raw_os_error() == Some(ENOTDIR) { - show_error!("failed to remove '{}': Not a directory", path.display()); - } else { - show_error!("reading directory '{}': {}", path.display(), e); - } - 1 - })?; - - let mut r = Ok(()); - - if read_dir.next().is_none() { - match fs::remove_dir(path) { - Err(e) => { - show_error!("removing directory '{}': {}", path.display(), e); - r = Err(1); - } - Ok(_) if verbose => println!("removing directory, '{}'", path.display()), - _ => (), - } - } else if !ignore { - show_error!("failed to remove '{}': Directory not empty", path.display()); - r = Err(1); - } - - r -} diff --git a/src/uu/seq/src/seq.rs b/src/uu/seq/src/seq.rs index 05388e7a1..2e700f952 100644 --- a/src/uu/seq/src/seq.rs +++ b/src/uu/seq/src/seq.rs @@ -105,7 +105,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { let dec = slice.find('.').unwrap_or(len); largest_dec = len - dec; padding = dec; - return_if_err!(1, slice.parse()) + crash_if_err!(1, slice.parse()) } else { Number::BigInt(BigInt::one()) }; @@ -115,7 +115,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { let dec = slice.find('.').unwrap_or(len); largest_dec = cmp::max(largest_dec, len - dec); padding = cmp::max(padding, dec); - return_if_err!(1, slice.parse()) + crash_if_err!(1, slice.parse()) } else { Number::BigInt(BigInt::one()) }; @@ -130,7 +130,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { let last = { let slice = numbers[numbers.len() - 1]; padding = cmp::max(padding, slice.find('.').unwrap_or_else(|| slice.len())); - return_if_err!(1, slice.parse()) + crash_if_err!(1, slice.parse()) }; if largest_dec > 0 { largest_dec -= 1; diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index 9aae23bb8..abba1e63b 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -45,7 +45,7 @@ use std::path::Path; use std::path::PathBuf; use std::str::Utf8Error; use unicode_width::UnicodeWidthStr; -use uucore::error::{set_exit_code, UError, UResult, USimpleError, UUsageError}; +use uucore::error::{set_exit_code, strip_errno, UError, UResult, USimpleError, UUsageError}; use uucore::parse_size::{parse_size, ParseSizeError}; use uucore::version_cmp::version_cmp; use uucore::InvalidEncodingHandling; @@ -197,27 +197,17 @@ impl Display for SortError { Ok(()) } } - SortError::OpenFailed { path, error } => write!( - f, - "open failed: {}: {}", - path, - strip_errno(&error.to_string()) - ), + SortError::OpenFailed { path, error } => { + write!(f, "open failed: {}: {}", path, strip_errno(error)) + } SortError::ParseKeyError { key, msg } => { write!(f, "failed to parse key `{}`: {}", key, msg) } - SortError::ReadFailed { path, error } => write!( - f, - "cannot read: {}: {}", - path, - strip_errno(&error.to_string()) - ), + SortError::ReadFailed { path, error } => { + write!(f, "cannot read: {}: {}", path, strip_errno(error)) + } SortError::OpenTmpFileFailed { error } => { - write!( - f, - "failed to open temporary file: {}", - strip_errno(&error.to_string()) - ) + write!(f, "failed to open temporary file: {}", strip_errno(error)) } SortError::CompressProgExecutionFailed { code } => { write!(f, "couldn't execute compress program: errno {}", code) @@ -1814,11 +1804,6 @@ fn print_sorted<'a, T: Iterator>>( } } -/// Strips the trailing " (os error XX)" from io error strings. -fn strip_errno(err: &str) -> &str { - &err[..err.find(" (os error ").unwrap_or(err.len())] -} - fn open(path: impl AsRef) -> UResult> { let path = path.as_ref(); if path == "-" { diff --git a/src/uu/stdbuf/src/stdbuf.rs b/src/uu/stdbuf/src/stdbuf.rs index e87b3a225..77d80f777 100644 --- a/src/uu/stdbuf/src/stdbuf.rs +++ b/src/uu/stdbuf/src/stdbuf.rs @@ -170,7 +170,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { let command_params: Vec<&str> = command_values.collect(); let mut tmp_dir = tempdir().unwrap(); - let (preload_env, libstdbuf) = return_if_err!(1, get_preload_env(&mut tmp_dir)); + let (preload_env, libstdbuf) = crash_if_err!(1, get_preload_env(&mut tmp_dir)); command.env(preload_env, libstdbuf); set_command_env(&mut command, "_STDBUF_I", options.stdin); set_command_env(&mut command, "_STDBUF_O", options.stdout); diff --git a/src/uu/timeout/src/timeout.rs b/src/uu/timeout/src/timeout.rs index 89fde82ca..21b0a0c37 100644 --- a/src/uu/timeout/src/timeout.rs +++ b/src/uu/timeout/src/timeout.rs @@ -221,7 +221,7 @@ fn timeout( cmd[0] ); } - return_if_err!(ERR_EXIT_STATUS, process.send_signal(signal)); + crash_if_err!(ERR_EXIT_STATUS, process.send_signal(signal)); if let Some(kill_after) = kill_after { match process.wait_or_timeout(kill_after) { Ok(Some(status)) => { @@ -235,13 +235,13 @@ fn timeout( if verbose { show_error!("sending signal KILL to command '{}'", cmd[0]); } - return_if_err!( + crash_if_err!( ERR_EXIT_STATUS, process.send_signal( uucore::signals::signal_by_name_or_value("KILL").unwrap() ) ); - return_if_err!(ERR_EXIT_STATUS, process.wait()); + crash_if_err!(ERR_EXIT_STATUS, process.wait()); 137 } Err(_) => 124, @@ -251,7 +251,7 @@ fn timeout( } } Err(_) => { - return_if_err!(ERR_EXIT_STATUS, process.send_signal(signal)); + crash_if_err!(ERR_EXIT_STATUS, process.send_signal(signal)); ERR_EXIT_STATUS } } diff --git a/src/uu/uname/src/uname.rs b/src/uu/uname/src/uname.rs index 5f8dfd8a8..2c396081e 100644 --- a/src/uu/uname/src/uname.rs +++ b/src/uu/uname/src/uname.rs @@ -53,7 +53,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { let usage = format!("{} [OPTION]...", uucore::execution_phrase()); let matches = uu_app().usage(&usage[..]).get_matches_from(args); - let uname = return_if_err!(1, PlatformInfo::new()); + let uname = crash_if_err!(1, PlatformInfo::new()); let mut output = String::new(); let all = matches.is_present(options::ALL); diff --git a/src/uu/unexpand/src/unexpand.rs b/src/uu/unexpand/src/unexpand.rs index cb8541048..7fb9b2590 100644 --- a/src/uu/unexpand/src/unexpand.rs +++ b/src/uu/unexpand/src/unexpand.rs @@ -178,13 +178,13 @@ fn write_tabs( break; } - safe_unwrap!(output.write_all(b"\t")); + crash_if_err!(1, output.write_all(b"\t")); scol += nts; } } while col > scol { - safe_unwrap!(output.write_all(b" ")); + crash_if_err!(1, output.write_all(b" ")); scol += 1; } } @@ -272,7 +272,7 @@ fn unexpand(options: Options) { init, true, ); - safe_unwrap!(output.write_all(&buf[byte..])); + crash_if_err!(1, output.write_all(&buf[byte..])); scol = col; break; } @@ -292,7 +292,7 @@ fn unexpand(options: Options) { }; if !tabs_buffered { - safe_unwrap!(output.write_all(&buf[byte..byte + nbytes])); + crash_if_err!(1, output.write_all(&buf[byte..byte + nbytes])); scol = col; // now printed up to this column } } @@ -317,7 +317,7 @@ fn unexpand(options: Options) { } else { 0 }; - safe_unwrap!(output.write_all(&buf[byte..byte + nbytes])); + crash_if_err!(1, output.write_all(&buf[byte..byte + nbytes])); scol = col; // we've now printed up to this column } } @@ -336,7 +336,7 @@ fn unexpand(options: Options) { init, true, ); - safe_unwrap!(output.flush()); + crash_if_err!(1, output.flush()); buf.truncate(0); // clear out the buffer } } diff --git a/src/uu/wc/BENCHMARKING.md b/src/uu/wc/BENCHMARKING.md new file mode 100644 index 000000000..b9d8cc22d --- /dev/null +++ b/src/uu/wc/BENCHMARKING.md @@ -0,0 +1,124 @@ +# Benchmarking wc + + + +Much of what makes wc fast is avoiding unnecessary work. It has multiple strategies, depending on which data is requested. + +## Strategies + +### Counting bytes + +In the case of `wc -c` the content of the input doesn't have to be inspected at all, only the size has to be known. That enables a few optimizations. + +#### File size + +If it can, wc reads the file size directly. This is not interesting to benchmark, except to see if it still works. Try `wc -c largefile`. + +#### `splice()` + +On Linux `splice()` is used to get the input's length while discarding it directly. + +The best way I've found to generate a fast input to test `splice()` is to pipe the output of uutils `cat` into it. Note that GNU `cat` is slower and therefore less suitable, and that if a file is given as its input directly (as in `wc -c < largefile`) the first strategy kicks in. Try `uucat somefile | wc -c`. + +### Counting lines + +In the case of `wc -l` or `wc -cl` the input doesn't have to be decoded. It's read in chunks and the `bytecount` crate is used to count the newlines. + +It's useful to vary the line length in the input. GNU wc seems particularly bad at short lines. + +### Processing unicode + +This is the most general strategy, and it's necessary for counting words, characters, and line lengths. Individual steps are still switched on and off depending on what must be reported. + +Try varying which of the `-w`, `-m`, `-l` and `-L` flags are used. (The `-c` flag is unlikely to make a difference.) + +Passing no flags is equivalent to passing `-wcl`. That case should perhaps be given special attention as it's the default. + +## Generating files + +To generate a file with many very short lines, run `yes | head -c50000000 > 25Mshortlines`. + +To get a file with less artificial contents, download a book from Project Gutenberg and concatenate it a lot of times: + +``` +wget https://www.gutenberg.org/files/2701/2701-0.txt -O moby.txt +cat moby.txt moby.txt moby.txt moby.txt > moby4.txt +cat moby4.txt moby4.txt moby4.txt moby4.txt > moby16.txt +cat moby16.txt moby16.txt moby16.txt moby16.txt > moby64.txt +``` + +And get one with lots of unicode too: + +``` +wget https://www.gutenberg.org/files/30613/30613-0.txt -O odyssey.txt +cat odyssey.txt odyssey.txt odyssey.txt odyssey.txt > odyssey4.txt +cat odyssey4.txt odyssey4.txt odyssey4.txt odyssey4.txt > odyssey16.txt +cat odyssey16.txt odyssey16.txt odyssey16.txt odyssey16.txt > odyssey64.txt +cat odyssey64.txt odyssey64.txt odyssey64.txt odyssey64.txt > odyssey256.txt +``` + +Finally, it's interesting to try a binary file. Look for one with `du -sh /usr/bin/* | sort -h`. On my system `/usr/bin/docker` is a good candidate as it's fairly large. + +## Running benchmarks + +Use [`hyperfine`](https://github.com/sharkdp/hyperfine) to compare the performance. For example, `hyperfine 'wc somefile' 'uuwc somefile'`. + +If you want to get fancy and exhaustive, generate a table: + +| | moby64.txt | odyssey256.txt | 25Mshortlines | /usr/bin/docker | +|------------------------|--------------|------------------|-----------------|-------------------| +| `wc ` | 1.3965 | 1.6182 | 5.2967 | 2.2294 | +| `wc -c ` | 0.8134 | 1.2774 | 0.7732 | 0.9106 | +| `uucat | wc -c` | 2.7760 | 2.5565 | 2.3769 | 2.3982 | +| `wc -l ` | 1.1441 | 1.2854 | 2.9681 | 1.1493 | +| `wc -L ` | 2.1087 | 1.2551 | 5.4577 | 2.1490 | +| `wc -m ` | 2.7272 | 2.1704 | 7.3371 | 3.4347 | +| `wc -w ` | 1.9007 | 1.5206 | 4.7851 | 2.8529 | +| `wc -lwcmL ` | 1.1687 | 0.9169 | 4.4092 | 2.0663 | + +Beware that: +- Results are fuzzy and change from run to run +- You'll often want to check versions of uutils wc against each other instead of against GNU +- This takes a lot of time to generate +- This only shows the relative speedup, not the absolute time, which may be misleading if the time is very short + +Created by the following Python script: +```python +import json +import subprocess + +from tabulate import tabulate + +bins = ["wc", "uuwc"] +files = ["moby64.txt", "odyssey256.txt", "25Mshortlines", "/usr/bin/docker"] +cmds = [ + "{cmd} {file}", + "{cmd} -c {file}", + "uucat {file} | {cmd} -c", + "{cmd} -l {file}", + "{cmd} -L {file}", + "{cmd} -m {file}", + "{cmd} -w {file}", + "{cmd} -lwcmL {file}", +] + +table = [] +for cmd in cmds: + row = ["`" + cmd.format(cmd="wc", file="") + "`"] + for file in files: + subprocess.run( + [ + "hyperfine", + cmd.format(cmd=bins[0], file=file), + cmd.format(cmd=bins[1], file=file), + "--export-json=out.json", + ], + check=True, + ) + with open("out.json") as f: + res = json.load(f)["results"] + row.append(round(res[0]["mean"] / res[1]["mean"], 4)) + table.append(row) +print(tabulate(table, [""] + files, tablefmt="github")) +``` +(You may have to adjust the `bins` and `files` variables depending on your setup, and please do add other interesting cases to `cmds`.) diff --git a/src/uu/wc/Cargo.toml b/src/uu/wc/Cargo.toml index 31a7ac7af..80d6014da 100644 --- a/src/uu/wc/Cargo.toml +++ b/src/uu/wc/Cargo.toml @@ -18,7 +18,9 @@ path = "src/wc.rs" clap = { version = "2.33", features = ["wrap_help"] } uucore = { version=">=0.0.9", package="uucore", path="../../uucore" } uucore_procs = { version=">=0.0.6", package="uucore_procs", path="../../uucore_procs" } -thiserror = "1.0" +bytecount = "0.6.2" +utf-8 = "0.7.6" +unicode-width = "0.1.8" [target.'cfg(unix)'.dependencies] nix = "0.20" diff --git a/src/uu/wc/src/count_bytes.rs b/src/uu/wc/src/count_fast.rs similarity index 58% rename from src/uu/wc/src/count_bytes.rs rename to src/uu/wc/src/count_fast.rs index 83cc71ac4..b4078d6be 100644 --- a/src/uu/wc/src/count_bytes.rs +++ b/src/uu/wc/src/count_fast.rs @@ -1,13 +1,15 @@ -use super::{WcResult, WordCountable}; +use crate::word_count::WordCount; + +use super::WordCountable; #[cfg(any(target_os = "linux", target_os = "android"))] use std::fs::{File, OpenOptions}; -use std::io::ErrorKind; +use std::io::{self, ErrorKind, Read}; #[cfg(unix)] use libc::S_IFREG; #[cfg(unix)] -use nix::sys::stat::fstat; +use nix::sys::stat; #[cfg(any(target_os = "linux", target_os = "android"))] use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; @@ -18,7 +20,9 @@ use nix::fcntl::{splice, SpliceFFlags}; #[cfg(any(target_os = "linux", target_os = "android"))] use nix::unistd::pipe; -const BUF_SIZE: usize = 16384; +const BUF_SIZE: usize = 16 * 1024; +#[cfg(any(target_os = "linux", target_os = "android"))] +const SPLICE_SIZE: usize = 128 * 1024; /// Splice wrapper which handles short writes #[cfg(any(target_os = "linux", target_os = "android"))] @@ -37,15 +41,24 @@ fn splice_exact(read_fd: RawFd, write_fd: RawFd, num_bytes: usize) -> nix::Resul /// This is a Linux-specific function to count the number of bytes using the /// `splice` system call, which is faster than using `read`. +/// +/// On error it returns the number of bytes it did manage to read, since the +/// caller will fall back to a simpler method. #[inline] #[cfg(any(target_os = "linux", target_os = "android"))] -fn count_bytes_using_splice(fd: RawFd) -> nix::Result { +fn count_bytes_using_splice(fd: RawFd) -> Result { let null_file = OpenOptions::new() .write(true) .open("/dev/null") - .map_err(|_| nix::Error::last())?; + .map_err(|_| 0_usize)?; let null = null_file.as_raw_fd(); - let (pipe_rd, pipe_wr) = pipe()?; + let null_rdev = stat::fstat(null).map_err(|_| 0_usize)?.st_rdev; + if (stat::major(null_rdev), stat::minor(null_rdev)) != (1, 3) { + // This is not a proper /dev/null, writing to it is probably bad + // Bit of an edge case, but it has been known to happen + return Err(0); + } + let (pipe_rd, pipe_wr) = pipe().map_err(|_| 0_usize)?; // Ensure the pipe is closed when the function returns. // SAFETY: The file descriptors do not have other owners. @@ -53,12 +66,16 @@ fn count_bytes_using_splice(fd: RawFd) -> nix::Result { let mut byte_count = 0; loop { - let res = splice(fd, None, pipe_wr, None, BUF_SIZE, SpliceFFlags::empty())?; - if res == 0 { - break; - } - byte_count += res; - splice_exact(pipe_rd, null, res)?; + match splice(fd, None, pipe_wr, None, SPLICE_SIZE, SpliceFFlags::empty()) { + Ok(0) => break, + Ok(res) => { + byte_count += res; + if splice_exact(pipe_rd, null, res).is_err() { + return Err(byte_count); + } + } + Err(_) => return Err(byte_count), + }; } Ok(byte_count) @@ -72,23 +89,26 @@ fn count_bytes_using_splice(fd: RawFd) -> nix::Result { /// 3. Otherwise, we just read normally, but without the overhead of counting /// other things such as lines and words. #[inline] -pub(crate) fn count_bytes_fast(handle: &mut T) -> WcResult { +pub(crate) fn count_bytes_fast(handle: &mut T) -> (usize, Option) { + let mut byte_count = 0; + #[cfg(unix)] { let fd = handle.as_raw_fd(); - if let Ok(stat) = fstat(fd) { + if let Ok(stat) = stat::fstat(fd) { // If the file is regular, then the `st_size` should hold // the file's size in bytes. if (stat.st_mode & S_IFREG) != 0 { - return Ok(stat.st_size as usize); + return (stat.st_size as usize, None); } #[cfg(any(target_os = "linux", target_os = "android"))] { // Else, if we're on Linux and our file is a FIFO pipe // (or stdin), we use splice to count the number of bytes. if (stat.st_mode & S_IFIFO) != 0 { - if let Ok(n) = count_bytes_using_splice(fd) { - return Ok(n); + match count_bytes_using_splice(fd) { + Ok(n) => return (n, None), + Err(n) => byte_count = n, } } } @@ -97,15 +117,32 @@ pub(crate) fn count_bytes_fast(handle: &mut T) -> WcResult return Ok(byte_count), + Ok(0) => return (byte_count, None), Ok(n) => { byte_count += n; } Err(ref e) if e.kind() == ErrorKind::Interrupted => continue, - Err(e) => return Err(e.into()), + Err(e) => return (byte_count, Some(e)), + } + } +} + +pub(crate) fn count_bytes_and_lines_fast( + handle: &mut R, +) -> (WordCount, Option) { + let mut total = WordCount::default(); + let mut buf = [0; BUF_SIZE]; + loop { + match handle.read(&mut buf) { + Ok(0) => return (total, None), + Ok(n) => { + total.bytes += n; + total.lines += bytecount::count(&buf[..n], b'\n'); + } + Err(ref e) if e.kind() == ErrorKind::Interrupted => continue, + Err(e) => return (total, Some(e)), } } } diff --git a/src/uu/wc/src/countable.rs b/src/uu/wc/src/countable.rs index 3da910a03..a14623559 100644 --- a/src/uu/wc/src/countable.rs +++ b/src/uu/wc/src/countable.rs @@ -4,7 +4,7 @@ //! for some common file-like objects. Use the [`WordCountable::lines`] //! method to get an iterator over lines of a file-like object. use std::fs::File; -use std::io::{self, BufRead, BufReader, Read, StdinLock}; +use std::io::{BufRead, BufReader, Read, StdinLock}; #[cfg(unix)] use std::os::unix::io::AsRawFd; @@ -12,61 +12,26 @@ use std::os::unix::io::AsRawFd; #[cfg(unix)] pub trait WordCountable: AsRawFd + Read { type Buffered: BufRead; - fn lines(self) -> Lines; + fn buffered(self) -> Self::Buffered; } #[cfg(not(unix))] pub trait WordCountable: Read { type Buffered: BufRead; - fn lines(self) -> Lines; + fn buffered(self) -> Self::Buffered; } impl WordCountable for StdinLock<'_> { type Buffered = Self; - fn lines(self) -> Lines - where - Self: Sized, - { - Lines { buf: self } + fn buffered(self) -> Self::Buffered { + self } } impl WordCountable for File { type Buffered = BufReader; - fn lines(self) -> Lines - where - Self: Sized, - { - Lines { - buf: BufReader::new(self), - } - } -} - -/// An iterator over the lines of an instance of `BufRead`. -/// -/// Similar to [`io::Lines`] but yields each line as a `Vec` and -/// includes the newline character (`\n`, the `0xA` byte) that -/// terminates the line. -/// -/// [`io::Lines`]:: io::Lines -pub struct Lines { - buf: B, -} - -impl Iterator for Lines { - type Item = io::Result>; - - fn next(&mut self) -> Option { - let mut line = Vec::new(); - - // reading from a TTY seems to raise a condition on, rather than return Some(0) like a file. - // hence the option wrapped in a result here - match self.buf.read_until(b'\n', &mut line) { - Ok(0) => None, - Ok(_n) => Some(Ok(line)), - Err(e) => Some(Err(e)), - } + fn buffered(self) -> Self::Buffered { + BufReader::new(self) } } diff --git a/src/uu/wc/src/wc.rs b/src/uu/wc/src/wc.rs index d77cd6b4b..01c3d8fdc 100644 --- a/src/uu/wc/src/wc.rs +++ b/src/uu/wc/src/wc.rs @@ -8,33 +8,25 @@ #[macro_use] extern crate uucore; -mod count_bytes; +mod count_fast; mod countable; mod word_count; -use count_bytes::count_bytes_fast; +use count_fast::{count_bytes_and_lines_fast, count_bytes_fast}; use countable::WordCountable; +use unicode_width::UnicodeWidthChar; +use utf8::{BufReadDecoder, BufReadDecoderError}; use word_count::{TitledWordCount, WordCount}; use clap::{crate_version, App, Arg, ArgMatches}; -use thiserror::Error; +use std::cmp::max; use std::fs::{self, File}; -use std::io::{self, ErrorKind, Write}; -use std::path::Path; +use std::io::{self, Write}; +use std::path::{Path, PathBuf}; /// The minimum character width for formatting counts when reading from stdin. const MINIMUM_WIDTH: usize = 7; -#[derive(Error, Debug)] -pub enum WcError { - #[error("{0}")] - Io(#[from] io::Error), - #[error("Expected a file, found directory {0}")] - IsDirectory(String), -} - -type WcResult = Result; - struct Settings { show_bytes: bool, show_chars: bool, @@ -114,7 +106,7 @@ enum StdinKind { /// Supported inputs. enum Input { /// A regular file. - Path(String), + Path(PathBuf), /// Standard input. Stdin(StdinKind), @@ -122,13 +114,20 @@ enum Input { impl Input { /// Converts input to title that appears in stats. - fn to_title(&self) -> Option<&str> { + fn to_title(&self) -> Option<&Path> { match self { Input::Path(path) => Some(path), - Input::Stdin(StdinKind::Explicit) => Some("-"), + Input::Stdin(StdinKind::Explicit) => Some("-".as_ref()), Input::Stdin(StdinKind::Implicit) => None, } } + + fn path_display(&self) -> std::path::Display<'_> { + match self { + Input::Path(path) => path.display(), + Input::Stdin(_) => Path::display("'standard input'".as_ref()), + } + } } pub fn uumain(args: impl uucore::Args) -> i32 { @@ -137,13 +136,13 @@ pub fn uumain(args: impl uucore::Args) -> i32 { let matches = uu_app().usage(&usage[..]).get_matches_from(args); let mut inputs: Vec = matches - .values_of(ARG_FILES) + .values_of_os(ARG_FILES) .map(|v| { v.map(|i| { if i == "-" { Input::Stdin(StdinKind::Explicit) } else { - Input::Path(ToString::to_string(i)) + Input::Path(i.into()) } }) .collect() @@ -203,55 +202,125 @@ pub fn uu_app() -> App<'static, 'static> { fn word_count_from_reader( mut reader: T, settings: &Settings, - path: &str, -) -> WcResult { +) -> (WordCount, Option) { let only_count_bytes = settings.show_bytes && (!(settings.show_chars || settings.show_lines || settings.show_max_line_length || settings.show_words)); if only_count_bytes { - return Ok(WordCount { - bytes: count_bytes_fast(&mut reader)?, - ..WordCount::default() - }); + let (bytes, error) = count_bytes_fast(&mut reader); + return ( + WordCount { + bytes, + ..WordCount::default() + }, + error, + ); } // we do not need to decode the byte stream if we're only counting bytes/newlines let decode_chars = settings.show_chars || settings.show_words || settings.show_max_line_length; - // Sum the WordCount for each line. Show a warning for each line - // that results in an IO error when trying to read it. - let total = reader - .lines() - .filter_map(|res| match res { - Ok(line) => Some(line), - Err(e) => { - show_warning!("Error while reading {}: {}", path, e); - None + if !decode_chars { + return count_bytes_and_lines_fast(&mut reader); + } + + let mut total = WordCount::default(); + let mut reader = BufReadDecoder::new(reader.buffered()); + let mut in_word = false; + let mut current_len = 0; + + while let Some(chunk) = reader.next_strict() { + match chunk { + Ok(text) => { + for ch in text.chars() { + if settings.show_words { + if ch.is_whitespace() { + in_word = false; + } else if ch.is_ascii_control() { + // These count as characters but do not affect the word state + } else if !in_word { + in_word = true; + total.words += 1; + } + } + if settings.show_max_line_length { + match ch { + '\n' => { + total.max_line_length = max(current_len, total.max_line_length); + current_len = 0; + } + // '\x0c' = '\f' + '\r' | '\x0c' => { + total.max_line_length = max(current_len, total.max_line_length); + current_len = 0; + } + '\t' => { + current_len -= current_len % 8; + current_len += 8; + } + _ => { + current_len += ch.width().unwrap_or(0); + } + } + } + if settings.show_lines && ch == '\n' { + total.lines += 1; + } + if settings.show_chars { + total.chars += 1; + } + } + total.bytes += text.len(); } - }) - .map(|line| WordCount::from_line(&line, decode_chars)) - .sum(); - Ok(total) + Err(BufReadDecoderError::InvalidByteSequence(bytes)) => { + // GNU wc treats invalid data as neither word nor char nor whitespace, + // so no other counters are affected + total.bytes += bytes.len(); + } + Err(BufReadDecoderError::Io(e)) => { + return (total, Some(e)); + } + } + } + + total.max_line_length = max(current_len, total.max_line_length); + + (total, None) } -fn word_count_from_input(input: &Input, settings: &Settings) -> WcResult { +enum CountResult { + /// Nothing went wrong. + Success(WordCount), + /// Managed to open but failed to read. + Interrupted(WordCount, io::Error), + /// Didn't even manage to open. + Failure(io::Error), +} + +/// If we fail opening a file we only show the error. If we fail reading it +/// we show a count for what we managed to read. +/// +/// Therefore the reading implementations always return a total and sometimes +/// return an error: (WordCount, Option). +fn word_count_from_input(input: &Input, settings: &Settings) -> CountResult { match input { Input::Stdin(_) => { let stdin = io::stdin(); let stdin_lock = stdin.lock(); - word_count_from_reader(stdin_lock, settings, "-") - } - Input::Path(path) => { - let path_obj = Path::new(path); - if path_obj.is_dir() { - Err(WcError::IsDirectory(path.to_owned())) - } else { - let file = File::open(path)?; - word_count_from_reader(file, settings, path) + match word_count_from_reader(stdin_lock, settings) { + (total, Some(error)) => CountResult::Interrupted(total, error), + (total, None) => CountResult::Success(total), } } + Input::Path(path) => match File::open(path) { + Err(error) => CountResult::Failure(error), + Ok(file) => match word_count_from_reader(file, settings) { + (total, Some(error)) => CountResult::Interrupted(total, error), + (total, None) => CountResult::Success(total), + }, + }, } } @@ -264,18 +333,8 @@ fn word_count_from_input(input: &Input, settings: &Settings) -> WcResult { - show_error_custom_description!(path, "Is a directory"); - } - (Input::Path(path), WcError::Io(e)) if e.kind() == ErrorKind::NotFound => { - show_error_custom_description!(path, "No such file or directory"); - } - (_, e) => { - show_error!("{}", e); - } - }; +fn show_error(input: &Input, err: io::Error) { + show_error!("{}: {}", input.path_display(), err); } /// Compute the number of digits needed to represent any count for this input. @@ -297,9 +356,9 @@ fn show_error(input: &Input, err: WcError) { /// let input = Input::Stdin(StdinKind::Explicit); /// assert_eq!(7, digit_width(input)); /// ``` -fn digit_width(input: &Input) -> WcResult> { +fn digit_width(input: &Input) -> io::Result { match input { - Input::Stdin(_) => Ok(Some(MINIMUM_WIDTH)), + Input::Stdin(_) => Ok(MINIMUM_WIDTH), Input::Path(filename) => { let path = Path::new(filename); let metadata = fs::metadata(path)?; @@ -310,9 +369,9 @@ fn digit_width(input: &Input) -> WcResult> { // instead). See GitHub issue #2201. let num_bytes = metadata.len(); let num_digits = num_bytes.to_string().len(); - Ok(Some(num_digits)) + Ok(num_digits) } else { - Ok(None) + Ok(MINIMUM_WIDTH) } } } @@ -350,7 +409,7 @@ fn digit_width(input: &Input) -> WcResult> { fn max_width(inputs: &[Input]) -> usize { let mut result = 1; for input in inputs { - if let Ok(Some(n)) = digit_width(input) { + if let Ok(n) = digit_width(input) { result = result.max(n); } } @@ -363,7 +422,7 @@ fn wc(inputs: Vec, settings: &Settings) -> Result<(), u32> { // The width is the number of digits needed to print the number of // bytes in the largest file. This is true regardless of whether // the `settings` indicate that the bytes will be displayed. - let mut error_count = 0; + let mut failure = false; let max_width = max_width(&inputs); let mut total_word_count = WordCount::default(); @@ -371,35 +430,43 @@ fn wc(inputs: Vec, settings: &Settings) -> Result<(), u32> { let num_inputs = inputs.len(); for input in &inputs { - let word_count = word_count_from_input(input, settings).unwrap_or_else(|err| { - show_error(input, err); - error_count += 1; - WordCount::default() - }); + let word_count = match word_count_from_input(input, settings) { + CountResult::Success(word_count) => word_count, + CountResult::Interrupted(word_count, error) => { + show_error(input, error); + failure = true; + word_count + } + CountResult::Failure(error) => { + show_error(input, error); + failure = true; + continue; + } + }; total_word_count += word_count; let result = word_count.with_title(input.to_title()); if let Err(err) = print_stats(settings, &result, max_width) { show_warning!( "failed to print result for {}: {}", - result.title.unwrap_or(""), + result.title.unwrap_or_else(|| "".as_ref()).display(), err ); - error_count += 1; + failure = true; } } if num_inputs > 1 { - let total_result = total_word_count.with_title(Some("total")); + let total_result = total_word_count.with_title(Some("total".as_ref())); if let Err(err) = print_stats(settings, &total_result, max_width) { show_warning!("failed to print total: {}", err); - error_count += 1; + failure = true; } } - if error_count == 0 { - Ok(()) + if failure { + Err(1) } else { - Err(error_count) + Ok(()) } } @@ -407,7 +474,7 @@ fn print_stats( settings: &Settings, result: &TitledWordCount, mut min_width: usize, -) -> WcResult<()> { +) -> io::Result<()> { let stdout = io::stdout(); let mut stdout_lock = stdout.lock(); @@ -433,13 +500,6 @@ fn print_stats( write!(stdout_lock, "{:1$}", result.count.words, min_width)?; is_first = false; } - if settings.show_bytes { - if !is_first { - write!(stdout_lock, " ")?; - } - write!(stdout_lock, "{:1$}", result.count.bytes, min_width)?; - is_first = false; - } if settings.show_chars { if !is_first { write!(stdout_lock, " ")?; @@ -447,6 +507,13 @@ fn print_stats( write!(stdout_lock, "{:1$}", result.count.chars, min_width)?; is_first = false; } + if settings.show_bytes { + if !is_first { + write!(stdout_lock, " ")?; + } + write!(stdout_lock, "{:1$}", result.count.bytes, min_width)?; + is_first = false; + } if settings.show_max_line_length { if !is_first { write!(stdout_lock, " ")?; @@ -459,7 +526,7 @@ fn print_stats( } if let Some(title) = result.title { - writeln!(stdout_lock, " {}", title)?; + writeln!(stdout_lock, " {}", title.display())?; } else { writeln!(stdout_lock)?; } diff --git a/src/uu/wc/src/word_count.rs b/src/uu/wc/src/word_count.rs index bdb510f58..617b811fc 100644 --- a/src/uu/wc/src/word_count.rs +++ b/src/uu/wc/src/word_count.rs @@ -1,19 +1,6 @@ use std::cmp::max; -use std::iter::Sum; use std::ops::{Add, AddAssign}; -use std::str::from_utf8; - -const CR: u8 = b'\r'; -const LF: u8 = b'\n'; -const SPACE: u8 = b' '; -const TAB: u8 = b'\t'; -const SYN: u8 = 0x16_u8; -const FF: u8 = 0x0C_u8; - -#[inline(always)] -fn is_word_separator(byte: u8) -> bool { - byte == SPACE || byte == TAB || byte == CR || byte == SYN || byte == FF -} +use std::path::Path; #[derive(Debug, Default, Copy, Clone)] pub struct WordCount { @@ -44,80 +31,10 @@ impl AddAssign for WordCount { } } -impl Sum for WordCount { - fn sum(iter: I) -> WordCount - where - I: Iterator, - { - iter.fold(WordCount::default(), |acc, x| acc + x) - } -} - impl WordCount { - /// Count the characters and whitespace-separated words in the given bytes. - /// - /// `line` is a slice of bytes that will be decoded as ASCII characters. - fn ascii_word_and_char_count(line: &[u8]) -> (usize, usize) { - let word_count = line.split(|&x| is_word_separator(x)).count(); - let char_count = line.iter().filter(|c| c.is_ascii()).count(); - (word_count, char_count) - } - - /// Create a [`WordCount`] from a sequence of bytes representing a line. - /// - /// If the last byte of `line` encodes a newline character (`\n`), - /// then the [`lines`] field will be set to 1. Otherwise, it will - /// be set to 0. The [`bytes`] field is simply the length of - /// `line`. - /// - /// If `decode_chars` is `false`, the [`chars`] and [`words`] - /// fields will be set to 0. If it is `true`, this function will - /// attempt to decode the bytes first as UTF-8, and failing that, - /// as ASCII. - pub fn from_line(line: &[u8], decode_chars: bool) -> WordCount { - // GNU 'wc' only counts lines that end in LF as lines - let lines = (*line.last().unwrap() == LF) as usize; - let bytes = line.len(); - let (words, chars) = if decode_chars { - WordCount::word_and_char_count(line) - } else { - (0, 0) - }; - // -L is a GNU 'wc' extension so same behavior on LF - let max_line_length = if chars > 0 { chars - lines } else { 0 }; - WordCount { - bytes, - chars, - lines, - words, - max_line_length, - } - } - - /// Count the UTF-8 characters and words in the given string slice. - /// - /// `s` is a string slice that is assumed to be a UTF-8 string. - fn utf8_word_and_char_count(s: &str) -> (usize, usize) { - let word_count = s.split_whitespace().count(); - let char_count = s.chars().count(); - (word_count, char_count) - } - - pub fn with_title(self, title: Option<&str>) -> TitledWordCount { + pub fn with_title(self, title: Option<&Path>) -> TitledWordCount { TitledWordCount { title, count: self } } - - /// Count the characters and words in the given slice of bytes. - /// - /// `line` is a slice of bytes that will be decoded as UTF-8 - /// characters, or if that fails, as ASCII characters. - fn word_and_char_count(line: &[u8]) -> (usize, usize) { - // try and convert the bytes to UTF-8 first - match from_utf8(line) { - Ok(s) => WordCount::utf8_word_and_char_count(s), - Err(..) => WordCount::ascii_word_and_char_count(line), - } - } } /// This struct supplements the actual word count with an optional title that is @@ -126,6 +43,6 @@ impl WordCount { /// it would result in unnecessary copying of `String`. #[derive(Debug, Default, Clone)] pub struct TitledWordCount<'a> { - pub title: Option<&'a str>, + pub title: Option<&'a Path>, pub count: WordCount, } diff --git a/src/uu/who/src/who.rs b/src/uu/who/src/who.rs index b0ef7b3fa..d61747127 100644 --- a/src/uu/who/src/who.rs +++ b/src/uu/who/src/who.rs @@ -492,7 +492,7 @@ impl Who { }; let s = if self.do_lookup { - safe_unwrap!(ut.canon_host()) + crash_if_err!(1, ut.canon_host()) } else { ut.host() }; diff --git a/src/uucore/Cargo.toml b/src/uucore/Cargo.toml index 6d27ecad4..e45c9e5b5 100644 --- a/src/uucore/Cargo.toml +++ b/src/uucore/Cargo.toml @@ -16,6 +16,7 @@ edition = "2018" path="src/lib/lib.rs" [dependencies] +clap = "2.33.3" dns-lookup = { version="1.0.5", optional=true } dunce = "1.0.0" getopts = "<= 0.2.21" diff --git a/src/uucore/src/lib/features/fs.rs b/src/uucore/src/lib/features/fs.rs index ea77a3506..0b8079a5c 100644 --- a/src/uucore/src/lib/features/fs.rs +++ b/src/uucore/src/lib/features/fs.rs @@ -6,6 +6,8 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. +//! Set of functions to manage files and symlinks + #[cfg(unix)] use libc::{ mode_t, S_IFBLK, S_IFCHR, S_IFDIR, S_IFIFO, S_IFLNK, S_IFMT, S_IFREG, S_IFSOCK, S_IRGRP, @@ -15,6 +17,7 @@ use libc::{ use std::borrow::Cow; use std::env; use std::fs; +use std::io::Error as IOError; use std::io::Result as IOResult; use std::io::{Error, ErrorKind}; #[cfg(any(unix, target_os = "redox"))] @@ -109,26 +112,42 @@ pub fn normalize_path(path: &Path) -> PathBuf { ret } -fn resolve>(original: P) -> IOResult { +fn resolve>(original: P) -> Result { const MAX_LINKS_FOLLOWED: u32 = 255; let mut followed = 0; let mut result = original.as_ref().to_path_buf(); + + let mut first_resolution = None; loop { if followed == MAX_LINKS_FOLLOWED { - return Err(Error::new( - ErrorKind::InvalidInput, - "maximum links followed", + return Err(( + // When we hit MAX_LINKS_FOLLOWED we should return the first resolution (that's what GNU does - for whatever reason) + first_resolution.unwrap(), + Error::new(ErrorKind::InvalidInput, "maximum links followed"), )); } - if !fs::symlink_metadata(&result)?.file_type().is_symlink() { - break; + match fs::symlink_metadata(&result) { + Ok(meta) => { + if !meta.file_type().is_symlink() { + break; + } + } + Err(e) => return Err((result, e)), } followed += 1; - let path = fs::read_link(&result)?; - result.pop(); - result.push(path); + match fs::read_link(&result) { + Ok(path) => { + result.pop(); + result.push(path); + } + Err(e) => return Err((result, e)), + } + + if first_resolution.is_none() { + first_resolution = Some(result.clone()); + } } Ok(result) } @@ -214,11 +233,10 @@ pub fn canonicalize>( } match resolve(&result) { - Err(_) if miss_mode == MissingHandling::Missing => continue, - Err(e) => return Err(e), + Err((path, _)) if miss_mode == MissingHandling::Missing => result = path, + Err((_, e)) => return Err(e), Ok(path) => { - result.pop(); - result.push(path); + result = path; } } } @@ -230,14 +248,12 @@ pub fn canonicalize>( } match resolve(&result) { - Err(e) if miss_mode == MissingHandling::Existing => { + Err((_, e)) if miss_mode == MissingHandling::Existing => { return Err(e); } - Ok(path) => { - result.pop(); - result.push(path); + Ok(path) | Err((path, _)) => { + result = path; } - Err(_) => (), } if res_mode == ResolveMode::Physical { result = normalize_path(&result); diff --git a/src/uucore/src/lib/features/fsext.rs b/src/uucore/src/lib/features/fsext.rs index 75375c7e2..72471ff9e 100644 --- a/src/uucore/src/lib/features/fsext.rs +++ b/src/uucore/src/lib/features/fsext.rs @@ -7,6 +7,8 @@ // For the full copyright and license information, please view the LICENSE file // that was distributed with this source code. +//! Set of functions to manage file systems + // spell-checker:ignore (arch) bitrig ; (fs) cifs smbfs extern crate time; diff --git a/src/uucore/src/lib/features/mode.rs b/src/uucore/src/lib/features/mode.rs index 794cda418..02e78ba9b 100644 --- a/src/uucore/src/lib/features/mode.rs +++ b/src/uucore/src/lib/features/mode.rs @@ -5,6 +5,8 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. +//! Set of functions to parse modes + // spell-checker:ignore (vars) fperm srwx use libc::{mode_t, umask, S_IRGRP, S_IROTH, S_IRUSR, S_IWGRP, S_IWOTH, S_IWUSR}; diff --git a/src/uucore/src/lib/features/perms.rs b/src/uucore/src/lib/features/perms.rs index 4d2a2afad..a5b2522b9 100644 --- a/src/uucore/src/lib/features/perms.rs +++ b/src/uucore/src/lib/features/perms.rs @@ -3,6 +3,8 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. +//! Common functions to manage permissions + use crate::error::UResult; pub use crate::features::entries; use crate::fs::resolve_relative_path; diff --git a/src/uucore/src/lib/features/process.rs b/src/uucore/src/lib/features/process.rs index 21bfa992c..fc4b7ed48 100644 --- a/src/uucore/src/lib/features/process.rs +++ b/src/uucore/src/lib/features/process.rs @@ -9,6 +9,8 @@ // spell-checker:ignore (vars) cvar exitstatus // spell-checker:ignore (sys/unix) WIFSIGNALED +//! Set of functions to manage IDs + use libc::{gid_t, pid_t, uid_t}; use std::fmt; use std::io; diff --git a/src/uucore/src/lib/macros.rs b/src/uucore/src/lib/macros.rs index d16f04504..c7b15dba3 100644 --- a/src/uucore/src/lib/macros.rs +++ b/src/uucore/src/lib/macros.rs @@ -99,24 +99,6 @@ macro_rules! crash_if_err( //==== -/// Unwraps the Result. Instead of panicking, it shows the error and then -/// returns from the function with the provided exit code. -/// Assumes the current function returns an i32 value. -#[macro_export] -macro_rules! return_if_err( - ($exit_code:expr, $exp:expr) => ( - match $exp { - Ok(m) => m, - Err(f) => { - $crate::show_error!("{}", f); - return $exit_code; - } - } - ) -); - -//==== - #[macro_export] macro_rules! safe_write( ($fd:expr, $($args:tt)+) => ( @@ -137,18 +119,6 @@ macro_rules! safe_writeln( ) ); -/// Unwraps the Result. Instead of panicking, it exists the program with exit -/// code 1. -#[macro_export] -macro_rules! safe_unwrap( - ($exp:expr) => ( - match $exp { - Ok(m) => m, - Err(f) => $crate::crash!(1, "{}", f.to_string()) - } - ) -); - //-- message templates //-- message templates : (join utility sub-macros) diff --git a/src/uucore/src/lib/mods/backup_control.rs b/src/uucore/src/lib/mods/backup_control.rs index 6fa48d308..83617f8ed 100644 --- a/src/uucore/src/lib/mods/backup_control.rs +++ b/src/uucore/src/lib/mods/backup_control.rs @@ -1,5 +1,89 @@ +//! Implement GNU-style backup functionality. +//! +//! This module implements the backup functionality as described in the [GNU +//! manual][1]. It provides +//! +//! - pre-defined [`clap`-Arguments][2] for inclusion in utilities that +//! implement backups +//! - determination of the [backup mode][3] +//! - determination of the [backup suffix][4] +//! - [backup target path construction][5] +//! - [Error types][6] for backup-related errors +//! - GNU-compliant [help texts][7] for backup-related errors +//! +//! Backup-functionality is implemented by the following utilities: +//! +//! - `cp` +//! - `install` +//! - `ln` +//! - `mv` +//! +//! +//! [1]: https://www.gnu.org/software/coreutils/manual/html_node/Backup-options.html +//! [2]: arguments +//! [3]: `determine_backup_mode()` +//! [4]: `determine_backup_suffix()` +//! [5]: `get_backup_path()` +//! [6]: `BackupError` +//! [7]: `BACKUP_CONTROL_LONG_HELP` +//! +//! +//! # Usage example +//! +//! ``` +//! #[macro_use] +//! extern crate uucore; +//! +//! use clap::{App, Arg, ArgMatches}; +//! use std::path::{Path, PathBuf}; +//! use uucore::backup_control::{self, BackupMode}; +//! use uucore::error::{UError, UResult}; +//! +//! fn main() { +//! let usage = String::from("app [OPTION]... ARG"); +//! let long_usage = String::from("And here's a detailed explanation"); +//! +//! let matches = App::new("app") +//! .arg(backup_control::arguments::backup()) +//! .arg(backup_control::arguments::backup_no_args()) +//! .arg(backup_control::arguments::suffix()) +//! .usage(&usage[..]) +//! .after_help(&*format!( +//! "{}\n{}", +//! long_usage, +//! backup_control::BACKUP_CONTROL_LONG_HELP +//! )) +//! .get_matches_from(vec![ +//! "app", "--backup=t", "--suffix=bak~" +//! ]); +//! +//! let backup_mode = match backup_control::determine_backup_mode(&matches) { +//! Err(e) => { +//! show!(e); +//! return; +//! }, +//! Ok(mode) => mode, +//! }; +//! let backup_suffix = backup_control::determine_backup_suffix(&matches); +//! let target_path = Path::new("/tmp/example"); +//! +//! let backup_path = backup_control::get_backup_path( +//! backup_mode, target_path, &backup_suffix +//! ); +//! +//! // Perform your backups here. +//! +//! } +//! ``` + +// spell-checker:ignore backupopt + +use crate::error::{UError, UResult}; +use clap::ArgMatches; use std::{ env, + error::Error, + fmt::{Debug, Display}, path::{Path, PathBuf}, }; @@ -17,15 +101,144 @@ the VERSION_CONTROL environment variable. Here are the values: existing, nil numbered if numbered backups exist, simple otherwise simple, never always make simple backups"; +static VALID_ARGS_HELP: &str = "Valid arguments are: + - 'none', 'off' + - 'simple', 'never' + - 'existing', 'nil' + - 'numbered', 't'"; + +/// Available backup modes. +/// +/// The mapping of the backup modes to the CLI arguments is annotated on the +/// enum variants. #[derive(Debug, Clone, Copy, Eq, PartialEq)] pub enum BackupMode { + /// Argument 'none', 'off' NoBackup, + /// Argument 'simple', 'never' SimpleBackup, + /// Argument 'numbered', 't' NumberedBackup, + /// Argument 'existing', 'nil' ExistingBackup, } -pub fn determine_backup_suffix(supplied_suffix: Option<&str>) -> String { +/// Backup error types. +/// +/// Errors are currently raised by [`determine_backup_mode`] only. All errors +/// are implemented as [`UCustomError`] for uniform handling across utilities. +#[derive(Debug, Eq, PartialEq)] +pub enum BackupError { + /// An invalid argument (e.g. 'foo') was given as backup type. First + /// parameter is the argument, second is the arguments origin (CLI or + /// ENV-var) + InvalidArgument(String, String), + /// An ambiguous argument (e.g. 'n') was given as backup type. First + /// parameter is the argument, second is the arguments origin (CLI or + /// ENV-var) + AmbiguousArgument(String, String), + /// Currently unused + BackupImpossible(), + // BackupFailed(PathBuf, PathBuf, std::io::Error), +} + +impl UError for BackupError { + fn code(&self) -> i32 { + match self { + BackupError::BackupImpossible() => 2, + _ => 1, + } + } + + fn usage(&self) -> bool { + // Suggested by clippy. + matches!( + self, + BackupError::InvalidArgument(_, _) | BackupError::AmbiguousArgument(_, _) + ) + } +} + +impl Error for BackupError {} + +impl Display for BackupError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + use BackupError as BE; + match self { + BE::InvalidArgument(arg, origin) => write!( + f, + "invalid argument '{}' for '{}'\n{}", + arg, origin, VALID_ARGS_HELP + ), + BE::AmbiguousArgument(arg, origin) => write!( + f, + "ambiguous argument '{}' for '{}'\n{}", + arg, origin, VALID_ARGS_HELP + ), + BE::BackupImpossible() => write!(f, "cannot create backup"), + // Placeholder for later + // BE::BackupFailed(from, to, e) => Display::fmt( + // &uio_error!(e, "failed to backup '{}' to '{}'", from.display(), to.display()), + // f + // ), + } + } +} + +/// Arguments for backup-related functionality. +/// +/// Rather than implementing the `clap`-Arguments for every utility, it is +/// recommended to include the `clap` arguments via the functions provided here. +/// This way the backup-specific arguments are handled uniformly across +/// utilities and can be maintained in one central place. +pub mod arguments { + extern crate clap; + + pub static OPT_BACKUP: &str = "backupopt_backup"; + pub static OPT_BACKUP_NO_ARG: &str = "backupopt_b"; + pub static OPT_SUFFIX: &str = "backupopt_suffix"; + + /// '--backup' argument + pub fn backup() -> clap::Arg<'static, 'static> { + clap::Arg::with_name(OPT_BACKUP) + .long("backup") + .help("make a backup of each existing destination file") + .takes_value(true) + .require_equals(true) + .min_values(0) + .value_name("CONTROL") + } + + /// '-b' argument + pub fn backup_no_args() -> clap::Arg<'static, 'static> { + clap::Arg::with_name(OPT_BACKUP_NO_ARG) + .short("b") + .help("like --backup but does not accept an argument") + } + + /// '-S, --suffix' argument + pub fn suffix() -> clap::Arg<'static, 'static> { + clap::Arg::with_name(OPT_SUFFIX) + .short("S") + .long("suffix") + .help("override the usual backup suffix") + .takes_value(true) + .value_name("SUFFIX") + } +} + +/// Obtain the suffix to use for a backup. +/// +/// In order of precedence, this function obtains the backup suffix +/// +/// 1. From the '-S' or '--suffix' CLI argument, if present +/// 2. From the "SIMPLE_BACKUP_SUFFIX" environment variable, if present +/// 3. By using the default '~' if none of the others apply +/// +/// This function directly takes [`clap::ArgMatches`] as argument and looks for +/// the '-S' and '--suffix' arguments itself. +pub fn determine_backup_suffix(matches: &ArgMatches) -> String { + let supplied_suffix = matches.value_of(arguments::OPT_SUFFIX); if let Some(suffix) = supplied_suffix { String::from(suffix) } else { @@ -38,7 +251,13 @@ pub fn determine_backup_suffix(supplied_suffix: Option<&str>) -> String { /// Parses the backup options according to the [GNU manual][1], and converts /// them to an instance of `BackupMode` for further processing. /// -/// For an explanation of what the arguments mean, refer to the examples below. +/// Takes [`clap::ArgMatches`] as argument which **must** contain the options +/// from [`arguments::backup()`] and [`arguments::backup_no_args()`]. Otherwise +/// the `NoBackup` mode is returned unconditionally. +/// +/// It is recommended for anyone who would like to implement the +/// backup-functionality to use the arguments prepared in the `arguments` +/// submodule (see examples) /// /// [1]: https://www.gnu.org/software/coreutils/manual/html_node/Backup-options.html /// @@ -47,9 +266,11 @@ pub fn determine_backup_suffix(supplied_suffix: Option<&str>) -> String { /// /// If an argument supplied directly to the long `backup` option, or read in /// through the `VERSION CONTROL` env var is ambiguous (i.e. may resolve to -/// multiple backup modes) or invalid, an error is returned. The error contains -/// the formatted error string which may then be passed to the -/// [`show_usage_error`] macro. +/// multiple backup modes) or invalid, an [`InvalidArgument`][10] or +/// [`AmbiguousArgument`][11] error is returned, respectively. +/// +/// [10]: BackupError::InvalidArgument +/// [11]: BackupError::AmbiguousArgument /// /// /// # Examples @@ -61,34 +282,18 @@ pub fn determine_backup_suffix(supplied_suffix: Option<&str>) -> String { /// #[macro_use] /// extern crate uucore; /// use uucore::backup_control::{self, BackupMode}; -/// use clap::{App, Arg}; +/// use clap::{App, Arg, ArgMatches}; /// /// fn main() { -/// let OPT_BACKUP: &str = "backup"; -/// let OPT_BACKUP_NO_ARG: &str = "b"; /// let matches = App::new("app") -/// .arg(Arg::with_name(OPT_BACKUP_NO_ARG) -/// .short(OPT_BACKUP_NO_ARG)) -/// .arg(Arg::with_name(OPT_BACKUP) -/// .long(OPT_BACKUP) -/// .takes_value(true) -/// .require_equals(true) -/// .min_values(0)) +/// .arg(backup_control::arguments::backup()) +/// .arg(backup_control::arguments::backup_no_args()) /// .get_matches_from(vec![ /// "app", "-b", "--backup=t" /// ]); -/// -/// let backup_mode = backup_control::determine_backup_mode( -/// matches.is_present(OPT_BACKUP_NO_ARG), matches.is_present(OPT_BACKUP), -/// matches.value_of(OPT_BACKUP) -/// ); -/// let backup_mode = match backup_mode { -/// Err(err) => { -/// show_usage_error!("{}", err); -/// return; -/// }, -/// Ok(mode) => mode, -/// }; +/// +/// let backup_mode = backup_control::determine_backup_mode(&matches).unwrap(); +/// assert_eq!(backup_mode, BackupMode::NumberedBackup) /// } /// ``` /// @@ -99,57 +304,43 @@ pub fn determine_backup_suffix(supplied_suffix: Option<&str>) -> String { /// ``` /// #[macro_use] /// extern crate uucore; -/// use uucore::backup_control::{self, BackupMode}; -/// use clap::{crate_version, App, Arg, ArgMatches}; +/// use uucore::backup_control::{self, BackupMode, BackupError}; +/// use clap::{App, Arg, ArgMatches}; /// /// fn main() { -/// let OPT_BACKUP: &str = "backup"; -/// let OPT_BACKUP_NO_ARG: &str = "b"; /// let matches = App::new("app") -/// .arg(Arg::with_name(OPT_BACKUP_NO_ARG) -/// .short(OPT_BACKUP_NO_ARG)) -/// .arg(Arg::with_name(OPT_BACKUP) -/// .long(OPT_BACKUP) -/// .takes_value(true) -/// .require_equals(true) -/// .min_values(0)) +/// .arg(backup_control::arguments::backup()) +/// .arg(backup_control::arguments::backup_no_args()) /// .get_matches_from(vec![ /// "app", "-b", "--backup=n" /// ]); /// -/// let backup_mode = backup_control::determine_backup_mode( -/// matches.is_present(OPT_BACKUP_NO_ARG), matches.is_present(OPT_BACKUP), -/// matches.value_of(OPT_BACKUP) -/// ); -/// let backup_mode = match backup_mode { -/// Err(err) => { -/// show_usage_error!("{}", err); -/// return; -/// }, -/// Ok(mode) => mode, -/// }; +/// let backup_mode = backup_control::determine_backup_mode(&matches); +/// +/// assert!(backup_mode.is_err()); +/// let err = backup_mode.unwrap_err(); +/// // assert_eq!(err, BackupError::AmbiguousArgument); +/// // Use uucore functionality to show the error to the user +/// show!(err); /// } /// ``` -pub fn determine_backup_mode( - short_opt_present: bool, - long_opt_present: bool, - long_opt_value: Option<&str>, -) -> Result { - if long_opt_present { +pub fn determine_backup_mode(matches: &ArgMatches) -> UResult { + if matches.is_present(arguments::OPT_BACKUP) { // Use method to determine the type of backups to make. When this option // is used but method is not specified, then the value of the // VERSION_CONTROL environment variable is used. And if VERSION_CONTROL - // is not set, the default backup type is ‘existing’. - if let Some(method) = long_opt_value { + // is not set, the default backup type is 'existing'. + if let Some(method) = matches.value_of(arguments::OPT_BACKUP) { // Second argument is for the error string that is returned. match_method(method, "backup type") } else if let Ok(method) = env::var("VERSION_CONTROL") { // Second argument is for the error string that is returned. match_method(&method, "$VERSION_CONTROL") } else { + // Default if no argument is provided to '--backup' Ok(BackupMode::ExistingBackup) } - } else if short_opt_present { + } else if matches.is_present(arguments::OPT_BACKUP_NO_ARG) { // the short form of this option, -b does not accept any argument. // Using -b is equivalent to using --backup=existing. Ok(BackupMode::ExistingBackup) @@ -172,10 +363,13 @@ pub fn determine_backup_mode( /// /// # Errors /// -/// If `method` is ambiguous (i.e. may resolve to multiple backup modes) or -/// invalid, an error is returned. The error contains the formatted error string -/// which may then be passed to the [`show_usage_error`] macro. -fn match_method(method: &str, origin: &str) -> Result { +/// If `method` is invalid or ambiguous (i.e. may resolve to multiple backup +/// modes), an [`InvalidArgument`][10] or [`AmbiguousArgument`][11] error is +/// returned, respectively. +/// +/// [10]: BackupError::InvalidArgument +/// [11]: BackupError::AmbiguousArgument +fn match_method(method: &str, origin: &str) -> UResult { let matches: Vec<&&str> = BACKUP_CONTROL_VALUES .iter() .filter(|val| val.starts_with(method)) @@ -189,21 +383,10 @@ fn match_method(method: &str, origin: &str) -> Result { _ => unreachable!(), // cannot happen as we must have exactly one match // from the list above. } + } else if matches.is_empty() { + Err(BackupError::InvalidArgument(method.to_string(), origin.to_string()).into()) } else { - let error_type = if matches.is_empty() { - "invalid" - } else { - "ambiguous" - }; - Err(format!( - "{0} argument ‘{1}’ for ‘{2}’ -Valid arguments are: - - ‘none’, ‘off’ - - ‘simple’, ‘never’ - - ‘existing’, ‘nil’ - - ‘numbered’, ‘t’", - error_type, method, origin - )) + Err(BackupError::AmbiguousArgument(method.to_string(), origin.to_string()).into()) } } @@ -255,6 +438,7 @@ mod tests { use super::*; use std::env; // Required to instantiate mutex in shared context + use clap::App; use lazy_static::lazy_static; use std::sync::Mutex; @@ -271,16 +455,20 @@ mod tests { // Environment variable for "VERSION_CONTROL" static ENV_VERSION_CONTROL: &str = "VERSION_CONTROL"; + fn make_app() -> clap::App<'static, 'static> { + App::new("app") + .arg(arguments::backup()) + .arg(arguments::backup_no_args()) + .arg(arguments::suffix()) + } + // Defaults to --backup=existing #[test] fn test_backup_mode_short_only() { - let short_opt_present = true; - let long_opt_present = false; - let long_opt_value = None; let _dummy = TEST_MUTEX.lock().unwrap(); + let matches = make_app().get_matches_from(vec!["app", "-b"]); - let result = - determine_backup_mode(short_opt_present, long_opt_present, long_opt_value).unwrap(); + let result = determine_backup_mode(&matches).unwrap(); assert_eq!(result, BackupMode::ExistingBackup); } @@ -288,13 +476,10 @@ mod tests { // --backup takes precedence over -b #[test] fn test_backup_mode_long_preferred_over_short() { - let short_opt_present = true; - let long_opt_present = true; - let long_opt_value = Some("none"); let _dummy = TEST_MUTEX.lock().unwrap(); + let matches = make_app().get_matches_from(vec!["app", "-b", "--backup=none"]); - let result = - determine_backup_mode(short_opt_present, long_opt_present, long_opt_value).unwrap(); + let result = determine_backup_mode(&matches).unwrap(); assert_eq!(result, BackupMode::NoBackup); } @@ -302,13 +487,10 @@ mod tests { // --backup can be passed without an argument #[test] fn test_backup_mode_long_without_args_no_env() { - let short_opt_present = false; - let long_opt_present = true; - let long_opt_value = None; let _dummy = TEST_MUTEX.lock().unwrap(); + let matches = make_app().get_matches_from(vec!["app", "--backup"]); - let result = - determine_backup_mode(short_opt_present, long_opt_present, long_opt_value).unwrap(); + let result = determine_backup_mode(&matches).unwrap(); assert_eq!(result, BackupMode::ExistingBackup); } @@ -316,13 +498,10 @@ mod tests { // --backup can be passed with an argument only #[test] fn test_backup_mode_long_with_args() { - let short_opt_present = false; - let long_opt_present = true; - let long_opt_value = Some("simple"); let _dummy = TEST_MUTEX.lock().unwrap(); + let matches = make_app().get_matches_from(vec!["app", "--backup=simple"]); - let result = - determine_backup_mode(short_opt_present, long_opt_present, long_opt_value).unwrap(); + let result = determine_backup_mode(&matches).unwrap(); assert_eq!(result, BackupMode::SimpleBackup); } @@ -330,43 +509,36 @@ mod tests { // --backup errors on invalid argument #[test] fn test_backup_mode_long_with_args_invalid() { - let short_opt_present = false; - let long_opt_present = true; - let long_opt_value = Some("foobar"); let _dummy = TEST_MUTEX.lock().unwrap(); + let matches = make_app().get_matches_from(vec!["app", "--backup=foobar"]); - let result = determine_backup_mode(short_opt_present, long_opt_present, long_opt_value); + let result = determine_backup_mode(&matches); assert!(result.is_err()); - let text = result.unwrap_err(); - assert!(text.contains("invalid argument ‘foobar’ for ‘backup type’")); + let text = format!("{}", result.unwrap_err()); + assert!(text.contains("invalid argument 'foobar' for 'backup type'")); } // --backup errors on ambiguous argument #[test] fn test_backup_mode_long_with_args_ambiguous() { - let short_opt_present = false; - let long_opt_present = true; - let long_opt_value = Some("n"); let _dummy = TEST_MUTEX.lock().unwrap(); + let matches = make_app().get_matches_from(vec!["app", "--backup=n"]); - let result = determine_backup_mode(short_opt_present, long_opt_present, long_opt_value); + let result = determine_backup_mode(&matches); assert!(result.is_err()); - let text = result.unwrap_err(); - assert!(text.contains("ambiguous argument ‘n’ for ‘backup type’")); + let text = format!("{}", result.unwrap_err()); + assert!(text.contains("ambiguous argument 'n' for 'backup type'")); } // --backup accepts shortened arguments (si for simple) #[test] fn test_backup_mode_long_with_arg_shortened() { - let short_opt_present = false; - let long_opt_present = true; - let long_opt_value = Some("si"); let _dummy = TEST_MUTEX.lock().unwrap(); + let matches = make_app().get_matches_from(vec!["app", "--backup=si"]); - let result = - determine_backup_mode(short_opt_present, long_opt_present, long_opt_value).unwrap(); + let result = determine_backup_mode(&matches).unwrap(); assert_eq!(result, BackupMode::SimpleBackup); } @@ -374,14 +546,11 @@ mod tests { // -b ignores the "VERSION_CONTROL" environment variable #[test] fn test_backup_mode_short_only_ignore_env() { - let short_opt_present = true; - let long_opt_present = false; - let long_opt_value = None; let _dummy = TEST_MUTEX.lock().unwrap(); env::set_var(ENV_VERSION_CONTROL, "none"); + let matches = make_app().get_matches_from(vec!["app", "-b"]); - let result = - determine_backup_mode(short_opt_present, long_opt_present, long_opt_value).unwrap(); + let result = determine_backup_mode(&matches).unwrap(); assert_eq!(result, BackupMode::ExistingBackup); env::remove_var(ENV_VERSION_CONTROL); @@ -390,14 +559,11 @@ mod tests { // --backup can be passed without an argument, but reads env var if existent #[test] fn test_backup_mode_long_without_args_with_env() { - let short_opt_present = false; - let long_opt_present = true; - let long_opt_value = None; let _dummy = TEST_MUTEX.lock().unwrap(); env::set_var(ENV_VERSION_CONTROL, "none"); + let matches = make_app().get_matches_from(vec!["app", "--backup"]); - let result = - determine_backup_mode(short_opt_present, long_opt_present, long_opt_value).unwrap(); + let result = determine_backup_mode(&matches).unwrap(); assert_eq!(result, BackupMode::NoBackup); env::remove_var(ENV_VERSION_CONTROL); @@ -406,48 +572,41 @@ mod tests { // --backup errors on invalid VERSION_CONTROL env var #[test] fn test_backup_mode_long_with_env_var_invalid() { - let short_opt_present = false; - let long_opt_present = true; - let long_opt_value = None; let _dummy = TEST_MUTEX.lock().unwrap(); env::set_var(ENV_VERSION_CONTROL, "foobar"); + let matches = make_app().get_matches_from(vec!["app", "--backup"]); - let result = determine_backup_mode(short_opt_present, long_opt_present, long_opt_value); + let result = determine_backup_mode(&matches); assert!(result.is_err()); - let text = result.unwrap_err(); - assert!(text.contains("invalid argument ‘foobar’ for ‘$VERSION_CONTROL’")); + let text = format!("{}", result.unwrap_err()); + assert!(text.contains("invalid argument 'foobar' for '$VERSION_CONTROL'")); env::remove_var(ENV_VERSION_CONTROL); } // --backup errors on ambiguous VERSION_CONTROL env var #[test] fn test_backup_mode_long_with_env_var_ambiguous() { - let short_opt_present = false; - let long_opt_present = true; - let long_opt_value = None; let _dummy = TEST_MUTEX.lock().unwrap(); env::set_var(ENV_VERSION_CONTROL, "n"); + let matches = make_app().get_matches_from(vec!["app", "--backup"]); - let result = determine_backup_mode(short_opt_present, long_opt_present, long_opt_value); + let result = determine_backup_mode(&matches); assert!(result.is_err()); - let text = result.unwrap_err(); - assert!(text.contains("ambiguous argument ‘n’ for ‘$VERSION_CONTROL’")); + let text = format!("{}", result.unwrap_err()); + assert!(text.contains("ambiguous argument 'n' for '$VERSION_CONTROL'")); env::remove_var(ENV_VERSION_CONTROL); } // --backup accepts shortened env vars (si for simple) #[test] fn test_backup_mode_long_with_env_var_shortened() { - let short_opt_present = false; - let long_opt_present = true; - let long_opt_value = None; let _dummy = TEST_MUTEX.lock().unwrap(); env::set_var(ENV_VERSION_CONTROL, "si"); + let matches = make_app().get_matches_from(vec!["app", "--backup"]); - let result = - determine_backup_mode(short_opt_present, long_opt_present, long_opt_value).unwrap(); + let result = determine_backup_mode(&matches).unwrap(); assert_eq!(result, BackupMode::SimpleBackup); env::remove_var(ENV_VERSION_CONTROL); diff --git a/src/uucore/src/lib/mods/error.rs b/src/uucore/src/lib/mods/error.rs index c2b3069c8..6af934d3e 100644 --- a/src/uucore/src/lib/mods/error.rs +++ b/src/uucore/src/lib/mods/error.rs @@ -409,6 +409,15 @@ impl Display for UIoError { } } +/// Strip the trailing " (os error XX)" from io error strings. +pub fn strip_errno(err: &std::io::Error) -> String { + let mut msg = err.to_string(); + if let Some(pos) = msg.find(" (os error ") { + msg.drain(pos..); + } + msg +} + /// Enables the conversion from [`std::io::Error`] to [`UError`] and from [`std::io::Result`] to /// [`UResult`]. pub trait FromIo { diff --git a/tests/by-util/test_realpath.rs b/tests/by-util/test_realpath.rs index 8cb1551f0..7b6da5d36 100644 --- a/tests/by-util/test_realpath.rs +++ b/tests/by-util/test_realpath.rs @@ -139,3 +139,23 @@ fn test_realpath_logical_mode() { .succeeds() .stdout_contains("dir1\n"); } + +#[test] +fn test_realpath_dangling() { + let (at, mut ucmd) = at_and_ucmd!(); + at.symlink_file("nonexistent-file", "link"); + ucmd.arg("link") + .succeeds() + .stdout_only(at.plus_as_string("nonexistent-file\n")); +} + +#[test] +fn test_realpath_loop() { + let (at, mut ucmd) = at_and_ucmd!(); + at.symlink_file("2", "1"); + at.symlink_file("3", "2"); + at.symlink_file("1", "3"); + ucmd.arg("1") + .succeeds() + .stdout_only(at.plus_as_string("2\n")); +} diff --git a/tests/by-util/test_rmdir.rs b/tests/by-util/test_rmdir.rs index 4b74b2522..c8f22aa6c 100644 --- a/tests/by-util/test_rmdir.rs +++ b/tests/by-util/test_rmdir.rs @@ -1,126 +1,238 @@ use crate::common::util::*; +const DIR: &str = "dir"; +const DIR_FILE: &str = "dir/file"; +const NESTED_DIR: &str = "dir/ect/ory"; +const NESTED_DIR_FILE: &str = "dir/ect/ory/file"; + +#[cfg(windows)] +const NOT_FOUND: &str = "The system cannot find the file specified."; +#[cfg(not(windows))] +const NOT_FOUND: &str = "No such file or directory"; + +#[cfg(windows)] +const NOT_EMPTY: &str = "The directory is not empty."; +#[cfg(not(windows))] +const NOT_EMPTY: &str = "Directory not empty"; + +#[cfg(windows)] +const NOT_A_DIRECTORY: &str = "The directory name is invalid."; +#[cfg(not(windows))] +const NOT_A_DIRECTORY: &str = "Not a directory"; + #[test] fn test_rmdir_empty_directory_no_parents() { let (at, mut ucmd) = at_and_ucmd!(); - let dir = "test_rmdir_empty_no_parents"; - at.mkdir(dir); - assert!(at.dir_exists(dir)); + at.mkdir(DIR); - ucmd.arg(dir).succeeds().no_stderr(); + ucmd.arg(DIR).succeeds().no_stderr(); - assert!(!at.dir_exists(dir)); + assert!(!at.dir_exists(DIR)); } #[test] fn test_rmdir_empty_directory_with_parents() { let (at, mut ucmd) = at_and_ucmd!(); - let dir = "test_rmdir_empty/with/parents"; - at.mkdir_all(dir); - assert!(at.dir_exists(dir)); + at.mkdir_all(NESTED_DIR); - ucmd.arg("-p").arg(dir).succeeds().no_stderr(); + ucmd.arg("-p").arg(NESTED_DIR).succeeds().no_stderr(); - assert!(!at.dir_exists(dir)); + assert!(!at.dir_exists(NESTED_DIR)); + assert!(!at.dir_exists(DIR)); } #[test] fn test_rmdir_nonempty_directory_no_parents() { let (at, mut ucmd) = at_and_ucmd!(); - let dir = "test_rmdir_nonempty_no_parents"; - let file = "test_rmdir_nonempty_no_parents/foo"; - at.mkdir(dir); - assert!(at.dir_exists(dir)); + at.mkdir(DIR); + at.touch(DIR_FILE); - at.touch(file); - assert!(at.file_exists(file)); + ucmd.arg(DIR) + .fails() + .stderr_is(format!("rmdir: failed to remove 'dir': {}", NOT_EMPTY)); - ucmd.arg(dir).fails().stderr_is( - "rmdir: failed to remove 'test_rmdir_nonempty_no_parents': Directory not \ - empty\n", - ); - - assert!(at.dir_exists(dir)); + assert!(at.dir_exists(DIR)); } #[test] fn test_rmdir_nonempty_directory_with_parents() { let (at, mut ucmd) = at_and_ucmd!(); - let dir = "test_rmdir_nonempty/with/parents"; - let file = "test_rmdir_nonempty/with/parents/foo"; - at.mkdir_all(dir); - assert!(at.dir_exists(dir)); + at.mkdir_all(NESTED_DIR); + at.touch(NESTED_DIR_FILE); - at.touch(file); - assert!(at.file_exists(file)); + ucmd.arg("-p").arg(NESTED_DIR).fails().stderr_is(format!( + "rmdir: failed to remove 'dir/ect/ory': {}", + NOT_EMPTY + )); - ucmd.arg("-p").arg(dir).fails().stderr_is( - "rmdir: failed to remove 'test_rmdir_nonempty/with/parents': Directory not \ - empty\nrmdir: failed to remove 'test_rmdir_nonempty/with': Directory not \ - empty\nrmdir: failed to remove 'test_rmdir_nonempty': Directory not \ - empty\n", - ); - - assert!(at.dir_exists(dir)); + assert!(at.dir_exists(NESTED_DIR)); } #[test] fn test_rmdir_ignore_nonempty_directory_no_parents() { let (at, mut ucmd) = at_and_ucmd!(); - let dir = "test_rmdir_ignore_nonempty_no_parents"; - let file = "test_rmdir_ignore_nonempty_no_parents/foo"; - at.mkdir(dir); - assert!(at.dir_exists(dir)); - - at.touch(file); - assert!(at.file_exists(file)); + at.mkdir(DIR); + at.touch(DIR_FILE); ucmd.arg("--ignore-fail-on-non-empty") - .arg(dir) + .arg(DIR) .succeeds() .no_stderr(); - assert!(at.dir_exists(dir)); + assert!(at.dir_exists(DIR)); } #[test] fn test_rmdir_ignore_nonempty_directory_with_parents() { let (at, mut ucmd) = at_and_ucmd!(); - let dir = "test_rmdir_ignore_nonempty/with/parents"; - let file = "test_rmdir_ignore_nonempty/with/parents/foo"; - at.mkdir_all(dir); - assert!(at.dir_exists(dir)); - - at.touch(file); - assert!(at.file_exists(file)); + at.mkdir_all(NESTED_DIR); + at.touch(NESTED_DIR_FILE); ucmd.arg("--ignore-fail-on-non-empty") .arg("-p") - .arg(dir) + .arg(NESTED_DIR) .succeeds() .no_stderr(); - assert!(at.dir_exists(dir)); + assert!(at.dir_exists(NESTED_DIR)); } #[test] -fn test_rmdir_remove_symlink_match_gnu_error() { +fn test_rmdir_not_a_directory() { let (at, mut ucmd) = at_and_ucmd!(); - let file = "file"; - let fl = "fl"; - at.touch(file); - assert!(at.file_exists(file)); - at.symlink_file(file, fl); - assert!(at.file_exists(fl)); + at.touch("file"); - ucmd.arg("fl/") + ucmd.arg("--ignore-fail-on-non-empty") + .arg("file") .fails() - .stderr_is("rmdir: failed to remove 'fl/': Not a directory"); + .no_stdout() + .stderr_is(format!( + "rmdir: failed to remove 'file': {}", + NOT_A_DIRECTORY + )); +} + +#[test] +fn test_verbose_single() { + let (at, mut ucmd) = at_and_ucmd!(); + + at.mkdir(DIR); + + ucmd.arg("-v") + .arg(DIR) + .succeeds() + .no_stderr() + .stdout_is("rmdir: removing directory, 'dir'\n"); +} + +#[test] +fn test_verbose_multi() { + let (at, mut ucmd) = at_and_ucmd!(); + + at.mkdir(DIR); + + ucmd.arg("-v") + .arg("does_not_exist") + .arg(DIR) + .fails() + .stdout_is( + "rmdir: removing directory, 'does_not_exist'\n\ + rmdir: removing directory, 'dir'\n", + ) + .stderr_is(format!( + "rmdir: failed to remove 'does_not_exist': {}", + NOT_FOUND + )); +} + +#[test] +fn test_verbose_nested_failure() { + let (at, mut ucmd) = at_and_ucmd!(); + + at.mkdir_all(NESTED_DIR); + at.touch("dir/ect/file"); + + ucmd.arg("-pv") + .arg(NESTED_DIR) + .fails() + .stdout_is( + "rmdir: removing directory, 'dir/ect/ory'\n\ + rmdir: removing directory, 'dir/ect'\n", + ) + .stderr_is(format!("rmdir: failed to remove 'dir/ect': {}", NOT_EMPTY)); +} + +#[cfg(unix)] +#[test] +fn test_rmdir_ignore_nonempty_no_permissions() { + use std::fs; + + let (at, mut ucmd) = at_and_ucmd!(); + + // We make the *parent* dir read-only to prevent deleting the dir in it. + at.mkdir_all("dir/ect/ory"); + at.touch("dir/ect/ory/file"); + let dir_ect = at.plus("dir/ect"); + let mut perms = fs::metadata(&dir_ect).unwrap().permissions(); + perms.set_readonly(true); + fs::set_permissions(&dir_ect, perms.clone()).unwrap(); + + // rmdir should now get a permissions error that it interprets as + // a non-empty error. + ucmd.arg("--ignore-fail-on-non-empty") + .arg("dir/ect/ory") + .succeeds() + .no_stderr(); + + assert!(at.dir_exists("dir/ect/ory")); + + // Politely restore permissions for cleanup + perms.set_readonly(false); + fs::set_permissions(&dir_ect, perms).unwrap(); +} + +#[test] +fn test_rmdir_remove_symlink_file() { + let (at, mut ucmd) = at_and_ucmd!(); + + at.touch("file"); + at.symlink_file("file", "fl"); + + ucmd.arg("fl/").fails().stderr_is(format!( + "rmdir: failed to remove 'fl/': {}", + NOT_A_DIRECTORY + )); +} + +// This behavior is known to happen on Linux but not all Unixes +#[cfg(any(target_os = "linux", target_os = "android"))] +#[test] +fn test_rmdir_remove_symlink_dir() { + let (at, mut ucmd) = at_and_ucmd!(); + + at.mkdir("dir"); + at.symlink_dir("dir", "dl"); + + ucmd.arg("dl/") + .fails() + .stderr_is("rmdir: failed to remove 'dl/': Symbolic link not followed"); +} + +#[cfg(any(target_os = "linux", target_os = "android"))] +#[test] +fn test_rmdir_remove_symlink_dangling() { + let (at, mut ucmd) = at_and_ucmd!(); + + at.symlink_dir("dir", "dl"); + + ucmd.arg("dl/") + .fails() + .stderr_is("rmdir: failed to remove 'dl/': Symbolic link not followed"); } diff --git a/tests/by-util/test_wc.rs b/tests/by-util/test_wc.rs index 88c65c997..eabaf58eb 100644 --- a/tests/by-util/test_wc.rs +++ b/tests/by-util/test_wc.rs @@ -1,6 +1,6 @@ use crate::common::util::*; -// spell-checker:ignore (flags) lwmcL clmwL ; (path) bogusfile emptyfile manyemptylines moby notrailingnewline onelongemptyline onelongword +// spell-checker:ignore (flags) lwmcL clmwL ; (path) bogusfile emptyfile manyemptylines moby notrailingnewline onelongemptyline onelongword weirdchars #[test] fn test_count_bytes_large_stdin() { @@ -53,11 +53,16 @@ fn test_utf8() { .args(&["-lwmcL"]) .pipe_in_fixture("UTF_8_test.txt") .run() - .stdout_is(" 300 4969 22781 22213 79\n"); - // GNU returns " 300 2086 22219 22781 79" - // - // TODO: we should fix the word, character, and byte count to - // match the behavior of GNU wc + .stdout_is(" 303 2119 22457 23025 79\n"); +} + +#[test] +fn test_utf8_extra() { + new_ucmd!() + .arg("-lwmcL") + .pipe_in_fixture("UTF_8_weirdchars.txt") + .run() + .stdout_is(" 25 87 442 513 48\n"); } #[test] @@ -200,22 +205,33 @@ fn test_file_bytes_dictate_width() { /// Test that getting counts from a directory is an error. #[test] fn test_read_from_directory_error() { - // TODO To match GNU `wc`, the `stdout` should be: - // - // " 0 0 0 .\n" - // + #[cfg(not(windows))] + const STDERR: &str = ".: Is a directory"; + #[cfg(windows)] + const STDERR: &str = ".: Access is denied"; + + #[cfg(not(windows))] + const STDOUT: &str = " 0 0 0 .\n"; + #[cfg(windows)] + const STDOUT: &str = ""; + new_ucmd!() .args(&["."]) .fails() - .stderr_contains(".: Is a directory\n") - .stdout_is("0 0 0 .\n"); + .stderr_contains(STDERR) + .stdout_is(STDOUT); } /// Test that getting counts from nonexistent file is an error. #[test] fn test_read_from_nonexistent_file() { + #[cfg(not(windows))] + const MSG: &str = "bogusfile: No such file or directory"; + #[cfg(windows)] + const MSG: &str = "bogusfile: The system cannot find the file specified"; new_ucmd!() .args(&["bogusfile"]) .fails() - .stderr_contains("bogusfile: No such file or directory\n"); + .stderr_contains(MSG) + .stdout_is(""); } diff --git a/tests/fixtures/wc/UTF_8_test.txt b/tests/fixtures/wc/UTF_8_test.txt index a5b5d50e6..cd0474c82 100644 Binary files a/tests/fixtures/wc/UTF_8_test.txt and b/tests/fixtures/wc/UTF_8_test.txt differ diff --git a/tests/fixtures/wc/UTF_8_weirdchars.txt b/tests/fixtures/wc/UTF_8_weirdchars.txt new file mode 100644 index 000000000..0c7670f5e --- /dev/null +++ b/tests/fixtures/wc/UTF_8_weirdchars.txt @@ -0,0 +1,25 @@ +zero-width space inbetween these: x​x +and inbetween two spaces: [ ​ ] +and at the end of the line: ​ + +non-breaking space: x x [   ]   + +simple unicode: xµx [ µ ] µ + +wide: xwx [ w ] w + +simple emoji: x👩x [ 👩 ] 👩 + +complex emoji: x👩‍🔬x [ 👩‍🔬 ] 👩‍🔬 + +Hello, world! + +line feed: x x [ ] + +vertical tab: x x [ ] + +horizontal tab: x x [ ] +this should be the longest line: +1234567 12345678 123456781234567812345678 + +Control character: xx [  ]  diff --git a/util/GHA-delete-GNU-workflow-logs.sh b/util/GHA-delete-GNU-workflow-logs.sh old mode 100644 new mode 100755 diff --git a/util/build-code_coverage.sh b/util/build-code_coverage.sh old mode 100644 new mode 100755 diff --git a/util/build-gnu.sh b/util/build-gnu.sh old mode 100644 new mode 100755 diff --git a/util/compare_gnu_result.py b/util/compare_gnu_result.py old mode 100644 new mode 100755 diff --git a/util/publish.sh b/util/publish.sh old mode 100644 new mode 100755 diff --git a/util/run-gnu-test.sh b/util/run-gnu-test.sh old mode 100644 new mode 100755 index 9d51a983e..483fc1be9 --- a/util/run-gnu-test.sh +++ b/util/run-gnu-test.sh @@ -1,5 +1,6 @@ #!/bin/bash # spell-checker:ignore (env/vars) BUILDDIR GNULIB SUBDIRS +cd "$(dirname "${BASH_SOURCE[0]}")/../.." set -e BUILDDIR="${PWD}/uutils/target/release" GNULIB_DIR="${PWD}/gnulib" diff --git a/util/show-code_coverage.sh b/util/show-code_coverage.sh old mode 100644 new mode 100755 diff --git a/util/update-version.sh b/util/update-version.sh old mode 100644 new mode 100755