diff --git a/.github/workflows/CICD.yml b/.github/workflows/CICD.yml index b2d77a5a4..8a1e142df 100644 --- a/.github/workflows/CICD.yml +++ b/.github/workflows/CICD.yml @@ -13,8 +13,8 @@ env: PROJECT_NAME: coreutils PROJECT_DESC: "Core universal (cross-platform) utilities" PROJECT_AUTH: "uutils" - RUST_MIN_SRV: "1.43.1" ## v1.43.0 - RUST_COV_SRV: "2020-08-01" ## (~v1.47.0) supported rust version for code coverage; (date required/used by 'coverage') ## !maint: refactor when code coverage support is included in the stable channel + RUST_MIN_SRV: "1.47.0" ## MSRV v1.47.0 + RUST_COV_SRV: "2021-05-06" ## (~v1.52.0) supported rust version for code coverage; (date required/used by 'coverage') ## !maint: refactor when code coverage support is included in the stable channel on: [push, pull_request] @@ -249,6 +249,8 @@ jobs: # { os, target, cargo-options, features, use-cross, toolchain } - { os: ubuntu-latest , target: arm-unknown-linux-gnueabihf , features: feat_os_unix_gnueabihf , use-cross: use-cross } - { os: ubuntu-latest , target: aarch64-unknown-linux-gnu , features: feat_os_unix_gnueabihf , use-cross: use-cross } + - { os: ubuntu-latest , target: x86_64-unknown-linux-gnu , features: feat_os_unix , use-cross: use-cross } + # - { os: ubuntu-latest , target: x86_64-unknown-linux-gnu , features: feat_selinux , use-cross: use-cross } # - { os: ubuntu-18.04 , target: i586-unknown-linux-gnu , features: feat_os_unix , use-cross: use-cross } ## note: older windows platform; not required, dev-FYI only # - { os: ubuntu-18.04 , target: i586-unknown-linux-gnu , features: feat_os_unix , use-cross: use-cross } ## note: older windows platform; not required, dev-FYI only - { os: ubuntu-18.04 , target: i686-unknown-linux-gnu , features: feat_os_unix , use-cross: use-cross } diff --git a/.vscode/cspell.dictionaries/people.wordlist.txt b/.vscode/cspell.dictionaries/people.wordlist.txt index d7665585b..405733836 100644 --- a/.vscode/cspell.dictionaries/people.wordlist.txt +++ b/.vscode/cspell.dictionaries/people.wordlist.txt @@ -151,6 +151,9 @@ Sylvestre Ledru T Jameson Little Jameson Little +Thomas Queiroz + Thomas + Queiroz Tobias Bohumir Schottdorf Tobias Bohumir diff --git a/Cargo.lock b/Cargo.lock index 7a34ad49b..a9ec6d726 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -73,6 +73,29 @@ dependencies = [ "compare", ] +[[package]] +name = "bindgen" +version = "0.59.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "453c49e5950bb0eb63bb3df640e31618846c89d5b7faa54040d76e98e0134375" +dependencies = [ + "bitflags", + "cexpr", + "clang-sys", + "clap", + "env_logger 0.8.4", + "lazy_static", + "lazycell", + "log", + "peeking_take_while", + "proc-macro2", + "quote 1.0.9", + "regex", + "rustc-hash", + "shlex", + "which", +] + [[package]] name = "bit-set" version = "0.5.2" @@ -94,6 +117,18 @@ version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" +[[package]] +name = "bitvec" +version = "0.19.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8942c8d352ae1838c9dda0b0ca2ab657696ef2232a20147cf1b30ae1a9cb4321" +dependencies = [ + "funty", + "radium", + "tap", + "wyz", +] + [[package]] name = "blake2b_simd" version = "0.5.11" @@ -149,9 +184,18 @@ checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" [[package]] name = "cc" -version = "1.0.68" +version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a72c244c1ff497a746a7e1fb3d14bd08420ecda70c8f25c7112f2781652d787" +checksum = "e70cc2f62c6ce1868963827bd677764c62d07c3d9a3e1fb1177ee1a9ab199eb2" + +[[package]] +name = "cexpr" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db507a7679252d2276ed0dd8113c6875ec56d3089f9225b2b42c30cc1f8e5c89" +dependencies = [ + "nom", +] [[package]] name = "cfg-if" @@ -178,6 +222,17 @@ dependencies = [ "winapi 0.3.9", ] +[[package]] +name = "clang-sys" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "853eda514c284c2287f4bf20ae614f8781f40a81d32ecda6e91449304dfe077c" +dependencies = [ + "glob 0.3.0", + "libc", + "libloading", +] + [[package]] name = "clap" version = "2.33.3" @@ -241,6 +296,7 @@ dependencies = [ "rand 0.7.3", "regex", "rlimit", + "selinux", "sha1", "tempfile", "textwrap", @@ -464,9 +520,9 @@ dependencies = [ [[package]] name = "crossbeam-deque" -version = "0.8.0" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94af6efb46fef72616855b036a624cf27ba656ffc9be1b9a3c931cfc7749a9a9" +checksum = "6455c0ca19f0d2fbf751b908d5c55c1f5cbc65e03c4225427254b46890bdde1e" dependencies = [ "cfg-if 1.0.0", "crossbeam-epoch", @@ -592,6 +648,19 @@ dependencies = [ "regex", ] +[[package]] +name = "env_logger" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a19187fea3ac7e84da7dacf48de0c45d63c6a76f9490dae389aead16c243fce3" +dependencies = [ + "atty", + "humantime", + "log", + "regex", + "termcolor", +] + [[package]] name = "fake-simd" version = "0.1.2" @@ -634,6 +703,12 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba" +[[package]] +name = "funty" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fed34cd105917e91daa4da6b3728c47b068749d6a62c59811f06ed2ac71d9da7" + [[package]] name = "gcd" version = "2.0.1" @@ -747,6 +822,12 @@ dependencies = [ "winapi 0.3.9", ] +[[package]] +name = "humantime" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" + [[package]] name = "if_rust_version" version = "1.0.0" @@ -802,12 +883,28 @@ version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" +[[package]] +name = "lazycell" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" + [[package]] name = "libc" version = "0.2.85" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7ccac4b00700875e6a07c6cde370d44d32fa01c5a65cdd2fca6858c479d28bb3" +[[package]] +name = "libloading" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f84d96438c15fcd6c3f244c8fce01d1e2b9c6b5623e9c711dc9286d8fc92d6a" +dependencies = [ + "cfg-if 1.0.0", + "winapi 0.3.9", +] + [[package]] name = "locale" version = "0.2.2" @@ -939,6 +1036,18 @@ version = "0.1.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72ef4a56884ca558e5ddb05a1d1e7e1bfd9a68d9ed024c21704cc98872dae1bb" +[[package]] +name = "nom" +version = "6.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7413f999671bd4745a7b624bd370a569fb6bc574b23c83a3c5ed2e453f3d5e2" +dependencies = [ + "bitvec", + "funty", + "memchr 2.4.0", + "version_check", +] + [[package]] name = "ntapi" version = "0.3.6" @@ -1104,6 +1213,12 @@ dependencies = [ "proc-macro-hack", ] +[[package]] +name = "peeking_take_while" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099" + [[package]] name = "pkg-config" version = "0.3.19" @@ -1170,9 +1285,9 @@ checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5" [[package]] name = "proc-macro2" -version = "1.0.27" +version = "1.0.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0d8caf72986c1a598726adc988bb5984792ef84f5ee5aa50209145ee8077038" +checksum = "5c7ed8b8c7b886ea3ed7dde405212185f423ab44682667c8c6dd14aa1d9f6612" dependencies = [ "unicode-xid 0.2.2", ] @@ -1195,7 +1310,7 @@ version = "0.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a44883e74aa97ad63db83c4bf8ca490f02b2fc02f92575e720c8551e843c945f" dependencies = [ - "env_logger", + "env_logger 0.7.1", "log", "rand 0.7.3", "rand_core 0.5.1", @@ -1216,6 +1331,12 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "radium" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "941ba9d78d8e2f7ce474c015eea4d9c6d25b6a3327f9832ee29a4de27f91bbb8" + [[package]] name = "rand" version = "0.5.6" @@ -1384,6 +1505,12 @@ dependencies = [ "redox_syscall 0.2.9", ] +[[package]] +name = "reference-counted-singleton" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef445213a92fdddc4bc69d9111156d20ffd50704a86ad82b372aab701a0d3a9a" + [[package]] name = "regex" version = "1.5.4" @@ -1438,6 +1565,12 @@ version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3e52c148ef37f8c375d49d5a73aa70713125b7f19095948a923f80afdeb22ec2" +[[package]] +name = "rustc-hash" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" + [[package]] name = "same-file" version = "1.0.6" @@ -1453,6 +1586,32 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" +[[package]] +name = "selinux" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd525eeb189eb26c8471463186bba87644e3d8a9c7ae392adaf9ec45ede574bc" +dependencies = [ + "bitflags", + "libc", + "once_cell", + "reference-counted-singleton", + "selinux-sys", + "thiserror", +] + +[[package]] +name = "selinux-sys" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d842d177120716580c4c6cb56dfe3c5f3a3e3dcec635091f1b2034b6c0be4c6" +dependencies = [ + "bindgen", + "cc", + "dunce", + "walkdir", +] + [[package]] name = "sha1" version = "0.6.0" @@ -1484,6 +1643,12 @@ dependencies = [ "generic-array", ] +[[package]] +name = "shlex" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42a568c8f2cd051a4d283bd6eb0343ac214c1b0f1ac19f93e1175b2dee38c73d" + [[package]] name = "signal-hook" version = "0.3.9" @@ -1572,15 +1737,21 @@ dependencies = [ [[package]] name = "syn" -version = "1.0.73" +version = "1.0.74" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f71489ff30030d2ae598524f61326b902466f72a0fb1a8564c001cc63425bcc7" +checksum = "1873d832550d4588c3dbc20f01361ab00bfe741048f71e3fecf145a7cc18b29c" dependencies = [ "proc-macro2", "quote 1.0.9", "unicode-xid 0.2.2", ] +[[package]] +name = "tap" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" + [[package]] name = "tempfile" version = "3.2.0" @@ -1614,6 +1785,15 @@ dependencies = [ "winapi 0.3.9", ] +[[package]] +name = "termcolor" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dfed899f0eb03f32ee8c6a0aabdb8a7949659e3466561fc0adf54e26d88c5f4" +dependencies = [ + "winapi-util", +] + [[package]] name = "termion" version = "1.5.6" @@ -2111,6 +2291,7 @@ name = "uu_id" version = "0.0.7" dependencies = [ "clap", + "selinux", "uucore", "uucore_procs", ] @@ -2603,6 +2784,7 @@ version = "0.0.7" dependencies = [ "clap", "libc", + "nix 0.20.0", "redox_syscall 0.1.57", "uucore", "uucore_procs", @@ -2872,6 +3054,15 @@ version = "0.10.2+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6" +[[package]] +name = "which" +version = "3.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d011071ae14a2f6671d0b74080ae0cd8ebf3a6f8c9589a2cd45f23126fe29724" +dependencies = [ + "libc", +] + [[package]] name = "wild" version = "2.0.4" @@ -2924,6 +3115,12 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "wyz" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85e60b0d1b5f99db2556934e21937020776a5d31520bf169e851ac44e6420214" + [[package]] name = "xattr" version = "0.2.2" diff --git a/Cargo.toml b/Cargo.toml index c3819560e..eb3f99190 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,8 @@ # coreutils (uutils) # * see the repository LICENSE, README, and CONTRIBUTING files for more information +# spell-checker:ignore (libs) libselinux + [package] name = "coreutils" version = "0.0.7" @@ -139,6 +141,11 @@ feat_os_unix_musl = [ # "feat_require_unix", ] +# "feat_selinux" == set of utilities providing support for SELinux Security Context if enabled with `--features feat_selinux`. +# NOTE: +# The selinux(-sys) crate requires `libselinux` headers and shared library to be accessible in the C toolchain at compile time. +# Running a uutils compiled with `feat_selinux` requires an SELinux enabled Kernel at run time. +feat_selinux = ["id/selinux", "selinux"] ## feature sets with requirements (restricting cross-platform availability) # # ** NOTE: these `feat_require_...` sets should be minimized as much as possible to encourage cross-platform availability of utilities @@ -230,6 +237,7 @@ clap = { version = "2.33", features = ["wrap_help"] } lazy_static = { version="1.3" } textwrap = { version="=0.11.0", features=["term_size"] } # !maint: [2020-05-10; rivy] unstable crate using undocumented features; pinned currently, will review uucore = { version=">=0.0.9", package="uucore", path="src/uucore" } +selinux = { version="0.1.3", optional = true } # * uutils uu_test = { optional=true, version="0.0.7", package="uu_test", path="src/uu/test" } # diff --git a/README.md b/README.md index 379825c16..23e4c8cd3 100644 --- a/README.md +++ b/README.md @@ -39,7 +39,7 @@ to compile anywhere, and this is as good a way as any to try and learn it. ### Rust Version uutils follows Rust's release channels and is tested against stable, beta and nightly. -The current oldest supported version of the Rust compiler is `1.43.1`. +The current oldest supported version of the Rust compiler is `1.47`. On both Windows and Redox, only the nightly version is tested currently. diff --git a/build.rs b/build.rs index e9fe129eb..4fbb27cce 100644 --- a/build.rs +++ b/build.rs @@ -28,8 +28,8 @@ pub fn main() { if val == "1" && key.starts_with(env_feature_prefix) { let krate = key[env_feature_prefix.len()..].to_lowercase(); match krate.as_ref() { - "default" | "macos" | "unix" | "windows" => continue, // common/standard feature names - "nightly" | "test_unimplemented" => continue, // crate-local custom features + "default" | "macos" | "unix" | "windows" | "selinux" => continue, // common/standard feature names + "nightly" | "test_unimplemented" => continue, // crate-local custom features "test" => continue, // over-ridden with 'uu_test' to avoid collision with rust core crate 'test' s if s.starts_with(feature_prefix) => continue, // crate feature sets _ => {} // util feature name diff --git a/clippy.toml b/clippy.toml index 0a0a69a41..dbabdab50 100644 --- a/clippy.toml +++ b/clippy.toml @@ -1 +1 @@ -msrv = "1.43.1" +msrv = "1.47.0" diff --git a/src/uu/df/src/df.rs b/src/uu/df/src/df.rs index 1092938df..baa5fe292 100644 --- a/src/uu/df/src/df.rs +++ b/src/uu/df/src/df.rs @@ -8,6 +8,8 @@ #[macro_use] extern crate uucore; +use uucore::error::UCustomError; +use uucore::error::UResult; #[cfg(unix)] use uucore::fsext::statfs_fn; use uucore::fsext::{read_fs_list, FsUsage, MountInfo}; @@ -19,8 +21,10 @@ use std::cell::Cell; use std::collections::HashMap; use std::collections::HashSet; +use std::error::Error; #[cfg(unix)] use std::ffi::CString; +use std::fmt::Display; #[cfg(unix)] use std::mem; @@ -33,9 +37,6 @@ use std::path::Path; static ABOUT: &str = "Show information about the file system on which each FILE resides,\n\ or all file systems by default."; -static EXIT_OK: i32 = 0; -static EXIT_ERR: i32 = 1; - static OPT_ALL: &str = "all"; static OPT_BLOCKSIZE: &str = "blocksize"; static OPT_DIRECT: &str = "direct"; @@ -226,8 +227,8 @@ fn filter_mount_list(vmi: Vec, paths: &[String], opt: &Options) -> Ve /// Convert `value` to a human readable string based on `base`. /// e.g. It returns 1G when value is 1 * 1024 * 1024 * 1024 and base is 1024. /// Note: It returns `value` if `base` isn't positive. -fn human_readable(value: u64, base: i64) -> String { - match base { +fn human_readable(value: u64, base: i64) -> UResult { + let base_str = match base { d if d < 0 => value.to_string(), // ref: [Binary prefix](https://en.wikipedia.org/wiki/Binary_prefix) @@ @@ -242,8 +243,10 @@ fn human_readable(value: u64, base: i64) -> String { NumberPrefix::Prefixed(prefix, bytes) => format!("{:.1}{}", bytes, prefix.symbol()), }, - _ => crash!(EXIT_ERR, "Internal error: Unknown base value {}", base), - } + _ => return Err(DfError::InvalidBaseValue(base.to_string()).into()), + }; + + Ok(base_str) } fn use_size(free_size: u64, total_size: u64) -> String { @@ -256,7 +259,31 @@ fn use_size(free_size: u64, total_size: u64) -> String { ); } -pub fn uumain(args: impl uucore::Args) -> i32 { +#[derive(Debug)] +enum DfError { + InvalidBaseValue(String), +} + +impl Display for DfError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + DfError::InvalidBaseValue(s) => write!(f, "Internal error: Unknown base value {}", s), + } + } +} + +impl Error for DfError {} + +impl UCustomError for DfError { + fn code(&self) -> i32 { + match self { + DfError::InvalidBaseValue(_) => 1, + } + } +} + +#[uucore_procs::gen_uumain] +pub fn uumain(args: impl uucore::Args) -> UResult<()> { let usage = get_usage(); let matches = uu_app().usage(&usage[..]).get_matches_from(args); @@ -269,7 +296,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { { if matches.is_present(OPT_INODES) { println!("{}: doesn't support -i option", executable!()); - return EXIT_OK; + return Ok(()); } } @@ -353,15 +380,15 @@ pub fn uumain(args: impl uucore::Args) -> i32 { if opt.show_inode_instead { print!( "{0: >12} ", - human_readable(fs.usage.files, opt.human_readable_base) + human_readable(fs.usage.files, opt.human_readable_base)? ); print!( "{0: >12} ", - human_readable(fs.usage.files - fs.usage.ffree, opt.human_readable_base) + human_readable(fs.usage.files - fs.usage.ffree, opt.human_readable_base)? ); print!( "{0: >12} ", - human_readable(fs.usage.ffree, opt.human_readable_base) + human_readable(fs.usage.ffree, opt.human_readable_base)? ); print!( "{0: >5} ", @@ -375,15 +402,15 @@ pub fn uumain(args: impl uucore::Args) -> i32 { let free_size = fs.usage.blocksize * fs.usage.bfree; print!( "{0: >12} ", - human_readable(total_size, opt.human_readable_base) + human_readable(total_size, opt.human_readable_base)? ); print!( "{0: >12} ", - human_readable(total_size - free_size, opt.human_readable_base) + human_readable(total_size - free_size, opt.human_readable_base)? ); print!( "{0: >12} ", - human_readable(free_size, opt.human_readable_base) + human_readable(free_size, opt.human_readable_base)? ); if cfg!(target_os = "macos") { let used = fs.usage.blocks - fs.usage.bfree; @@ -396,7 +423,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { println!(); } - EXIT_OK + Ok(()) } pub fn uu_app() -> App<'static, 'static> { diff --git a/src/uu/expand/src/expand.rs b/src/uu/expand/src/expand.rs index 66c3eb259..d5c37ce21 100644 --- a/src/uu/expand/src/expand.rs +++ b/src/uu/expand/src/expand.rs @@ -36,28 +36,86 @@ fn get_usage() -> String { format!("{0} [OPTION]... [FILE]...", executable!()) } -fn tabstops_parse(s: String) -> Vec { - let words = s.split(','); +/// The mode to use when replacing tabs beyond the last one specified in +/// the `--tabs` argument. +enum RemainingMode { + None, + Slash, + Plus, +} - let nums = words - .map(|sn| { - sn.parse::() - .unwrap_or_else(|_| crash!(1, "{}\n", "tab size contains invalid character(s)")) - }) - .collect::>(); +/// Decide whether the character is either a space or a comma. +/// +/// # Examples +/// +/// ```rust,ignore +/// assert!(is_space_or_comma(' ')) +/// assert!(is_space_or_comma(',')) +/// assert!(!is_space_or_comma('a')) +/// ``` +fn is_space_or_comma(c: char) -> bool { + c == ' ' || c == ',' +} - if nums.iter().any(|&n| n == 0) { - crash!(1, "{}\n", "tab size cannot be 0"); +/// Parse a list of tabstops from a `--tabs` argument. +/// +/// This function returns both the vector of numbers appearing in the +/// comma- or space-separated list, and also an optional mode, specified +/// by either a "/" or a "+" character appearing before the final number +/// in the list. This mode defines the strategy to use for computing the +/// number of spaces to use for columns beyond the end of the tab stop +/// list specified here. +fn tabstops_parse(s: String) -> (RemainingMode, Vec) { + // Leading commas and spaces are ignored. + let s = s.trim_start_matches(is_space_or_comma); + + // If there were only commas and spaces in the string, just use the + // default tabstops. + if s.is_empty() { + return (RemainingMode::None, vec![DEFAULT_TABSTOP]); } - if let (false, _) = nums - .iter() - .fold((true, 0), |(acc, last), &n| (acc && last <= n, n)) - { - crash!(1, "{}\n", "tab sizes must be ascending"); - } + let mut nums = vec![]; + let mut remaining_mode = RemainingMode::None; + for word in s.split(is_space_or_comma) { + let bytes = word.as_bytes(); + for i in 0..bytes.len() { + match bytes[i] { + b'+' => { + remaining_mode = RemainingMode::Plus; + } + b'/' => { + remaining_mode = RemainingMode::Slash; + } + _ => { + // Parse a number from the byte sequence. + let num = from_utf8(&bytes[i..]).unwrap().parse::().unwrap(); - nums + // Tab size must be positive. + if num == 0 { + crash!(1, "{}\n", "tab size cannot be 0"); + } + + // Tab sizes must be ascending. + if let Some(last_stop) = nums.last() { + if *last_stop >= num { + crash!(1, "tab sizes must be ascending"); + } + } + + // Append this tab stop to the list of all tabstops. + nums.push(num); + break; + } + } + } + } + // If no numbers could be parsed (for example, if `s` were "+,+,+"), + // then just use the default tabstops. + if nums.is_empty() { + nums = vec![DEFAULT_TABSTOP]; + } + (remaining_mode, nums) } struct Options { @@ -66,13 +124,17 @@ struct Options { tspaces: String, iflag: bool, uflag: bool, + + /// Strategy for expanding tabs for columns beyond those specified + /// in `tabstops`. + remaining_mode: RemainingMode, } impl Options { fn new(matches: &ArgMatches) -> Options { - let tabstops = match matches.value_of(options::TABS) { + let (remaining_mode, tabstops) = match matches.value_of(options::TABS) { Some(s) => tabstops_parse(s.to_string()), - None => vec![DEFAULT_TABSTOP], + None => (RemainingMode::None, vec![DEFAULT_TABSTOP]), }; let iflag = matches.is_present(options::INITIAL); @@ -102,6 +164,7 @@ impl Options { tspaces, iflag, uflag, + remaining_mode, } } } @@ -159,13 +222,41 @@ fn open(path: String) -> BufReader> { } } -fn next_tabstop(tabstops: &[usize], col: usize) -> usize { - if tabstops.len() == 1 { - tabstops[0] - col % tabstops[0] - } else { - match tabstops.iter().find(|&&t| t > col) { +/// Compute the number of spaces to the next tabstop. +/// +/// `tabstops` is the sequence of tabstop locations. +/// +/// `col` is the index of the current cursor in the line being written. +/// +/// If `remaining_mode` is [`RemainingMode::Plus`], then the last entry +/// in the `tabstops` slice is interpreted as a relative number of +/// spaces, which this function will return for every input value of +/// `col` beyond the end of the second-to-last element of `tabstops`. +/// +/// If `remaining_mode` is [`RemainingMode::Plus`], then the last entry +/// in the `tabstops` slice is interpreted as a relative number of +/// spaces, which this function will return for every input value of +/// `col` beyond the end of the second-to-last element of `tabstops`. +fn next_tabstop(tabstops: &[usize], col: usize, remaining_mode: &RemainingMode) -> usize { + let num_tabstops = tabstops.len(); + match remaining_mode { + RemainingMode::Plus => match tabstops[0..num_tabstops - 1].iter().find(|&&t| t > col) { Some(t) => t - col, - None => 1, + None => tabstops[num_tabstops - 1] - 1, + }, + RemainingMode::Slash => match tabstops[0..num_tabstops - 1].iter().find(|&&t| t > col) { + Some(t) => t - col, + None => tabstops[num_tabstops - 1] - col % tabstops[num_tabstops - 1], + }, + RemainingMode::None => { + if num_tabstops == 1 { + tabstops[0] - col % tabstops[0] + } else { + match tabstops.iter().find(|&&t| t > col) { + Some(t) => t - col, + None => 1, + } + } } } } @@ -232,12 +323,16 @@ fn expand(options: Options) { match ctype { Tab => { // figure out how many spaces to the next tabstop - let nts = next_tabstop(ts, col); + let nts = next_tabstop(ts, col, &options.remaining_mode); col += nts; // now dump out either spaces if we're expanding, or a literal tab if we're not if init || !options.iflag { - safe_unwrap!(output.write_all(options.tspaces[..nts].as_bytes())); + if nts <= options.tspaces.len() { + safe_unwrap!(output.write_all(options.tspaces[..nts].as_bytes())); + } else { + safe_unwrap!(output.write_all(" ".repeat(nts).as_bytes())); + }; } else { safe_unwrap!(output.write_all(&buf[byte..byte + nbytes])); } @@ -269,3 +364,30 @@ fn expand(options: Options) { } } } + +#[cfg(test)] +mod tests { + use super::next_tabstop; + use super::RemainingMode; + + #[test] + fn test_next_tabstop_remaining_mode_none() { + assert_eq!(next_tabstop(&[1, 5], 0, &RemainingMode::None), 1); + assert_eq!(next_tabstop(&[1, 5], 3, &RemainingMode::None), 2); + assert_eq!(next_tabstop(&[1, 5], 6, &RemainingMode::None), 1); + } + + #[test] + fn test_next_tabstop_remaining_mode_plus() { + assert_eq!(next_tabstop(&[1, 5], 0, &RemainingMode::Plus), 1); + assert_eq!(next_tabstop(&[1, 5], 3, &RemainingMode::Plus), 4); + assert_eq!(next_tabstop(&[1, 5], 6, &RemainingMode::Plus), 4); + } + + #[test] + fn test_next_tabstop_remaining_mode_slash() { + assert_eq!(next_tabstop(&[1, 5], 0, &RemainingMode::Slash), 1); + assert_eq!(next_tabstop(&[1, 5], 3, &RemainingMode::Slash), 2); + assert_eq!(next_tabstop(&[1, 5], 6, &RemainingMode::Slash), 4); + } +} diff --git a/src/uu/id/Cargo.toml b/src/uu/id/Cargo.toml index 367fb553c..518de2729 100644 --- a/src/uu/id/Cargo.toml +++ b/src/uu/id/Cargo.toml @@ -18,7 +18,11 @@ path = "src/id.rs" clap = { version = "2.33", features = ["wrap_help"] } uucore = { version=">=0.0.9", package="uucore", path="../../uucore", features=["entries", "process"] } uucore_procs = { version=">=0.0.6", package="uucore_procs", path="../../uucore_procs" } +selinux = { version="0.1.3", optional = true } [[bin]] name = "id" path = "src/main.rs" + +[features] +feat_selinux = ["selinux"] diff --git a/src/uu/id/src/id.rs b/src/uu/id/src/id.rs index d5acc97f3..9f92a4561 100644 --- a/src/uu/id/src/id.rs +++ b/src/uu/id/src/id.rs @@ -5,7 +5,10 @@ // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -// + +// spell-checker:ignore (ToDO) asid auditid auditinfo auid cstr egid emod euid getaudit getlogin gflag nflag pline rflag termid uflag gsflag zflag cflag + +// README: // This was originally based on BSD's `id` // (noticeable in functionality, usage text, options text, etc.) // and synced with: @@ -25,8 +28,10 @@ // // * Help text based on BSD's `id` manpage and GNU's `id` manpage. // - -// spell-checker:ignore (ToDO) asid auditid auditinfo auid cstr egid emod euid getaudit getlogin gflag nflag pline rflag termid uflag gsflag zflag +// * This passes GNU's coreutils Test suite (8.32) for "tests/id/context.sh" if compiled with +// `--features feat_selinux`. It should also pass "tests/id/no-context.sh", but that depends on +// `uu_ls -Z` being implemented and therefore fails at the moment +// #![allow(non_camel_case_types)] #![allow(dead_code)] @@ -35,6 +40,8 @@ extern crate uucore; use clap::{crate_version, App, Arg}; +#[cfg(all(target_os = "linux", feature = "selinux"))] +use selinux; use std::ffi::CStr; use uucore::entries::{self, Group, Locate, Passwd}; pub use uucore::libc; @@ -50,6 +57,11 @@ macro_rules! cstr2cow { static ABOUT: &str = "Print user and group information for each specified USER, or (when USER omitted) for the current user."; +#[cfg(not(feature = "selinux"))] +static CONTEXT_HELP_TEXT: &str = "print only the security context of the process (not enabled)"; +#[cfg(feature = "selinux")] +static CONTEXT_HELP_TEXT: &str = "print only the security context of the process"; + mod options { pub const OPT_AUDIT: &str = "audit"; // GNU's id does not have this pub const OPT_CONTEXT: &str = "context"; @@ -93,6 +105,8 @@ struct State { gsflag: bool, // --groups rflag: bool, // --real zflag: bool, // --zero + cflag: bool, // --context + selinux_supported: bool, ids: Option, // The behavior for calling GNU's `id` and calling GNU's `id $USER` is similar but different. // * The SELinux context is only displayed without a specified user. @@ -109,6 +123,7 @@ struct State { // 1000 10 968 975 // +++ exited with 0 +++ user_specified: bool, + exit_code: i32, } pub fn uumain(args: impl uucore::Args) -> i32 { @@ -132,8 +147,21 @@ pub fn uumain(args: impl uucore::Args) -> i32 { gsflag: matches.is_present(options::OPT_GROUPS), rflag: matches.is_present(options::OPT_REAL_ID), zflag: matches.is_present(options::OPT_ZERO), + cflag: matches.is_present(options::OPT_CONTEXT), + + selinux_supported: { + #[cfg(feature = "selinux")] + { + selinux::kernel_support() != selinux::KernelSupport::Unsupported + } + #[cfg(not(feature = "selinux"))] + { + false + } + }, user_specified: !users.is_empty(), ids: None, + exit_code: 0, }; let default_format = { @@ -141,13 +169,16 @@ pub fn uumain(args: impl uucore::Args) -> i32 { !(state.uflag || state.gflag || state.gsflag) }; - if (state.nflag || state.rflag) && default_format { + if (state.nflag || state.rflag) && default_format && !state.cflag { crash!(1, "cannot print only names or real IDs in default format"); } - if (state.zflag) && default_format { + if state.zflag && default_format && !state.cflag { // NOTE: GNU test suite "id/zero.sh" needs this stderr output: crash!(1, "option --zero not permitted in default format"); } + if state.user_specified && state.cflag { + crash!(1, "cannot print security context when user specified"); + } let delimiter = { if state.zflag { @@ -163,7 +194,23 @@ pub fn uumain(args: impl uucore::Args) -> i32 { '\n' } }; - let mut exit_code = 0; + + if state.cflag { + if state.selinux_supported { + // print SElinux context and exit + #[cfg(all(target_os = "linux", feature = "selinux"))] + if let Ok(context) = selinux::SecurityContext::current(false) { + let bytes = context.as_bytes(); + print!("{}{}", String::from_utf8_lossy(bytes), line_ending); + } else { + // print error because `cflag` was explicitly requested + crash!(1, "can't get process context"); + } + return state.exit_code; + } else { + crash!(1, "--context (-Z) works only on an SELinux-enabled kernel"); + } + } for i in 0..=users.len() { let possible_pw = if !state.user_specified { @@ -173,7 +220,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { Ok(p) => Some(p), Err(_) => { show_error!("'{}': no such user", users[i]); - exit_code = 1; + state.exit_code = 1; if i + 1 >= users.len() { break; } else { @@ -187,17 +234,17 @@ pub fn uumain(args: impl uucore::Args) -> i32 { if matches.is_present(options::OPT_PASSWORD) { // BSD's `id` ignores all but the first specified user pline(possible_pw.map(|v| v.uid())); - return exit_code; + return state.exit_code; }; if matches.is_present(options::OPT_HUMAN_READABLE) { // BSD's `id` ignores all but the first specified user pretty(possible_pw); - return exit_code; + return state.exit_code; } if matches.is_present(options::OPT_AUDIT) { // BSD's `id` ignores specified users auditid(); - return exit_code; + return state.exit_code; } let (uid, gid) = possible_pw.map(|p| (p.uid(), p.gid())).unwrap_or(( @@ -217,7 +264,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { if state.nflag { entries::gid2grp(gid).unwrap_or_else(|_| { show_error!("cannot find name for group ID {}", gid); - exit_code = 1; + state.exit_code = 1; gid.to_string() }) } else { @@ -232,7 +279,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { if state.nflag { entries::uid2usr(uid).unwrap_or_else(|_| { show_error!("cannot find name for user ID {}", uid); - exit_code = 1; + state.exit_code = 1; uid.to_string() }) } else { @@ -257,7 +304,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { if state.nflag { entries::gid2grp(id).unwrap_or_else(|_| { show_error!("cannot find name for group ID {}", id); - exit_code = 1; + state.exit_code = 1; id.to_string() }) } else { @@ -276,7 +323,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { } if default_format { - id_print(&state, groups); + id_print(&mut state, groups); } print!("{}", line_ending); @@ -285,7 +332,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { } } - exit_code + state.exit_code } pub fn uu_app() -> App<'static, 'static> { @@ -319,6 +366,7 @@ pub fn uu_app() -> App<'static, 'static> { Arg::with_name(options::OPT_GROUP) .short("g") .long(options::OPT_GROUP) + .conflicts_with(options::OPT_EFFECTIVE_USER) .help("Display only the effective group ID as a number"), ) .arg( @@ -328,6 +376,7 @@ pub fn uu_app() -> App<'static, 'static> { .conflicts_with_all(&[ options::OPT_GROUP, options::OPT_EFFECTIVE_USER, + options::OPT_CONTEXT, options::OPT_HUMAN_READABLE, options::OPT_PASSWORD, options::OPT_AUDIT, @@ -379,7 +428,8 @@ pub fn uu_app() -> App<'static, 'static> { Arg::with_name(options::OPT_CONTEXT) .short("Z") .long(options::OPT_CONTEXT) - .help("NotImplemented: print only the security context of the process"), + .conflicts_with_all(&[options::OPT_GROUP, options::OPT_EFFECTIVE_USER]) + .help(CONTEXT_HELP_TEXT), ) .arg( Arg::with_name(options::ARG_USERS) @@ -499,34 +549,80 @@ fn auditid() { println!("asid={}", auditinfo.ai_asid); } -fn id_print(state: &State, groups: Vec) { +fn id_print(state: &mut State, groups: Vec) { let uid = state.ids.as_ref().unwrap().uid; let gid = state.ids.as_ref().unwrap().gid; let euid = state.ids.as_ref().unwrap().euid; let egid = state.ids.as_ref().unwrap().egid; - print!("uid={}({})", uid, entries::uid2usr(uid).unwrap()); - print!(" gid={}({})", gid, entries::gid2grp(gid).unwrap()); + print!( + "uid={}({})", + uid, + entries::uid2usr(uid).unwrap_or_else(|_| { + show_error!("cannot find name for user ID {}", uid); + state.exit_code = 1; + uid.to_string() + }) + ); + print!( + " gid={}({})", + gid, + entries::gid2grp(gid).unwrap_or_else(|_| { + show_error!("cannot find name for group ID {}", gid); + state.exit_code = 1; + gid.to_string() + }) + ); if !state.user_specified && (euid != uid) { - print!(" euid={}({})", euid, entries::uid2usr(euid).unwrap()); + print!( + " euid={}({})", + euid, + entries::uid2usr(euid).unwrap_or_else(|_| { + show_error!("cannot find name for user ID {}", euid); + state.exit_code = 1; + euid.to_string() + }) + ); } if !state.user_specified && (egid != gid) { - print!(" egid={}({})", euid, entries::gid2grp(egid).unwrap()); + print!( + " egid={}({})", + euid, + entries::gid2grp(egid).unwrap_or_else(|_| { + show_error!("cannot find name for group ID {}", egid); + state.exit_code = 1; + egid.to_string() + }) + ); } print!( " groups={}", groups .iter() - .map(|&gr| format!("{}({})", gr, entries::gid2grp(gr).unwrap())) + .map(|&gr| format!( + "{}({})", + gr, + entries::gid2grp(gr).unwrap_or_else(|_| { + show_error!("cannot find name for group ID {}", gr); + state.exit_code = 1; + gr.to_string() + }) + )) .collect::>() .join(",") ); - // NOTE: (SELinux NotImplemented) placeholder: - // if !state.user_specified { - // // print SElinux context (does not depend on "-Z") - // print!(" context={}", get_selinux_contexts().join(":")); - // } + #[cfg(all(target_os = "linux", feature = "selinux"))] + if state.selinux_supported + && !state.user_specified + && std::env::var_os("POSIXLY_CORRECT").is_none() + { + // print SElinux context (does not depend on "-Z") + if let Ok(context) = selinux::SecurityContext::current(false) { + let bytes = context.as_bytes(); + print!(" context={}", String::from_utf8_lossy(bytes)); + } + } } #[cfg(not(target_os = "linux"))] diff --git a/src/uu/ln/src/ln.rs b/src/uu/ln/src/ln.rs index d354acce9..65bdcf36c 100644 --- a/src/uu/ln/src/ln.rs +++ b/src/uu/ln/src/ln.rs @@ -11,9 +11,12 @@ extern crate uucore; use clap::{crate_version, App, Arg}; +use uucore::error::{UCustomError, UResult}; use std::borrow::Cow; +use std::error::Error; use std::ffi::OsStr; +use std::fmt::Display; use std::fs; use std::io::{stdin, Result}; @@ -44,6 +47,51 @@ pub enum OverwriteMode { Force, } +#[derive(Debug)] +enum LnError { + TargetIsDirectory(String), + SomeLinksFailed, + FailedToLink(String), + MissingDestination(String), + ExtraOperand(String), + InvalidBackupMode(String), +} + +impl Display for LnError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::TargetIsDirectory(s) => write!(f, "target '{}' is not a directory", s), + Self::FailedToLink(s) => write!(f, "failed to link '{}'", s), + Self::SomeLinksFailed => write!(f, "some links failed to create"), + Self::MissingDestination(s) => { + write!(f, "missing destination file operand after '{}'", s) + } + Self::ExtraOperand(s) => write!( + f, + "extra operand '{}'\nTry '{} --help' for more information.", + s, + executable!() + ), + Self::InvalidBackupMode(s) => write!(f, "{}", s), + } + } +} + +impl Error for LnError {} + +impl UCustomError for LnError { + fn code(&self) -> i32 { + match self { + Self::TargetIsDirectory(_) => 1, + Self::SomeLinksFailed => 1, + Self::FailedToLink(_) => 1, + Self::MissingDestination(_) => 1, + Self::ExtraOperand(_) => 1, + Self::InvalidBackupMode(_) => 1, + } + } +} + fn get_usage() -> String { format!( "{0} [OPTION]... [-T] TARGET LINK_NAME (1st form) @@ -86,7 +134,8 @@ mod options { static ARG_FILES: &str = "files"; -pub fn uumain(args: impl uucore::Args) -> i32 { +#[uucore_procs::gen_uumain] +pub fn uumain(args: impl uucore::Args) -> UResult<()> { let usage = get_usage(); let long_usage = get_long_usage(); @@ -122,8 +171,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { ); let backup_mode = match backup_mode { Err(err) => { - show_usage_error!("{}", err); - return 1; + return Err(LnError::InvalidBackupMode(err).into()); } Ok(mode) => mode, }; @@ -246,7 +294,7 @@ pub fn uu_app() -> App<'static, 'static> { ) } -fn exec(files: &[PathBuf], settings: &Settings) -> i32 { +fn exec(files: &[PathBuf], settings: &Settings) -> UResult<()> { // Handle cases where we create links in a directory first. if let Some(ref name) = settings.target_dir { // 4th form: a directory is specified by -t. @@ -267,35 +315,22 @@ fn exec(files: &[PathBuf], settings: &Settings) -> i32 { // 1st form. Now there should be only two operands, but if -T is // specified we may have a wrong number of operands. if files.len() == 1 { - show_error!( - "missing destination file operand after '{}'", - files[0].to_string_lossy() - ); - return 1; + return Err(LnError::MissingDestination(files[0].to_string_lossy().into()).into()); } if files.len() > 2 { - show_error!( - "extra operand '{}'\nTry '{} --help' for more information.", - files[2].display(), - executable!() - ); - return 1; + return Err(LnError::ExtraOperand(files[2].display().to_string()).into()); } assert!(!files.is_empty()); match link(&files[0], &files[1], settings) { - Ok(_) => 0, - Err(e) => { - show_error!("{}", e); - 1 - } + Ok(_) => Ok(()), + Err(e) => Err(LnError::FailedToLink(e.to_string()).into()), } } -fn link_files_in_dir(files: &[PathBuf], target_dir: &Path, settings: &Settings) -> i32 { +fn link_files_in_dir(files: &[PathBuf], target_dir: &Path, settings: &Settings) -> UResult<()> { if !target_dir.is_dir() { - show_error!("target '{}' is not a directory", target_dir.display()); - return 1; + return Err(LnError::TargetIsDirectory(target_dir.display().to_string()).into()); } let mut all_successful = true; @@ -354,9 +389,9 @@ fn link_files_in_dir(files: &[PathBuf], target_dir: &Path, settings: &Settings) } } if all_successful { - 0 + Ok(()) } else { - 1 + Err(LnError::SomeLinksFailed.into()) } } diff --git a/src/uu/sort/src/check.rs b/src/uu/sort/src/check.rs index f1cd22686..5be752be0 100644 --- a/src/uu/sort/src/check.rs +++ b/src/uu/sort/src/check.rs @@ -9,24 +9,33 @@ use crate::{ chunks::{self, Chunk, RecycledChunk}, - compare_by, open, GlobalSettings, + compare_by, open, GlobalSettings, SortError, }; use itertools::Itertools; use std::{ cmp::Ordering, + ffi::OsStr, io::Read, iter, sync::mpsc::{sync_channel, Receiver, SyncSender}, thread, }; +use uucore::error::UResult; /// Check if the file at `path` is ordered. /// /// # Returns /// /// The code we should exit with. -pub fn check(path: &str, settings: &GlobalSettings) -> i32 { - let file = open(path); +pub fn check(path: &OsStr, settings: &GlobalSettings) -> UResult<()> { + let max_allowed_cmp = if settings.unique { + // If `unique` is enabled, the previous line must compare _less_ to the next one. + Ordering::Less + } else { + // Otherwise, the line previous line must compare _less or equal_ to the next one. + Ordering::Equal + }; + let file = open(path)?; let (recycled_sender, recycled_receiver) = sync_channel(2); let (loaded_sender, loaded_receiver) = sync_channel(2); thread::spawn({ @@ -34,7 +43,13 @@ pub fn check(path: &str, settings: &GlobalSettings) -> i32 { move || reader(file, recycled_receiver, loaded_sender, &settings) }); for _ in 0..2 { - let _ = recycled_sender.send(RecycledChunk::new(100 * 1024)); + let _ = recycled_sender.send(RecycledChunk::new(if settings.buffer_size < 100 * 1024 { + // when the buffer size is smaller than 100KiB we choose it instead of the default. + // this improves testability. + settings.buffer_size + } else { + 100 * 1024 + })); } let mut prev_chunk: Option = None; @@ -53,30 +68,35 @@ pub fn check(path: &str, settings: &GlobalSettings) -> i32 { settings, prev_chunk.line_data(), chunk.line_data(), - ) == Ordering::Greater + ) > max_allowed_cmp { - if !settings.check_silent { - println!("sort: {}:{}: disorder: {}", path, line_idx, new_first.line); + return Err(SortError::Disorder { + file: path.to_owned(), + line_number: line_idx, + line: new_first.line.to_owned(), + silent: settings.check_silent, } - return 1; + .into()); } let _ = recycled_sender.send(prev_chunk.recycle()); } for (a, b) in chunk.lines().iter().tuple_windows() { line_idx += 1; - if compare_by(a, b, settings, chunk.line_data(), chunk.line_data()) == Ordering::Greater - { - if !settings.check_silent { - println!("sort: {}:{}: disorder: {}", path, line_idx, b.line); + if compare_by(a, b, settings, chunk.line_data(), chunk.line_data()) > max_allowed_cmp { + return Err(SortError::Disorder { + file: path.to_owned(), + line_number: line_idx, + line: b.line.to_owned(), + silent: settings.check_silent, } - return 1; + .into()); } } prev_chunk = Some(chunk); } - 0 + Ok(()) } /// The function running on the reader thread. @@ -85,7 +105,7 @@ fn reader( receiver: Receiver, sender: SyncSender, settings: &GlobalSettings, -) { +) -> UResult<()> { let mut carry_over = vec![]; for recycled_chunk in receiver.iter() { let should_continue = chunks::read( @@ -101,9 +121,10 @@ fn reader( b'\n' }, settings, - ); + )?; if !should_continue { break; } } + Ok(()) } diff --git a/src/uu/sort/src/chunks.rs b/src/uu/sort/src/chunks.rs index 9e9d212c2..80d6060d4 100644 --- a/src/uu/sort/src/chunks.rs +++ b/src/uu/sort/src/chunks.rs @@ -14,8 +14,9 @@ use std::{ use memchr::memchr_iter; use ouroboros::self_referencing; +use uucore::error::{UResult, USimpleError}; -use crate::{numeric_str_cmp::NumInfo, GeneralF64ParseResult, GlobalSettings, Line}; +use crate::{numeric_str_cmp::NumInfo, GeneralF64ParseResult, GlobalSettings, Line, SortError}; /// The chunk that is passed around between threads. /// `lines` consist of slices into `buffer`. @@ -137,10 +138,10 @@ pub fn read( max_buffer_size: Option, carry_over: &mut Vec, file: &mut T, - next_files: &mut impl Iterator, + next_files: &mut impl Iterator>, separator: u8, settings: &GlobalSettings, -) -> bool { +) -> UResult { let RecycledChunk { lines, selections, @@ -159,12 +160,12 @@ pub fn read( max_buffer_size, carry_over.len(), separator, - ); + )?; carry_over.clear(); carry_over.extend_from_slice(&buffer[read..]); if read != 0 { - let payload = Chunk::new(buffer, |buffer| { + let payload: UResult = Chunk::try_new(buffer, |buffer| { let selections = unsafe { // SAFETY: It is safe to transmute to an empty vector of selections with shorter lifetime. // It was only temporarily transmuted to a Vec> to make recycling possible. @@ -175,18 +176,19 @@ pub fn read( // because it was only temporarily transmuted to a Vec> to make recycling possible. std::mem::transmute::>, Vec>>(lines) }; - let read = crash_if_err!(1, std::str::from_utf8(&buffer[..read])); + let read = std::str::from_utf8(&buffer[..read]) + .map_err(|error| SortError::Uft8Error { error })?; let mut line_data = LineData { selections, num_infos, parsed_floats, }; parse_lines(read, &mut lines, &mut line_data, separator, settings); - ChunkContents { lines, line_data } + Ok(ChunkContents { lines, line_data }) }); - sender.send(payload).unwrap(); + sender.send(payload?).unwrap(); } - should_continue + Ok(should_continue) } /// Split `read` into `Line`s, and add them to `lines`. @@ -242,12 +244,12 @@ fn parse_lines<'a>( /// * Whether this function should be called again. fn read_to_buffer( file: &mut T, - next_files: &mut impl Iterator, + next_files: &mut impl Iterator>, buffer: &mut Vec, max_buffer_size: Option, start_offset: usize, separator: u8, -) -> (usize, bool) { +) -> UResult<(usize, bool)> { let mut read_target = &mut buffer[start_offset..]; let mut last_file_target_size = read_target.len(); loop { @@ -274,7 +276,7 @@ fn read_to_buffer( // We read enough lines. let end = last_line_end.unwrap(); // We want to include the separator here, because it shouldn't be carried over. - return (end + 1, true); + return Ok((end + 1, true)); } else { // We need to read more lines let len = buffer.len(); @@ -299,11 +301,11 @@ fn read_to_buffer( if let Some(next_file) = next_files.next() { // There is another file. last_file_target_size = leftover_len; - *file = next_file; + *file = next_file?; } else { // This was the last file. let read_len = buffer.len() - leftover_len; - return (read_len, false); + return Ok((read_len, false)); } } } @@ -313,7 +315,7 @@ fn read_to_buffer( Err(e) if e.kind() == ErrorKind::Interrupted => { // retry } - Err(e) => crash!(1, "{}", e), + Err(e) => return Err(USimpleError::new(2, e.to_string())), } } } diff --git a/src/uu/sort/src/ext_sort.rs b/src/uu/sort/src/ext_sort.rs index e0814b7a2..8ff5665fd 100644 --- a/src/uu/sort/src/ext_sort.rs +++ b/src/uu/sort/src/ext_sort.rs @@ -22,12 +22,15 @@ use std::{ }; use itertools::Itertools; +use uucore::error::UResult; use crate::chunks::RecycledChunk; use crate::merge::ClosedTmpFile; use crate::merge::WriteableCompressedTmpFile; use crate::merge::WriteablePlainTmpFile; use crate::merge::WriteableTmpFile; +use crate::Output; +use crate::SortError; use crate::{ chunks::{self, Chunk}, compare_by, merge, sort_by, GlobalSettings, @@ -38,7 +41,11 @@ use tempfile::TempDir; const START_BUFFER_SIZE: usize = 8_000; /// Sort files by using auxiliary files for storing intermediate chunks (if needed), and output the result. -pub fn ext_sort(files: &mut impl Iterator>, settings: &GlobalSettings) { +pub fn ext_sort( + files: &mut impl Iterator>>, + settings: &GlobalSettings, + output: Output, +) -> UResult<()> { let (sorted_sender, sorted_receiver) = std::sync::mpsc::sync_channel(1); let (recycled_sender, recycled_receiver) = std::sync::mpsc::sync_channel(1); thread::spawn({ @@ -51,23 +58,29 @@ pub fn ext_sort(files: &mut impl Iterator>, settings settings, sorted_receiver, recycled_sender, - ); + output, + ) } else { reader_writer::<_, WriteablePlainTmpFile>( files, settings, sorted_receiver, recycled_sender, - ); + output, + ) } } -fn reader_writer>, Tmp: WriteableTmpFile + 'static>( +fn reader_writer< + F: Iterator>>, + Tmp: WriteableTmpFile + 'static, +>( files: F, settings: &GlobalSettings, receiver: Receiver, sender: SyncSender, -) { + output: Output, +) -> UResult<()> { let separator = if settings.zero_terminated { b'\0' } else { @@ -81,22 +94,20 @@ fn reader_writer>, Tmp: WriteableTmpFile files, &settings.tmp_dir, separator, - // Heuristically chosen: Dividing by 10 seems to keep our memory usage roughly - // around settings.buffer_size as a whole. buffer_size, settings, receiver, sender, - ); + )?; match read_result { ReadResult::WroteChunksToFile { tmp_files, tmp_dir } => { let tmp_dir_size = tmp_files.len(); - let mut merger = merge::merge_with_file_limit::<_, _, Tmp>( + let merger = merge::merge_with_file_limit::<_, _, Tmp>( tmp_files.into_iter().map(|c| c.reopen()), settings, Some((tmp_dir, tmp_dir_size)), - ); - merger.write_all(settings); + )?; + merger.write_all(settings, output)?; } ReadResult::SortedSingleChunk(chunk) => { if settings.unique { @@ -106,9 +117,10 @@ fn reader_writer>, Tmp: WriteableTmpFile == Ordering::Equal }), settings, + output, ); } else { - print_sorted(chunk.lines().iter(), settings); + print_sorted(chunk.lines().iter(), settings, output); } } ReadResult::SortedTwoChunks([a, b]) => { @@ -128,15 +140,17 @@ fn reader_writer>, Tmp: WriteableTmpFile }) .map(|(line, _)| line), settings, + output, ); } else { - print_sorted(merged_iter.map(|(line, _)| line), settings); + print_sorted(merged_iter.map(|(line, _)| line), settings, output); } } ReadResult::EmptyInput => { // don't output anything } } + Ok(()) } /// The function that is executed on the sorter thread. @@ -145,7 +159,11 @@ fn sorter(receiver: Receiver, sender: SyncSender, settings: Global payload.with_contents_mut(|contents| { sort_by(&mut contents.lines, &settings, &contents.line_data) }); - sender.send(payload).unwrap(); + if sender.send(payload).is_err() { + // The receiver has gone away, likely because the other thread hit an error. + // We stop silently because the actual error is printed by the other thread. + return; + } } } @@ -165,15 +183,15 @@ enum ReadResult { } /// The function that is executed on the reader/writer thread. fn read_write_loop( - mut files: impl Iterator>, + mut files: impl Iterator>>, tmp_dir_parent: &Path, separator: u8, buffer_size: usize, settings: &GlobalSettings, receiver: Receiver, sender: SyncSender, -) -> ReadResult { - let mut file = files.next().unwrap(); +) -> UResult> { + let mut file = files.next().unwrap()?; let mut carry_over = vec![]; // kick things off with two reads @@ -191,14 +209,14 @@ fn read_write_loop( &mut files, separator, settings, - ); + )?; if !should_continue { drop(sender); // We have already read the whole input. Since we are in our first two reads, // this means that we can fit the whole input into memory. Bypass writing below and // handle this case in a more straightforward way. - return if let Ok(first_chunk) = receiver.recv() { + return Ok(if let Ok(first_chunk) = receiver.recv() { if let Ok(second_chunk) = receiver.recv() { ReadResult::SortedTwoChunks([first_chunk, second_chunk]) } else { @@ -206,16 +224,14 @@ fn read_write_loop( } } else { ReadResult::EmptyInput - }; + }); } } - let tmp_dir = crash_if_err!( - 1, - tempfile::Builder::new() - .prefix("uutils_sort") - .tempdir_in(tmp_dir_parent) - ); + let tmp_dir = tempfile::Builder::new() + .prefix("uutils_sort") + .tempdir_in(tmp_dir_parent) + .map_err(|_| SortError::TmpDirCreationFailed)?; let mut sender_option = Some(sender); let mut file_number = 0; @@ -224,7 +240,7 @@ fn read_write_loop( let mut chunk = match receiver.recv() { Ok(it) => it, _ => { - return ReadResult::WroteChunksToFile { tmp_files, tmp_dir }; + return Ok(ReadResult::WroteChunksToFile { tmp_files, tmp_dir }); } }; @@ -233,7 +249,7 @@ fn read_write_loop( tmp_dir.path().join(file_number.to_string()), settings.compress_prog.as_deref(), separator, - ); + )?; tmp_files.push(tmp_file); file_number += 1; @@ -250,7 +266,7 @@ fn read_write_loop( &mut files, separator, settings, - ); + )?; if !should_continue { sender_option = None; } @@ -265,8 +281,8 @@ fn write( file: PathBuf, compress_prog: Option<&str>, separator: u8, -) -> I::Closed { - let mut tmp_file = I::create(file, compress_prog); +) -> UResult { + let mut tmp_file = I::create(file, compress_prog)?; write_lines(chunk.lines(), tmp_file.as_write(), separator); tmp_file.finished_writing() } diff --git a/src/uu/sort/src/merge.rs b/src/uu/sort/src/merge.rs index 12d7a9b9b..fad966f64 100644 --- a/src/uu/sort/src/merge.rs +++ b/src/uu/sort/src/merge.rs @@ -9,44 +9,85 @@ use std::{ cmp::Ordering, + ffi::OsString, fs::{self, File}, io::{BufWriter, Read, Write}, iter, - path::PathBuf, + path::{Path, PathBuf}, process::{Child, ChildStdin, ChildStdout, Command, Stdio}, rc::Rc, sync::mpsc::{channel, sync_channel, Receiver, Sender, SyncSender}, - thread, + thread::{self, JoinHandle}, }; use compare::Compare; use itertools::Itertools; use tempfile::TempDir; +use uucore::error::UResult; use crate::{ chunks::{self, Chunk, RecycledChunk}, - compare_by, GlobalSettings, + compare_by, open, GlobalSettings, Output, SortError, }; +/// If the output file occurs in the input files as well, copy the contents of the output file +/// and replace its occurrences in the inputs with that copy. +fn replace_output_file_in_input_files( + files: &mut [OsString], + settings: &GlobalSettings, + output: Option<&str>, +) -> UResult> { + let mut copy: Option<(TempDir, PathBuf)> = None; + if let Some(Ok(output_path)) = output.map(|path| Path::new(path).canonicalize()) { + for file in files { + if let Ok(file_path) = Path::new(file).canonicalize() { + if file_path == output_path { + if let Some((_dir, copy)) = © { + *file = copy.clone().into_os_string(); + } else { + let tmp_dir = tempfile::Builder::new() + .prefix("uutils_sort") + .tempdir_in(&settings.tmp_dir) + .map_err(|_| SortError::TmpDirCreationFailed)?; + let copy_path = tmp_dir.path().join("0"); + std::fs::copy(file_path, ©_path) + .map_err(|error| SortError::OpenTmpFileFailed { error })?; + *file = copy_path.clone().into_os_string(); + copy = Some((tmp_dir, copy_path)) + } + } + } + } + } + // if we created a TempDir its size must be one. + Ok(copy.map(|(dir, _copy)| (dir, 1))) +} + /// Merge pre-sorted `Box`s. /// /// If `settings.merge_batch_size` is greater than the length of `files`, intermediate files will be used. /// If `settings.compress_prog` is `Some`, intermediate files will be compressed with it. -pub fn merge>>( - files: Files, - settings: &GlobalSettings, -) -> FileMerger { +pub fn merge<'a>( + files: &mut [OsString], + settings: &'a GlobalSettings, + output: Option<&str>, +) -> UResult> { + let tmp_dir = replace_output_file_in_input_files(files, settings, output)?; if settings.compress_prog.is_none() { merge_with_file_limit::<_, _, WriteablePlainTmpFile>( - files.map(|file| PlainMergeInput { inner: file }), + files + .iter() + .map(|file| open(file).map(|file| PlainMergeInput { inner: file })), settings, - None, + tmp_dir, ) } else { merge_with_file_limit::<_, _, WriteableCompressedTmpFile>( - files.map(|file| PlainMergeInput { inner: file }), + files + .iter() + .map(|file| open(file).map(|file| PlainMergeInput { inner: file })), settings, - None, + tmp_dir, ) } } @@ -54,24 +95,25 @@ pub fn merge>>( // Merge already sorted `MergeInput`s. pub fn merge_with_file_limit< M: MergeInput + 'static, - F: ExactSizeIterator, + F: ExactSizeIterator>, Tmp: WriteableTmpFile + 'static, >( files: F, settings: &GlobalSettings, tmp_dir: Option<(TempDir, usize)>, -) -> FileMerger { +) -> UResult { if files.len() > settings.merge_batch_size { // If we did not get a tmp_dir, create one. - let (tmp_dir, mut tmp_dir_size) = tmp_dir.unwrap_or_else(|| { - ( + let (tmp_dir, mut tmp_dir_size) = match tmp_dir { + Some(x) => x, + None => ( tempfile::Builder::new() .prefix("uutils_sort") .tempdir_in(&settings.tmp_dir) - .unwrap(), + .map_err(|_| SortError::TmpDirCreationFailed)?, 0, - ) - }); + ), + }; let mut remaining_files = files.len(); let batches = files.chunks(settings.merge_batch_size); let mut batches = batches.into_iter(); @@ -79,14 +121,14 @@ pub fn merge_with_file_limit< while remaining_files != 0 { // Work around the fact that `Chunks` is not an `ExactSizeIterator`. remaining_files = remaining_files.saturating_sub(settings.merge_batch_size); - let mut merger = merge_without_limit(batches.next().unwrap(), settings); + let merger = merge_without_limit(batches.next().unwrap(), settings)?; let mut tmp_file = Tmp::create( tmp_dir.path().join(tmp_dir_size.to_string()), settings.compress_prog.as_deref(), - ); + )?; tmp_dir_size += 1; - merger.write_all_to(settings, tmp_file.as_write()); - temporary_files.push(tmp_file.finished_writing()); + merger.write_all_to(settings, tmp_file.as_write())?; + temporary_files.push(tmp_file.finished_writing()?); } assert!(batches.next().is_none()); merge_with_file_limit::<_, _, Tmp>( @@ -94,7 +136,7 @@ pub fn merge_with_file_limit< .into_iter() .map(Box::new(|c: Tmp::Closed| c.reopen()) as Box< - dyn FnMut(Tmp::Closed) -> ::Reopened, + dyn FnMut(Tmp::Closed) -> UResult<::Reopened>, >), settings, Some((tmp_dir, tmp_dir_size)), @@ -108,10 +150,10 @@ pub fn merge_with_file_limit< /// /// It is the responsibility of the caller to ensure that `files` yields only /// as many files as we are allowed to open concurrently. -fn merge_without_limit>( +fn merge_without_limit>>( files: F, settings: &GlobalSettings, -) -> FileMerger { +) -> UResult { let (request_sender, request_receiver) = channel(); let mut reader_files = Vec::with_capacity(files.size_hint().0); let mut loaded_receivers = Vec::with_capacity(files.size_hint().0); @@ -119,7 +161,7 @@ fn merge_without_limit>( let (sender, receiver) = sync_channel(2); loaded_receivers.push(receiver); reader_files.push(Some(ReaderFile { - file, + file: file?, sender, carry_over: vec![], })); @@ -136,7 +178,7 @@ fn merge_without_limit>( .unwrap(); } - thread::spawn({ + let reader_join_handle = thread::spawn({ let settings = settings.clone(); move || { reader( @@ -155,22 +197,25 @@ fn merge_without_limit>( let mut mergeable_files = vec![]; for (file_number, receiver) in loaded_receivers.into_iter().enumerate() { - mergeable_files.push(MergeableFile { - current_chunk: Rc::new(receiver.recv().unwrap()), - file_number, - line_idx: 0, - receiver, - }) + if let Ok(chunk) = receiver.recv() { + mergeable_files.push(MergeableFile { + current_chunk: Rc::new(chunk), + file_number, + line_idx: 0, + receiver, + }) + } } - FileMerger { + Ok(FileMerger { heap: binary_heap_plus::BinaryHeap::from_vec_cmp( mergeable_files, FileComparator { settings }, ), request_sender, prev: None, - } + reader_join_handle, + }) } /// The struct on the reader thread representing an input file struct ReaderFile { @@ -185,7 +230,7 @@ fn reader( files: &mut [Option>], settings: &GlobalSettings, separator: u8, -) { +) -> UResult<()> { for (file_idx, recycled_chunk) in recycled_receiver.iter() { if let Some(ReaderFile { file, @@ -202,15 +247,16 @@ fn reader( &mut iter::empty(), separator, settings, - ); + )?; if !should_continue { // Remove the file from the list by replacing it with `None`. let ReaderFile { file, .. } = files[file_idx].take().unwrap(); // Depending on the kind of the `MergeInput`, this may delete the file: - file.finished_reading(); + file.finished_reading()?; } } } + Ok(()) } /// The struct on the main thread representing an input file pub struct MergeableFile { @@ -234,17 +280,20 @@ pub struct FileMerger<'a> { heap: binary_heap_plus::BinaryHeap>, request_sender: Sender<(usize, RecycledChunk)>, prev: Option, + reader_join_handle: JoinHandle>, } impl<'a> FileMerger<'a> { /// Write the merged contents to the output file. - pub fn write_all(&mut self, settings: &GlobalSettings) { - let mut out = settings.out_writer(); - self.write_all_to(settings, &mut out); + pub fn write_all(self, settings: &GlobalSettings, output: Output) -> UResult<()> { + let mut out = output.into_write(); + self.write_all_to(settings, &mut out) } - pub fn write_all_to(&mut self, settings: &GlobalSettings, out: &mut impl Write) { + pub fn write_all_to(mut self, settings: &GlobalSettings, out: &mut impl Write) -> UResult<()> { while self.write_next(settings, out) {} + drop(self.request_sender); + self.reader_join_handle.join().unwrap() } fn write_next(&mut self, settings: &GlobalSettings, out: &mut impl Write) -> bool { @@ -328,36 +377,41 @@ impl<'a> Compare for FileComparator<'a> { } // Wait for the child to exit and check its exit code. -fn assert_child_success(mut child: Child, program: &str) { +fn check_child_success(mut child: Child, program: &str) -> UResult<()> { if !matches!( child.wait().map(|e| e.code()), Ok(Some(0)) | Ok(None) | Err(_) ) { - crash!(2, "'{}' terminated abnormally", program) + Err(SortError::CompressProgTerminatedAbnormally { + prog: program.to_owned(), + } + .into()) + } else { + Ok(()) } } /// A temporary file that can be written to. -pub trait WriteableTmpFile { +pub trait WriteableTmpFile: Sized { type Closed: ClosedTmpFile; type InnerWrite: Write; - fn create(path: PathBuf, compress_prog: Option<&str>) -> Self; + fn create(path: PathBuf, compress_prog: Option<&str>) -> UResult; /// Closes the temporary file. - fn finished_writing(self) -> Self::Closed; + fn finished_writing(self) -> UResult; fn as_write(&mut self) -> &mut Self::InnerWrite; } /// A temporary file that is (temporarily) closed, but can be reopened. pub trait ClosedTmpFile { type Reopened: MergeInput; /// Reopens the temporary file. - fn reopen(self) -> Self::Reopened; + fn reopen(self) -> UResult; } /// A pre-sorted input for merging. pub trait MergeInput: Send { type InnerRead: Read; /// Cleans this `MergeInput` up. /// Implementations may delete the backing file. - fn finished_reading(self); + fn finished_reading(self) -> UResult<()>; fn as_read(&mut self) -> &mut Self::InnerRead; } @@ -376,15 +430,17 @@ impl WriteableTmpFile for WriteablePlainTmpFile { type Closed = ClosedPlainTmpFile; type InnerWrite = BufWriter; - fn create(path: PathBuf, _: Option<&str>) -> Self { - WriteablePlainTmpFile { - file: BufWriter::new(File::create(&path).unwrap()), + fn create(path: PathBuf, _: Option<&str>) -> UResult { + Ok(WriteablePlainTmpFile { + file: BufWriter::new( + File::create(&path).map_err(|error| SortError::OpenTmpFileFailed { error })?, + ), path, - } + }) } - fn finished_writing(self) -> Self::Closed { - ClosedPlainTmpFile { path: self.path } + fn finished_writing(self) -> UResult { + Ok(ClosedPlainTmpFile { path: self.path }) } fn as_write(&mut self) -> &mut Self::InnerWrite { @@ -393,18 +449,22 @@ impl WriteableTmpFile for WriteablePlainTmpFile { } impl ClosedTmpFile for ClosedPlainTmpFile { type Reopened = PlainTmpMergeInput; - fn reopen(self) -> Self::Reopened { - PlainTmpMergeInput { - file: File::open(&self.path).unwrap(), + fn reopen(self) -> UResult { + Ok(PlainTmpMergeInput { + file: File::open(&self.path).map_err(|error| SortError::OpenTmpFileFailed { error })?, path: self.path, - } + }) } } impl MergeInput for PlainTmpMergeInput { type InnerRead = File; - fn finished_reading(self) { - fs::remove_file(self.path).ok(); + fn finished_reading(self) -> UResult<()> { + // we ignore failures to delete the temporary file, + // because there is a race at the end of the execution and the whole + // temporary directory might already be gone. + let _ = fs::remove_file(self.path); + Ok(()) } fn as_read(&mut self) -> &mut Self::InnerRead { @@ -432,35 +492,33 @@ impl WriteableTmpFile for WriteableCompressedTmpFile { type Closed = ClosedCompressedTmpFile; type InnerWrite = BufWriter; - fn create(path: PathBuf, compress_prog: Option<&str>) -> Self { + fn create(path: PathBuf, compress_prog: Option<&str>) -> UResult { let compress_prog = compress_prog.unwrap(); let mut command = Command::new(compress_prog); - command - .stdin(Stdio::piped()) - .stdout(File::create(&path).unwrap()); - let mut child = crash_if_err!( - 2, - command.spawn().map_err(|err| format!( - "couldn't execute compress program: errno {}", - err.raw_os_error().unwrap() - )) - ); + let tmp_file = + File::create(&path).map_err(|error| SortError::OpenTmpFileFailed { error })?; + command.stdin(Stdio::piped()).stdout(tmp_file); + let mut child = command + .spawn() + .map_err(|err| SortError::CompressProgExecutionFailed { + code: err.raw_os_error().unwrap(), + })?; let child_stdin = child.stdin.take().unwrap(); - WriteableCompressedTmpFile { + Ok(WriteableCompressedTmpFile { path, compress_prog: compress_prog.to_owned(), child, child_stdin: BufWriter::new(child_stdin), - } + }) } - fn finished_writing(self) -> Self::Closed { + fn finished_writing(self) -> UResult { drop(self.child_stdin); - assert_child_success(self.child, &self.compress_prog); - ClosedCompressedTmpFile { + check_child_success(self.child, &self.compress_prog)?; + Ok(ClosedCompressedTmpFile { path: self.path, compress_prog: self.compress_prog, - } + }) } fn as_write(&mut self) -> &mut Self::InnerWrite { @@ -470,33 +528,32 @@ impl WriteableTmpFile for WriteableCompressedTmpFile { impl ClosedTmpFile for ClosedCompressedTmpFile { type Reopened = CompressedTmpMergeInput; - fn reopen(self) -> Self::Reopened { + fn reopen(self) -> UResult { let mut command = Command::new(&self.compress_prog); let file = File::open(&self.path).unwrap(); command.stdin(file).stdout(Stdio::piped()).arg("-d"); - let mut child = crash_if_err!( - 2, - command.spawn().map_err(|err| format!( - "couldn't execute compress program: errno {}", - err.raw_os_error().unwrap() - )) - ); + let mut child = command + .spawn() + .map_err(|err| SortError::CompressProgExecutionFailed { + code: err.raw_os_error().unwrap(), + })?; let child_stdout = child.stdout.take().unwrap(); - CompressedTmpMergeInput { + Ok(CompressedTmpMergeInput { path: self.path, compress_prog: self.compress_prog, child, child_stdout, - } + }) } } impl MergeInput for CompressedTmpMergeInput { type InnerRead = ChildStdout; - fn finished_reading(self) { + fn finished_reading(self) -> UResult<()> { drop(self.child_stdout); - assert_child_success(self.child, &self.compress_prog); - fs::remove_file(self.path).ok(); + check_child_success(self.child, &self.compress_prog)?; + let _ = fs::remove_file(self.path); + Ok(()) } fn as_read(&mut self) -> &mut Self::InnerRead { @@ -509,7 +566,9 @@ pub struct PlainMergeInput { } impl MergeInput for PlainMergeInput { type InnerRead = R; - fn finished_reading(self) {} + fn finished_reading(self) -> UResult<()> { + Ok(()) + } fn as_read(&mut self) -> &mut Self::InnerRead { &mut self.inner } diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index 55bcdb77b..be9510aef 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -29,19 +29,22 @@ use custom_str_cmp::custom_str_cmp; use ext_sort::ext_sort; use fnv::FnvHasher; use numeric_str_cmp::{human_numeric_str_cmp, numeric_str_cmp, NumInfo, NumInfoParseSettings}; -use rand::distributions::Alphanumeric; use rand::{thread_rng, Rng}; use rayon::prelude::*; use std::cmp::Ordering; use std::env; -use std::ffi::OsStr; -use std::fs::File; +use std::error::Error; +use std::ffi::{OsStr, OsString}; +use std::fmt::Display; +use std::fs::{File, OpenOptions}; use std::hash::{Hash, Hasher}; use std::io::{stdin, stdout, BufRead, BufReader, BufWriter, Read, Write}; use std::ops::Range; use std::path::Path; use std::path::PathBuf; +use std::str::Utf8Error; use unicode_width::UnicodeWidthStr; +use uucore::error::{set_exit_code, UCustomError, UResult, USimpleError, UUsageError}; use uucore::parse_size::{parse_size, ParseSizeError}; use uucore::version_cmp::version_cmp; use uucore::InvalidEncodingHandling; @@ -121,6 +124,111 @@ const POSITIVE: char = '+'; // available memory into consideration, instead of relying on this constant only. const DEFAULT_BUF_SIZE: usize = 1_000_000_000; // 1 GB +#[derive(Debug)] +enum SortError { + Disorder { + file: OsString, + line_number: usize, + line: String, + silent: bool, + }, + OpenFailed { + path: String, + error: std::io::Error, + }, + ReadFailed { + path: String, + error: std::io::Error, + }, + ParseKeyError { + key: String, + msg: String, + }, + OpenTmpFileFailed { + error: std::io::Error, + }, + CompressProgExecutionFailed { + code: i32, + }, + CompressProgTerminatedAbnormally { + prog: String, + }, + TmpDirCreationFailed, + Uft8Error { + error: Utf8Error, + }, +} + +impl Error for SortError {} + +impl UCustomError for SortError { + fn code(&self) -> i32 { + match self { + SortError::Disorder { .. } => 1, + _ => 2, + } + } + + fn usage(&self) -> bool { + false + } +} + +impl Display for SortError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + SortError::Disorder { + file, + line_number, + line, + silent, + } => { + if !silent { + write!( + f, + "{}:{}: disorder: {}", + file.to_string_lossy(), + line_number, + line + ) + } else { + Ok(()) + } + } + SortError::OpenFailed { path, error } => write!( + f, + "open failed: {}: {}", + path, + strip_errno(&error.to_string()) + ), + SortError::ParseKeyError { key, msg } => { + write!(f, "failed to parse key `{}`: {}", key, msg) + } + SortError::ReadFailed { path, error } => write!( + f, + "cannot read: {}: {}", + path, + strip_errno(&error.to_string()) + ), + SortError::OpenTmpFileFailed { error } => { + write!( + f, + "failed to open temporary file: {}", + strip_errno(&error.to_string()) + ) + } + SortError::CompressProgExecutionFailed { code } => { + write!(f, "couldn't execute compress program: errno {}", code) + } + SortError::CompressProgTerminatedAbnormally { prog } => { + write!(f, "'{}' terminated abnormally", prog) + } + SortError::TmpDirCreationFailed => write!(f, "could not create temporary directory"), + SortError::Uft8Error { error } => write!(f, "{}", error), + } + } +} + #[derive(Eq, Ord, PartialEq, PartialOrd, Clone, Copy, Debug)] enum SortMode { Numeric, @@ -146,6 +254,49 @@ impl SortMode { } } +pub struct Output { + file: Option<(String, File)>, +} + +impl Output { + fn new(name: Option<&str>) -> UResult { + let file = if let Some(name) = name { + // This is different from `File::create()` because we don't truncate the output yet. + // This allows using the output file as an input file. + let file = OpenOptions::new() + .write(true) + .create(true) + .open(name) + .map_err(|e| SortError::OpenFailed { + path: name.to_owned(), + error: e, + })?; + Some((name.to_owned(), file)) + } else { + None + }; + Ok(Self { file }) + } + + fn into_write(self) -> BufWriter> { + BufWriter::new(match self.file { + Some((_name, file)) => { + // truncate the file + let _ = file.set_len(0); + Box::new(file) + } + None => Box::new(stdout()), + }) + } + + fn as_output_name(&self) -> Option<&str> { + match &self.file { + Some((name, _file)) => Some(name), + None => None, + } + } +} + #[derive(Clone)] pub struct GlobalSettings { mode: SortMode, @@ -156,12 +307,11 @@ pub struct GlobalSettings { ignore_non_printing: bool, merge: bool, reverse: bool, - output_file: Option, stable: bool, unique: bool, check: bool, check_silent: bool, - salt: String, + salt: Option<[u8; 16]>, selectors: Vec, separator: Option, threads: String, @@ -209,19 +359,6 @@ impl GlobalSettings { } } - fn out_writer(&self) -> BufWriter> { - match self.output_file { - Some(ref filename) => match File::create(Path::new(&filename)) { - Ok(f) => BufWriter::new(Box::new(f) as Box), - Err(e) => { - show_error!("{0}: {1}", filename, e.to_string()); - panic!("Could not open output file"); - } - }, - None => BufWriter::new(Box::new(stdout()) as Box), - } - } - /// Precompute some data needed for sorting. /// This function **must** be called before starting to sort, and `GlobalSettings` may not be altered /// afterwards. @@ -253,12 +390,11 @@ impl Default for GlobalSettings { ignore_non_printing: false, merge: false, reverse: false, - output_file: None, stable: false, unique: false, check: false, check_silent: false, - salt: String::new(), + salt: None, selectors: vec![], separator: None, threads: String::new(), @@ -697,33 +833,37 @@ impl FieldSelector { } } - fn parse(key: &str, global_settings: &GlobalSettings) -> Self { + fn parse(key: &str, global_settings: &GlobalSettings) -> UResult { let mut from_to = key.split(','); let (from, from_options) = Self::split_key_options(from_to.next().unwrap()); let to = from_to.next().map(|to| Self::split_key_options(to)); let options_are_empty = from_options.is_empty() && matches!(to, None | Some((_, ""))); - crash_if_err!( - 2, - if options_are_empty { - // Inherit the global settings if there are no options attached to this key. - (|| { - // This would be ideal for a try block, I think. In the meantime this closure allows - // to use the `?` operator here. - Self::new( - KeyPosition::new(from, 1, global_settings.ignore_leading_blanks)?, - to.map(|(to, _)| { - KeyPosition::new(to, 0, global_settings.ignore_leading_blanks) - }) - .transpose()?, - KeySettings::from(global_settings), - ) - })() - } else { - // Do not inherit from `global_settings`, as there are options attached to this key. - Self::parse_with_options((from, from_options), to) + + if options_are_empty { + // Inherit the global settings if there are no options attached to this key. + (|| { + // This would be ideal for a try block, I think. In the meantime this closure allows + // to use the `?` operator here. + Self::new( + KeyPosition::new(from, 1, global_settings.ignore_leading_blanks)?, + to.map(|(to, _)| { + KeyPosition::new(to, 0, global_settings.ignore_leading_blanks) + }) + .transpose()?, + KeySettings::from(global_settings), + ) + })() + } else { + // Do not inherit from `global_settings`, as there are options attached to this key. + Self::parse_with_options((from, from_options), to) + } + .map_err(|msg| { + SortError::ParseKeyError { + key: key.to_owned(), + msg, } - .map_err(|e| format!("failed to parse key `{}`: {}", key, e)) - ) + .into() + }) } fn parse_with_options( @@ -916,9 +1056,7 @@ impl FieldSelector { fn get_usage() -> String { format!( - "{0} -Usage: - {0} [OPTION]... [FILE]... + "{0} [OPTION]... [FILE]... Write the sorted concatenation of all FILE(s) to standard output. Mandatory arguments for long options are mandatory for short options too. With no FILE, or when FILE is -, read standard input.", @@ -937,41 +1075,57 @@ fn make_sort_mode_arg<'a, 'b>(mode: &'a str, short: &'b str, help: &'b str) -> A arg } -pub fn uumain(args: impl uucore::Args) -> i32 { +#[uucore_procs::gen_uumain] +pub fn uumain(args: impl uucore::Args) -> UResult<()> { let args = args .collect_str(InvalidEncodingHandling::Ignore) .accept_any(); let usage = get_usage(); let mut settings: GlobalSettings = Default::default(); - let matches = uu_app().usage(&usage[..]).get_matches_from(args); + let matches = match uu_app().usage(&usage[..]).get_matches_from_safe(args) { + Ok(t) => t, + Err(e) => { + // not all clap "Errors" are because of a failure to parse arguments. + // "--version" also causes an Error to be returned, but we should not print to stderr + // nor return with a non-zero exit code in this case (we should print to stdout and return 0). + // This logic is similar to the code in clap, but we return 2 as the exit code in case of real failure + // (clap returns 1). + if e.use_stderr() { + eprintln!("{}", e.message); + set_exit_code(2); + } else { + println!("{}", e.message); + } + return Ok(()); + } + }; settings.debug = matches.is_present(options::DEBUG); // check whether user specified a zero terminated list of files for input, otherwise read files from args - let mut files: Vec = if matches.is_present(options::FILES0_FROM) { - let files0_from: Vec = matches - .values_of(options::FILES0_FROM) - .map(|v| v.map(ToString::to_string).collect()) + let mut files: Vec = if matches.is_present(options::FILES0_FROM) { + let files0_from: Vec = matches + .values_of_os(options::FILES0_FROM) + .map(|v| v.map(ToOwned::to_owned).collect()) .unwrap_or_default(); let mut files = Vec::new(); for path in &files0_from { - let reader = open(path.as_str()); + let reader = open(&path)?; let buf_reader = BufReader::new(reader); for line in buf_reader.split(b'\0').flatten() { - files.push( + files.push(OsString::from( std::str::from_utf8(&line) - .expect("Could not parse string from zero terminated input.") - .to_string(), - ); + .expect("Could not parse string from zero terminated input."), + )); } } files } else { matches - .values_of(options::FILES) - .map(|v| v.map(ToString::to_string).collect()) + .values_of_os(options::FILES) + .map(|v| v.map(ToOwned::to_owned).collect()) .unwrap_or_default() }; @@ -998,7 +1152,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { } else if matches.is_present(options::modes::RANDOM) || matches.value_of(options::modes::SORT) == Some("random") { - settings.salt = get_rand_string(); + settings.salt = Some(get_rand_string()); SortMode::Random } else { SortMode::Default @@ -1015,12 +1169,14 @@ pub fn uumain(args: impl uucore::Args) -> i32 { env::set_var("RAYON_NUM_THREADS", &settings.threads); } - settings.buffer_size = matches - .value_of(options::BUF_SIZE) - .map_or(DEFAULT_BUF_SIZE, |s| { - GlobalSettings::parse_byte_count(s) - .unwrap_or_else(|e| crash!(2, "{}", format_error_message(e, s, options::BUF_SIZE))) - }); + settings.buffer_size = + matches + .value_of(options::BUF_SIZE) + .map_or(Ok(DEFAULT_BUF_SIZE), |s| { + GlobalSettings::parse_byte_count(s).map_err(|e| { + USimpleError::new(2, format_error_message(e, s, options::BUF_SIZE)) + }) + })?; settings.tmp_dir = matches .value_of(options::TMP_DIR) @@ -1030,9 +1186,9 @@ pub fn uumain(args: impl uucore::Args) -> i32 { settings.compress_prog = matches.value_of(options::COMPRESS_PROG).map(String::from); if let Some(n_merge) = matches.value_of(options::BATCH_SIZE) { - settings.merge_batch_size = n_merge - .parse() - .unwrap_or_else(|_| crash!(2, "invalid --batch-size argument '{}'", n_merge)); + settings.merge_batch_size = n_merge.parse().map_err(|_| { + UUsageError::new(2, format!("invalid --batch-size argument '{}'", n_merge)) + })?; } settings.zero_terminated = matches.is_present(options::ZERO_TERMINATED); @@ -1053,32 +1209,45 @@ pub fn uumain(args: impl uucore::Args) -> i32 { settings.ignore_leading_blanks = matches.is_present(options::IGNORE_LEADING_BLANKS); - settings.output_file = matches.value_of(options::OUTPUT).map(String::from); settings.reverse = matches.is_present(options::REVERSE); settings.stable = matches.is_present(options::STABLE); settings.unique = matches.is_present(options::UNIQUE); if files.is_empty() { /* if no file, default to stdin */ - files.push("-".to_owned()); + files.push("-".to_string().into()); } else if settings.check && files.len() != 1 { - crash!(1, "extra operand `{}' not allowed with -c", files[1]) + return Err(UUsageError::new( + 2, + format!( + "extra operand `{}' not allowed with -c", + files[1].to_string_lossy() + ), + )); } if let Some(arg) = matches.args.get(options::SEPARATOR) { let separator = arg.vals[0].to_string_lossy(); - let separator = separator; + let mut separator = separator.as_ref(); + if separator == "\\0" { + separator = "\0"; + } if separator.len() != 1 { - crash!(1, "separator must be exactly one character long"); + return Err(UUsageError::new( + 2, + "separator must be exactly one character long".into(), + )); } settings.separator = Some(separator.chars().next().unwrap()) } if let Some(values) = matches.values_of(options::KEY) { for value in values { - settings - .selectors - .push(FieldSelector::parse(value, &settings)); + let selector = FieldSelector::parse(value, &settings)?; + if selector.settings.mode == SortMode::Random && settings.salt.is_none() { + settings.salt = Some(get_rand_string()); + } + settings.selectors.push(selector); } } @@ -1099,9 +1268,19 @@ pub fn uumain(args: impl uucore::Args) -> i32 { ); } + // Verify that we can open all input files. + // It is the correct behavior to close all files afterwards, + // and to reopen them at a later point. This is different from how the output file is handled, + // probably to prevent running out of file descriptors. + for file in &files { + open(file)?; + } + + let output = Output::new(matches.value_of(options::OUTPUT))?; + settings.init_precomputed(); - exec(&files, &settings) + exec(&mut files, &settings, output) } pub fn uu_app() -> App<'static, 'static> { @@ -1112,73 +1291,57 @@ pub fn uu_app() -> App<'static, 'static> { Arg::with_name(options::modes::SORT) .long(options::modes::SORT) .takes_value(true) - .possible_values( - &[ - "general-numeric", - "human-numeric", - "month", - "numeric", - "version", - "random", - ] - ) - .conflicts_with_all(&options::modes::ALL_SORT_MODES) - ) - .arg( - make_sort_mode_arg( - options::modes::HUMAN_NUMERIC, - "h", - "compare according to human readable sizes, eg 1M > 100k" - ), - ) - .arg( - make_sort_mode_arg( - options::modes::MONTH, - "M", - "compare according to month name abbreviation" - ), - ) - .arg( - make_sort_mode_arg( - options::modes::NUMERIC, - "n", - "compare according to string numerical value" - ), - ) - .arg( - make_sort_mode_arg( - options::modes::GENERAL_NUMERIC, - "g", - "compare according to string general numerical value" - ), - ) - .arg( - make_sort_mode_arg( - options::modes::VERSION, - "V", - "Sort by SemVer version number, eg 1.12.2 > 1.1.2", - ), - ) - .arg( - make_sort_mode_arg( - options::modes::RANDOM, - "R", - "shuffle in random order", - ), + .possible_values(&[ + "general-numeric", + "human-numeric", + "month", + "numeric", + "version", + "random", + ]) + .conflicts_with_all(&options::modes::ALL_SORT_MODES), ) + .arg(make_sort_mode_arg( + options::modes::HUMAN_NUMERIC, + "h", + "compare according to human readable sizes, eg 1M > 100k", + )) + .arg(make_sort_mode_arg( + options::modes::MONTH, + "M", + "compare according to month name abbreviation", + )) + .arg(make_sort_mode_arg( + options::modes::NUMERIC, + "n", + "compare according to string numerical value", + )) + .arg(make_sort_mode_arg( + options::modes::GENERAL_NUMERIC, + "g", + "compare according to string general numerical value", + )) + .arg(make_sort_mode_arg( + options::modes::VERSION, + "V", + "Sort by SemVer version number, eg 1.12.2 > 1.1.2", + )) + .arg(make_sort_mode_arg( + options::modes::RANDOM, + "R", + "shuffle in random order", + )) .arg( Arg::with_name(options::DICTIONARY_ORDER) .short("d") .long(options::DICTIONARY_ORDER) .help("consider only blanks and alphanumeric characters") - .conflicts_with_all( - &[ - options::modes::NUMERIC, - options::modes::GENERAL_NUMERIC, - options::modes::HUMAN_NUMERIC, - options::modes::MONTH, - ] - ), + .conflicts_with_all(&[ + options::modes::NUMERIC, + options::modes::GENERAL_NUMERIC, + options::modes::HUMAN_NUMERIC, + options::modes::MONTH, + ]), ) .arg( Arg::with_name(options::MERGE) @@ -1206,7 +1369,10 @@ pub fn uu_app() -> App<'static, 'static> { .short("C") .long(options::check::CHECK_SILENT) .conflicts_with(options::OUTPUT) - .help("exit successfully if the given file is already sorted, and exit with status 1 otherwise."), + .help( + "exit successfully if the given file is already sorted,\ + and exit with status 1 otherwise.", + ), ) .arg( Arg::with_name(options::IGNORE_CASE) @@ -1219,14 +1385,12 @@ pub fn uu_app() -> App<'static, 'static> { .short("i") .long(options::IGNORE_NONPRINTING) .help("ignore nonprinting characters") - .conflicts_with_all( - &[ - options::modes::NUMERIC, - options::modes::GENERAL_NUMERIC, - options::modes::HUMAN_NUMERIC, - options::modes::MONTH - ] - ), + .conflicts_with_all(&[ + options::modes::NUMERIC, + options::modes::GENERAL_NUMERIC, + options::modes::HUMAN_NUMERIC, + options::modes::MONTH, + ]), ) .arg( Arg::with_name(options::IGNORE_LEADING_BLANKS) @@ -1275,7 +1439,8 @@ pub fn uu_app() -> App<'static, 'static> { .short("t") .long(options::SEPARATOR) .help("custom separator for -k") - .takes_value(true)) + .takes_value(true), + ) .arg( Arg::with_name(options::ZERO_TERMINATED) .short("z") @@ -1310,13 +1475,13 @@ pub fn uu_app() -> App<'static, 'static> { .long(options::COMPRESS_PROG) .help("compress temporary files with PROG, decompress with PROG -d") .long_help("PROG has to take input from stdin and output to stdout") - .value_name("PROG") + .value_name("PROG"), ) .arg( Arg::with_name(options::BATCH_SIZE) .long(options::BATCH_SIZE) .help("Merge at most N_MERGE inputs at once.") - .value_name("N_MERGE") + .value_name("N_MERGE"), ) .arg( Arg::with_name(options::FILES0_FROM) @@ -1331,24 +1496,27 @@ pub fn uu_app() -> App<'static, 'static> { .long(options::DEBUG) .help("underline the parts of the line that are actually used for sorting"), ) - .arg(Arg::with_name(options::FILES).multiple(true).takes_value(true)) + .arg( + Arg::with_name(options::FILES) + .multiple(true) + .takes_value(true), + ) } -fn exec(files: &[String], settings: &GlobalSettings) -> i32 { +fn exec(files: &mut [OsString], settings: &GlobalSettings, output: Output) -> UResult<()> { if settings.merge { - let mut file_merger = merge::merge(files.iter().map(open), settings); - file_merger.write_all(settings); + let file_merger = merge::merge(files, settings, output.as_output_name())?; + file_merger.write_all(settings, output) } else if settings.check { if files.len() > 1 { - crash!(1, "only one file allowed with -c"); + Err(UUsageError::new(2, "only one file allowed with -c".into())) + } else { + check::check(files.first().unwrap(), settings) } - return check::check(files.first().unwrap(), settings); } else { let mut lines = files.iter().map(open); - - ext_sort(&mut lines, settings); + ext_sort(&mut lines, settings, output) } - 0 } fn sort_by<'a>(unsorted: &mut Vec>, settings: &GlobalSettings, line_data: &LineData<'a>) { @@ -1387,7 +1555,22 @@ fn compare_by<'a>( let settings = &selector.settings; let cmp: Ordering = match settings.mode { - SortMode::Random => random_shuffle(a_str, b_str, &global_settings.salt), + SortMode::Random => { + // check if the two strings are equal + if custom_str_cmp( + a_str, + b_str, + settings.ignore_non_printing, + settings.dictionary_order, + settings.ignore_case, + ) == Ordering::Equal + { + Ordering::Equal + } else { + // Only if they are not equal compare by the hash + random_shuffle(a_str, b_str, &global_settings.salt.unwrap()) + } + } SortMode::Numeric => { let a_num_info = &a_line_data.num_infos [a.index * global_settings.precomputed.num_infos_per_line + num_info_index]; @@ -1536,12 +1719,8 @@ fn general_numeric_compare(a: &GeneralF64ParseResult, b: &GeneralF64ParseResult) a.partial_cmp(b).unwrap() } -fn get_rand_string() -> String { - thread_rng() - .sample_iter(&Alphanumeric) - .take(16) - .map(char::from) - .collect::() +fn get_rand_string() -> [u8; 16] { + thread_rng().sample(rand::distributions::Standard) } fn get_hash(t: &T) -> u64 { @@ -1550,10 +1729,9 @@ fn get_hash(t: &T) -> u64 { s.finish() } -fn random_shuffle(a: &str, b: &str, salt: &str) -> Ordering { - let da = get_hash(&[a, salt].concat()); - let db = get_hash(&[b, salt].concat()); - +fn random_shuffle(a: &str, b: &str, salt: &[u8]) -> Ordering { + let da = get_hash(&(a, salt)); + let db = get_hash(&(b, salt)); da.cmp(&db) } @@ -1618,26 +1796,38 @@ fn month_compare(a: &str, b: &str) -> Ordering { } } -fn print_sorted<'a, T: Iterator>>(iter: T, settings: &GlobalSettings) { - let mut writer = settings.out_writer(); +fn print_sorted<'a, T: Iterator>>( + iter: T, + settings: &GlobalSettings, + output: Output, +) { + let mut writer = output.into_write(); for line in iter { line.print(&mut writer, settings); } } -// from cat.rs -fn open(path: impl AsRef) -> Box { +/// Strips the trailing " (os error XX)" from io error strings. +fn strip_errno(err: &str) -> &str { + &err[..err.find(" (os error ").unwrap_or(err.len())] +} + +fn open(path: impl AsRef) -> UResult> { let path = path.as_ref(); if path == "-" { let stdin = stdin(); - return Box::new(stdin) as Box; + return Ok(Box::new(stdin) as Box); } - match File::open(Path::new(path)) { - Ok(f) => Box::new(f) as Box, - Err(e) => { - crash!(2, "cannot read: {0:?}: {1}", path, e); + let path = Path::new(path); + + match File::open(path) { + Ok(f) => Ok(Box::new(f) as Box), + Err(error) => Err(SortError::ReadFailed { + path: path.to_string_lossy().to_string(), + error, } + .into()), } } diff --git a/src/uu/stat/src/stat.rs b/src/uu/stat/src/stat.rs index 70c06bdf6..c56971f6b 100644 --- a/src/uu/stat/src/stat.rs +++ b/src/uu/stat/src/stat.rs @@ -436,6 +436,7 @@ impl Stater { 'f' => tokens.push(Token::Char('\x0C')), 'n' => tokens.push(Token::Char('\n')), 'r' => tokens.push(Token::Char('\r')), + 't' => tokens.push(Token::Char('\t')), 'v' => tokens.push(Token::Char('\x0B')), c => { show_warning!("unrecognized escape '\\{}'", c); diff --git a/src/uu/tac/src/tac.rs b/src/uu/tac/src/tac.rs index ae1fd9bc5..01cf09215 100644 --- a/src/uu/tac/src/tac.rs +++ b/src/uu/tac/src/tac.rs @@ -35,15 +35,11 @@ pub fn uumain(args: impl uucore::Args) -> i32 { let before = matches.is_present(options::BEFORE); let regex = matches.is_present(options::REGEX); - let separator = match matches.value_of(options::SEPARATOR) { - Some(m) => { - if m.is_empty() { - crash!(1, "separator cannot be empty") - } else { - m.to_owned() - } - } - None => "\n".to_owned(), + let raw_separator = matches.value_of(options::SEPARATOR).unwrap_or("\n"); + let separator = if raw_separator.is_empty() { + "\0" + } else { + raw_separator }; let files: Vec = match matches.values_of(options::FILE) { @@ -51,7 +47,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { None => vec!["-".to_owned()], }; - tac(files, before, regex, &separator[..]) + tac(files, before, regex, separator) } pub fn uu_app() -> App<'static, 'static> { @@ -97,7 +93,7 @@ fn tac(filenames: Vec, before: bool, _: bool, separator: &str) -> i32 { let path = Path::new(filename); if path.is_dir() || path.metadata().is_err() { if path.is_dir() { - show_error!("dir: read error: Invalid argument"); + show_error!("{}: read error: Invalid argument", filename); } else { show_error!( "failed to open '{}' for reading: No such file or directory", @@ -139,9 +135,16 @@ fn tac(filenames: Vec, before: bool, _: bool, separator: &str) -> i32 { i += 1; } } + // If the file contains no line separators, then simply write + // the contents of the file directly to stdout. + if offsets.is_empty() { + out.write_all(&data) + .unwrap_or_else(|e| crash!(1, "failed to write to stdout: {}", e)); + return exit_code; + } // if there isn't a separator at the end of the file, fake it - if offsets.is_empty() || *offsets.last().unwrap() < data.len() - slen { + if *offsets.last().unwrap() < data.len() - slen { offsets.push(data.len()); } diff --git a/src/uu/tail/Cargo.toml b/src/uu/tail/Cargo.toml index bab6c125d..710a1421f 100644 --- a/src/uu/tail/Cargo.toml +++ b/src/uu/tail/Cargo.toml @@ -24,6 +24,10 @@ winapi = { version="0.3", features=["fileapi", "handleapi", "processthreadsapi", [target.'cfg(target_os = "redox")'.dependencies] redox_syscall = "0.1" +[target.'cfg(unix)'.dependencies] +nix = "0.20" +libc = "0.2" + [[bin]] name = "tail" path = "src/main.rs" diff --git a/src/uu/tail/README.md b/src/uu/tail/README.md index b7f92f8e4..94b6816af 100644 --- a/src/uu/tail/README.md +++ b/src/uu/tail/README.md @@ -11,7 +11,7 @@ ### Others -- [ ] The current implementation does not handle `-` as an alias for stdin. +- [ ] The current implementation doesn't follow stdin in non-unix platforms ## Possible optimizations diff --git a/src/uu/tail/src/platform/mod.rs b/src/uu/tail/src/platform/mod.rs index 010c5c4ac..4a8982713 100644 --- a/src/uu/tail/src/platform/mod.rs +++ b/src/uu/tail/src/platform/mod.rs @@ -2,13 +2,14 @@ * This file is part of the uutils coreutils package. * * (c) Alexander Batischev + * (c) Thomas Queiroz * * For the full copyright and license information, please view the LICENSE * file that was distributed with this source code. */ #[cfg(unix)] -pub use self::unix::{supports_pid_checks, Pid, ProcessChecker}; +pub use self::unix::{stdin_is_pipe_or_fifo, supports_pid_checks, Pid, ProcessChecker}; #[cfg(windows)] pub use self::windows::{supports_pid_checks, Pid, ProcessChecker}; diff --git a/src/uu/tail/src/platform/unix.rs b/src/uu/tail/src/platform/unix.rs index 167f693e6..580a40135 100644 --- a/src/uu/tail/src/platform/unix.rs +++ b/src/uu/tail/src/platform/unix.rs @@ -2,6 +2,7 @@ * This file is part of the uutils coreutils package. * * (c) Alexander Batischev + * (c) Thomas Queiroz * * For the full copyright and license information, please view the LICENSE * file that was distributed with this source code. @@ -9,7 +10,13 @@ // spell-checker:ignore (ToDO) errno EPERM ENOSYS -use std::io::Error; +use std::io::{stdin, Error}; + +use std::os::unix::prelude::AsRawFd; + +use nix::sys::stat::fstat; + +use libc::{S_IFIFO, S_IFSOCK}; pub type Pid = libc::pid_t; @@ -40,3 +47,16 @@ pub fn supports_pid_checks(pid: self::Pid) -> bool { fn get_errno() -> i32 { Error::last_os_error().raw_os_error().unwrap() } + +pub fn stdin_is_pipe_or_fifo() -> bool { + let fd = stdin().lock().as_raw_fd(); + fd >= 0 // GNU tail checks fd >= 0 + && match fstat(fd) { + Ok(stat) => { + let mode = stat.st_mode; + // NOTE: This is probably not the most correct way to check this + (mode & S_IFIFO != 0) || (mode & S_IFSOCK != 0) + } + Err(err) => panic!("{}", err), + } +} diff --git a/src/uu/tail/src/tail.rs b/src/uu/tail/src/tail.rs index 4970cdcc2..471c1a404 100644 --- a/src/uu/tail/src/tail.rs +++ b/src/uu/tail/src/tail.rs @@ -2,6 +2,7 @@ // * // * (c) Morten Olsen Lysgaard // * (c) Alexander Batischev +// * (c) Thomas Queiroz // * // * For the full copyright and license information, please view the LICENSE // * file that was distributed with this source code. @@ -29,6 +30,9 @@ use std::time::Duration; use uucore::parse_size::{parse_size, ParseSizeError}; use uucore::ringbuffer::RingBuffer; +#[cfg(unix)] +use crate::platform::stdin_is_pipe_or_fifo; + pub mod options { pub mod verbosity { pub static QUIET: &str = "quiet"; @@ -130,25 +134,56 @@ pub fn uumain(args: impl uucore::Args) -> i32 { let files: Vec = matches .values_of(options::ARG_FILES) .map(|v| v.map(ToString::to_string).collect()) - .unwrap_or_default(); + .unwrap_or_else(|| vec![String::from("-")]); - if files.is_empty() { - let mut buffer = BufReader::new(stdin()); - unbounded_tail(&mut buffer, &settings); - } else { - let multiple = files.len() > 1; - let mut first_header = true; - let mut readers = Vec::new(); + let multiple = files.len() > 1; + let mut first_header = true; + let mut readers: Vec<(Box, &String)> = Vec::new(); - for filename in &files { - if (multiple || verbose) && !quiet { - if !first_header { - println!(); - } + #[cfg(unix)] + let stdin_string = String::from("standard input"); + + for filename in &files { + let use_stdin = filename.as_str() == "-"; + if (multiple || verbose) && !quiet { + if !first_header { + println!(); + } + if use_stdin { + println!("==> standard input <=="); + } else { println!("==> {} <==", filename); } - first_header = false; + } + first_header = false; + if use_stdin { + let mut reader = BufReader::new(stdin()); + unbounded_tail(&mut reader, &settings); + + // Don't follow stdin since there are no checks for pipes/FIFOs + // + // FIXME windows has GetFileType which can determine if the file is a pipe/FIFO + // so this check can also be performed + + #[cfg(unix)] + { + /* + POSIX specification regarding tail -f + + If the input file is a regular file or if the file operand specifies a FIFO, do not + terminate after the last line of the input file has been copied, but read and copy + further bytes from the input file when they become available. If no file operand is + specified and standard input is a pipe or FIFO, the -f option shall be ignored. If + the input file is not a FIFO, pipe, or regular file, it is unspecified whether or + not the -f option shall be ignored. + */ + + if settings.follow && !stdin_is_pipe_or_fifo() { + readers.push((Box::new(reader), &stdin_string)); + } + } + } else { let path = Path::new(filename); if path.is_dir() { continue; @@ -158,20 +193,20 @@ pub fn uumain(args: impl uucore::Args) -> i32 { bounded_tail(&mut file, &settings); if settings.follow { let reader = BufReader::new(file); - readers.push(reader); + readers.push((Box::new(reader), filename)); } } else { let mut reader = BufReader::new(file); unbounded_tail(&mut reader, &settings); if settings.follow { - readers.push(reader); + readers.push((Box::new(reader), filename)); } } } + } - if settings.follow { - follow(&mut readers[..], &files[..], &settings); - } + if settings.follow { + follow(&mut readers[..], &settings); } 0 @@ -248,8 +283,12 @@ pub fn uu_app() -> App<'static, 'static> { ) } -fn follow(readers: &mut [BufReader], filenames: &[String], settings: &Settings) { +fn follow(readers: &mut [(T, &String)], settings: &Settings) { assert!(settings.follow); + if readers.is_empty() { + return; + } + let mut last = readers.len() - 1; let mut read_some = false; let mut process = platform::ProcessChecker::new(settings.pid); @@ -260,7 +299,7 @@ fn follow(readers: &mut [BufReader], filenames: &[String], settings: let pid_is_dead = !read_some && settings.pid != 0 && process.is_dead(); read_some = false; - for (i, reader) in readers.iter_mut().enumerate() { + for (i, (reader, filename)) in readers.iter_mut().enumerate() { // Print all new content since the last pass loop { let mut datum = String::new(); @@ -269,7 +308,7 @@ fn follow(readers: &mut [BufReader], filenames: &[String], settings: Ok(_) => { read_some = true; if i != last { - println!("\n==> {} <==", filenames[i]); + println!("\n==> {} <==", filename); last = i; } print!("{}", datum); diff --git a/src/uucore/src/lib/mods/error.rs b/src/uucore/src/lib/mods/error.rs index c64e7df66..d82da9feb 100644 --- a/src/uucore/src/lib/mods/error.rs +++ b/src/uucore/src/lib/mods/error.rs @@ -251,7 +251,7 @@ impl Display for UError { /// /// A crate like [`quick_error`](https://crates.io/crates/quick-error) might /// also be used, but will still require an `impl` for the `code` method. -pub trait UCustomError: Error { +pub trait UCustomError: Error + Send { /// Error code of a custom error. /// /// Set a return value for each variant of an enum-type to associate an diff --git a/tests/by-util/test_expand.rs b/tests/by-util/test_expand.rs index 834a09736..9c06fe904 100644 --- a/tests/by-util/test_expand.rs +++ b/tests/by-util/test_expand.rs @@ -1,4 +1,5 @@ use crate::common::util::*; +// spell-checker:ignore (ToDO) taaaa tbbbb tcccc #[test] fn test_with_tab() { @@ -53,3 +54,140 @@ fn test_with_multiple_files() { .stdout_contains(" return") .stdout_contains(" "); } + +#[test] +fn test_tabs_space_separated_list() { + new_ucmd!() + .args(&["--tabs", "3 6 9"]) + .pipe_in("a\tb\tc\td\te") + .succeeds() + .stdout_is("a b c d e"); +} + +#[test] +fn test_tabs_mixed_style_list() { + new_ucmd!() + .args(&["--tabs", ", 3,6 9"]) + .pipe_in("a\tb\tc\td\te") + .succeeds() + .stdout_is("a b c d e"); +} + +#[test] +fn test_tabs_empty_string() { + new_ucmd!() + .args(&["--tabs", ""]) + .pipe_in("a\tb\tc") + .succeeds() + .stdout_is("a b c"); +} + +#[test] +fn test_tabs_comma_only() { + new_ucmd!() + .args(&["--tabs", ","]) + .pipe_in("a\tb\tc") + .succeeds() + .stdout_is("a b c"); +} + +#[test] +fn test_tabs_space_only() { + new_ucmd!() + .args(&["--tabs", " "]) + .pipe_in("a\tb\tc") + .succeeds() + .stdout_is("a b c"); +} + +#[test] +fn test_tabs_slash() { + new_ucmd!() + .args(&["--tabs", "/"]) + .pipe_in("a\tb\tc") + .succeeds() + .stdout_is("a b c"); +} + +#[test] +fn test_tabs_plus() { + new_ucmd!() + .args(&["--tabs", "+"]) + .pipe_in("a\tb\tc") + .succeeds() + .stdout_is("a b c"); +} + +#[test] +fn test_tabs_trailing_slash() { + new_ucmd!() + .arg("--tabs=1,/5") + .pipe_in("\ta\tb\tc") + .succeeds() + // 0 1 + // 01234567890 + .stdout_is(" a b c"); +} + +#[test] +fn test_tabs_trailing_slash_long_columns() { + new_ucmd!() + .arg("--tabs=1,/3") + .pipe_in("\taaaa\tbbbb\tcccc") + .succeeds() + // 0 1 + // 01234567890123456 + .stdout_is(" aaaa bbbb cccc"); +} + +#[test] +fn test_tabs_trailing_plus() { + new_ucmd!() + .arg("--tabs=1,+5") + .pipe_in("\ta\tb\tc") + .succeeds() + // 0 1 + // 012345678901 + .stdout_is(" a b c"); +} + +#[test] +fn test_tabs_trailing_plus_long_columns() { + new_ucmd!() + .arg("--tabs=1,+3") + .pipe_in("\taaaa\tbbbb\tcccc") + .succeeds() + // 0 1 + // 012345678901234567 + .stdout_is(" aaaa bbbb cccc"); +} + +#[test] +fn test_tabs_must_be_ascending() { + new_ucmd!() + .arg("--tabs=1,1") + .fails() + .stderr_contains("tab sizes must be ascending"); +} + +#[test] +fn test_tabs_keep_last_trailing_specifier() { + // If there are multiple trailing specifiers, use only the last one + // before the number. + new_ucmd!() + .arg("--tabs=1,+/+/5") + .pipe_in("\ta\tb\tc") + .succeeds() + // 0 1 + // 01234567890 + .stdout_is(" a b c"); +} + +#[test] +fn test_tabs_comma_separated_no_numbers() { + new_ucmd!() + .arg("--tabs=+,/,+,/") + .pipe_in("\ta\tb\tc") + .succeeds() + .stdout_is(" a b c"); +} diff --git a/tests/by-util/test_id.rs b/tests/by-util/test_id.rs index e62f105de..db918aa33 100644 --- a/tests/by-util/test_id.rs +++ b/tests/by-util/test_id.rs @@ -17,9 +17,9 @@ fn test_id_no_specified_user() { let exp_result = unwrap_or_return!(expected_result(&ts, &[])); let mut _exp_stdout = exp_result.stdout_str().to_string(); - #[cfg(target_os = "linux")] + #[cfg(not(feature = "feat_selinux"))] { - // NOTE: (SELinux NotImplemented) strip 'context' part from exp_stdout: + // NOTE: strip 'context' part from exp_stdout if selinux not enabled: // example: // uid=1001(runner) gid=121(docker) groups=121(docker),4(adm),101(systemd-journal) \ // context=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023 @@ -363,3 +363,88 @@ fn test_id_zero() { } } } + +#[test] +#[cfg(feature = "feat_selinux")] +fn test_id_context() { + use selinux::{self, KernelSupport}; + if selinux::kernel_support() == KernelSupport::Unsupported { + println!("test skipped: Kernel has no support for SElinux context",); + return; + } + let ts = TestScenario::new(util_name!()); + for c_flag in &["-Z", "--context"] { + ts.ucmd() + .args(&[c_flag]) + .succeeds() + .stdout_only(unwrap_or_return!(expected_result(&ts, &[c_flag])).stdout_str()); + for &z_flag in &["-z", "--zero"] { + let args = [c_flag, z_flag]; + ts.ucmd() + .args(&args) + .succeeds() + .stdout_only(unwrap_or_return!(expected_result(&ts, &args)).stdout_str()); + for &opt1 in &["--name", "--real"] { + // id: cannot print only names or real IDs in default format + let args = [opt1, c_flag]; + ts.ucmd() + .args(&args) + .succeeds() + .stdout_only(unwrap_or_return!(expected_result(&ts, &args)).stdout_str()); + let args = [opt1, c_flag, z_flag]; + ts.ucmd() + .args(&args) + .succeeds() + .stdout_only(unwrap_or_return!(expected_result(&ts, &args)).stdout_str()); + for &opt2 in &["--user", "--group", "--groups"] { + // u/g/G n/r z Z + // for now, we print clap's standard response for "conflicts_with" instead of: + // id: cannot print "only" of more than one choice + let args = [opt2, c_flag, opt1]; + let _result = ts.ucmd().args(&args).fails(); + // let exp_result = unwrap_or_return!(expected_result(&args)); + // result + // .stdout_is(exp_result.stdout_str()) + // .stderr_is(exp_result.stderr_str()) + // .code_is(exp_result.code()); + } + } + for &opt2 in &["--user", "--group", "--groups"] { + // u/g/G z Z + // for now, we print clap's standard response for "conflicts_with" instead of: + // id: cannot print "only" of more than one choice + let args = [opt2, c_flag]; + let _result = ts.ucmd().args(&args).fails(); + // let exp_result = unwrap_or_return!(expected_result(&args)); + // result + // .stdout_is(exp_result.stdout_str()) + // .stderr_is(exp_result.stderr_str()) + // .code_is(exp_result.code()); + } + } + } +} + +#[test] +#[cfg(unix)] +fn test_id_no_specified_user_posixly() { + // gnu/tests/id/no-context.sh + + let ts = TestScenario::new(util_name!()); + let result = ts.ucmd().env("POSIXLY_CORRECT", "1").run(); + assert!(!result.stdout_str().contains("context=")); + if !is_ci() { + result.success(); + } + + #[cfg(all(target_os = "linux", feature = "feat_selinux"))] + { + use selinux::{self, KernelSupport}; + if selinux::kernel_support() == KernelSupport::Unsupported { + println!("test skipped: Kernel has no support for SElinux context",); + } else { + let result = ts.ucmd().succeeds(); + assert!(result.stdout_str().contains("context=")); + } + } +} diff --git a/tests/by-util/test_sort.rs b/tests/by-util/test_sort.rs index 4a1cc3fa9..1f21722ed 100644 --- a/tests/by-util/test_sort.rs +++ b/tests/by-util/test_sort.rs @@ -181,7 +181,7 @@ fn test_check_zero_terminated_failure() { .arg("-c") .arg("zero-terminated.txt") .fails() - .stdout_is("sort: zero-terminated.txt:2: disorder: ../../fixtures/du\n"); + .stderr_only("sort: zero-terminated.txt:2: disorder: ../../fixtures/du\n"); } #[test] @@ -220,32 +220,29 @@ fn test_random_shuffle_contains_all_lines() { #[test] fn test_random_shuffle_two_runs_not_the_same() { - // check to verify that two random shuffles are not equal; this has the - // potential to fail in the very unlikely event that the random order is the same - // as the starting order, or if both random sorts end up having the same order. - const FILE: &str = "default_unsorted_ints.expected"; - let (at, _ucmd) = at_and_ucmd!(); - let result = new_ucmd!().arg("-R").arg(FILE).run().stdout_move_str(); - let expected = at.read(FILE); - let unexpected = new_ucmd!().arg("-R").arg(FILE).run().stdout_move_str(); + for arg in &["-R", "-k1,1R"] { + // check to verify that two random shuffles are not equal; this has the + // potential to fail in the very unlikely event that the random order is the same + // as the starting order, or if both random sorts end up having the same order. + const FILE: &str = "default_unsorted_ints.expected"; + let (at, _ucmd) = at_and_ucmd!(); + let result = new_ucmd!().arg(arg).arg(FILE).run().stdout_move_str(); + let expected = at.read(FILE); + let unexpected = new_ucmd!().arg(arg).arg(FILE).run().stdout_move_str(); - assert_ne!(result, expected); - assert_ne!(result, unexpected); + assert_ne!(result, expected); + assert_ne!(result, unexpected); + } } #[test] -fn test_random_shuffle_contains_two_runs_not_the_same() { - // check to verify that two random shuffles are not equal; this has the - // potential to fail in the unlikely event that random order is the same - // as the starting order, or if both random sorts end up having the same order. - const FILE: &str = "default_unsorted_ints.expected"; - let (at, _ucmd) = at_and_ucmd!(); - let result = new_ucmd!().arg("-R").arg(FILE).run().stdout_move_str(); - let expected = at.read(FILE); - let unexpected = new_ucmd!().arg("-R").arg(FILE).run().stdout_move_str(); - - assert_ne!(result, expected); - assert_ne!(result, unexpected); +fn test_random_ignore_case() { + let input = "ABC\nABc\nAbC\nAbc\naBC\naBc\nabC\nabc\n"; + new_ucmd!() + .args(&["-fR"]) + .pipe_in(input) + .succeeds() + .stdout_is(input); } #[test] @@ -774,14 +771,15 @@ fn test_check() { new_ucmd!() .arg(diagnose_arg) .arg("check_fail.txt") + .arg("--buffer-size=10b") .fails() - .stdout_is("sort: check_fail.txt:6: disorder: 5\n"); + .stderr_only("sort: check_fail.txt:6: disorder: 5\n"); new_ucmd!() .arg(diagnose_arg) .arg("multiple_files.expected") .succeeds() - .stdout_is(""); + .stderr_is(""); } } @@ -796,6 +794,18 @@ fn test_check_silent() { } } +#[test] +fn test_check_unique() { + // Due to a clap bug the combination "-cu" does not work. "-c -u" works. + // See https://github.com/clap-rs/clap/issues/2624 + new_ucmd!() + .args(&["-c", "-u"]) + .pipe_in("A\nA\n") + .fails() + .code_is(1) + .stderr_only("sort: -:2: disorder: A"); +} + #[test] fn test_dictionary_and_nonprinting_conflicts() { let conflicting_args = ["n", "h", "g", "M"]; @@ -839,9 +849,9 @@ fn test_nonexistent_file() { .status_code(2) .stderr_only( #[cfg(not(windows))] - "sort: cannot read: \"nonexistent.txt\": No such file or directory (os error 2)", + "sort: cannot read: nonexistent.txt: No such file or directory", #[cfg(windows)] - "sort: cannot read: \"nonexistent.txt\": The system cannot find the file specified. (os error 2)", + "sort: cannot read: nonexistent.txt: The system cannot find the file specified.", ); } @@ -883,6 +893,29 @@ fn test_compress() { .stdout_only_fixture("ext_sort.expected"); } +#[test] +#[cfg(target_os = "linux")] +fn test_compress_merge() { + new_ucmd!() + .args(&[ + "--compress-program", + "gzip", + "-S", + "10", + "--batch-size=2", + "-m", + "--unique", + "merge_ints_interleaved_1.txt", + "merge_ints_interleaved_2.txt", + "merge_ints_interleaved_3.txt", + "merge_ints_interleaved_3.txt", + "merge_ints_interleaved_2.txt", + "merge_ints_interleaved_1.txt", + ]) + .succeeds() + .stdout_only_fixture("merge_ints_interleaved.expected"); +} + #[test] fn test_compress_fail() { TestScenario::new(util_name!()) @@ -959,3 +992,102 @@ fn test_key_takes_one_arg() { .succeeds() .stdout_is_fixture("keys_open_ended.expected"); } + +#[test] +fn test_verifies_out_file() { + let inputs = ["" /* no input */, "some input"]; + for &input in &inputs { + new_ucmd!() + .args(&["-o", "nonexistent_dir/nonexistent_file"]) + .pipe_in(input) + .ignore_stdin_write_error() + .fails() + .status_code(2) + .stderr_only( + #[cfg(not(windows))] + "sort: open failed: nonexistent_dir/nonexistent_file: No such file or directory", + #[cfg(windows)] + "sort: open failed: nonexistent_dir/nonexistent_file: The system cannot find the path specified.", + ); + } +} + +#[test] +fn test_verifies_files_after_keys() { + new_ucmd!() + .args(&[ + "-o", + "nonexistent_dir/nonexistent_file", + "-k", + "0", + "nonexistent_dir/input_file", + ]) + .fails() + .status_code(2) + .stderr_contains("failed to parse key"); +} + +#[test] +#[cfg(unix)] +fn test_verifies_input_files() { + new_ucmd!() + .args(&["/dev/random", "nonexistent_file"]) + .fails() + .status_code(2) + .stderr_is("sort: cannot read: nonexistent_file: No such file or directory"); +} + +#[test] +fn test_separator_null() { + new_ucmd!() + .args(&["-k1,1", "-k3,3", "-t", "\\0"]) + .pipe_in("z\0a\0b\nz\0b\0a\na\0z\0z\n") + .succeeds() + .stdout_only("a\0z\0z\nz\0b\0a\nz\0a\0b\n"); +} + +#[test] +fn test_output_is_input() { + let input = "a\nb\nc\n"; + let (at, mut cmd) = at_and_ucmd!(); + at.touch("file"); + at.append("file", input); + cmd.args(&["-m", "-u", "-o", "file", "file", "file", "file"]) + .succeeds(); + assert_eq!(at.read("file"), input); +} + +#[test] +#[cfg(unix)] +fn test_output_device() { + new_ucmd!() + .args(&["-o", "/dev/null"]) + .pipe_in("input") + .succeeds(); +} + +#[test] +fn test_merge_empty_input() { + new_ucmd!() + .args(&["-m", "empty.txt"]) + .succeeds() + .no_stderr() + .no_stdout(); +} + +#[test] +fn test_no_error_for_version() { + new_ucmd!() + .arg("--version") + .succeeds() + .stdout_contains("sort"); +} + +#[test] +fn test_wrong_args_exit_code() { + new_ucmd!() + .arg("--misspelled") + .fails() + .status_code(2) + .stderr_contains("--misspelled"); +} diff --git a/tests/by-util/test_stat.rs b/tests/by-util/test_stat.rs index 7cff0d89c..af9e3de45 100644 --- a/tests/by-util/test_stat.rs +++ b/tests/by-util/test_stat.rs @@ -64,7 +64,7 @@ mod test_generate_tokens { #[test] fn printf_format() { - let s = "%-# 15a\\r\\\"\\\\\\a\\b\\e\\f\\v%+020.-23w\\x12\\167\\132\\112\\n"; + let s = "%-# 15a\\t\\r\\\"\\\\\\a\\b\\e\\f\\v%+020.-23w\\x12\\167\\132\\112\\n"; let expected = vec![ Token::Directive { flag: F_LEFT | F_ALTER | F_SPACE, @@ -72,6 +72,7 @@ mod test_generate_tokens { precision: -1, format: 'a', }, + Token::Char('\t'), Token::Char('\r'), Token::Char('"'), Token::Char('\\'), diff --git a/tests/by-util/test_tac.rs b/tests/by-util/test_tac.rs index a8adbb28e..599bc19c7 100644 --- a/tests/by-util/test_tac.rs +++ b/tests/by-util/test_tac.rs @@ -66,5 +66,19 @@ fn test_invalid_input() { .ucmd() .arg("a") .fails() - .stderr_contains("dir: read error: Invalid argument"); + .stderr_contains("a: read error: Invalid argument"); +} + +#[test] +fn test_no_line_separators() { + new_ucmd!().pipe_in("a").succeeds().stdout_is("a"); +} + +#[test] +fn test_null_separator() { + new_ucmd!() + .args(&["-s", ""]) + .pipe_in("a\0b\0") + .succeeds() + .stdout_is("b\0a\0"); } diff --git a/tests/by-util/test_tail.rs b/tests/by-util/test_tail.rs index e8dd63317..28c3580bb 100644 --- a/tests/by-util/test_tail.rs +++ b/tests/by-util/test_tail.rs @@ -23,6 +23,15 @@ fn test_stdin_default() { .stdout_is_fixture("foobar_stdin_default.expected"); } +#[test] +fn test_stdin_explicit() { + new_ucmd!() + .pipe_in_fixture(FOOBAR_TXT) + .arg("-") + .run() + .stdout_is_fixture("foobar_stdin_default.expected"); +} + #[test] fn test_single_default() { new_ucmd!() diff --git a/tests/common/util.rs b/tests/common/util.rs index 68b7caa9b..41389d567 100644 --- a/tests/common/util.rs +++ b/tests/common/util.rs @@ -591,28 +591,40 @@ impl AtPath { } } - pub fn hard_link(&self, src: &str, dst: &str) { + pub fn hard_link(&self, original: &str, link: &str) { log_info( "hard_link", - &format!("{},{}", self.plus_as_string(src), self.plus_as_string(dst)), + &format!( + "{},{}", + self.plus_as_string(original), + self.plus_as_string(link) + ), ); - hard_link(&self.plus(src), &self.plus(dst)).unwrap(); + hard_link(&self.plus(original), &self.plus(link)).unwrap(); } - pub fn symlink_file(&self, src: &str, dst: &str) { + pub fn symlink_file(&self, original: &str, link: &str) { log_info( "symlink", - &format!("{},{}", self.plus_as_string(src), self.plus_as_string(dst)), + &format!( + "{},{}", + self.plus_as_string(original), + self.plus_as_string(link) + ), ); - symlink_file(&self.plus(src), &self.plus(dst)).unwrap(); + symlink_file(&self.plus(original), &self.plus(link)).unwrap(); } - pub fn symlink_dir(&self, src: &str, dst: &str) { + pub fn symlink_dir(&self, original: &str, link: &str) { log_info( "symlink", - &format!("{},{}", self.plus_as_string(src), self.plus_as_string(dst)), + &format!( + "{},{}", + self.plus_as_string(original), + self.plus_as_string(link) + ), ); - symlink_dir(&self.plus(src), &self.plus(dst)).unwrap(); + symlink_dir(&self.plus(original), &self.plus(link)).unwrap(); } pub fn is_symlink(&self, path: &str) -> bool { diff --git a/tests/fixtures/sort/empty.txt b/tests/fixtures/sort/empty.txt new file mode 100644 index 000000000..e69de29bb