diff --git a/.cirrus.yml b/.cirrus.yml index 5d16dce92..50f8a25b1 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -1,7 +1,14 @@ +env: + # Temporary workaround for error `error: sysinfo not supported on + # this platform` seen on FreeBSD platforms, affecting Rustup + # + # References: https://github.com/rust-lang/rustup/issues/2774 + RUSTUP_IO_THREADS: 1 + task: name: stable x86_64-unknown-freebsd-12 freebsd_instance: - image: freebsd-12-1-release-amd64 + image: freebsd-12-2-release-amd64 setup_script: - pkg install -y curl gmake - curl https://sh.rustup.rs -sSf --output rustup.sh diff --git a/.github/workflows/CICD.yml b/.github/workflows/CICD.yml index cc0972bf9..804720bea 100644 --- a/.github/workflows/CICD.yml +++ b/.github/workflows/CICD.yml @@ -11,7 +11,7 @@ env: PROJECT_NAME: coreutils PROJECT_DESC: "Core universal (cross-platform) utilities" PROJECT_AUTH: "uutils" - RUST_MIN_SRV: "1.40.0" ## v1.40.0 + RUST_MIN_SRV: "1.43.1" ## v1.43.0 RUST_COV_SRV: "2020-08-01" ## (~v1.47.0) supported rust version for code coverage; (date required/used by 'coverage') ## !maint: refactor when code coverage support is included in the stable channel on: [push, pull_request] @@ -235,6 +235,9 @@ jobs: arm-unknown-linux-gnueabihf) sudo apt-get -y update ; sudo apt-get -y install gcc-arm-linux-gnueabihf ;; aarch64-unknown-linux-gnu) sudo apt-get -y update ; sudo apt-get -y install gcc-aarch64-linux-gnu ;; esac + case '${{ matrix.job.os }}' in + macos-latest) brew install coreutils ;; # needed for testing + esac - name: Initialize workflow variables id: vars shell: bash @@ -360,6 +363,10 @@ jobs: mkdir -p '${{ steps.vars.outputs.STAGING }}/dpkg' - name: rust toolchain ~ install uses: actions-rs/toolchain@v1 + env: + # Override auto-detection of RAM for Rustc install. + # https://github.com/rust-lang/rustup/issues/2229#issuecomment-585855925 + RUSTUP_UNPACK_RAM: "21474836480" with: toolchain: ${{ steps.vars.outputs.TOOLCHAIN }} target: ${{ matrix.job.target }} @@ -486,6 +493,13 @@ jobs: - { os: windows-latest , features: windows } steps: - uses: actions/checkout@v1 + - name: Install/setup prerequisites + shell: bash + run: | + ## install/setup prerequisites + case '${{ matrix.job.os }}' in + macos-latest) brew install coreutils ;; # needed for testing + esac # - name: Reattach HEAD ## may be needed for accurate code coverage info # run: git checkout ${{ github.head_ref }} - name: Initialize workflow variables diff --git a/.github/workflows/GNU.yml b/.github/workflows/GNU.yml index a68f0a083..1f9250900 100644 --- a/.github/workflows/GNU.yml +++ b/.github/workflows/GNU.yml @@ -12,16 +12,18 @@ jobs: uses: actions/checkout@v2 with: path: 'uutils' - - name: Chechout GNU coreutils + - name: Checkout GNU coreutils uses: actions/checkout@v2 with: repository: 'coreutils/coreutils' path: 'gnu' - - name: Chechout GNU corelib + ref: v8.32 + - name: Checkout GNU corelib uses: actions/checkout@v2 with: repository: 'coreutils/gnulib' path: 'gnulib' + ref: 8e99f24c0931a38880c6ee9b8287c7da80b0036b fetch-depth: 0 # gnu gets upset if gnulib is a shallow checkout - name: Install `rust` toolchain uses: actions-rs/toolchain@v1 @@ -30,103 +32,43 @@ jobs: default: true profile: minimal # minimal component installation (ie, no documentation) components: rustfmt - - name: Build binaries + - name: Install deps shell: bash run: | sudo apt-get update - sudo apt-get install autoconf autopoint bison texinfo gperf gcc g++ gdb python-pyinotify python3-sphinx - pushd uutils - make PROFILE=release - BUILDDIR="$PWD/target/release/" - cp "${BUILDDIR}/install" "${BUILDDIR}/ginstall" # The GNU tests rename this script before running, to avoid confusion with the make target - # Create *sum binaries - for sum in b2sum md5sum sha1sum sha224sum sha256sum sha384sum sha512sum - do - sum_path="${BUILDDIR}/${sum}" - test -f "${sum_path}" || cp "${BUILDDIR}/hashsum" "${sum_path}" - done - test -f "${BUILDDIR}/[" || cp "${BUILDDIR}/test" "${BUILDDIR}/[" - popd - GNULIB_SRCDIR="$PWD/gnulib" - pushd gnu/ - - # Any binaries that aren't built become `false` so their tests fail - for binary in $(./build-aux/gen-lists-of-programs.sh --list-progs) - do - bin_path="${BUILDDIR}/${binary}" - test -f "${bin_path}" || { echo "'${binary}' was not built with uutils, using the 'false' program"; cp "${BUILDDIR}/false" "${bin_path}"; } - done - - ./bootstrap --gnulib-srcdir="$GNULIB_SRCDIR" - ./configure --quiet --disable-gcc-warnings - #Add timeout to to protect against hangs - sed -i 's|"\$@|/usr/bin/timeout 600 "\$@|' build-aux/test-driver - # Change the PATH in the Makefile to test the uutils coreutils instead of the GNU coreutils - sed -i "s/^[[:blank:]]*PATH=.*/ PATH='${BUILDDIR//\//\\/}\$(PATH_SEPARATOR)'\"\$\$PATH\" \\\/" Makefile - sed -i 's| tr | /usr/bin/tr |' tests/init.sh - make - # Generate the factor tests, so they can be fixed - for i in {00..36} - do - make tests/factor/t${i}.sh - done - grep -rl 'path_prepend_' tests/* | xargs sed -i 's|path_prepend_ ./src||' - sed -i -e 's|^seq |/usr/bin/seq |' -e 's|sha1sum |/usr/bin/sha1sum |' tests/factor/t*sh - - # Remove tests checking for --version & --help - # Not really interesting for us and logs are too big - sed -i -e '/tests\/misc\/invalid-opt.pl/ D' \ - -e '/tests\/misc\/help-version.sh/ D' \ - -e '/tests\/misc\/help-version-getopt.sh/ D' \ - Makefile - - # printf doesn't limit the values used in its arg, so this produced ~2GB of output - sed -i '/INT_OFLOW/ D' tests/misc/printf.sh - - # Use the system coreutils where the test fails due to error in a util that is not the one being tested - sed -i 's|stat|/usr/bin/stat|' tests/chgrp/basic.sh tests/cp/existing-perm-dir.sh tests/touch/60-seconds.sh tests/misc/sort-compress-proc.sh - sed -i 's|ls -|/usr/bin/ls -|' tests/chgrp/posix-H.sh tests/chown/deref.sh tests/cp/same-file.sh tests/misc/mknod.sh tests/mv/part-symlink.sh tests/du/8gb.sh - sed -i 's|mkdir |/usr/bin/mkdir |' tests/cp/existing-perm-dir.sh tests/rm/empty-inacc.sh - sed -i 's|timeout \([[:digit:]]\)| /usr/bin/timeout \1|' tests/tail-2/inotify-rotate.sh tests/tail-2/inotify-dir-recreate.sh tests/tail-2/inotify-rotate-resources.sh tests/cp/parent-perm-race.sh tests/ls/infloop.sh tests/misc/sort-exit-early.sh tests/misc/sort-NaN-infloop.sh tests/misc/uniq-perf.sh tests/tail-2/inotify-only-regular.sh tests/tail-2/pipe-f2.sh tests/tail-2/retry.sh tests/tail-2/symlink.sh tests/tail-2/wait.sh tests/tail-2/pid.sh tests/dd/stats.sh tests/tail-2/follow-name.sh tests/misc/shuf.sh # Don't break the function called 'grep_timeout' - sed -i 's|chmod |/usr/bin/chmod |' tests/du/inacc-dir.sh tests/mkdir/p-3.sh tests/tail-2/tail-n0f.sh tests/cp/fail-perm.sh tests/du/inaccessible-cwd.sh tests/mv/i-2.sh tests/chgrp/basic.sh tests/misc/shuf.sh - sed -i 's|sort |/usr/bin/sort |' tests/ls/hyperlink.sh tests/misc/test-N.sh - sed -i 's|split |/usr/bin/split |' tests/misc/factor-parallel.sh - sed -i 's|truncate |/usr/bin/truncate |' tests/split/fail.sh - sed -i 's|dd |/usr/bin/dd |' tests/du/8gb.sh tests/tail-2/big-4gb.sh tests/cp/fiemap-2.sh init.cfg - sed -i 's|id -|/usr/bin/id -|' tests/misc/runcon-no-reorder.sh - sed -i 's|touch |/usr/bin/touch |' tests/cp/preserve-link.sh tests/cp/reflink-perm.sh tests/ls/block-size.sh tests/ls/abmon-align.sh tests/ls/rt-1.sh tests/mv/update.sh tests/misc/ls-time.sh tests/misc/stat-nanoseconds.sh tests/misc/time-style.sh tests/misc/test-N.sh - sed -i 's|ln -|/usr/bin/ln -|' tests/cp/link-deref.sh - sed -i 's|printf |/usr/bin/printf |' tests/dd/ascii.sh - sed -i 's|cp |/usr/bin/cp |' tests/mv/hard-2.sh - sed -i 's|paste |/usr/bin/paste |' tests/misc/od-endian.sh - sed -i 's|seq |/usr/bin/seq |' tests/misc/sort-discrim.sh - - #Add specific timeout to tests that currently hang to limit time spent waiting - sed -i 's|seq \$|/usr/bin/timeout 0.1 seq \$|' tests/misc/seq-precision.sh tests/misc/seq-long-double.sh - sed -i 's|cat |/usr/bin/timeout 0.1 cat |' tests/misc/cat-self.sh - - - test -f "${BUILDDIR}/getlimits" || cp src/getlimits "${BUILDDIR}" + sudo apt-get install autoconf autopoint bison texinfo gperf gcc g++ gdb python-pyinotify python3-sphinx jq + - name: Build binaries + shell: bash + run: | + cd uutils + bash util/build-gnu.sh - name: Run GNU tests shell: bash run: | - BUILDDIR="${PWD}/uutils/target/release" - GNULIB_DIR="${PWD}/gnulib" - pushd gnu - - timeout -sKILL 2h make -j "$(nproc)" check SUBDIRS=. RUN_EXPENSIVE_TESTS=yes RUN_VERY_EXPENSIVE_TESTS=yes VERBOSE=no || : # Kill after 4 hours in case something gets stuck in make + bash uutils/util/run-gnu-test.sh - name: Extract tests info shell: bash run: | - if test -f gnu/tests/test-suite.log + LOG_FILE=gnu/tests/test-suite.log + if test -f "$LOG_FILE" then - TOTAL=$( grep "# TOTAL:" gnu/tests/test-suite.log|cut -d' ' -f2-) - PASS=$( grep "# PASS:" gnu/tests/test-suite.log|cut -d' ' -f2-) - SKIP=$( grep "# SKIP:" gnu/tests/test-suite.log|cut -d' ' -f2-) - FAIL=$( grep "# FAIL:" gnu/tests/test-suite.log|cut -d' ' -f2-) - XPASS=$( grep "# XPASS:" gnu/tests/test-suite.log|cut -d' ' -f2-) - ERROR=$( grep "# ERROR:" gnu/tests/test-suite.log|cut -d' ' -f2-) - echo "::warning ::GNU testsuite = $TOTAL / $PASS / $FAIL / $ERROR" + TOTAL=$(sed -n "s/.*# TOTAL: \(.*\)/\1/p" "$LOG_FILE"|tr -d '\r'|head -n1) + PASS=$(sed -n "s/.*# PASS: \(.*\)/\1/p" "$LOG_FILE"|tr -d '\r'|head -n1) + SKIP=$(sed -n "s/.*# SKIP: \(.*\)/\1/p" "$LOG_FILE"|tr -d '\r'|head -n1) + FAIL=$(sed -n "s/.*# FAIL: \(.*\)/\1/p" "$LOG_FILE"|tr -d '\r'|head -n1) + XPASS=$(sed -n "s/.*# XPASS: \(.*\)/\1/p" "$LOG_FILE"|tr -d '\r'|head -n1) + ERROR=$(sed -n "s/.*# ERROR: \(.*\)/\1/p" "$LOG_FILE"|tr -d '\r'|head -n1) + echo "::warning ::GNU testsuite = TOTAL: $TOTAL / PASS: $PASS / FAIL: $FAIL / ERROR: $ERROR" + jq -n \ + --arg date "$(date --rfc-email)" \ + --arg sha "$GITHUB_SHA" \ + --arg total "$TOTAL" \ + --arg pass "$PASS" \ + --arg skip "$SKIP" \ + --arg fail "$FAIL" \ + --arg xpass "$XPASS" \ + --arg error "$ERROR" \ + '{($date): { sha: $sha, total: $total, pass: $pass, skip: $skip, fail: $fail, xpass: $xpass, error: $error, }}' > gnu-result.json else echo "::error ::Failed to get summary of test results" fi @@ -135,3 +77,8 @@ jobs: with: name: test-report path: gnu/tests/**/*.log + + - uses: actions/upload-artifact@v2 + with: + name: gnu-result + path: gnu-result.json diff --git a/.gitignore b/.gitignore index b1ac52506..77e8f717e 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,6 @@ target/ Cargo.lock lib*.a /docs/_build +*.iml +### macOS ### +.DS_Store diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 389ba44b0..000000000 --- a/.travis.yml +++ /dev/null @@ -1,72 +0,0 @@ -language: rust - -rust: - - stable - - beta - -os: - - linux - # - osx - -env: - # sphinx v1.8.0 is bugged & fails for linux builds; so, force specific `sphinx` version - global: FEATURES='' TEST_INSTALL='' SPHINX_VERSIONED='sphinx==1.7.8' - -matrix: - allow_failures: - - rust: beta - - rust: nightly - fast_finish: true - include: - - rust: 1.40.0 - env: FEATURES=unix - # - rust: stable - # os: linux - # env: FEATURES=unix TEST_INSTALL=true - # - rust: stable - # os: osx - # env: FEATURES=macos TEST_INSTALL=true - - rust: nightly - os: linux - env: FEATURES=nightly,unix TEST_INSTALL=true - - rust: nightly - os: osx - env: FEATURES=nightly,macos TEST_INSTALL=true - - rust: nightly - os: linux - env: FEATURES=nightly,feat_os_unix_redox CC=x86_64-unknown-redox-gcc CARGO_ARGS='--no-default-features --target=x86_64-unknown-redox' REDOX=1 - -cache: - directories: - - $HOME/.cargo - -sudo: true - -before_install: - - if [ $REDOX ]; then ./.travis/redox-toolchain.sh; fi - -install: - - if [ $TRAVIS_OS_NAME = linux ]; then sudo apt-get install python-pip && sudo pip install $SPHINX_VERSIONED; fi - - | - if [ $TRAVIS_OS_NAME = osx ]; then - brew update - brew upgrade python - pip3 install $SPHINX_VERSIONED - fi - -script: - - cargo build $CARGO_ARGS --features "$FEATURES" - - if [ ! $REDOX ]; then cargo test $CARGO_ARGS -p uucore -p coreutils --features "$FEATURES" --no-fail-fast; fi - - if [ -n "$TEST_INSTALL" ]; then mkdir installdir_test; DESTDIR=installdir_test make install; [ `ls installdir_test/usr/local/bin | wc -l` -gt 0 ]; fi - -addons: - apt: - packages: - - libssl-dev - -after_success: | - if [ "$TRAVIS_OS_NAME" = linux -a "$TRAVIS_RUST_VERSION" = stable ]; then - bash <(curl https://raw.githubusercontent.com/xd009642/tarpaulin/master/travis-install.sh) - cargo tarpaulin --out Xml - bash <(curl -s https://codecov.io/bash) - fi diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index bcb1f8fff..3793a0968 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -70,10 +70,6 @@ lines for non-utility modules include: README: add help ``` -``` -travis: fix build -``` - ``` uucore: add new modules ``` diff --git a/Cargo.lock b/Cargo.lock index 6deae1a49..fd6f127c6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,5 +1,11 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. +[[package]] +name = "Inflector" +version = "0.11.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe438c63458706e03479442743baae6c88256498e6431708f6dfc520a26515d3" + [[package]] name = "advapi32-sys" version = "0.2.0" @@ -37,6 +43,12 @@ dependencies = [ "winapi 0.3.9", ] +[[package]] +name = "array-init" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6945cc5422176fc5e602e590c2878d2c2acd9a4fe20a4baa7c28022521698ec6" + [[package]] name = "arrayvec" version = "0.4.12" @@ -63,6 +75,15 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a" +[[package]] +name = "binary-heap-plus" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4f068638f8ff9e118a9361e66a411eff410e7fb3ecaa23bf9272324f8fc606d7" +dependencies = [ + "compare", +] + [[package]] name = "bit-set" version = "0.5.2" @@ -136,9 +157,9 @@ checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" [[package]] name = "cast" -version = "0.2.5" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc38c385bfd7e444464011bb24820f40dd1c76bcdfa1b78611cb7c2e5cafab75" +checksum = "57cdfa5d50aad6cb4d44dcab6101a7f79925bd59d82ca42f38a9856a28865374" dependencies = [ "rustc_version", ] @@ -198,6 +219,12 @@ dependencies = [ "bitflags", ] +[[package]] +name = "compare" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "120133d4db2ec47efe2e26502ee984747630c67f51974fca0b6c1340cf2368d3" + [[package]] name = "constant_time_eq" version = "0.1.5" @@ -228,7 +255,6 @@ dependencies = [ "rand 0.7.3", "regex", "sha1", - "tempdir", "tempfile", "textwrap", "time", @@ -259,6 +285,7 @@ dependencies = [ "uu_expand", "uu_expr", "uu_factor", + "uu_factor_benches", "uu_false", "uu_fmt", "uu_fold", @@ -602,6 +629,18 @@ dependencies = [ "generic-array", ] +[[package]] +name = "dns-lookup" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "093d88961fd18c4ecacb8c80cd0b356463ba941ba11e0e01f9cf5271380b79dc" +dependencies = [ + "cfg-if 1.0.0", + "libc", + "socket2", + "winapi 0.3.9", +] + [[package]] name = "dunce" version = "1.0.1" @@ -644,7 +683,7 @@ checksum = "1d34cfa13a63ae058bfa601fe9e313bbdb3746427c1459185464ce0fcf62e1e8" dependencies = [ "cfg-if 1.0.0", "libc", - "redox_syscall 0.2.7", + "redox_syscall 0.2.8", "winapi 0.3.9", ] @@ -693,7 +732,18 @@ checksum = "8fc3cb4d91f53b50155bdcfd23f6a4c39ae1969c2ae85982b135750cccaf5fce" dependencies = [ "cfg-if 1.0.0", "libc", - "wasi", + "wasi 0.9.0+wasi-snapshot-preview1", +] + +[[package]] +name = "getrandom" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fcd999463524c52659517fe2cea98493cfe485d10565e7b0fb07dbba7ad2753" +dependencies = [ + "cfg-if 1.0.0", + "libc", + "wasi 0.10.2+wasi-snapshot-preview1", ] [[package]] @@ -809,9 +859,9 @@ checksum = "dd25036021b0de88a0aff6b850051563c6516d0bf53f8638938edbb9de732736" [[package]] name = "js-sys" -version = "0.3.50" +version = "0.3.51" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d99f9e3e84b8f67f846ef5b4cbbc3b1c29f6c759fcbce6f01aa0e73d932a24c" +checksum = "83bdfbace3a0e81a4253f73b49e960b053e396a11012cbd49b9b74d6a2b67062" dependencies = [ "wasm-bindgen", ] @@ -1053,6 +1103,29 @@ version = "11.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575" +[[package]] +name = "ouroboros" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc1f52300b81ac4eeeb6c00c20f7e86556c427d9fb2d92b68fc73c22f331cd15" +dependencies = [ + "ouroboros_macro", + "stable_deref_trait", +] + +[[package]] +name = "ouroboros_macro" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41db02c8f8731cdd7a72b433c7900cce4bf245465b452c364bfd21f4566ab055" +dependencies = [ + "Inflector", + "proc-macro-error", + "proc-macro2", + "quote 1.0.9", + "syn", +] + [[package]] name = "output_vt100" version = "0.1.2" @@ -1105,6 +1178,15 @@ dependencies = [ "proc-macro-hack", ] +[[package]] +name = "pest" +version = "2.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10f4872ae94d7b90ae48754df22fd42ad52ce740b8f370b03da4835417403e53" +dependencies = [ + "ucd-trie", +] + [[package]] name = "pkg-config" version = "0.3.19" @@ -1123,9 +1205,9 @@ dependencies = [ [[package]] name = "plotters" -version = "0.3.0" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "45ca0ae5f169d0917a7c7f5a9c1a3d3d9598f18f529dd2b8373ed988efea307a" +checksum = "32a3fd9ec30b9749ce28cd91f255d569591cdf937fe280c312143e3c4bad6f2a" dependencies = [ "num-traits", "plotters-backend", @@ -1167,6 +1249,30 @@ dependencies = [ "output_vt100", ] +[[package]] +name = "proc-macro-error" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" +dependencies = [ + "proc-macro-error-attr", + "proc-macro2", + "quote 1.0.9", + "syn", + "version_check", +] + +[[package]] +name = "proc-macro-error-attr" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" +dependencies = [ + "proc-macro2", + "quote 1.0.9", + "version_check", +] + [[package]] name = "proc-macro-hack" version = "0.5.19" @@ -1175,9 +1281,9 @@ checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5" [[package]] name = "proc-macro2" -version = "1.0.26" +version = "1.0.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a152013215dca273577e18d2bf00fa862b89b24169fb78c4c95aeb07992c9cec" +checksum = "f0d8caf72986c1a598726adc988bb5984792ef84f5ee5aa50209145ee8077038" dependencies = [ "unicode-xid 0.2.2", ] @@ -1215,19 +1321,6 @@ dependencies = [ "proc-macro2", ] -[[package]] -name = "rand" -version = "0.4.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "552840b97013b1a26992c11eac34bdd778e464601a4c2054b5f0bff7c6761293" -dependencies = [ - "fuchsia-cprng", - "libc", - "rand_core 0.3.1", - "rdrand", - "winapi 0.3.9", -] - [[package]] name = "rand" version = "0.5.6" @@ -1247,14 +1340,26 @@ version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03" dependencies = [ - "getrandom", + "getrandom 0.1.16", "libc", - "rand_chacha", + "rand_chacha 0.2.2", "rand_core 0.5.1", - "rand_hc", + "rand_hc 0.2.0", "rand_pcg", ] +[[package]] +name = "rand" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ef9e7e66b4468674bfcb0c81af8b7fa0bb154fa9f28eb840da5c447baeb8d7e" +dependencies = [ + "libc", + "rand_chacha 0.3.0", + "rand_core 0.6.2", + "rand_hc 0.3.0", +] + [[package]] name = "rand_chacha" version = "0.2.2" @@ -1265,6 +1370,16 @@ dependencies = [ "rand_core 0.5.1", ] +[[package]] +name = "rand_chacha" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e12735cf05c9e10bf21534da50a147b924d555dc7a547c42e6bb2d5b6017ae0d" +dependencies = [ + "ppv-lite86", + "rand_core 0.6.2", +] + [[package]] name = "rand_core" version = "0.3.1" @@ -1286,7 +1401,16 @@ version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19" dependencies = [ - "getrandom", + "getrandom 0.1.16", +] + +[[package]] +name = "rand_core" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34cf66eb183df1c5876e2dcf6b13d57340741e8dc255b48e40a26de954d06ae7" +dependencies = [ + "getrandom 0.2.3", ] [[package]] @@ -1298,6 +1422,15 @@ dependencies = [ "rand_core 0.5.1", ] +[[package]] +name = "rand_hc" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3190ef7066a446f2e7f42e239d161e905420ccab01eb967c9eb27d21b2322a73" +dependencies = [ + "rand_core 0.6.2", +] + [[package]] name = "rand_pcg" version = "0.2.1" @@ -1309,9 +1442,9 @@ dependencies = [ [[package]] name = "rayon" -version = "1.5.0" +version = "1.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b0d8e0819fadc20c74ea8373106ead0600e3a67ef1fe8da56e39b9ae7275674" +checksum = "c06aca804d41dbc8ba42dfd964f0d01334eceb64314b9ecf7c5fad5188a06d90" dependencies = [ "autocfg", "crossbeam-deque", @@ -1321,9 +1454,9 @@ dependencies = [ [[package]] name = "rayon-core" -version = "1.9.0" +version = "1.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ab346ac5921dc62ffa9f89b7a773907511cdfa5490c572ae9be1be33e8afa4a" +checksum = "d78120e2c850279833f1dd3582f730c4ab53ed95aeaaaa862a2a5c71b1656d8e" dependencies = [ "crossbeam-channel", "crossbeam-deque", @@ -1332,15 +1465,6 @@ dependencies = [ "num_cpus", ] -[[package]] -name = "rdrand" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "678054eb77286b51581ba43620cc911abf02758c91f93f479767aed0f90458b2" -dependencies = [ - "rand_core 0.3.1", -] - [[package]] name = "redox_syscall" version = "0.1.57" @@ -1349,9 +1473,9 @@ checksum = "41cc0f7e4d5d4544e8861606a285bb08d3e70712ccc7d2b84d7c0ccfaf4b05ce" [[package]] name = "redox_syscall" -version = "0.2.7" +version = "0.2.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85dd92e586f7355c633911e11f77f3d12f04b1b1bd76a198bd34ae3af8341ef2" +checksum = "742739e41cd49414de871ea5e549afb7e2a3ac77b589bcbebe8c82fab37147fc" dependencies = [ "bitflags", ] @@ -1362,14 +1486,14 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8440d8acb4fd3d277125b4bd01a6f38aee8d814b3b5fc09b3f2b825d37d3fe8f" dependencies = [ - "redox_syscall 0.2.7", + "redox_syscall 0.2.8", ] [[package]] name = "regex" -version = "1.5.3" +version = "1.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce5f1ceb7f74abbce32601642fcf8e8508a8a8991e0621c7d750295b9095702b" +checksum = "d07a8629359eb56f1e2fb1652bb04212c072a87ba68546a04065d525673ac461" dependencies = [ "aho-corasick", "memchr 2.4.0", @@ -1402,9 +1526,9 @@ dependencies = [ [[package]] name = "retain_mut" -version = "0.1.2" +version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53552c6c49e1e13f1a203ef0080ab3bbef0beb570a528993e83df057a9d9bba1" +checksum = "e9c17925a9027d298a4603d286befe3f9dc0e8ed02523141914eb628798d6e5b" [[package]] name = "rust-ini" @@ -1414,11 +1538,11 @@ checksum = "3e52c148ef37f8c375d49d5a73aa70713125b7f19095948a923f80afdeb22ec2" [[package]] name = "rustc_version" -version = "0.2.3" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a" +checksum = "f0dfe2087c51c460008730de8b57e6a320782fbfb312e1f4d520e6c6fae155ee" dependencies = [ - "semver", + "semver 0.11.0", ] [[package]] @@ -1448,7 +1572,16 @@ version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403" dependencies = [ - "semver-parser", + "semver-parser 0.7.0", +] + +[[package]] +name = "semver" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f301af10236f6df4160f7c3f04eec6dbc70ace82d23326abad5edee88801c6b6" +dependencies = [ + "semver-parser 0.10.2", ] [[package]] @@ -1458,14 +1591,20 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3" [[package]] -name = "serde" -version = "1.0.125" +name = "semver-parser" +version = "0.10.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "558dc50e1a5a5fa7112ca2ce4effcb321b0300c0d4ccf0776a9f60cd89031171" +checksum = "00b0bef5b7f9e0df16536d3961cfb6e84331c065b4066afb39768d0e319411f7" dependencies = [ - "serde_derive", + "pest", ] +[[package]] +name = "serde" +version = "1.0.126" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec7505abeacaec74ae4778d9d9328fe5a5d04253220a85c4ee022239fc996d03" + [[package]] name = "serde_cbor" version = "0.11.1" @@ -1478,9 +1617,9 @@ dependencies = [ [[package]] name = "serde_derive" -version = "1.0.125" +version = "1.0.126" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b093b7a2bb58203b5da3056c05b4ec1fed827dcfdb37347a8841695263b3d06d" +checksum = "963a7dbc9895aeac7ac90e74f34a5d5261828f79df35cbed41e10189d3804d43" dependencies = [ "proc-macro2", "quote 1.0.9", @@ -1559,14 +1698,22 @@ dependencies = [ ] [[package]] -name = "smallvec" -version = "1.6.1" +name = "socket2" +version = "0.3.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe0f37c9e8f3c5a4a66ad655a93c74daac4ad00c441533bf5c6e7990bb42604e" +checksum = "122e570113d28d773067fab24266b66753f6ea915758651696b6e35e49f88d6e" dependencies = [ - "serde", + "cfg-if 1.0.0", + "libc", + "winapi 0.3.9", ] +[[package]] +name = "stable_deref_trait" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" + [[package]] name = "strsim" version = "0.8.0" @@ -1602,26 +1749,16 @@ dependencies = [ "unicode-xid 0.2.2", ] -[[package]] -name = "tempdir" -version = "0.3.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15f2b5fb00ccdf689e0149d1b1b3c03fead81c2b37735d812fa8bddbbf41b6d8" -dependencies = [ - "rand 0.4.6", - "remove_dir_all", -] - [[package]] name = "tempfile" -version = "3.1.0" +version = "3.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a6e24d9338a0a5be79593e2fa15a648add6138caa803e2d5bc782c371732ca9" +checksum = "dac1c663cfc93810f88aed9b8941d48cabf856a1b111c29a40439018d870eb22" dependencies = [ - "cfg-if 0.1.10", + "cfg-if 1.0.0", "libc", - "rand 0.7.3", - "redox_syscall 0.1.57", + "rand 0.8.3", + "redox_syscall 0.2.8", "remove_dir_all", "winapi 0.3.9", ] @@ -1653,7 +1790,7 @@ checksum = "077185e2eac69c3f8379a4298e1e07cd36beb962290d4a51199acf0fdc10607e" dependencies = [ "libc", "numtoa", - "redox_syscall 0.2.7", + "redox_syscall 0.2.8", "redox_termios", ] @@ -1726,6 +1863,12 @@ version = "1.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "879f6906492a7cd215bfa4cf595b600146ccfac0c79bcbd1f3000162af5e8b06" +[[package]] +name = "ucd-trie" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56dee185309b50d1f11bfedef0fe6d036842e3fb77413abef29f8f8d1c5d4c1c" + [[package]] name = "unicode-segmentation" version = "1.7.1" @@ -1780,6 +1923,7 @@ dependencies = [ name = "uu_arch" version = "0.0.6" dependencies = [ + "clap", "platform-info", "uucore", "uucore_procs", @@ -1941,11 +2085,9 @@ name = "uu_df" version = "0.0.6" dependencies = [ "clap", - "libc", "number_prefix", "uucore", "uucore_procs", - "winapi 0.3.9", ] [[package]] @@ -1972,6 +2114,7 @@ name = "uu_du" version = "0.0.6" dependencies = [ "chrono", + "clap", "uucore", "uucore_procs", "winapi 0.3.9", @@ -2022,17 +2165,26 @@ name = "uu_factor" version = "0.0.6" dependencies = [ "coz", - "criterion", "num-traits", "paste", "quickcheck", "rand 0.7.3", - "rand_chacha", - "smallvec 0.6.14", + "smallvec", "uucore", "uucore_procs", ] +[[package]] +name = "uu_factor_benches" +version = "0.0.0" +dependencies = [ + "array-init", + "criterion", + "rand 0.7.3", + "rand_chacha 0.2.2", + "uu_factor", +] + [[package]] name = "uu_false" version = "0.0.6" @@ -2184,6 +2336,7 @@ dependencies = [ name = "uu_logname" version = "0.0.6" dependencies = [ + "clap", "libc", "uucore", "uucore_procs", @@ -2233,7 +2386,7 @@ dependencies = [ name = "uu_mknod" version = "0.0.6" dependencies = [ - "getopts", + "clap", "libc", "uucore", "uucore_procs", @@ -2503,16 +2656,17 @@ dependencies = [ name = "uu_sort" version = "0.0.6" dependencies = [ + "binary-heap-plus", "clap", + "compare", "fnv", "itertools 0.10.0", + "memchr 2.4.0", + "ouroboros", "rand 0.7.3", "rayon", - "semver", - "serde", - "serde_json", - "smallvec 1.6.1", - "tempdir", + "semver 0.9.0", + "tempfile", "unicode-width", "uucore", "uucore_procs", @@ -2532,7 +2686,6 @@ name = "uu_stat" version = "0.0.6" dependencies = [ "clap", - "time", "uucore", "uucore_procs", ] @@ -2796,6 +2949,7 @@ name = "uucore" version = "0.0.8" dependencies = [ "data-encoding", + "dns-lookup", "dunce", "getopts", "lazy_static", @@ -2806,6 +2960,7 @@ dependencies = [ "thiserror", "time", "wild", + "winapi 0.3.9", ] [[package]] @@ -2823,6 +2978,12 @@ version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191" +[[package]] +name = "version_check" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fecdca9a5291cc2b8dcf7dc02453fee791a280f3743cb0905f8822ae463b3fe" + [[package]] name = "void" version = "1.0.2" @@ -2847,10 +3008,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519" [[package]] -name = "wasm-bindgen" -version = "0.2.73" +name = "wasi" +version = "0.10.2+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83240549659d187488f91f33c0f8547cbfef0b2088bc470c116d1d260ef623d9" +checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6" + +[[package]] +name = "wasm-bindgen" +version = "0.2.74" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d54ee1d4ed486f78874278e63e4069fc1ab9f6a18ca492076ffb90c5eb2997fd" dependencies = [ "cfg-if 1.0.0", "wasm-bindgen-macro", @@ -2858,9 +3025,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.73" +version = "0.2.74" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae70622411ca953215ca6d06d3ebeb1e915f0f6613e3b495122878d7ebec7dae" +checksum = "3b33f6a0694ccfea53d94db8b2ed1c3a8a4c86dd936b13b9f0a15ec4a451b900" dependencies = [ "bumpalo", "lazy_static", @@ -2873,9 +3040,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.73" +version = "0.2.74" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e734d91443f177bfdb41969de821e15c516931c3c3db3d318fa1b68975d0f6f" +checksum = "088169ca61430fe1e58b8096c24975251700e7b1f6fd91cc9d59b04fb9b18bd4" dependencies = [ "quote 1.0.9", "wasm-bindgen-macro-support", @@ -2883,9 +3050,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.73" +version = "0.2.74" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d53739ff08c8a68b0fdbcd54c372b8ab800b1449ab3c9d706503bc7dd1621b2c" +checksum = "be2241542ff3d9f241f5e2cb6dd09b37efe786df8851c54957683a49f0987a97" dependencies = [ "proc-macro2", "quote 1.0.9", @@ -2896,15 +3063,15 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.73" +version = "0.2.74" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9a543ae66aa233d14bb765ed9af4a33e81b8b58d1584cf1b47ff8cd0b9e4489" +checksum = "d7cff876b8f18eed75a66cf49b65e7f967cb354a7aa16003fb55dbfd25b44b4f" [[package]] name = "web-sys" -version = "0.3.50" +version = "0.3.51" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a905d57e488fec8861446d3393670fb50d27a262344013181c2cdf9fff5481be" +checksum = "e828417b379f3df7111d3a2a9e5753706cae29c41f7c4029ee9fd77f3e09e582" dependencies = [ "js-sys", "wasm-bindgen", diff --git a/Cargo.toml b/Cargo.toml index 322d34f3a..cc36199cc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -324,6 +324,9 @@ wc = { optional=true, version="0.0.6", package="uu_wc", path="src/uu/wc" } who = { optional=true, version="0.0.6", package="uu_who", path="src/uu/who" } whoami = { optional=true, version="0.0.6", package="uu_whoami", path="src/uu/whoami" } yes = { optional=true, version="0.0.6", package="uu_yes", path="src/uu/yes" } + +factor_benches = { optional = true, version = "0.0.0", package = "uu_factor_benches", path = "tests/benches/factor" } + # # * pinned transitive dependencies # Not needed for now. Keep as examples: @@ -339,15 +342,11 @@ pretty_assertions = "0.7.2" rand = "0.7" regex = "1.0" sha1 = { version="0.6", features=["std"] } -## tempfile 3.2 depends on recent version of rand which depends on getrandom v0.2 which has compiler errors for MinRustV v1.32.0 -## min dep for tempfile = Rustc 1.40 -tempfile = "= 3.1.0" +tempfile = "3.2.0" time = "0.1" unindent = "0.1" uucore = { version=">=0.0.8", package="uucore", path="src/uucore", features=["entries"] } walkdir = "2.2" -tempdir = "0.3" -atty = "0.2.14" [target.'cfg(unix)'.dev-dependencies] rust-users = { version="0.10", package="users" } diff --git a/README.md b/README.md index 95dc036fd..1365bf7ce 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,6 @@ [![LOC](https://tokei.rs/b1/github/uutils/coreutils?category=code)](https://github.com/Aaronepower/tokei) [![dependency status](https://deps.rs/repo/github/uutils/coreutils/status.svg)](https://deps.rs/repo/github/uutils/coreutils) -[![Build Status](https://api.travis-ci.org/uutils/coreutils.svg?branch=master)](https://travis-ci.org/uutils/coreutils) [![Build Status (FreeBSD)](https://api.cirrus-ci.com/github/uutils/coreutils.svg)](https://cirrus-ci.com/github/uutils/coreutils/master) [![CodeCov](https://codecov.io/gh/uutils/coreutils/branch/master/graph/badge.svg)](https://codecov.io/gh/uutils/coreutils) @@ -40,7 +39,7 @@ to compile anywhere, and this is as good a way as any to try and learn it. ### Rust Version uutils follows Rust's release channels and is tested against stable, beta and nightly. -The current oldest supported version of the Rust compiler is `1.40.0`. +The current oldest supported version of the Rust compiler is `1.43.1`. On both Windows and Redox, only the nightly version is tested currently. @@ -319,6 +318,16 @@ To pass an argument like "-v" to the busybox test runtime $ make UTILS='UTILITY_1 UTILITY_2' RUNTEST_ARGS='-v' busytest ``` +## Comparing with GNU + +![Evolution over time](https://github.com/uutils/coreutils-tracking/blob/main/gnu-results.png?raw=true) + +To run locally: +```bash +$ bash util/build-gnu.sh +$ bash util/run-gnu-test.sh +``` + ## Contribute To contribute to uutils, please see [CONTRIBUTING](CONTRIBUTING.md). diff --git a/src/uu/arch/Cargo.toml b/src/uu/arch/Cargo.toml index 0b4359620..b3fe1f8cb 100644 --- a/src/uu/arch/Cargo.toml +++ b/src/uu/arch/Cargo.toml @@ -16,6 +16,7 @@ path = "src/arch.rs" [dependencies] platform-info = "0.1" +clap = "2.33" uucore = { version=">=0.0.8", package="uucore", path="../../uucore" } uucore_procs = { version=">=0.0.5", package="uucore_procs", path="../../uucore_procs" } diff --git a/src/uu/arch/src/arch.rs b/src/uu/arch/src/arch.rs index a4c57e282..31278f000 100644 --- a/src/uu/arch/src/arch.rs +++ b/src/uu/arch/src/arch.rs @@ -10,17 +10,20 @@ extern crate uucore; use platform_info::*; -use uucore::InvalidEncodingHandling; -static SYNTAX: &str = "Display machine architecture"; +use clap::App; + +static VERSION: &str = env!("CARGO_PKG_VERSION"); +static ABOUT: &str = "Display machine architecture"; static SUMMARY: &str = "Determine architecture name for current machine."; -static LONG_HELP: &str = ""; pub fn uumain(args: impl uucore::Args) -> i32 { - app!(SYNTAX, SUMMARY, LONG_HELP).parse( - args.collect_str(InvalidEncodingHandling::ConvertLossy) - .accept_any(), - ); + App::new(executable!()) + .version(VERSION) + .about(ABOUT) + .after_help(SUMMARY) + .get_matches_from(args); + let uts = return_if_err!(1, PlatformInfo::new()); println!("{}", uts.machine().trim()); 0 diff --git a/src/uu/cat/src/cat.rs b/src/uu/cat/src/cat.rs index 8dea096be..69ea902e6 100644 --- a/src/uu/cat/src/cat.rs +++ b/src/uu/cat/src/cat.rs @@ -347,7 +347,7 @@ fn cat_files(files: Vec, options: &OutputOptions) -> Result<(), u32> { for path in &files { if let Err(err) = cat_path(path, &options, &mut state) { - show_info!("{}: {}", path, err); + show_error!("{}: {}", path, err); error_count += 1; } } diff --git a/src/uu/chgrp/src/chgrp.rs b/src/uu/chgrp/src/chgrp.rs index 2afef7de0..f6afc2805 100644 --- a/src/uu/chgrp/src/chgrp.rs +++ b/src/uu/chgrp/src/chgrp.rs @@ -97,7 +97,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { if recursive { if bit_flag == FTS_PHYSICAL { if derefer == 1 { - show_info!("-R --dereference requires -H or -L"); + show_error!("-R --dereference requires -H or -L"); return 1; } derefer = 0; @@ -132,7 +132,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { dest_gid = meta.gid(); } Err(e) => { - show_info!("failed to get attributes of '{}': {}", file, e); + show_error!("failed to get attributes of '{}': {}", file, e); return 1; } } @@ -143,7 +143,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { dest_gid = g; } _ => { - show_info!("invalid group: {}", matches.free[0].as_str()); + show_error!("invalid group: {}", matches.free[0].as_str()); return 1; } } @@ -235,8 +235,8 @@ impl Chgrper { if let Some(p) = may_exist { if p.parent().is_none() || self.is_bind_root(p) { - show_info!("it is dangerous to operate recursively on '/'"); - show_info!("use --no-preserve-root to override this failsafe"); + show_error!("it is dangerous to operate recursively on '/'"); + show_error!("use --no-preserve-root to override this failsafe"); return 1; } } @@ -250,12 +250,12 @@ impl Chgrper { self.verbosity.clone(), ) { Ok(n) => { - show_info!("{}", n); + show_error!("{}", n); 0 } Err(e) => { if self.verbosity != Verbosity::Silent { - show_info!("{}", e); + show_error!("{}", e); } 1 } @@ -275,7 +275,7 @@ impl Chgrper { for entry in WalkDir::new(root).follow_links(follow).min_depth(1) { let entry = unwrap!(entry, e, { ret = 1; - show_info!("{}", e); + show_error!("{}", e); continue; }); let path = entry.path(); @@ -290,13 +290,13 @@ impl Chgrper { ret = match wrap_chgrp(path, &meta, self.dest_gid, follow, self.verbosity.clone()) { Ok(n) => { if !n.is_empty() { - show_info!("{}", n); + show_error!("{}", n); } 0 } Err(e) => { if self.verbosity != Verbosity::Silent { - show_info!("{}", e); + show_error!("{}", e); } 1 } @@ -313,7 +313,7 @@ impl Chgrper { unwrap!(path.metadata(), e, { match self.verbosity { Silent => (), - _ => show_info!("cannot access '{}': {}", path.display(), e), + _ => show_error!("cannot access '{}': {}", path.display(), e), } return None; }) @@ -321,7 +321,7 @@ impl Chgrper { unwrap!(path.symlink_metadata(), e, { match self.verbosity { Silent => (), - _ => show_info!("cannot dereference '{}': {}", path.display(), e), + _ => show_error!("cannot dereference '{}': {}", path.display(), e), } return None; }) diff --git a/src/uu/chmod/src/chmod.rs b/src/uu/chmod/src/chmod.rs index d01f0316e..c4bf309d6 100644 --- a/src/uu/chmod/src/chmod.rs +++ b/src/uu/chmod/src/chmod.rs @@ -15,6 +15,7 @@ use std::fs; use std::os::unix::fs::{MetadataExt, PermissionsExt}; use std::path::Path; use uucore::fs::display_permissions_unix; +use uucore::libc::mode_t; #[cfg(not(windows))] use uucore::mode; use uucore::InvalidEncodingHandling; @@ -261,8 +262,10 @@ impl Chmoder { ); } return Ok(()); + } else if err.kind() == std::io::ErrorKind::PermissionDenied { + show_error!("'{}': Permission denied", file.display()); } else { - show_error!("{}: '{}'", err, file.display()); + show_error!("'{}': {}", file.display(), err); } return Err(1); } @@ -306,7 +309,7 @@ impl Chmoder { "mode of '{}' retained as {:04o} ({})", file.display(), fperm, - display_permissions_unix(fperm), + display_permissions_unix(fperm as mode_t, false), ); } Ok(()) @@ -315,25 +318,25 @@ impl Chmoder { show_error!("{}", err); } if self.verbose { - show_info!( + show_error!( "failed to change mode of file '{}' from {:o} ({}) to {:o} ({})", file.display(), fperm, - display_permissions_unix(fperm), + display_permissions_unix(fperm as mode_t, false), mode, - display_permissions_unix(mode) + display_permissions_unix(mode as mode_t, false) ); } Err(1) } else { if self.verbose || self.changes { - show_info!( + show_error!( "mode of '{}' changed from {:o} ({}) to {:o} ({})", file.display(), fperm, - display_permissions_unix(fperm), + display_permissions_unix(fperm as mode_t, false), mode, - display_permissions_unix(mode) + display_permissions_unix(mode as mode_t, false) ); } Ok(()) diff --git a/src/uu/chown/src/chown.rs b/src/uu/chown/src/chown.rs index ff9c42dd0..3d0b25814 100644 --- a/src/uu/chown/src/chown.rs +++ b/src/uu/chown/src/chown.rs @@ -199,7 +199,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { if recursive { if bit_flag == FTS_PHYSICAL { if derefer == 1 { - show_info!("-R --dereference requires -H or -L"); + show_error!("-R --dereference requires -H or -L"); return 1; } derefer = 0; @@ -227,7 +227,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { Ok((Some(uid), Some(gid))) => IfFrom::UserGroup(uid, gid), Ok((None, None)) => IfFrom::All, Err(e) => { - show_info!("{}", e); + show_error!("{}", e); return 1; } } @@ -244,7 +244,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { dest_uid = Some(meta.uid()); } Err(e) => { - show_info!("failed to get attributes of '{}': {}", file, e); + show_error!("failed to get attributes of '{}': {}", file, e); return 1; } } @@ -255,7 +255,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { dest_gid = g; } Err(e) => { - show_info!("{}", e); + show_error!("{}", e); return 1; } } @@ -377,8 +377,8 @@ impl Chowner { if let Some(p) = may_exist { if p.parent().is_none() { - show_info!("it is dangerous to operate recursively on '/'"); - show_info!("use --no-preserve-root to override this failsafe"); + show_error!("it is dangerous to operate recursively on '/'"); + show_error!("use --no-preserve-root to override this failsafe"); return 1; } } @@ -395,13 +395,13 @@ impl Chowner { ) { Ok(n) => { if !n.is_empty() { - show_info!("{}", n); + show_error!("{}", n); } 0 } Err(e) => { if self.verbosity != Verbosity::Silent { - show_info!("{}", e); + show_error!("{}", e); } 1 } @@ -424,7 +424,7 @@ impl Chowner { for entry in WalkDir::new(root).follow_links(follow).min_depth(1) { let entry = unwrap!(entry, e, { ret = 1; - show_info!("{}", e); + show_error!("{}", e); continue; }); let path = entry.path(); @@ -450,13 +450,13 @@ impl Chowner { ) { Ok(n) => { if !n.is_empty() { - show_info!("{}", n); + show_error!("{}", n); } 0 } Err(e) => { if self.verbosity != Verbosity::Silent { - show_info!("{}", e); + show_error!("{}", e); } 1 } @@ -472,7 +472,7 @@ impl Chowner { unwrap!(path.metadata(), e, { match self.verbosity { Silent => (), - _ => show_info!("cannot access '{}': {}", path.display(), e), + _ => show_error!("cannot access '{}': {}", path.display(), e), } return None; }) @@ -480,7 +480,7 @@ impl Chowner { unwrap!(path.symlink_metadata(), e, { match self.verbosity { Silent => (), - _ => show_info!("cannot dereference '{}': {}", path.display(), e), + _ => show_error!("cannot dereference '{}': {}", path.display(), e), } return None; }) diff --git a/src/uu/cp/src/cp.rs b/src/uu/cp/src/cp.rs index 3d6faf66a..fab1dfec1 100644 --- a/src/uu/cp/src/cp.rs +++ b/src/uu/cp/src/cp.rs @@ -47,6 +47,7 @@ use std::os::windows::ffi::OsStrExt; use std::path::{Path, PathBuf, StripPrefixError}; use std::str::FromStr; use std::string::ToString; +use uucore::backup_control::{self, BackupMode}; use uucore::fs::resolve_relative_path; use uucore::fs::{canonicalize, CanonicalizeMode}; use walkdir::WalkDir; @@ -169,14 +170,6 @@ pub enum TargetType { File, } -#[derive(Clone, Eq, PartialEq)] -pub enum BackupMode { - ExistingBackup, - NoBackup, - NumberedBackup, - SimpleBackup, -} - pub enum CopyMode { Link, SymLink, @@ -201,7 +194,7 @@ pub enum Attribute { #[allow(dead_code)] pub struct Options { attributes_only: bool, - backup: bool, + backup: BackupMode, copy_contents: bool, copy_mode: CopyMode, dereference: bool, @@ -222,6 +215,7 @@ pub struct Options { static VERSION: &str = env!("CARGO_PKG_VERSION"); static ABOUT: &str = "Copy SOURCE to DEST, or multiple SOURCE(s) to DIRECTORY."; +static LONG_HELP: &str = ""; static EXIT_OK: i32 = 0; static EXIT_ERR: i32 = 1; @@ -238,6 +232,7 @@ fn get_usage() -> String { static OPT_ARCHIVE: &str = "archive"; static OPT_ATTRIBUTES_ONLY: &str = "attributes-only"; static OPT_BACKUP: &str = "backup"; +static OPT_BACKUP_NO_ARG: &str = "b"; static OPT_CLI_SYMBOLIC_LINKS: &str = "cli-symbolic-links"; static OPT_CONTEXT: &str = "context"; static OPT_COPY_CONTENTS: &str = "copy-contents"; @@ -301,6 +296,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { let matches = App::new(executable!()) .version(VERSION) .about(ABOUT) + .after_help(&*format!("{}\n{}", LONG_HELP, backup_control::BACKUP_CONTROL_LONG_HELP)) .usage(&usage[..]) .arg(Arg::with_name(OPT_TARGET_DIRECTORY) .short("t") @@ -362,14 +358,22 @@ pub fn uumain(args: impl uucore::Args) -> i32 { .help("remove each existing destination file before attempting to open it \ (contrast with --force). On Windows, current only works for writeable files.")) .arg(Arg::with_name(OPT_BACKUP) - .short("b") .long(OPT_BACKUP) - .help("make a backup of each existing destination file")) + .help("make a backup of each existing destination file") + .takes_value(true) + .require_equals(true) + .min_values(0) + .possible_values(backup_control::BACKUP_CONTROL_VALUES) + .value_name("CONTROL") + ) + .arg(Arg::with_name(OPT_BACKUP_NO_ARG) + .short(OPT_BACKUP_NO_ARG) + .help("like --backup but does not accept an argument") + ) .arg(Arg::with_name(OPT_SUFFIX) .short("S") .long(OPT_SUFFIX) .takes_value(true) - .default_value("~") .value_name("SUFFIX") .help("override the usual backup suffix")) .arg(Arg::with_name(OPT_UPDATE) @@ -463,6 +467,12 @@ pub fn uumain(args: impl uucore::Args) -> i32 { .get_matches_from(args); let options = crash_if_err!(EXIT_ERR, Options::from_matches(&matches)); + + if options.overwrite == OverwriteMode::NoClobber && options.backup != BackupMode::NoBackup { + show_usage_error!("options --backup and --no-clobber are mutually exclusive"); + return 1; + } + let paths: Vec = matches .values_of(OPT_PATHS) .map(|v| v.map(ToString::to_string).collect()) @@ -585,7 +595,13 @@ impl Options { || matches.is_present(OPT_RECURSIVE_ALIAS) || matches.is_present(OPT_ARCHIVE); - let backup = matches.is_present(OPT_BACKUP) || (matches.occurrences_of(OPT_SUFFIX) > 0); + let backup_mode = backup_control::determine_backup_mode( + matches.is_present(OPT_BACKUP_NO_ARG) || matches.is_present(OPT_BACKUP), + matches.value_of(OPT_BACKUP), + ); + let backup_suffix = backup_control::determine_backup_suffix(matches.value_of(OPT_SUFFIX)); + + let overwrite = OverwriteMode::from_matches(matches); // Parse target directory options let no_target_dir = matches.is_present(OPT_NO_TARGET_DIRECTORY); @@ -631,9 +647,7 @@ impl Options { || matches.is_present(OPT_NO_DEREFERENCE_PRESERVE_LINKS) || matches.is_present(OPT_ARCHIVE), one_file_system: matches.is_present(OPT_ONE_FILE_SYSTEM), - overwrite: OverwriteMode::from_matches(matches), parents: matches.is_present(OPT_PARENTS), - backup_suffix: matches.value_of(OPT_SUFFIX).unwrap().to_string(), update: matches.is_present(OPT_UPDATE), verbose: matches.is_present(OPT_VERBOSE), strip_trailing_slashes: matches.is_present(OPT_STRIP_TRAILING_SLASHES), @@ -654,7 +668,9 @@ impl Options { ReflinkMode::Never } }, - backup, + backup: backup_mode, + backup_suffix: backup_suffix, + overwrite: overwrite, no_target_dir, preserve_attributes, recursive, @@ -1090,14 +1106,10 @@ fn context_for(src: &Path, dest: &Path) -> String { format!("'{}' -> '{}'", src.display(), dest.display()) } -/// Implements a relatively naive backup that is not as full featured -/// as GNU cp. No CONTROL version control method argument is taken -/// for backups. -/// TODO: Add version control methods -fn backup_file(path: &Path, suffix: &str) -> CopyResult { - let mut backup_path = path.to_path_buf().into_os_string(); - backup_path.push(suffix); - fs::copy(path, &backup_path)?; +/// Implements a simple backup copy for the destination file. +/// TODO: for the backup, should this function be replaced by `copy_file(...)`? +fn backup_dest(dest: &Path, backup_path: &PathBuf) -> CopyResult { + fs::copy(dest, &backup_path)?; Ok(backup_path.into()) } @@ -1108,8 +1120,9 @@ fn handle_existing_dest(source: &Path, dest: &Path, options: &Options) -> CopyRe options.overwrite.verify(dest)?; - if options.backup { - backup_file(dest, &options.backup_suffix)?; + let backup_path = backup_control::get_backup_path(options.backup, dest, &options.backup_suffix); + if let Some(backup_path) = backup_path { + backup_dest(dest, &backup_path)?; } match options.overwrite { diff --git a/src/uu/date/src/date.rs b/src/uu/date/src/date.rs index 317fd72d4..1fe80c03f 100644 --- a/src/uu/date/src/date.rs +++ b/src/uu/date/src/date.rs @@ -207,11 +207,15 @@ pub fn uumain(args: impl uucore::Args) -> i32 { .alias(OPT_UNIVERSAL_2) .help("print or set Coordinated Universal Time (UTC)"), ) - .arg(Arg::with_name(OPT_FORMAT).multiple(true)) + .arg(Arg::with_name(OPT_FORMAT).multiple(false)) .get_matches_from(args); let format = if let Some(form) = matches.value_of(OPT_FORMAT) { - let form = form[1..].into(); + if !form.starts_with('+') { + eprintln!("date: invalid date ‘{}’", form); + return 1; + } + let form = form[1..].to_string(); Format::Custom(form) } else if let Some(fmt) = matches .values_of(OPT_ISO_8601) @@ -237,7 +241,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { let set_to = match matches.value_of(OPT_SET).map(parse_date) { None => None, Some(Err((input, _err))) => { - eprintln!("date: invalid date '{}'", input); + eprintln!("date: invalid date ‘{}’", input); return 1; } Some(Ok(date)) => Some(date), @@ -297,11 +301,13 @@ pub fn uumain(args: impl uucore::Args) -> i32 { for date in dates { match date { Ok(date) => { - let formatted = date.format(format_string); + // GNU `date` uses `%N` for nano seconds, however crate::chrono uses `%f` + let format_string = &format_string.replace("%N", "%f"); + let formatted = date.format(format_string).to_string().replace("%f", "%N"); println!("{}", formatted); } Err((input, _err)) => { - println!("date: invalid date '{}'", input); + println!("date: invalid date ‘{}’", input); } } } diff --git a/src/uu/df/Cargo.toml b/src/uu/df/Cargo.toml index 4770cb557..0e65fdb32 100644 --- a/src/uu/df/Cargo.toml +++ b/src/uu/df/Cargo.toml @@ -16,14 +16,10 @@ path = "src/df.rs" [dependencies] clap = "2.33" -libc = "0.2" number_prefix = "0.4" -uucore = { version=">=0.0.8", package="uucore", path="../../uucore" } +uucore = { version=">=0.0.8", package="uucore", path="../../uucore", features=["libc", "fsext"] } uucore_procs = { version=">=0.0.5", package="uucore_procs", path="../../uucore_procs" } -[target.'cfg(target_os = "windows")'.dependencies] -winapi = { version = "0.3", features = ["errhandlingapi", "fileapi", "handleapi", "winerror"] } - [[bin]] name = "df" path = "src/main.rs" diff --git a/src/uu/df/src/df.rs b/src/uu/df/src/df.rs index c917eb2e8..8219b0a27 100644 --- a/src/uu/df/src/df.rs +++ b/src/uu/df/src/df.rs @@ -6,22 +6,17 @@ // For the full copyright and license information, please view the LICENSE file // that was distributed with this source code. -// spell-checker:ignore (ToDO) mountinfo mtab BLOCKSIZE getmntinfo fobj mptr noatime Iused overmounted -// spell-checker:ignore (libc/fs) asyncreads asyncwrites autofs bavail bfree bsize charspare cifs debugfs devfs devpts ffree frsize fsid fstypename fusectl inode inodes iosize kernfs mntbufp mntfromname mntonname mqueue namemax pipefs smbfs statfs statvfs subfs syncreads syncwrites sysfs wcslen +// spell-checker:ignore (ToDO) mountinfo BLOCKSIZE fobj mptr noatime Iused overmounted +// spell-checker:ignore (libc/fs) asyncreads asyncwrites autofs bavail bfree bsize charspare cifs debugfs devfs devpts ffree frsize fsid fstypename fusectl inode inodes iosize kernfs mntbufp mntfromname mntonname mqueue namemax pipefs smbfs statvfs subfs syncreads syncwrites sysfs wcslen #[macro_use] extern crate uucore; +#[cfg(unix)] +use uucore::fsext::statfs_fn; +use uucore::fsext::{read_fs_list, FsUsage, MountInfo}; use clap::{App, Arg}; -#[cfg(windows)] -use winapi::um::errhandlingapi::GetLastError; -#[cfg(windows)] -use winapi::um::fileapi::{ - FindFirstVolumeW, FindNextVolumeW, FindVolumeClose, GetDriveTypeW, GetVolumeInformationW, - GetVolumePathNamesForVolumeNameW, QueryDosDeviceW, -}; - use number_prefix::NumberPrefix; use std::cell::Cell; use std::collections::HashMap; @@ -32,41 +27,11 @@ use std::ffi::CString; #[cfg(unix)] use std::mem; -#[cfg(any(target_vendor = "apple", target_os = "freebsd"))] -use libc::c_int; -#[cfg(target_vendor = "apple")] -use libc::statfs; -#[cfg(any(target_vendor = "apple", target_os = "freebsd"))] -use std::ffi::CStr; -#[cfg(any(target_vendor = "apple", target_os = "freebsd", target_os = "windows"))] -use std::ptr; -#[cfg(any(target_vendor = "apple", target_os = "freebsd"))] -use std::slice; - #[cfg(target_os = "freebsd")] -use libc::{c_char, fsid_t, uid_t}; +use uucore::libc::{c_char, fsid_t, uid_t}; -#[cfg(target_os = "linux")] -use std::fs::File; -#[cfg(target_os = "linux")] -use std::io::{BufRead, BufReader}; - -#[cfg(windows)] -use std::ffi::OsString; -#[cfg(windows)] -use std::os::windows::ffi::OsStrExt; -#[cfg(windows)] -use std::os::windows::ffi::OsStringExt; #[cfg(windows)] use std::path::Path; -#[cfg(windows)] -use winapi::shared::minwindef::DWORD; -#[cfg(windows)] -use winapi::um::fileapi::GetDiskFreeSpaceW; -#[cfg(windows)] -use winapi::um::handleapi::INVALID_HANDLE_VALUE; -#[cfg(windows)] -use winapi::um::winbase::DRIVE_REMOTE; static VERSION: &str = env!("CARGO_PKG_VERSION"); static ABOUT: &str = "Show information about the file system on which each FILE resides,\n\ @@ -75,14 +40,6 @@ static ABOUT: &str = "Show information about the file system on which each FILE static EXIT_OK: i32 = 0; static EXIT_ERR: i32 = 1; -#[cfg(windows)] -const MAX_PATH: usize = 266; - -#[cfg(target_os = "linux")] -static LINUX_MOUNTINFO: &str = "/proc/self/mountinfo"; -#[cfg(target_os = "linux")] -static LINUX_MTAB: &str = "/etc/mtab"; - static OPT_ALL: &str = "all"; static OPT_BLOCKSIZE: &str = "blocksize"; static OPT_DIRECT: &str = "direct"; @@ -101,8 +58,6 @@ static OPT_TYPE: &str = "type"; static OPT_PRINT_TYPE: &str = "print-type"; static OPT_EXCLUDE_TYPE: &str = "exclude-type"; -static MOUNT_OPT_BIND: &str = "bind"; - /// Store names of file systems as a selector. /// Note: `exclude` takes priority over `include`. struct FsSelector { @@ -121,136 +76,16 @@ struct Options { fs_selector: FsSelector, } -#[derive(Debug, Clone)] -struct MountInfo { - // it stores `volume_name` in windows platform and `dev_id` in unix platform - dev_id: String, - dev_name: String, - fs_type: String, - mount_dir: String, - mount_option: String, // we only care "bind" option - mount_root: String, - remote: bool, - dummy: bool, -} - -#[cfg(all( - target_os = "freebsd", - not(all(target_vendor = "apple", target_arch = "x86_64")) -))] -#[repr(C)] -#[derive(Copy, Clone)] -#[allow(non_camel_case_types)] -struct statfs { - f_version: u32, - f_type: u32, - f_flags: u64, - f_bsize: u64, - f_iosize: u64, - f_blocks: u64, - f_bfree: u64, - f_bavail: i64, - f_files: u64, - f_ffree: i64, - f_syncwrites: u64, - f_asyncwrites: u64, - f_syncreads: u64, - f_asyncreads: u64, - f_spare: [u64; 10usize], - f_namemax: u32, - f_owner: uid_t, - f_fsid: fsid_t, - f_charspare: [c_char; 80usize], - f_fstypename: [c_char; 16usize], - f_mntfromname: [c_char; 88usize], - f_mntonname: [c_char; 88usize], -} - -#[derive(Debug, Clone)] -struct FsUsage { - blocksize: u64, - blocks: u64, - bfree: u64, - bavail: u64, - bavail_top_bit_set: bool, - files: u64, - ffree: u64, -} - #[derive(Debug, Clone)] struct Filesystem { mountinfo: MountInfo, usage: FsUsage, } -#[cfg(windows)] -macro_rules! String2LPWSTR { - ($str: expr) => { - OsString::from($str.clone()) - .as_os_str() - .encode_wide() - .chain(Some(0)) - .collect::>() - .as_ptr() - }; -} - -#[cfg(windows)] -#[allow(non_snake_case)] -fn LPWSTR2String(buf: &[u16]) -> String { - let len = unsafe { libc::wcslen(buf.as_ptr()) }; - OsString::from_wide(&buf[..len as usize]) - .into_string() - .unwrap() -} - fn get_usage() -> String { format!("{0} [OPTION]... [FILE]...", executable!()) } -#[cfg(any(target_os = "freebsd", target_vendor = "apple"))] -extern "C" { - #[cfg(all(target_vendor = "apple", target_arch = "x86_64"))] - #[link_name = "getmntinfo$INODE64"] - fn getmntinfo(mntbufp: *mut *mut statfs, flags: c_int) -> c_int; - - #[cfg(any( - all(target_os = "freebsd"), - all(target_vendor = "apple", target_arch = "aarch64") - ))] - fn getmntinfo(mntbufp: *mut *mut statfs, flags: c_int) -> c_int; -} - -#[cfg(any(target_os = "freebsd", target_vendor = "apple"))] -impl From for MountInfo { - fn from(statfs: statfs) -> Self { - let mut info = MountInfo { - dev_id: "".to_string(), - dev_name: unsafe { - CStr::from_ptr(&statfs.f_mntfromname[0]) - .to_string_lossy() - .into_owned() - }, - fs_type: unsafe { - CStr::from_ptr(&statfs.f_fstypename[0]) - .to_string_lossy() - .into_owned() - }, - mount_dir: unsafe { - CStr::from_ptr(&statfs.f_mntonname[0]) - .to_string_lossy() - .into_owned() - }, - mount_root: "".to_string(), - mount_option: "".to_string(), - remote: false, - dummy: false, - }; - info.set_missing_fields(); - info - } -} - impl FsSelector { fn new() -> FsSelector { FsSelector { @@ -295,239 +130,6 @@ impl Options { } } -impl MountInfo { - fn set_missing_fields(&mut self) { - #[cfg(unix)] - { - // We want to keep the dev_id on Windows - // but set dev_id - let path = CString::new(self.mount_dir.clone()).unwrap(); - unsafe { - let mut stat = mem::zeroed(); - if libc::stat(path.as_ptr(), &mut stat) == 0 { - self.dev_id = (stat.st_dev as i32).to_string(); - } else { - self.dev_id = "".to_string(); - } - } - } - // set MountInfo::dummy - match self.fs_type.as_ref() { - "autofs" | "proc" | "subfs" - /* for Linux 2.6/3.x */ - | "debugfs" | "devpts" | "fusectl" | "mqueue" | "rpc_pipefs" | "sysfs" - /* FreeBSD, Linux 2.4 */ - | "devfs" - /* for NetBSD 3.0 */ - | "kernfs" - /* for Irix 6.5 */ - | "ignore" => self.dummy = true, - _ => self.dummy = self.fs_type == "none" - && self.mount_option.find(MOUNT_OPT_BIND).is_none(), - } - // set MountInfo::remote - #[cfg(windows)] - { - self.remote = DRIVE_REMOTE == unsafe { GetDriveTypeW(String2LPWSTR!(self.mount_root)) }; - } - #[cfg(unix)] - { - if self.dev_name.find(':').is_some() - || (self.dev_name.starts_with("//") && self.fs_type == "smbfs" - || self.fs_type == "cifs") - || self.dev_name == "-hosts" - { - self.remote = true; - } else { - self.remote = false; - } - } - } - - #[cfg(target_os = "linux")] - fn new(file_name: &str, raw: Vec<&str>) -> Option { - match file_name { - // Format: 36 35 98:0 /mnt1 /mnt2 rw,noatime master:1 - ext3 /dev/root rw,errors=continue - // "man proc" for more details - "/proc/self/mountinfo" => { - let mut m = MountInfo { - dev_id: "".to_string(), - dev_name: raw[9].to_string(), - fs_type: raw[8].to_string(), - mount_root: raw[3].to_string(), - mount_dir: raw[4].to_string(), - mount_option: raw[5].to_string(), - remote: false, - dummy: false, - }; - m.set_missing_fields(); - Some(m) - } - "/etc/mtab" => { - let mut m = MountInfo { - dev_id: "".to_string(), - dev_name: raw[0].to_string(), - fs_type: raw[2].to_string(), - mount_root: "".to_string(), - mount_dir: raw[1].to_string(), - mount_option: raw[3].to_string(), - remote: false, - dummy: false, - }; - m.set_missing_fields(); - Some(m) - } - _ => None, - } - } - #[cfg(windows)] - fn new(mut volume_name: String) -> Option { - let mut dev_name_buf = [0u16; MAX_PATH]; - volume_name.pop(); - unsafe { - QueryDosDeviceW( - OsString::from(volume_name.clone()) - .as_os_str() - .encode_wide() - .chain(Some(0)) - .skip(4) - .collect::>() - .as_ptr(), - dev_name_buf.as_mut_ptr(), - dev_name_buf.len() as DWORD, - ) - }; - volume_name.push('\\'); - let dev_name = LPWSTR2String(&dev_name_buf); - - let mut mount_root_buf = [0u16; MAX_PATH]; - let success = unsafe { - GetVolumePathNamesForVolumeNameW( - String2LPWSTR!(volume_name), - mount_root_buf.as_mut_ptr(), - mount_root_buf.len() as DWORD, - ptr::null_mut(), - ) - }; - if 0 == success { - // TODO: support the case when `GetLastError()` returns `ERROR_MORE_DATA` - return None; - } - let mount_root = LPWSTR2String(&mount_root_buf); - - let mut fs_type_buf = [0u16; MAX_PATH]; - let success = unsafe { - GetVolumeInformationW( - String2LPWSTR!(mount_root), - ptr::null_mut(), - 0 as DWORD, - ptr::null_mut(), - ptr::null_mut(), - ptr::null_mut(), - fs_type_buf.as_mut_ptr(), - fs_type_buf.len() as DWORD, - ) - }; - let fs_type = if 0 != success { - Some(LPWSTR2String(&fs_type_buf)) - } else { - None - }; - let mut mn_info = MountInfo { - dev_id: volume_name, - dev_name, - fs_type: fs_type.unwrap_or_else(|| "".to_string()), - mount_root, - mount_dir: "".to_string(), - mount_option: "".to_string(), - remote: false, - dummy: false, - }; - mn_info.set_missing_fields(); - Some(mn_info) - } -} - -impl FsUsage { - #[cfg(unix)] - fn new(statvfs: libc::statvfs) -> FsUsage { - { - FsUsage { - blocksize: if statvfs.f_frsize != 0 { - statvfs.f_frsize as u64 - } else { - statvfs.f_bsize as u64 - }, - blocks: statvfs.f_blocks as u64, - bfree: statvfs.f_bfree as u64, - bavail: statvfs.f_bavail as u64, - bavail_top_bit_set: ((statvfs.f_bavail as u64) & (1u64.rotate_right(1))) != 0, - files: statvfs.f_files as u64, - ffree: statvfs.f_ffree as u64, - } - } - } - #[cfg(not(unix))] - fn new(path: &Path) -> FsUsage { - let mut root_path = [0u16; MAX_PATH]; - let success = unsafe { - GetVolumePathNamesForVolumeNameW( - //path_utf8.as_ptr(), - String2LPWSTR!(path.as_os_str()), - root_path.as_mut_ptr(), - root_path.len() as DWORD, - ptr::null_mut(), - ) - }; - if 0 == success { - crash!( - EXIT_ERR, - "GetVolumePathNamesForVolumeNameW failed: {}", - unsafe { GetLastError() } - ); - } - - let mut sectors_per_cluster = 0; - let mut bytes_per_sector = 0; - let mut number_of_free_clusters = 0; - let mut total_number_of_clusters = 0; - - let success = unsafe { - GetDiskFreeSpaceW( - String2LPWSTR!(path.as_os_str()), - &mut sectors_per_cluster, - &mut bytes_per_sector, - &mut number_of_free_clusters, - &mut total_number_of_clusters, - ) - }; - if 0 == success { - // Fails in case of CD for example - //crash!(EXIT_ERR, "GetDiskFreeSpaceW failed: {}", unsafe { - //GetLastError() - //}); - } - - let bytes_per_cluster = sectors_per_cluster as u64 * bytes_per_sector as u64; - FsUsage { - // f_bsize File system block size. - blocksize: bytes_per_cluster as u64, - // f_blocks - Total number of blocks on the file system, in units of f_frsize. - // frsize = Fundamental file system block size (fragment size). - blocks: total_number_of_clusters as u64, - // Total number of free blocks. - bfree: number_of_free_clusters as u64, - // Total number of free blocks available to non-privileged processes. - bavail: 0 as u64, - bavail_top_bit_set: ((bytes_per_sector as u64) & (1u64.rotate_right(1))) != 0, - // Total number of file nodes (inodes) on the file system. - files: 0 as u64, // Not available on windows - // Total number of free file nodes (inodes). - ffree: 4096 as u64, // Meaningless on Windows - } - } -} - impl Filesystem { // TODO: resolve uuid in `mountinfo.dev_name` if exists fn new(mountinfo: MountInfo) -> Option { @@ -548,7 +150,7 @@ impl Filesystem { unsafe { let path = CString::new(_stat_path).unwrap(); let mut statvfs = mem::zeroed(); - if libc::statvfs(path.as_ptr(), &mut statvfs) < 0 { + if statfs_fn(path.as_ptr(), &mut statvfs) < 0 { None } else { Some(Filesystem { @@ -565,80 +167,6 @@ impl Filesystem { } } -/// Read file system list. -fn read_fs_list() -> Vec { - #[cfg(target_os = "linux")] - { - let (file_name, fobj) = File::open(LINUX_MOUNTINFO) - .map(|f| (LINUX_MOUNTINFO, f)) - .or_else(|_| File::open(LINUX_MTAB).map(|f| (LINUX_MTAB, f))) - .expect("failed to find mount list files"); - let reader = BufReader::new(fobj); - reader - .lines() - .filter_map(|line| line.ok()) - .filter_map(|line| { - let raw_data = line.split_whitespace().collect::>(); - MountInfo::new(file_name, raw_data) - }) - .collect::>() - } - #[cfg(any(target_os = "freebsd", target_vendor = "apple"))] - { - let mut mptr: *mut statfs = ptr::null_mut(); - let len = unsafe { getmntinfo(&mut mptr, 1 as c_int) }; - if len < 0 { - crash!(EXIT_ERR, "getmntinfo failed"); - } - let mounts = unsafe { slice::from_raw_parts(mptr, len as usize) }; - mounts - .iter() - .map(|m| MountInfo::from(*m)) - .collect::>() - } - #[cfg(windows)] - { - let mut volume_name_buf = [0u16; MAX_PATH]; - // As recommended in the MS documentation, retrieve the first volume before the others - let find_handle = unsafe { - FindFirstVolumeW(volume_name_buf.as_mut_ptr(), volume_name_buf.len() as DWORD) - }; - if INVALID_HANDLE_VALUE == find_handle { - crash!(EXIT_ERR, "FindFirstVolumeW failed: {}", unsafe { - GetLastError() - }); - } - let mut mounts = Vec::::new(); - loop { - let volume_name = LPWSTR2String(&volume_name_buf); - if !volume_name.starts_with("\\\\?\\") || !volume_name.ends_with('\\') { - show_warning!("A bad path was skipped: {}", volume_name); - continue; - } - if let Some(m) = MountInfo::new(volume_name) { - mounts.push(m); - } - if 0 == unsafe { - FindNextVolumeW( - find_handle, - volume_name_buf.as_mut_ptr(), - volume_name_buf.len() as DWORD, - ) - } { - let err = unsafe { GetLastError() }; - if err != winapi::shared::winerror::ERROR_NO_MORE_FILES { - crash!(EXIT_ERR, "FindNextVolumeW failed: {}", err); - } - break; - } - } - unsafe { - FindVolumeClose(find_handle); - } - mounts - } -} - fn filter_mount_list(vmi: Vec, paths: &[String], opt: &Options) -> Vec { vmi.into_iter() .filter_map(|mi| { diff --git a/src/uu/dircolors/src/dircolors.rs b/src/uu/dircolors/src/dircolors.rs index a2d819620..b6942c2d2 100644 --- a/src/uu/dircolors/src/dircolors.rs +++ b/src/uu/dircolors/src/dircolors.rs @@ -105,7 +105,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { if out_format == OutputFmt::Unknown { match guess_syntax() { OutputFmt::Unknown => { - show_info!("no SHELL environment variable, and no shell type option given"); + show_error!("no SHELL environment variable, and no shell type option given"); return 1; } fmt => out_format = fmt, @@ -130,7 +130,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { ) } Err(e) => { - show_info!("{}: {}", matches.free[0], e); + show_error!("{}: {}", matches.free[0], e); return 1; } } @@ -141,7 +141,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { 0 } Err(s) => { - show_info!("{}", s); + show_error!("{}", s); 1 } } diff --git a/src/uu/du/Cargo.toml b/src/uu/du/Cargo.toml index 3ce9d8361..023c0a021 100644 --- a/src/uu/du/Cargo.toml +++ b/src/uu/du/Cargo.toml @@ -15,6 +15,7 @@ edition = "2018" path = "src/du.rs" [dependencies] +clap = "2.33" chrono = "0.4" uucore = { version=">=0.0.8", package="uucore", path="../../uucore" } uucore_procs = { version=">=0.0.5", package="uucore_procs", path="../../uucore_procs" } diff --git a/src/uu/du/src/du.rs b/src/uu/du/src/du.rs index 89dd3f739..6bd4f23e4 100644 --- a/src/uu/du/src/du.rs +++ b/src/uu/du/src/du.rs @@ -12,6 +12,7 @@ extern crate uucore; use chrono::prelude::DateTime; use chrono::Local; +use clap::{App, Arg}; use std::collections::HashSet; use std::env; use std::fs; @@ -37,6 +38,27 @@ use winapi::um::winbase::GetFileInformationByHandleEx; #[cfg(windows)] use winapi::um::winnt::{FILE_ID_128, ULONGLONG}; +mod options { + pub const NULL: &str = "0"; + pub const ALL: &str = "all"; + pub const APPARENT_SIZE: &str = "apparent-size"; + pub const BLOCK_SIZE: &str = "B"; + pub const BYTES: &str = "b"; + pub const TOTAL: &str = "c"; + pub const MAX_DEPTH: &str = "d"; + pub const HUMAN_READABLE: &str = "h"; + pub const BLOCK_SIZE_1K: &str = "k"; + pub const COUNT_LINKS: &str = "l"; + pub const BLOCK_SIZE_1M: &str = "m"; + pub const SEPARATE_DIRS: &str = "S"; + pub const SUMMARIZE: &str = "s"; + pub const SI: &str = "si"; + pub const TIME: &str = "time"; + pub const TIME_STYLE: &str = "time-style"; + pub const FILE: &str = "FILE"; +} + +const VERSION: &str = env!("CARGO_PKG_VERSION"); const NAME: &str = "du"; const SUMMARY: &str = "estimate file space usage"; const LONG_HELP: &str = " @@ -220,14 +242,14 @@ fn unit_string_to_number(s: &str) -> Option { Some(number * multiple.pow(unit)) } -fn translate_to_pure_number(s: &Option) -> Option { +fn translate_to_pure_number(s: &Option<&str>) -> Option { match *s { Some(ref s) => unit_string_to_number(s), None => None, } } -fn read_block_size(s: Option) -> u64 { +fn read_block_size(s: Option<&str>) -> u64 { match translate_to_pure_number(&s) { Some(v) => v, None => { @@ -236,7 +258,8 @@ fn read_block_size(s: Option) -> u64 { }; for env_var in &["DU_BLOCK_SIZE", "BLOCK_SIZE", "BLOCKSIZE"] { - if let Some(quantity) = translate_to_pure_number(&env::var(env_var).ok()) { + let env_size = env::var(env_var).ok(); + if let Some(quantity) = translate_to_pure_number(&env_size.as_deref()) { return quantity; } } @@ -361,126 +384,189 @@ fn convert_size_other(size: u64, _multiplier: u64, block_size: u64) -> String { format!("{}", ((size as f64) / (block_size as f64)).ceil()) } +fn get_usage() -> String { + format!( + "{0} [OPTION]... [FILE]... + {0} [OPTION]... --files0-from=F", + executable!() + ) +} + #[allow(clippy::cognitive_complexity)] pub fn uumain(args: impl uucore::Args) -> i32 { let args = args .collect_str(InvalidEncodingHandling::Ignore) .accept_any(); - let syntax = format!( - "[OPTION]... [FILE]... - {0} [OPTION]... --files0-from=F", - NAME - ); - let matches = app!(&syntax, SUMMARY, LONG_HELP) - // In task - .optflag( - "a", - "all", - " write counts for all files, not just directories", - ) - // In main - .optflag( - "", - "apparent-size", - "print apparent sizes, rather than disk usage - although the apparent size is usually smaller, it may be larger due to holes - in ('sparse') files, internal fragmentation, indirect blocks, and the like", - ) - // In main - .optopt( - "B", - "block-size", - "scale sizes by SIZE before printing them. - E.g., '-BM' prints sizes in units of 1,048,576 bytes. See SIZE format below.", - "SIZE", - ) - // In main - .optflag( - "b", - "bytes", - "equivalent to '--apparent-size --block-size=1'", - ) - // In main - .optflag("c", "total", "produce a grand total") - // In task - // opts.optflag("D", "dereference-args", "dereference only symlinks that are listed - // on the command line"), - // In main - // opts.optopt("", "files0-from", "summarize disk usage of the NUL-terminated file - // names specified in file F; - // If F is - then read names from standard input", "F"), - // // In task - // opts.optflag("H", "", "equivalent to --dereference-args (-D)"), - // In main - .optflag( - "h", - "human-readable", - "print sizes in human readable format (e.g., 1K 234M 2G)", - ) - // In main - .optflag("", "si", "like -h, but use powers of 1000 not 1024") - // In main - .optflag("k", "", "like --block-size=1K") - // In task - .optflag("l", "count-links", "count sizes many times if hard linked") - // // In main - .optflag("m", "", "like --block-size=1M") - // // In task - // opts.optflag("L", "dereference", "dereference all symbolic links"), - // // In task - // opts.optflag("P", "no-dereference", "don't follow any symbolic links (this is the default)"), - // // In main - .optflag( - "0", - "null", - "end each output line with 0 byte rather than newline", - ) - // In main - .optflag( - "S", - "separate-dirs", - "do not include size of subdirectories", - ) - // In main - .optflag("s", "summarize", "display only a total for each argument") - // // In task - // opts.optflag("x", "one-file-system", "skip directories on different file systems"), - // // In task - // opts.optopt("X", "exclude-from", "exclude files that match any pattern in FILE", "FILE"), - // // In task - // opts.optopt("", "exclude", "exclude files that match PATTERN", "PATTERN"), - // In main - .optopt( - "d", - "max-depth", - "print the total for a directory (or file, with --all) - only if it is N or fewer levels below the command - line argument; --max-depth=0 is the same as --summarize", - "N", - ) - // In main - .optflagopt( - "", - "time", - "show time of the last modification of any file in the - directory, or any of its subdirectories. If WORD is given, show time as WORD instead - of modification time: atime, access, use, ctime or status", - "WORD", - ) - // In main - .optopt( - "", - "time-style", - "show times using style STYLE: - full-iso, long-iso, iso, +FORMAT FORMAT is interpreted like 'date'", - "STYLE", - ) - .parse(args); + let usage = get_usage(); - let summarize = matches.opt_present("summarize"); + let matches = App::new(executable!()) + .version(VERSION) + .about(SUMMARY) + .usage(&usage[..]) + .after_help(LONG_HELP) + .arg( + Arg::with_name(options::ALL) + .short("a") + .long(options::ALL) + .help("write counts for all files, not just directories"), + ) + .arg( + Arg::with_name(options::APPARENT_SIZE) + .long(options::APPARENT_SIZE) + .help( + "print apparent sizes, rather than disk usage \ + although the apparent size is usually smaller, it may be larger due to holes \ + in ('sparse') files, internal fragmentation, indirect blocks, and the like" + ) + ) + .arg( + Arg::with_name(options::BLOCK_SIZE) + .short("B") + .long("block-size") + .value_name("SIZE") + .help( + "scale sizes by SIZE before printing them. \ + E.g., '-BM' prints sizes in units of 1,048,576 bytes. See SIZE format below." + ) + ) + .arg( + Arg::with_name(options::BYTES) + .short("b") + .long("bytes") + .help("equivalent to '--apparent-size --block-size=1'") + ) + .arg( + Arg::with_name(options::TOTAL) + .long("total") + .short("c") + .help("produce a grand total") + ) + .arg( + Arg::with_name(options::MAX_DEPTH) + .short("d") + .long("max-depth") + .value_name("N") + .help( + "print the total for a directory (or file, with --all) \ + only if it is N or fewer levels below the command \ + line argument; --max-depth=0 is the same as --summarize" + ) + ) + .arg( + Arg::with_name(options::HUMAN_READABLE) + .long("human-readable") + .short("h") + .help("print sizes in human readable format (e.g., 1K 234M 2G)") + ) + .arg( + Arg::with_name("inodes") + .long("inodes") + .help( + "list inode usage information instead of block usage like --block-size=1K" + ) + ) + .arg( + Arg::with_name(options::BLOCK_SIZE_1K) + .short("k") + .help("like --block-size=1K") + ) + .arg( + Arg::with_name(options::COUNT_LINKS) + .short("l") + .long("count-links") + .help("count sizes many times if hard linked") + ) + // .arg( + // Arg::with_name("dereference") + // .short("L") + // .long("dereference") + // .help("dereference all symbolic links") + // ) + // .arg( + // Arg::with_name("no-dereference") + // .short("P") + // .long("no-dereference") + // .help("don't follow any symbolic links (this is the default)") + // ) + .arg( + Arg::with_name(options::BLOCK_SIZE_1M) + .short("m") + .help("like --block-size=1M") + ) + .arg( + Arg::with_name(options::NULL) + .short("0") + .long("null") + .help("end each output line with 0 byte rather than newline") + ) + .arg( + Arg::with_name(options::SEPARATE_DIRS) + .short("S") + .long("separate-dirs") + .help("do not include size of subdirectories") + ) + .arg( + Arg::with_name(options::SUMMARIZE) + .short("s") + .long("summarize") + .help("display only a total for each argument") + ) + .arg( + Arg::with_name(options::SI) + .long(options::SI) + .help("like -h, but use powers of 1000 not 1024") + ) + // .arg( + // Arg::with_name("one-file-system") + // .short("x") + // .long("one-file-system") + // .help("skip directories on different file systems") + // ) + // .arg( + // Arg::with_name("") + // .short("x") + // .long("exclude-from") + // .value_name("FILE") + // .help("exclude files that match any pattern in FILE") + // ) + // .arg( + // Arg::with_name("exclude") + // .long("exclude") + // .value_name("PATTERN") + // .help("exclude files that match PATTERN") + // ) + .arg( + Arg::with_name(options::TIME) + .long(options::TIME) + .value_name("WORD") + .require_equals(true) + .min_values(0) + .help( + "show time of the last modification of any file in the \ + directory, or any of its subdirectories. If WORD is given, show time as WORD instead \ + of modification time: atime, access, use, ctime or status" + ) + ) + .arg( + Arg::with_name(options::TIME_STYLE) + .long(options::TIME_STYLE) + .value_name("STYLE") + .help( + "show times using style STYLE: \ + full-iso, long-iso, iso, +FORMAT FORMAT is interpreted like 'date'" + ) + ) + .arg( + Arg::with_name(options::FILE) + .hidden(true) + .multiple(true) + ) + .get_matches_from(args); - let max_depth_str = matches.opt_str("max-depth"); + let summarize = matches.is_present(options::SUMMARIZE); + + let max_depth_str = matches.value_of(options::MAX_DEPTH); let max_depth = max_depth_str.as_ref().and_then(|s| s.parse::().ok()); match (max_depth_str, max_depth) { (Some(ref s), _) if summarize => { @@ -495,34 +581,35 @@ pub fn uumain(args: impl uucore::Args) -> i32 { } let options = Options { - all: matches.opt_present("all"), + all: matches.is_present(options::ALL), program_name: NAME.to_owned(), max_depth, - total: matches.opt_present("total"), - separate_dirs: matches.opt_present("S"), + total: matches.is_present(options::TOTAL), + separate_dirs: matches.is_present(options::SEPARATE_DIRS), }; - let strs = if matches.free.is_empty() { - vec!["./".to_owned()] // TODO: gnu `du` doesn't use trailing "/" here - } else { - matches.free.clone() + let strs = match matches.value_of(options::FILE) { + Some(_) => matches.values_of(options::FILE).unwrap().collect(), + None => { + vec!["./"] // TODO: gnu `du` doesn't use trailing "/" here + } }; - let block_size = read_block_size(matches.opt_str("block-size")); + let block_size = read_block_size(matches.value_of(options::BLOCK_SIZE)); - let multiplier: u64 = if matches.opt_present("si") { + let multiplier: u64 = if matches.is_present(options::SI) { 1000 } else { 1024 }; let convert_size_fn = { - if matches.opt_present("human-readable") || matches.opt_present("si") { + if matches.is_present(options::HUMAN_READABLE) || matches.is_present(options::SI) { convert_size_human - } else if matches.opt_present("b") { + } else if matches.is_present(options::BYTES) { convert_size_b - } else if matches.opt_present("k") { + } else if matches.is_present(options::BLOCK_SIZE_1K) { convert_size_k - } else if matches.opt_present("m") { + } else if matches.is_present(options::BLOCK_SIZE_1M) { convert_size_m } else { convert_size_other @@ -530,8 +617,8 @@ pub fn uumain(args: impl uucore::Args) -> i32 { }; let convert_size = |size| convert_size_fn(size, multiplier, block_size); - let time_format_str = match matches.opt_str("time-style") { - Some(s) => match &s[..] { + let time_format_str = match matches.value_of("time-style") { + Some(s) => match s { "full-iso" => "%Y-%m-%d %H:%M:%S.%f %z", "long-iso" => "%Y-%m-%d %H:%M", "iso" => "%Y-%m-%d", @@ -552,7 +639,11 @@ Try '{} --help' for more information.", None => "%Y-%m-%d %H:%M", }; - let line_separator = if matches.opt_present("0") { "\0" } else { "\n" }; + let line_separator = if matches.is_present(options::NULL) { + "\0" + } else { + "\n" + }; let mut grand_total = 0; for path_str in strs { @@ -565,18 +656,20 @@ Try '{} --help' for more information.", let (_, len) = iter.size_hint(); let len = len.unwrap(); for (index, stat) in iter.enumerate() { - let size = if matches.opt_present("apparent-size") || matches.opt_present("b") { + let size = if matches.is_present(options::APPARENT_SIZE) + || matches.is_present(options::BYTES) + { stat.size } else { // C's stat is such that each block is assume to be 512 bytes // See: http://linux.die.net/man/2/stat stat.blocks * 512 }; - if matches.opt_present("time") { + if matches.is_present(options::TIME) { let tm = { let secs = { - match matches.opt_str("time") { - Some(s) => match &s[..] { + match matches.value_of(options::TIME) { + Some(s) => match s { "accessed" => stat.accessed, "created" => stat.created, "modified" => stat.modified, @@ -649,8 +742,8 @@ mod test_du { (Some("900KB".to_string()), Some(900 * 1000)), (Some("BAD_STRING".to_string()), None), ]; - for it in test_data.into_iter() { - assert_eq!(translate_to_pure_number(&it.0), it.1); + for it in test_data.iter() { + assert_eq!(translate_to_pure_number(&it.0.as_deref()), it.1); } } @@ -661,8 +754,8 @@ mod test_du { (None, 1024), (Some("BAD_STRING".to_string()), 1024), ]; - for it in test_data.into_iter() { - assert_eq!(read_block_size(it.0.clone()), it.1); + for it in test_data.iter() { + assert_eq!(read_block_size(it.0.as_deref()), it.1); } } } diff --git a/src/uu/expr/src/syntax_tree.rs b/src/uu/expr/src/syntax_tree.rs index c81adf0c8..a75f4c742 100644 --- a/src/uu/expr/src/syntax_tree.rs +++ b/src/uu/expr/src/syntax_tree.rs @@ -153,7 +153,7 @@ impl AstNode { ":" | "match" => operator_match(&operand_values), "length" => Ok(prefix_operator_length(&operand_values)), "index" => Ok(prefix_operator_index(&operand_values)), - "substr" => prefix_operator_substr(&operand_values), + "substr" => Ok(prefix_operator_substr(&operand_values)), _ => Err(format!("operation not implemented: {}", op_type)), }, @@ -522,35 +522,23 @@ fn prefix_operator_index(values: &[String]) -> String { "0".to_string() } -fn prefix_operator_substr(values: &[String]) -> Result { +fn prefix_operator_substr(values: &[String]) -> String { assert!(values.len() == 3); let subj = &values[0]; - let mut idx = match values[1].parse::() { - Ok(i) => i, - Err(_) => return Err("expected integer as POS arg to 'substr'".to_string()), + let idx = match values[1] + .parse::() + .ok() + .and_then(|v| v.checked_sub(1)) + { + Some(i) => i, + None => return String::new(), }; - let mut len = match values[2].parse::() { + let len = match values[2].parse::() { Ok(i) => i, - Err(_) => return Err("expected integer as LENGTH arg to 'substr'".to_string()), + Err(_) => return String::new(), }; - if idx <= 0 || len <= 0 { - return Ok("".to_string()); - } - - let mut out_str = String::new(); - for ch in subj.chars() { - idx -= 1; - if idx <= 0 { - if len <= 0 { - break; - } - len -= 1; - - out_str.push(ch); - } - } - Ok(out_str) + subj.chars().skip(idx).take(len).collect() } fn bool_as_int(b: bool) -> i64 { diff --git a/src/uu/factor/BENCHMARKING.md b/src/uu/factor/BENCHMARKING.md new file mode 100644 index 000000000..e174d62b7 --- /dev/null +++ b/src/uu/factor/BENCHMARKING.md @@ -0,0 +1,116 @@ +# Benchmarking `factor` + +The benchmarks for `factor` are located under `tests/benches/factor` +and can be invoked with `cargo bench` in that directory. + +They are located outside the `uu_factor` crate, as they do not comply +with the project's minimum supported Rust version, *i.e.* may require +a newer version of `rustc`. + + +## Microbenchmarking deterministic functions + +We currently use [`criterion`] to benchmark deterministic functions, +such as `gcd` and `table::factor`. + +However, µbenchmarks are by nature unstable: not only are they specific to +the hardware, operating system version, etc., but they are noisy and affected +by other tasks on the system (browser, compile jobs, etc.), which can cause +`criterion` to report spurious performance improvements and regressions. + +This can be mitigated by getting as close to [idealised conditions][lemire] +as possible: +- minimize the amount of computation and I/O running concurrently to the + benchmark, *i.e.* close your browser and IM clients, don't compile at the + same time, etc. ; +- ensure the CPU's [frequency stays constant] during the benchmark ; +- [isolate a **physical** core], set it to `nohz_full`, and pin the benchmark + to it, so it won't be preempted in the middle of a measurement ; +- disable ASLR by running `setarch -R cargo bench`, so we can compare results + across multiple executions. + + +[`criterion`]: https://bheisler.github.io/criterion.rs/book/index.html +[lemire]: https://lemire.me/blog/2018/01/16/microbenchmarking-calls-for-idealized-conditions/ +[isolate a **physical** core]: https://pyperf.readthedocs.io/en/latest/system.html#isolate-cpus-on-linux +[frequency stays constant]: XXXTODO + + +### Guidance for designing µbenchmarks + +*Note:* this guidance is specific to `factor` and takes its application domain +into account; do not expect it to generalise to other projects. It is based +on Daniel Lemire's [*Microbenchmarking calls for idealized conditions*][lemire], +which I recommend reading if you want to add benchmarks to `factor`. + +1. Select a small, self-contained, deterministic component + `gcd` and `table::factor` are good example of such: + - no I/O or access to external data structures ; + - no call into other components ; + - behaviour is deterministic: no RNG, no concurrency, ... ; + - the test's body is *fast* (~100ns for `gcd`, ~10µs for `factor::table`), + so each sample takes a very short time, minimizing variability and + maximizing the numbers of samples we can take in a given time. + +2. Benchmarks are immutable (once merged in `uutils`) + Modifying a benchmark means previously-collected values cannot meaningfully + be compared, silently giving nonsensical results. If you must modify an + existing benchmark, rename it. + +3. Test common cases + We are interested in overall performance, rather than specific edge-cases; + use **reproducibly-randomised inputs**, sampling from either all possible + input values or some subset of interest. + +4. Use [`criterion`], `criterion::black_box`, ... + `criterion` isn't perfect, but it is also much better than ad-hoc + solutions in each benchmark. + + +## Wishlist + +### Configurable statistical estimators + +`criterion` always uses the arithmetic average as estimator; in µbenchmarks, +where the code under test is fully deterministic and the measurements are +subject to additive, positive noise, [the minimum is more appropriate][lemire]. + + +### CI & reproducible performance testing + +Measuring performance on real hardware is important, as it relates directly +to what users of `factor` experience; however, such measurements are subject +to the constraints of the real-world, and aren't perfectly reproducible. +Moreover, the mitigations for it (described above) aren't achievable in +virtualized, multi-tenant environments such as CI. + +Instead, we could run the µbenchmarks in a simulated CPU with [`cachegrind`], +measure execution “time” in that model (in CI), and use it to detect and report +performance improvements and regressions. + +[`iai`] is an implementation of this idea for Rust. + +[`cachegrind`]: https://www.valgrind.org/docs/manual/cg-manual.html +[`iai`]: https://bheisler.github.io/criterion.rs/book/iai/iai.html + + +### Comparing randomised implementations across multiple inputs + +`factor` is a challenging target for system benchmarks as it combines two +characteristics: + +1. integer factoring algorithms are randomised, with large variance in + execution time ; + +2. various inputs also have large differences in factoring time, that + corresponds to no natural, linear ordering of the inputs. + + +If (1) was untrue (i.e. if execution time wasn't random), we could faithfully +compare 2 implementations (2 successive versions, or `uutils` and GNU) using +a scatter plot, where each axis corresponds to the perf. of one implementation. + +Similarly, without (2) we could plot numbers on the X axis and their factoring +time on the Y axis, using multiple lines for various quantiles. The large +differences in factoring times for successive numbers, mean that such a plot +would be unreadable. diff --git a/src/uu/factor/Cargo.toml b/src/uu/factor/Cargo.toml index c4e7e8469..eb34519f1 100644 --- a/src/uu/factor/Cargo.toml +++ b/src/uu/factor/Cargo.toml @@ -17,20 +17,15 @@ num-traits = "0.2.13" # used in src/numerics.rs, which is included by build.rs [dependencies] coz = { version = "0.1.3", optional = true } num-traits = "0.2.13" # Needs at least version 0.2.13 for "OverflowingAdd" -rand = { version="0.7", features=["small_rng"] } -smallvec = { version="0.6.14, < 1.0" } -uucore = { version=">=0.0.8", package="uucore", path="../../uucore" } -uucore_procs = { version=">=0.0.5", package="uucore_procs", path="../../uucore_procs" } +rand = { version = "0.7", features = ["small_rng"] } +smallvec = { version = "0.6.14, < 1.0" } +uucore = { version = ">=0.0.8", package = "uucore", path = "../../uucore" } +uucore_procs = { version = ">=0.0.5", package = "uucore_procs", path = "../../uucore_procs" } [dev-dependencies] -criterion = "0.3" paste = "0.1.18" quickcheck = "0.9.2" -rand_chacha = "0.2.2" -[[bench]] -name = "gcd" -harness = false [[bin]] name = "factor" diff --git a/src/uu/factor/src/cli.rs b/src/uu/factor/src/cli.rs index fb7b3f192..ee4c8a4c4 100644 --- a/src/uu/factor/src/cli.rs +++ b/src/uu/factor/src/cli.rs @@ -13,13 +13,13 @@ use std::error::Error; use std::io::{self, stdin, stdout, BufRead, Write}; mod factor; -pub(crate) use factor::*; +pub use factor::*; use uucore::InvalidEncodingHandling; mod miller_rabin; pub mod numeric; mod rho; -mod table; +pub mod table; static SYNTAX: &str = "[OPTION] [NUMBER]..."; static SUMMARY: &str = "Print the prime factors of the given number(s). diff --git a/src/uu/factor/src/factor.rs b/src/uu/factor/src/factor.rs index 138254b51..b279de7fc 100644 --- a/src/uu/factor/src/factor.rs +++ b/src/uu/factor/src/factor.rs @@ -161,8 +161,9 @@ pub fn factor(mut n: u64) -> Factors { return factors; } - let (factors, n) = table::factor(n, factors); + table::factor(&mut n, &mut factors); + #[allow(clippy::let_and_return)] let r = if n < (1 << 32) { _factor::>(n, factors) } else { @@ -238,9 +239,13 @@ mod tests { } #[cfg(test)] -impl quickcheck::Arbitrary for Factors { - fn arbitrary(gen: &mut G) -> Self { - use rand::Rng; +use rand::{ + distributions::{Distribution, Standard}, + Rng, +}; +#[cfg(test)] +impl Distribution for Standard { + fn sample(&self, rng: &mut R) -> Factors { let mut f = Factors::one(); let mut g = 1u64; let mut n = u64::MAX; @@ -251,7 +256,7 @@ impl quickcheck::Arbitrary for Factors { // See Generating Random Factored Numbers, Easily, J. Cryptology (2003) 'attempt: loop { while n > 1 { - n = gen.gen_range(1, n); + n = rng.gen_range(1, n); if miller_rabin::is_prime(n) { if let Some(h) = g.checked_mul(n) { f.push(n); @@ -268,6 +273,13 @@ impl quickcheck::Arbitrary for Factors { } } +#[cfg(test)] +impl quickcheck::Arbitrary for Factors { + fn arbitrary(g: &mut G) -> Self { + g.gen() + } +} + #[cfg(test)] impl std::ops::BitXor for Factors { type Output = Self; @@ -280,6 +292,6 @@ impl std::ops::BitXor for Factors { } debug_assert_eq!(r.product(), self.product().pow(rhs.into())); - return r; + r } } diff --git a/src/uu/factor/src/table.rs b/src/uu/factor/src/table.rs index 94ad6df4c..518d4f241 100644 --- a/src/uu/factor/src/table.rs +++ b/src/uu/factor/src/table.rs @@ -8,15 +8,13 @@ // spell-checker: ignore (ToDO) INVS -use std::num::Wrapping; - use crate::Factors; include!(concat!(env!("OUT_DIR"), "/prime_table.rs")); -pub(crate) fn factor(mut num: u64, mut factors: Factors) -> (Factors, u64) { +pub fn factor(num: &mut u64, factors: &mut Factors) { for &(prime, inv, ceil) in P_INVS_U64 { - if num == 1 { + if *num == 1 { break; } @@ -27,11 +25,11 @@ pub(crate) fn factor(mut num: u64, mut factors: Factors) -> (Factors, u64) { // for a nice explanation. let mut k = 0; loop { - let Wrapping(x) = Wrapping(num) * Wrapping(inv); + let x = num.wrapping_mul(inv); // While prime divides num if x <= ceil { - num = x; + *num = x; k += 1; #[cfg(feature = "coz")] coz::progress!("factor found"); @@ -43,6 +41,61 @@ pub(crate) fn factor(mut num: u64, mut factors: Factors) -> (Factors, u64) { } } } - - (factors, num) +} + +pub const CHUNK_SIZE: usize = 8; +pub fn factor_chunk(n_s: &mut [u64; CHUNK_SIZE], f_s: &mut [Factors; CHUNK_SIZE]) { + for &(prime, inv, ceil) in P_INVS_U64 { + if n_s[0] == 1 && n_s[1] == 1 && n_s[2] == 1 && n_s[3] == 1 { + break; + } + + for (num, factors) in n_s.iter_mut().zip(f_s.iter_mut()) { + if *num == 1 { + continue; + } + let mut k = 0; + loop { + let x = num.wrapping_mul(inv); + + // While prime divides num + if x <= ceil { + *num = x; + k += 1; + } else { + if k > 0 { + factors.add(prime, k); + } + break; + } + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::Factors; + use quickcheck::quickcheck; + use rand::{rngs::SmallRng, Rng, SeedableRng}; + + quickcheck! { + fn chunk_vs_iter(seed: u64) -> () { + let mut rng = SmallRng::seed_from_u64(seed); + let mut n_c: [u64; CHUNK_SIZE] = rng.gen(); + let mut f_c: [Factors; CHUNK_SIZE] = rng.gen(); + + let mut n_i = n_c.clone(); + let mut f_i = f_c.clone(); + for (n, f) in n_i.iter_mut().zip(f_i.iter_mut()) { + factor(n, f); + } + + factor_chunk(&mut n_c, &mut f_c); + + assert_eq!(n_i, n_c); + assert_eq!(f_i, f_c); + } + } } diff --git a/src/uu/fmt/src/linebreak.rs b/src/uu/fmt/src/linebreak.rs index 50cb6f77f..fe9f8568e 100644 --- a/src/uu/fmt/src/linebreak.rs +++ b/src/uu/fmt/src/linebreak.rs @@ -296,7 +296,7 @@ fn find_kp_breakpoints<'a, T: Iterator>>( (0, 0.0) } else { compute_demerits( - (args.opts.goal - tlen) as isize, + args.opts.goal as isize - tlen as isize, stretch, w.word_nchars as isize, active.prev_rat, diff --git a/src/uu/head/Cargo.toml b/src/uu/head/Cargo.toml index 3c383cb6f..661052f58 100644 --- a/src/uu/head/Cargo.toml +++ b/src/uu/head/Cargo.toml @@ -16,7 +16,7 @@ path = "src/head.rs" [dependencies] clap = "2.33" -uucore = { version=">=0.0.8", package="uucore", path="../../uucore" } +uucore = { version=">=0.0.8", package="uucore", path="../../uucore", features=["ringbuffer"] } uucore_procs = { version=">=0.0.5", package="uucore_procs", path="../../uucore_procs" } [[bin]] diff --git a/src/uu/head/src/head.rs b/src/uu/head/src/head.rs index 807d04314..3602b4a73 100644 --- a/src/uu/head/src/head.rs +++ b/src/uu/head/src/head.rs @@ -1,8 +1,8 @@ use clap::{App, Arg}; use std::convert::TryFrom; use std::ffi::OsString; -use std::io::{ErrorKind, Read, Seek, SeekFrom, Write}; -use uucore::{crash, executable, show_error}; +use std::io::{self, ErrorKind, Read, Seek, SeekFrom, Write}; +use uucore::{crash, executable, show_error, show_error_custom_description}; const EXIT_FAILURE: i32 = 1; const EXIT_SUCCESS: i32 = 0; @@ -27,8 +27,12 @@ mod options { pub const ZERO_NAME: &str = "ZERO"; pub const FILES_NAME: &str = "FILE"; } +mod lines; mod parse; mod split; +mod take; +use lines::zlines; +use take::take_all_but; fn app<'a>() -> App<'a, 'a> { App::new(executable!()) @@ -206,38 +210,20 @@ impl Default for HeadOptions { } } -fn rbuf_n_bytes(input: &mut impl std::io::BufRead, n: usize) -> std::io::Result<()> { - if n == 0 { - return Ok(()); - } - let mut readbuf = [0u8; BUF_SIZE]; - let mut i = 0usize; +fn rbuf_n_bytes(input: R, n: usize) -> std::io::Result<()> +where + R: Read, +{ + // Read the first `n` bytes from the `input` reader. + let mut reader = input.take(n as u64); + // Write those bytes to `stdout`. let stdout = std::io::stdout(); let mut stdout = stdout.lock(); - loop { - let read = loop { - match input.read(&mut readbuf) { - Ok(n) => break n, - Err(e) => match e.kind() { - ErrorKind::Interrupted => {} - _ => return Err(e), - }, - } - }; - if read == 0 { - // might be unexpected if - // we haven't read `n` bytes - // but this mirrors GNU's behavior - return Ok(()); - } - stdout.write_all(&readbuf[..read.min(n - i)])?; - i += read.min(n - i); - if i == n { - return Ok(()); - } - } + io::copy(&mut reader, &mut stdout)?; + + Ok(()) } fn rbuf_n_lines(input: &mut impl std::io::BufRead, n: usize, zero: bool) -> std::io::Result<()> { @@ -311,36 +297,22 @@ fn rbuf_but_last_n_bytes(input: &mut impl std::io::BufRead, n: usize) -> std::io } fn rbuf_but_last_n_lines( - input: &mut impl std::io::BufRead, + input: impl std::io::BufRead, n: usize, zero: bool, ) -> std::io::Result<()> { - if n == 0 { - //prints everything - return rbuf_n_bytes(input, std::usize::MAX); + if zero { + let stdout = std::io::stdout(); + let mut stdout = stdout.lock(); + for bytes in take_all_but(zlines(input), n) { + stdout.write_all(&bytes?)?; + } + } else { + for line in take_all_but(input.lines(), n) { + println!("{}", line?); + } } - let mut ringbuf = vec![Vec::new(); n]; - let stdout = std::io::stdout(); - let mut stdout = stdout.lock(); - let mut line = Vec::new(); - let mut lines = 0usize; - split::walk_lines(input, zero, |e| match e { - split::Event::Data(dat) => { - line.extend_from_slice(dat); - Ok(true) - } - split::Event::Line => { - if lines < n { - ringbuf[lines] = std::mem::replace(&mut line, Vec::new()); - lines += 1; - } else { - stdout.write_all(&ringbuf[0])?; - ringbuf.rotate_left(1); - ringbuf[n - 1] = std::mem::replace(&mut line, Vec::new()); - } - Ok(true) - } - }) + Ok(()) } fn head_backwards_file(input: &mut std::fs::File, options: &HeadOptions) -> std::io::Result<()> { @@ -418,12 +390,13 @@ fn head_file(input: &mut std::fs::File, options: &HeadOptions) -> std::io::Resul } } -fn uu_head(options: &HeadOptions) { +fn uu_head(options: &HeadOptions) -> Result<(), u32> { + let mut error_count = 0; let mut first = true; for fname in &options.files { let res = match fname.as_str() { "-" => { - if options.verbose { + if (options.files.len() > 1 && !options.quiet) || options.verbose { if !first { println!(); } @@ -451,53 +424,49 @@ fn uu_head(options: &HeadOptions) { name => { let mut file = match std::fs::File::open(name) { Ok(f) => f, - Err(err) => match err.kind() { - ErrorKind::NotFound => { - crash!( - EXIT_FAILURE, - "head: cannot open '{}' for reading: No such file or directory", - name - ); + Err(err) => { + let prefix = format!("cannot open '{}' for reading", name); + match err.kind() { + ErrorKind::NotFound => { + show_error_custom_description!(prefix, "No such file or directory"); + } + ErrorKind::PermissionDenied => { + show_error_custom_description!(prefix, "Permission denied"); + } + _ => { + show_error_custom_description!(prefix, "{}", err); + } } - ErrorKind::PermissionDenied => { - crash!( - EXIT_FAILURE, - "head: cannot open '{}' for reading: Permission denied", - name - ); - } - _ => { - crash!( - EXIT_FAILURE, - "head: cannot open '{}' for reading: {}", - name, - err - ); - } - }, + error_count += 1; + continue; + } }; if (options.files.len() > 1 && !options.quiet) || options.verbose { + if !first { + println!(); + } println!("==> {} <==", name) } head_file(&mut file, options) } }; if res.is_err() { - if fname.as_str() == "-" { - crash!( - EXIT_FAILURE, - "head: error reading standard input: Input/output error" - ); + let name = if fname.as_str() == "-" { + "standard input" } else { - crash!( - EXIT_FAILURE, - "head: error reading {}: Input/output error", - fname - ); - } + fname + }; + let prefix = format!("error reading {}", name); + show_error_custom_description!(prefix, "Input/output error"); + error_count += 1; } first = false; } + if error_count > 0 { + Err(error_count) + } else { + Ok(()) + } } pub fn uumain(args: impl uucore::Args) -> i32 { @@ -507,9 +476,10 @@ pub fn uumain(args: impl uucore::Args) -> i32 { crash!(EXIT_FAILURE, "head: {}", s); } }; - uu_head(&args); - - EXIT_SUCCESS + match uu_head(&args) { + Ok(_) => EXIT_SUCCESS, + Err(_) => EXIT_FAILURE, + } } #[cfg(test)] diff --git a/src/uu/head/src/lines.rs b/src/uu/head/src/lines.rs new file mode 100644 index 000000000..dcae27bc8 --- /dev/null +++ b/src/uu/head/src/lines.rs @@ -0,0 +1,73 @@ +//! Iterate over zero-terminated lines. +use std::io::BufRead; + +/// The zero byte, representing the null character. +const ZERO: u8 = 0; + +/// Returns an iterator over the lines of the given reader. +/// +/// The iterator returned from this function will yield instances of +/// [`io::Result`]<[`Vec`]<[`u8`]>>, representing the bytes of the line +/// *including* the null character (with the possible exception of the +/// last line, which may not have one). +/// +/// # Examples +/// +/// ```rust,ignore +/// use std::io::Cursor; +/// +/// let cursor = Cursor::new(b"x\0y\0z\0"); +/// let mut iter = zlines(cursor).map(|l| l.unwrap()); +/// assert_eq!(iter.next(), Some(b"x\0".to_vec())); +/// assert_eq!(iter.next(), Some(b"y\0".to_vec())); +/// assert_eq!(iter.next(), Some(b"z\0".to_vec())); +/// assert_eq!(iter.next(), None); +/// ``` +pub fn zlines(buf: B) -> ZLines { + ZLines { buf } +} + +/// An iterator over the zero-terminated lines of an instance of `BufRead`. +pub struct ZLines { + buf: B, +} + +impl Iterator for ZLines { + type Item = std::io::Result>; + + fn next(&mut self) -> Option>> { + let mut buf = Vec::new(); + match self.buf.read_until(ZERO, &mut buf) { + Ok(0) => None, + Ok(_) => Some(Ok(buf)), + Err(e) => Some(Err(e)), + } + } +} + +#[cfg(test)] +mod tests { + + use crate::lines::zlines; + use std::io::Cursor; + + #[test] + fn test_null_terminated() { + let cursor = Cursor::new(b"x\0y\0z\0"); + let mut iter = zlines(cursor).map(|l| l.unwrap()); + assert_eq!(iter.next(), Some(b"x\0".to_vec())); + assert_eq!(iter.next(), Some(b"y\0".to_vec())); + assert_eq!(iter.next(), Some(b"z\0".to_vec())); + assert_eq!(iter.next(), None); + } + + #[test] + fn test_not_null_terminated() { + let cursor = Cursor::new(b"x\0y\0z"); + let mut iter = zlines(cursor).map(|l| l.unwrap()); + assert_eq!(iter.next(), Some(b"x\0".to_vec())); + assert_eq!(iter.next(), Some(b"y\0".to_vec())); + assert_eq!(iter.next(), Some(b"z".to_vec())); + assert_eq!(iter.next(), None); + } +} diff --git a/src/uu/head/src/take.rs b/src/uu/head/src/take.rs new file mode 100644 index 000000000..94fa012be --- /dev/null +++ b/src/uu/head/src/take.rs @@ -0,0 +1,93 @@ +//! Take all but the last elements of an iterator. +use uucore::ringbuffer::RingBuffer; + +/// Create an iterator over all but the last `n` elements of `iter`. +/// +/// # Examples +/// +/// ```rust,ignore +/// let data = [1, 2, 3, 4, 5]; +/// let n = 2; +/// let mut iter = take_all_but(data.iter(), n); +/// assert_eq!(Some(4), iter.next()); +/// assert_eq!(Some(5), iter.next()); +/// assert_eq!(None, iter.next()); +/// ``` +pub fn take_all_but(iter: I, n: usize) -> TakeAllBut { + TakeAllBut::new(iter, n) +} + +/// An iterator that only iterates over the last elements of another iterator. +pub struct TakeAllBut { + iter: I, + buf: RingBuffer<::Item>, +} + +impl TakeAllBut { + pub fn new(mut iter: I, n: usize) -> TakeAllBut { + // Create a new ring buffer and fill it up. + // + // If there are fewer than `n` elements in `iter`, then we + // exhaust the iterator so that whenever `TakeAllBut::next()` is + // called, it will return `None`, as expected. + let mut buf = RingBuffer::new(n); + for _ in 0..n { + let value = match iter.next() { + None => { + break; + } + Some(x) => x, + }; + buf.push_back(value); + } + TakeAllBut { iter, buf } + } +} + +impl Iterator for TakeAllBut +where + I: Iterator, +{ + type Item = ::Item; + + fn next(&mut self) -> Option<::Item> { + match self.iter.next() { + Some(value) => self.buf.push_back(value), + None => None, + } + } +} + +#[cfg(test)] +mod tests { + + use crate::take::take_all_but; + + #[test] + fn test_fewer_elements() { + let mut iter = take_all_but([0, 1, 2].iter(), 2); + assert_eq!(Some(&0), iter.next()); + assert_eq!(None, iter.next()); + } + + #[test] + fn test_same_number_of_elements() { + let mut iter = take_all_but([0, 1].iter(), 2); + assert_eq!(None, iter.next()); + } + + #[test] + fn test_more_elements() { + let mut iter = take_all_but([0].iter(), 2); + assert_eq!(None, iter.next()); + } + + #[test] + fn test_zero_elements() { + let mut iter = take_all_but([0, 1, 2].iter(), 0); + assert_eq!(Some(&0), iter.next()); + assert_eq!(Some(&1), iter.next()); + assert_eq!(Some(&2), iter.next()); + assert_eq!(None, iter.next()); + } +} diff --git a/src/uu/install/src/install.rs b/src/uu/install/src/install.rs index 4ce665b80..bb51a7606 100644 --- a/src/uu/install/src/install.rs +++ b/src/uu/install/src/install.rs @@ -370,13 +370,13 @@ fn directory(paths: Vec, b: Behavior) -> i32 { // created ancestor directories will have the default mode. Hence it is safe to use // fs::create_dir_all and then only modify the target's dir mode. if let Err(e) = fs::create_dir_all(path) { - show_info!("{}: {}", path.display(), e); + show_error!("{}: {}", path.display(), e); all_successful = false; continue; } if b.verbose { - show_info!("creating directory '{}'", path.display()); + show_error!("creating directory '{}'", path.display()); } } @@ -461,7 +461,7 @@ fn copy_files_into_dir(files: &[PathBuf], target_dir: &Path, b: &Behavior) -> i3 let mut all_successful = true; for sourcepath in files.iter() { if !sourcepath.exists() { - show_info!( + show_error!( "cannot stat '{}': No such file or directory", sourcepath.display() ); @@ -471,7 +471,7 @@ fn copy_files_into_dir(files: &[PathBuf], target_dir: &Path, b: &Behavior) -> i3 } if sourcepath.is_dir() { - show_info!("omitting directory '{}'", sourcepath.display()); + show_error!("omitting directory '{}'", sourcepath.display()); all_successful = false; continue; } @@ -588,10 +588,10 @@ fn copy(from: &Path, to: &Path, b: &Behavior) -> Result<(), ()> { ) { Ok(n) => { if !n.is_empty() { - show_info!("{}", n); + show_error!("{}", n); } } - Err(e) => show_info!("{}", e), + Err(e) => show_error!("{}", e), } } @@ -608,10 +608,10 @@ fn copy(from: &Path, to: &Path, b: &Behavior) -> Result<(), ()> { match wrap_chgrp(to, &meta, group_id, false, Verbosity::Normal) { Ok(n) => { if !n.is_empty() { - show_info!("{}", n); + show_error!("{}", n); } } - Err(e) => show_info!("{}", e), + Err(e) => show_error!("{}", e), } } @@ -626,12 +626,12 @@ fn copy(from: &Path, to: &Path, b: &Behavior) -> Result<(), ()> { match set_file_times(to, accessed_time, modified_time) { Ok(_) => {} - Err(e) => show_info!("{}", e), + Err(e) => show_error!("{}", e), } } if b.verbose { - show_info!("'{}' -> '{}'", from.display(), to.display()); + show_error!("'{}' -> '{}'", from.display(), to.display()); } Ok(()) diff --git a/src/uu/install/src/mode.rs b/src/uu/install/src/mode.rs index a3de40c68..b8d5cd839 100644 --- a/src/uu/install/src/mode.rs +++ b/src/uu/install/src/mode.rs @@ -23,7 +23,7 @@ pub fn parse(mode_string: &str, considering_dir: bool) -> Result { pub fn chmod(path: &Path, mode: u32) -> Result<(), ()> { use std::os::unix::fs::PermissionsExt; fs::set_permissions(path, fs::Permissions::from_mode(mode)).map_err(|err| { - show_info!("{}: chmod failed with error {}", path.display(), err); + show_error!("{}: chmod failed with error {}", path.display(), err); }) } diff --git a/src/uu/logname/Cargo.toml b/src/uu/logname/Cargo.toml index 416f817d7..4aa4d68f4 100644 --- a/src/uu/logname/Cargo.toml +++ b/src/uu/logname/Cargo.toml @@ -16,6 +16,7 @@ path = "src/logname.rs" [dependencies] libc = "0.2.42" +clap = "2.33" uucore = { version=">=0.0.8", package="uucore", path="../../uucore" } uucore_procs = { version=">=0.0.5", package="uucore_procs", path="../../uucore_procs" } diff --git a/src/uu/logname/src/logname.rs b/src/uu/logname/src/logname.rs index 8c6a946f5..14bf7ef3b 100644 --- a/src/uu/logname/src/logname.rs +++ b/src/uu/logname/src/logname.rs @@ -15,6 +15,8 @@ extern crate uucore; use std::ffi::CStr; use uucore::InvalidEncodingHandling; +use clap::App; + extern "C" { // POSIX requires using getlogin (or equivalent code) pub fn getlogin() -> *const libc::c_char; @@ -31,15 +33,24 @@ fn get_userlogin() -> Option { } } -static SYNTAX: &str = ""; static SUMMARY: &str = "Print user's login name"; -static LONG_HELP: &str = ""; +static VERSION: &str = env!("CARGO_PKG_VERSION"); + +fn get_usage() -> String { + String::from(executable!()) +} pub fn uumain(args: impl uucore::Args) -> i32 { - app!(SYNTAX, SUMMARY, LONG_HELP).parse( - args.collect_str(InvalidEncodingHandling::ConvertLossy) - .accept_any(), - ); + let args = args + .collect_str(InvalidEncodingHandling::Ignore) + .accept_any(); + + let usage = get_usage(); + let _ = App::new(executable!()) + .version(VERSION) + .about(SUMMARY) + .usage(&usage[..]) + .get_matches_from(args); match get_userlogin() { Some(userlogin) => println!("{}", userlogin), diff --git a/src/uu/ls/src/ls.rs b/src/uu/ls/src/ls.rs index 0e2754f07..d467d431a 100644 --- a/src/uu/ls/src/ls.rs +++ b/src/uu/ls/src/ls.rs @@ -1110,7 +1110,7 @@ struct PathData { md: OnceCell>, ft: OnceCell>, // Name of the file - will be empty for . or .. - file_name: String, + display_name: String, // PathBuf that all above data corresponds to p_buf: PathBuf, must_dereference: bool, @@ -1126,14 +1126,18 @@ impl PathData { ) -> Self { // We cannot use `Path::ends_with` or `Path::Components`, because they remove occurrences of '.' // For '..', the filename is None - let name = if let Some(name) = file_name { + let display_name = if let Some(name) = file_name { name } else { - p_buf - .file_name() - .unwrap_or_else(|| p_buf.iter().next_back().unwrap()) - .to_string_lossy() - .into_owned() + let display_osstr = if command_line { + p_buf.as_os_str() + } else { + p_buf + .file_name() + .unwrap_or_else(|| p_buf.iter().next_back().unwrap()) + }; + + display_osstr.to_string_lossy().into_owned() }; let must_dereference = match &config.dereference { Dereference::All => true, @@ -1159,7 +1163,7 @@ impl PathData { Self { md: OnceCell::new(), ft, - file_name: name, + display_name, p_buf, must_dereference, } @@ -1179,31 +1183,32 @@ impl PathData { } fn list(locs: Vec, config: Config) -> i32 { - let number_of_locs = locs.len(); - let mut files = Vec::::new(); let mut dirs = Vec::::new(); let mut has_failed = false; let mut out = BufWriter::new(stdout()); - for loc in locs { + for loc in &locs { let p = PathBuf::from(&loc); if !p.exists() { show_error!("'{}': {}", &loc, "No such file or directory"); - // We found an error, the return code of ls should not be 0 - // And no need to continue the execution + /* + We found an error, the return code of ls should not be 0 + And no need to continue the execution + */ has_failed = true; continue; } let path_data = PathData::new(p, None, None, &config, true); - let show_dir_contents = if let Some(ft) = path_data.file_type() { - !config.directory && ft.is_dir() - } else { - has_failed = true; - false + let show_dir_contents = match path_data.file_type() { + Some(ft) => !config.directory && ft.is_dir(), + None => { + has_failed = true; + false + } }; if show_dir_contents { @@ -1217,7 +1222,7 @@ fn list(locs: Vec, config: Config) -> i32 { sort_entries(&mut dirs, &config); for dir in dirs { - if number_of_locs > 1 { + if locs.len() > 1 { let _ = writeln!(out, "\n{}:", dir.p_buf.display()); } enter_directory(&dir, &config, &mut out); @@ -1242,7 +1247,7 @@ fn sort_entries(entries: &mut Vec, config: &Config) { entries.sort_by_key(|k| Reverse(k.md().as_ref().map(|md| md.len()).unwrap_or(0))) } // The default sort in GNU ls is case insensitive - Sort::Name => entries.sort_by(|a, b| a.file_name.cmp(&b.file_name)), + Sort::Name => entries.sort_by(|a, b| a.display_name.cmp(&b.display_name)), Sort::Version => entries.sort_by(|a, b| version_cmp::version_cmp(&a.p_buf, &b.p_buf)), Sort::Extension => entries.sort_by(|a, b| { a.p_buf @@ -1331,7 +1336,7 @@ fn display_dir_entry_size(entry: &PathData, config: &Config) -> (usize, usize) { if let Some(md) = entry.md() { ( display_symlink_count(&md).len(), - display_file_size(&md, config).len(), + display_size_or_rdev(&md, config).len(), ) } else { (0, 0) @@ -1344,14 +1349,22 @@ fn pad_left(string: String, count: usize) -> String { fn display_items(items: &[PathData], config: &Config, out: &mut BufWriter) { if config.format == Format::Long { - let (mut max_links, mut max_size) = (1, 1); + let (mut max_links, mut max_width) = (1, 1); + let mut total_size = 0; + for item in items { - let (links, size) = display_dir_entry_size(item, config); + let (links, width) = display_dir_entry_size(item, config); max_links = links.max(max_links); - max_size = size.max(max_size); + max_width = width.max(max_width); + total_size += item.md().map_or(0, |md| get_block_size(md, config)); } + + if total_size > 0 { + let _ = writeln!(out, "total {}", display_size(total_size, config)); + } + for item in items { - display_item_long(item, max_links, max_size, config, out); + display_item_long(item, max_links, max_width, config, out); } } else { let names = items.iter().filter_map(|i| display_file_name(&i, config)); @@ -1396,6 +1409,29 @@ fn display_items(items: &[PathData], config: &Config, out: &mut BufWriter u64 { + /* GNU ls will display sizes in terms of block size + md.len() will differ from this value when the file has some holes + */ + #[cfg(unix)] + { + // hard-coded for now - enabling setting this remains a TODO + let ls_block_size = 1024; + match config.size_format { + SizeFormat::Binary => md.blocks() * 512, + SizeFormat::Decimal => md.blocks() * 512, + SizeFormat::Bytes => md.blocks() * 512 / ls_block_size, + } + } + + #[cfg(not(unix))] + { + let _ = config; + // no way to get block size for windows, fall-back to file size + md.len() + } +} + fn display_grid( names: impl Iterator, width: u16, @@ -1448,9 +1484,8 @@ fn display_item_long( let _ = write!( out, - "{}{} {}", - display_file_type(md.file_type()), - display_permissions(&md), + "{} {}", + display_permissions(&md, true), pad_left(display_symlink_count(&md), max_links), ); @@ -1471,7 +1506,7 @@ fn display_item_long( let _ = writeln!( out, " {} {} {}", - pad_left(display_file_size(&md, config), max_size), + pad_left(display_size_or_rdev(md, config), max_size), display_date(&md, config), // unwrap is fine because it fails when metadata is not available // but we already know that it is because it's checked at the @@ -1626,23 +1661,28 @@ fn format_prefixed(prefixed: NumberPrefix) -> String { } } -fn display_file_size(metadata: &Metadata, config: &Config) -> String { +fn display_size_or_rdev(metadata: &Metadata, config: &Config) -> String { + #[cfg(unix)] + { + let ft = metadata.file_type(); + if ft.is_char_device() || ft.is_block_device() { + let dev: u64 = metadata.rdev(); + let major = (dev >> 8) as u8; + let minor = dev as u8; + return format!("{}, {}", major, minor); + } + } + + display_size(metadata.len(), config) +} + +fn display_size(size: u64, config: &Config) -> String { // NOTE: The human-readable behaviour deviates from the GNU ls. // The GNU ls uses binary prefixes by default. match config.size_format { - SizeFormat::Binary => format_prefixed(NumberPrefix::binary(metadata.len() as f64)), - SizeFormat::Decimal => format_prefixed(NumberPrefix::decimal(metadata.len() as f64)), - SizeFormat::Bytes => metadata.len().to_string(), - } -} - -fn display_file_type(file_type: FileType) -> char { - if file_type.is_dir() { - 'd' - } else if file_type.is_symlink() { - 'l' - } else { - '-' + SizeFormat::Binary => format_prefixed(NumberPrefix::binary(size as f64)), + SizeFormat::Decimal => format_prefixed(NumberPrefix::decimal(size as f64)), + SizeFormat::Bytes => size.to_string(), } } @@ -1683,7 +1723,7 @@ fn classify_file(path: &PathData) -> Option { } fn display_file_name(path: &PathData, config: &Config) -> Option { - let mut name = escape_name(&path.file_name, &config.quoting_style); + let mut name = escape_name(&path.display_name, &config.quoting_style); #[cfg(unix)] { diff --git a/src/uu/mkdir/src/mkdir.rs b/src/uu/mkdir/src/mkdir.rs index 6b9fd68ea..861ef5075 100644 --- a/src/uu/mkdir/src/mkdir.rs +++ b/src/uu/mkdir/src/mkdir.rs @@ -101,7 +101,7 @@ fn exec(dirs: Vec, recursive: bool, mode: u16, verbose: bool) -> i32 { if !recursive { if let Some(parent) = path.parent() { if parent != empty && !parent.exists() { - show_info!( + show_error!( "cannot create directory '{}': No such file or directory", path.display() ); @@ -125,7 +125,7 @@ fn mkdir(path: &Path, recursive: bool, mode: u16, verbose: bool) -> i32 { fs::create_dir }; if let Err(e) = create_dir(path) { - show_info!("{}: {}", path.display(), e.to_string()); + show_error!("{}: {}", path.display(), e.to_string()); return 1; } diff --git a/src/uu/mknod/Cargo.toml b/src/uu/mknod/Cargo.toml index 2c3ac8fb9..1320e3546 100644 --- a/src/uu/mknod/Cargo.toml +++ b/src/uu/mknod/Cargo.toml @@ -16,7 +16,7 @@ name = "uu_mknod" path = "src/mknod.rs" [dependencies] -getopts = "0.2.18" +clap = "2.33" libc = "^0.2.42" uucore = { version=">=0.0.8", package="uucore", path="../../uucore", features=["mode"] } uucore_procs = { version=">=0.0.5", package="uucore_procs", path="../../uucore_procs" } diff --git a/src/uu/mknod/src/mknod.rs b/src/uu/mknod/src/mknod.rs index fc6fb0870..e0cf62024 100644 --- a/src/uu/mknod/src/mknod.rs +++ b/src/uu/mknod/src/mknod.rs @@ -5,21 +5,41 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -// spell-checker:ignore (ToDO) parsemode makedev sysmacros makenod newmode perror IFBLK IFCHR IFIFO +// spell-checker:ignore (ToDO) parsemode makedev sysmacros perror IFBLK IFCHR IFIFO #[macro_use] extern crate uucore; +use std::ffi::CString; + +use clap::{App, Arg, ArgMatches}; use libc::{dev_t, mode_t}; use libc::{S_IFBLK, S_IFCHR, S_IFIFO, S_IRGRP, S_IROTH, S_IRUSR, S_IWGRP, S_IWOTH, S_IWUSR}; -use getopts::Options; - -use std::ffi::CString; use uucore::InvalidEncodingHandling; static NAME: &str = "mknod"; static VERSION: &str = env!("CARGO_PKG_VERSION"); +static ABOUT: &str = "Create the special file NAME of the given TYPE."; +static USAGE: &str = "mknod [OPTION]... NAME TYPE [MAJOR MINOR]"; +static LONG_HELP: &str = "Mandatory arguments to long options are mandatory for short options too. +-m, --mode=MODE set file permission bits to MODE, not a=rw - umask +--help display this help and exit +--version output version information and exit + +Both MAJOR and MINOR must be specified when TYPE is b, c, or u, and they +must be omitted when TYPE is p. If MAJOR or MINOR begins with 0x or 0X, +it is interpreted as hexadecimal; otherwise, if it begins with 0, as octal; +otherwise, as decimal. TYPE may be: + +b create a block (buffered) special file +c, u create a character (unbuffered) special file +p create a FIFO + +NOTE: your shell may have its own version of mknod, which usually supersedes +the version described here. Please refer to your shell's documentation +for details about the options it supports. +"; const MODE_RW_UGO: mode_t = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH; @@ -30,13 +50,35 @@ fn makedev(maj: u64, min: u64) -> dev_t { } #[cfg(windows)] -fn _makenod(path: CString, mode: mode_t, dev: dev_t) -> i32 { +fn _makenod(file_name: &str, mode: mode_t, dev: dev_t) -> i32 { panic!("Unsupported for windows platform") } #[cfg(unix)] -fn _makenod(path: CString, mode: mode_t, dev: dev_t) -> i32 { - unsafe { libc::mknod(path.as_ptr(), mode, dev) } +fn _makenod(file_name: &str, mode: mode_t, dev: dev_t) -> i32 { + let c_str = CString::new(file_name).expect("Failed to convert to CString"); + + // the user supplied a mode + let set_umask = mode & MODE_RW_UGO != MODE_RW_UGO; + + unsafe { + // store prev umask + let last_umask = if set_umask { libc::umask(0) } else { 0 }; + + let errno = libc::mknod(c_str.as_ptr(), mode, dev); + + // set umask back to original value + if set_umask { + libc::umask(last_umask); + } + + if errno == -1 { + let c_str = CString::new(NAME).expect("Failed to convert to CString"); + // shows the error from the mknod syscall + libc::perror(c_str.as_ptr()); + } + errno + } } #[allow(clippy::cognitive_complexity)] @@ -44,156 +86,136 @@ pub fn uumain(args: impl uucore::Args) -> i32 { let args = args .collect_str(InvalidEncodingHandling::Ignore) .accept_any(); - - let mut opts = Options::new(); - // Linux-specific options, not implemented // opts.optflag("Z", "", "set the SELinux security context to default type"); // opts.optopt("", "context", "like -Z, or if CTX is specified then set the SELinux or SMACK security context to CTX"); - opts.optopt( - "m", - "mode", - "set file permission bits to MODE, not a=rw - umask", - "MODE", - ); - opts.optflag("", "help", "display this help and exit"); - opts.optflag("", "version", "output version information and exit"); + let matches = App::new(executable!()) + .version(VERSION) + .usage(USAGE) + .after_help(LONG_HELP) + .about(ABOUT) + .arg( + Arg::with_name("mode") + .short("m") + .long("mode") + .value_name("MODE") + .help("set file permission bits to MODE, not a=rw - umask"), + ) + .arg( + Arg::with_name("name") + .value_name("NAME") + .help("name of the new file") + .required(true) + .index(1), + ) + .arg( + Arg::with_name("type") + .value_name("TYPE") + .help("type of the new file (b, c, u or p)") + .required(true) + .validator(valid_type) + .index(2), + ) + .arg( + Arg::with_name("major") + .value_name("MAJOR") + .help("major file type") + .validator(valid_u64) + .index(3), + ) + .arg( + Arg::with_name("minor") + .value_name("MINOR") + .help("minor file type") + .validator(valid_u64) + .index(4), + ) + .get_matches_from(args); - let matches = match opts.parse(&args[1..]) { - Ok(m) => m, - Err(f) => crash!(1, "{}\nTry '{} --help' for more information.", f, NAME), + let mode = match get_mode(&matches) { + Ok(mode) => mode, + Err(err) => { + show_error!("{}", err); + return 1; + } }; - if matches.opt_present("help") { - println!( - "Usage: {0} [OPTION]... NAME TYPE [MAJOR MINOR] + let file_name = matches.value_of("name").expect("Missing argument 'NAME'"); -Mandatory arguments to long options are mandatory for short options too. - -m, --mode=MODE set file permission bits to MODE, not a=rw - umask - --help display this help and exit - --version output version information and exit + // Only check the first character, to allow mnemonic usage like + // 'mknod /dev/rst0 character 18 0'. + let ch = matches + .value_of("type") + .expect("Missing argument 'TYPE'") + .chars() + .next() + .expect("Failed to get the first char"); -Both MAJOR and MINOR must be specified when TYPE is b, c, or u, and they -must be omitted when TYPE is p. If MAJOR or MINOR begins with 0x or 0X, -it is interpreted as hexadecimal; otherwise, if it begins with 0, as octal; -otherwise, as decimal. TYPE may be: - - b create a block (buffered) special file - c, u create a character (unbuffered) special file - p create a FIFO - -NOTE: your shell may have its own version of mknod, which usually supersedes -the version described here. Please refer to your shell's documentation -for details about the options it supports.", - NAME - ); - return 0; - } - - if matches.opt_present("version") { - println!("{} {}", NAME, VERSION); - return 0; - } - - let mut last_umask: mode_t = 0; - let mut newmode: mode_t = MODE_RW_UGO; - if matches.opt_present("mode") { - match uucore::mode::parse_mode(matches.opt_str("mode")) { - Ok(parsed) => { - if parsed > 0o777 { - show_info!("mode must specify only file permission bits"); - return 1; - } - newmode = parsed; - } - Err(e) => { - show_info!("{}", e); - return 1; - } + if ch == 'p' { + if matches.is_present("major") || matches.is_present("minor") { + eprintln!("Fifos do not have major and minor device numbers."); + eprintln!("Try '{} --help' for more information.", NAME); + 1 + } else { + _makenod(file_name, S_IFIFO | mode, 0) } - unsafe { - last_umask = libc::umask(0); - } - } + } else { + match (matches.value_of("major"), matches.value_of("minor")) { + (None, None) | (_, None) | (None, _) => { + eprintln!("Special files require major and minor device numbers."); + eprintln!("Try '{} --help' for more information.", NAME); + 1 + } + (Some(major), Some(minor)) => { + let major = major.parse::().expect("validated by clap"); + let minor = minor.parse::().expect("validated by clap"); - let mut ret = 0i32; - match matches.free.len() { - 0 => show_usage_error!("missing operand"), - 1 => show_usage_error!("missing operand after ‘{}’", matches.free[0]), - _ => { - let args = &matches.free; - let c_str = CString::new(args[0].as_str()).expect("Failed to convert to CString"); - - // Only check the first character, to allow mnemonic usage like - // 'mknod /dev/rst0 character 18 0'. - let ch = args[1] - .chars() - .next() - .expect("Failed to get the first char"); - - if ch == 'p' { - if args.len() > 2 { - show_info!("{}: extra operand ‘{}’", NAME, args[2]); - if args.len() == 4 { - eprintln!("Fifos do not have major and minor device numbers."); - } - eprintln!("Try '{} --help' for more information.", NAME); - return 1; - } - - ret = _makenod(c_str, S_IFIFO | newmode, 0); - } else { - if args.len() < 4 { - show_info!("missing operand after ‘{}’", args[args.len() - 1]); - if args.len() == 2 { - eprintln!("Special files require major and minor device numbers."); - } - eprintln!("Try '{} --help' for more information.", NAME); - return 1; - } else if args.len() > 4 { - show_usage_error!("extra operand ‘{}’", args[4]); - return 1; - } else if !"bcu".contains(ch) { - show_usage_error!("invalid device type ‘{}’", args[1]); - return 1; - } - - let maj = args[2].parse::(); - let min = args[3].parse::(); - if maj.is_err() { - show_info!("invalid major device number ‘{}’", args[2]); - return 1; - } else if min.is_err() { - show_info!("invalid minor device number ‘{}’", args[3]); - return 1; - } - - let (maj, min) = (maj.unwrap(), min.unwrap()); - let dev = makedev(maj, min); + let dev = makedev(major, minor); if ch == 'b' { // block special file - ret = _makenod(c_str, S_IFBLK | newmode, dev); - } else { + _makenod(file_name, S_IFBLK | mode, dev) + } else if ch == 'c' || ch == 'u' { // char special file - ret = _makenod(c_str, S_IFCHR | newmode, dev); + _makenod(file_name, S_IFCHR | mode, dev) + } else { + unreachable!("{} was validated to be only b, c or u", ch); } } } } - - if last_umask != 0 { - unsafe { - libc::umask(last_umask); - } - } - if ret == -1 { - let c_str = CString::new(format!("{}: {}", NAME, matches.free[0]).as_str()) - .expect("Failed to convert to CString"); - unsafe { - libc::perror(c_str.as_ptr()); - } - } - - ret +} + +fn get_mode(matches: &ArgMatches) -> Result { + match matches.value_of("mode") { + None => Ok(MODE_RW_UGO), + Some(str_mode) => uucore::mode::parse_mode(str_mode) + .map_err(|e| format!("invalid mode ({})", e)) + .and_then(|mode| { + if mode > 0o777 { + Err("mode must specify only file permission bits".to_string()) + } else { + Ok(mode) + } + }), + } +} + +fn valid_type(tpe: String) -> Result<(), String> { + // Only check the first character, to allow mnemonic usage like + // 'mknod /dev/rst0 character 18 0'. + tpe.chars() + .next() + .ok_or_else(|| "missing device type".to_string()) + .and_then(|first_char| { + if vec!['b', 'c', 'u', 'p'].contains(&first_char) { + Ok(()) + } else { + Err(format!("invalid device type ‘{}’", tpe)) + } + }) +} + +fn valid_u64(num: String) -> Result<(), String> { + num.parse::().map(|_| ()).map_err(|_| num) } diff --git a/src/uu/mknod/src/parsemode.rs b/src/uu/mknod/src/parsemode.rs new file mode 100644 index 000000000..026fc4a56 --- /dev/null +++ b/src/uu/mknod/src/parsemode.rs @@ -0,0 +1,54 @@ +// spell-checker:ignore (ToDO) fperm + +use libc::{mode_t, S_IRGRP, S_IROTH, S_IRUSR, S_IWGRP, S_IWOTH, S_IWUSR}; + +use uucore::mode; + +pub const MODE_RW_UGO: mode_t = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH; + +pub fn parse_mode(mode: &str) -> Result { + let arr: &[char] = &['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']; + let result = if mode.contains(arr) { + mode::parse_numeric(MODE_RW_UGO as u32, mode) + } else { + mode::parse_symbolic(MODE_RW_UGO as u32, mode, true) + }; + result.map(|mode| mode as mode_t) +} + +#[cfg(test)] +mod test { + /// Test if the program is running under WSL + // ref: @@ + // ToDO: test on WSL2 which likely doesn't need special handling; plan change to `is_wsl_1()` if WSL2 is less needy + pub fn is_wsl() -> bool { + #[cfg(target_os = "linux")] + { + if let Ok(b) = std::fs::read("/proc/sys/kernel/osrelease") { + if let Ok(s) = std::str::from_utf8(&b) { + let a = s.to_ascii_lowercase(); + return a.contains("microsoft") || a.contains("wsl"); + } + } + } + false + } + + #[test] + fn symbolic_modes() { + assert_eq!(super::parse_mode("u+x").unwrap(), 0o766); + assert_eq!( + super::parse_mode("+x").unwrap(), + if !is_wsl() { 0o777 } else { 0o776 } + ); + assert_eq!(super::parse_mode("a-w").unwrap(), 0o444); + assert_eq!(super::parse_mode("g-r").unwrap(), 0o626); + } + + #[test] + fn numeric_modes() { + assert_eq!(super::parse_mode("644").unwrap(), 0o644); + assert_eq!(super::parse_mode("+100").unwrap(), 0o766); + assert_eq!(super::parse_mode("-4").unwrap(), 0o662); + } +} diff --git a/src/uu/mktemp/src/mktemp.rs b/src/uu/mktemp/src/mktemp.rs index ed767ffe0..d66dd3d57 100644 --- a/src/uu/mktemp/src/mktemp.rs +++ b/src/uu/mktemp/src/mktemp.rs @@ -15,14 +15,11 @@ use clap::{App, Arg}; use std::env; use std::iter; -use std::mem::forget; use std::path::{is_separator, PathBuf}; use rand::Rng; use tempfile::Builder; -mod tempdir; - static ABOUT: &str = "create a temporary file or directory."; static VERSION: &str = env!("CARGO_PKG_VERSION"); @@ -157,7 +154,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { } if matches.is_present(OPT_TMPDIR) && PathBuf::from(prefix).is_absolute() { - show_info!( + show_error!( "invalid template, ‘{}’; with --tmpdir, it may not be absolute", template ); @@ -214,49 +211,54 @@ pub fn dry_exec(mut tmpdir: PathBuf, prefix: &str, rand: usize, suffix: &str) -> } fn exec( - tmpdir: PathBuf, + dir: PathBuf, prefix: &str, rand: usize, suffix: &str, make_dir: bool, quiet: bool, ) -> i32 { - if make_dir { - match tempdir::new_in(&tmpdir, prefix, rand, suffix) { - Ok(ref f) => { - println!("{}", f); - return 0; - } - Err(e) => { - if !quiet { - show_info!("{}: {}", e, tmpdir.display()); + let res = if make_dir { + let tmpdir = Builder::new() + .prefix(prefix) + .rand_bytes(rand) + .suffix(suffix) + .tempdir_in(&dir); + + // `into_path` consumes the TempDir without removing it + tmpdir.map(|d| d.into_path().to_string_lossy().to_string()) + } else { + let tmpfile = Builder::new() + .prefix(prefix) + .rand_bytes(rand) + .suffix(suffix) + .tempfile_in(&dir); + + match tmpfile { + Ok(f) => { + // `keep` ensures that the file is not deleted + match f.keep() { + Ok((_, p)) => Ok(p.to_string_lossy().to_string()), + Err(e) => { + show_error!("'{}': {}", dir.display(), e); + return 1; + } } - return 1; } - } - } - let tmpfile = Builder::new() - .prefix(prefix) - .rand_bytes(rand) - .suffix(suffix) - .tempfile_in(tmpdir); - let tmpfile = match tmpfile { - Ok(f) => f, - Err(e) => { - if !quiet { - show_info!("failed to create tempfile: {}", e); - } - return 1; + Err(x) => Err(x) } }; - let tmpname = tmpfile.path().to_string_lossy().to_string(); - - println!("{}", tmpname); - - // CAUTION: Not to call `drop` of tmpfile, which removes the tempfile, - // I call a dangerous function `forget`. - forget(tmpfile); - - 0 + match res { + Ok(ref f) => { + println!("{}", f); + 0 + } + Err(e) => { + if !quiet { + show_error!("{}: {}", e, dir.display()); + } + 1 + } + } } diff --git a/src/uu/mktemp/src/tempdir.rs b/src/uu/mktemp/src/tempdir.rs deleted file mode 100644 index 1b6c9d7b3..000000000 --- a/src/uu/mktemp/src/tempdir.rs +++ /dev/null @@ -1,51 +0,0 @@ -// spell-checker:ignore (ToDO) tempdir tmpdir - -// Mainly taken from crate `tempdir` - -use rand::distributions::Alphanumeric; -use rand::{thread_rng, Rng}; - -use std::io::Result as IOResult; -use std::io::{Error, ErrorKind}; -use std::path::Path; - -// How many times should we (re)try finding an unused random name? It should be -// enough that an attacker will run out of luck before we run out of patience. -const NUM_RETRIES: u32 = 1 << 31; - -#[cfg(any(unix, target_os = "redox"))] -fn create_dir>(path: P) -> IOResult<()> { - use std::fs::DirBuilder; - use std::os::unix::fs::DirBuilderExt; - - DirBuilder::new().mode(0o700).create(path) -} - -#[cfg(windows)] -fn create_dir>(path: P) -> IOResult<()> { - ::std::fs::create_dir(path) -} - -pub fn new_in>( - tmpdir: P, - prefix: &str, - rand: usize, - suffix: &str, -) -> IOResult { - let mut rng = thread_rng(); - for _ in 0..NUM_RETRIES { - let rand_chars: String = rng.sample_iter(&Alphanumeric).take(rand).collect(); - let leaf = format!("{}{}{}", prefix, rand_chars, suffix); - let path = tmpdir.as_ref().join(&leaf); - match create_dir(&path) { - Ok(_) => return Ok(path.to_string_lossy().into_owned()), - Err(ref e) if e.kind() == ErrorKind::AlreadyExists => {} - Err(e) => return Err(e), - } - } - - Err(Error::new( - ErrorKind::AlreadyExists, - "too many temporary directories already exist", - )) -} diff --git a/src/uu/mv/src/mv.rs b/src/uu/mv/src/mv.rs index f57178a09..c61c7caf1 100644 --- a/src/uu/mv/src/mv.rs +++ b/src/uu/mv/src/mv.rs @@ -20,6 +20,7 @@ use std::os::unix; #[cfg(windows)] use std::os::windows; use std::path::{Path, PathBuf}; +use uucore::backup_control::{self, BackupMode}; use fs_extra::dir::{move_dir, CopyOptions as DirCopyOptions}; @@ -40,16 +41,9 @@ pub enum OverwriteMode { Force, } -#[derive(Clone, Copy, Eq, PartialEq)] -pub enum BackupMode { - NoBackup, - SimpleBackup, - NumberedBackup, - ExistingBackup, -} - static ABOUT: &str = "Move SOURCE to DEST, or multiple SOURCE(s) to DIRECTORY."; static VERSION: &str = env!("CARGO_PKG_VERSION"); +static LONG_HELP: &str = ""; static OPT_BACKUP: &str = "backup"; static OPT_BACKUP_NO_ARG: &str = "b"; @@ -80,20 +74,16 @@ pub fn uumain(args: impl uucore::Args) -> i32 { let matches = App::new(executable!()) .version(VERSION) .about(ABOUT) + .after_help(&*format!("{}\n{}", LONG_HELP, backup_control::BACKUP_CONTROL_LONG_HELP)) .usage(&usage[..]) .arg( Arg::with_name(OPT_BACKUP) .long(OPT_BACKUP) .help("make a backup of each existing destination file") .takes_value(true) - .possible_value("simple") - .possible_value("never") - .possible_value("numbered") - .possible_value("t") - .possible_value("existing") - .possible_value("nil") - .possible_value("none") - .possible_value("off") + .require_equals(true) + .min_values(0) + .possible_values(backup_control::BACKUP_CONTROL_VALUES) .value_name("CONTROL") ) .arg( @@ -172,18 +162,17 @@ pub fn uumain(args: impl uucore::Args) -> i32 { .unwrap_or_default(); let overwrite_mode = determine_overwrite_mode(&matches); - let backup_mode = determine_backup_mode(&matches); + let backup_mode = backup_control::determine_backup_mode( + matches.is_present(OPT_BACKUP_NO_ARG) || matches.is_present(OPT_BACKUP), + matches.value_of(OPT_BACKUP), + ); if overwrite_mode == OverwriteMode::NoClobber && backup_mode != BackupMode::NoBackup { - show_error!( - "options --backup and --no-clobber are mutually exclusive\n\ - Try '{} --help' for more information.", - executable!() - ); + show_usage_error!("options --backup and --no-clobber are mutually exclusive"); return 1; } - let backup_suffix = determine_backup_suffix(backup_mode, &matches); + let backup_suffix = backup_control::determine_backup_suffix(matches.value_of(OPT_SUFFIX)); let behavior = Behavior { overwrite: overwrite_mode, @@ -227,37 +216,6 @@ fn determine_overwrite_mode(matches: &ArgMatches) -> OverwriteMode { } } -fn determine_backup_mode(matches: &ArgMatches) -> BackupMode { - if matches.is_present(OPT_BACKUP_NO_ARG) { - BackupMode::SimpleBackup - } else if matches.is_present(OPT_BACKUP) { - match matches.value_of(OPT_BACKUP).map(String::from) { - None => BackupMode::SimpleBackup, - Some(mode) => match &mode[..] { - "simple" | "never" => BackupMode::SimpleBackup, - "numbered" | "t" => BackupMode::NumberedBackup, - "existing" | "nil" => BackupMode::ExistingBackup, - "none" | "off" => BackupMode::NoBackup, - _ => panic!(), // cannot happen as it is managed by clap - }, - } - } else { - BackupMode::NoBackup - } -} - -fn determine_backup_suffix(backup_mode: BackupMode, matches: &ArgMatches) -> String { - if matches.is_present(OPT_SUFFIX) { - matches.value_of(OPT_SUFFIX).map(String::from).unwrap() - } else if let (Ok(s), BackupMode::SimpleBackup) = - (env::var("SIMPLE_BACKUP_SUFFIX"), backup_mode) - { - s - } else { - "~".to_owned() - } -} - fn exec(files: &[PathBuf], b: Behavior) -> i32 { if let Some(ref name) = b.target_dir { return move_files_into_dir(files, &PathBuf::from(name), &b); @@ -295,7 +253,7 @@ fn exec(files: &[PathBuf], b: Behavior) -> i32 { "cannot move ‘{}’ to ‘{}’: {}", source.display(), target.display(), - e + e.to_string() ); 1 } @@ -358,14 +316,15 @@ fn move_files_into_dir(files: &[PathBuf], target_dir: &Path, b: &Behavior) -> i3 if let Err(e) = rename(sourcepath, &targetpath, b) { show_error!( - "mv: cannot move ‘{}’ to ‘{}’: {}", + "cannot move ‘{}’ to ‘{}’: {}", sourcepath.display(), targetpath.display(), - e + e.to_string() ); all_successful = false; } } + if all_successful { 0 } else { @@ -388,12 +347,7 @@ fn rename(from: &Path, to: &Path, b: &Behavior) -> io::Result<()> { OverwriteMode::Force => {} }; - backup_path = match b.backup { - BackupMode::NoBackup => None, - BackupMode::SimpleBackup => Some(simple_backup_path(to, &b.suffix)), - BackupMode::NumberedBackup => Some(numbered_backup_path(to)), - BackupMode::ExistingBackup => Some(existing_backup_path(to, &b.suffix)), - }; + backup_path = backup_control::get_backup_path(b.backup, to, &b.suffix); if let Some(ref backup_path) = backup_path { rename_with_fallback(to, backup_path)?; } @@ -452,7 +406,13 @@ fn rename_with_fallback(from: &Path, to: &Path) -> io::Result<()> { ..DirCopyOptions::new() }; if let Err(err) = move_dir(from, to, &options) { - return Err(io::Error::new(io::ErrorKind::Other, format!("{:?}", err))); + return match err.kind { + fs_extra::error::ErrorKind::PermissionDenied => Err(io::Error::new( + io::ErrorKind::PermissionDenied, + "Permission denied", + )), + _ => Err(io::Error::new(io::ErrorKind::Other, format!("{:?}", err))), + }; } } else { fs::copy(from, to).and_then(|_| fs::remove_file(from))?; @@ -507,28 +467,6 @@ fn read_yes() -> bool { } } -fn simple_backup_path(path: &Path, suffix: &str) -> PathBuf { - let mut p = path.to_string_lossy().into_owned(); - p.push_str(suffix); - PathBuf::from(p) -} - -fn numbered_backup_path(path: &Path) -> PathBuf { - (1_u64..) - .map(|i| path.with_extension(format!("~{}~", i))) - .find(|p| !p.exists()) - .expect("cannot create backup") -} - -fn existing_backup_path(path: &Path, suffix: &str) -> PathBuf { - let test_path = path.with_extension("~1~"); - if test_path.exists() { - numbered_backup_path(path) - } else { - simple_backup_path(path, suffix) - } -} - fn is_empty_dir(path: &Path) -> bool { match fs::read_dir(path) { Ok(contents) => contents.peekable().peek().is_none(), diff --git a/src/uu/nohup/src/nohup.rs b/src/uu/nohup/src/nohup.rs index 83153ad37..93d9b5e45 100644 --- a/src/uu/nohup/src/nohup.rs +++ b/src/uu/nohup/src/nohup.rs @@ -122,13 +122,13 @@ fn find_stdout() -> File { .open(Path::new(NOHUP_OUT)) { Ok(t) => { - show_info!("ignoring input and appending output to '{}'", NOHUP_OUT); + show_error!("ignoring input and appending output to '{}'", NOHUP_OUT); t } Err(e1) => { let home = match env::var("HOME") { Err(_) => { - show_info!("failed to open '{}': {}", NOHUP_OUT, e1); + show_error!("failed to open '{}': {}", NOHUP_OUT, e1); exit!(internal_failure_code) } Ok(h) => h, @@ -143,12 +143,12 @@ fn find_stdout() -> File { .open(&homeout) { Ok(t) => { - show_info!("ignoring input and appending output to '{}'", homeout_str); + show_error!("ignoring input and appending output to '{}'", homeout_str); t } Err(e2) => { - show_info!("failed to open '{}': {}", NOHUP_OUT, e1); - show_info!("failed to open '{}': {}", homeout_str, e2); + show_error!("failed to open '{}': {}", NOHUP_OUT, e1); + show_error!("failed to open '{}': {}", homeout_str, e2); exit!(internal_failure_code) } } diff --git a/src/uu/numfmt/src/numfmt.rs b/src/uu/numfmt/src/numfmt.rs index e9a476956..6eba699b2 100644 --- a/src/uu/numfmt/src/numfmt.rs +++ b/src/uu/numfmt/src/numfmt.rs @@ -216,7 +216,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { match result { Err(e) => { std::io::stdout().flush().expect("error flushing stdout"); - show_info!("{}", e); + show_error!("{}", e); 1 } _ => 0, diff --git a/src/uu/pinky/src/pinky.rs b/src/uu/pinky/src/pinky.rs index e116a2382..d65775c2d 100644 --- a/src/uu/pinky/src/pinky.rs +++ b/src/uu/pinky/src/pinky.rs @@ -48,7 +48,7 @@ fn get_usage() -> String { fn get_long_usage() -> String { format!( "A lightweight 'finger' program; print user information.\n\ - The utmp file will be {}.", + The utmp file will be {}.", utmpx::DEFAULT_FILE ) } @@ -286,17 +286,10 @@ impl Pinky { print!(" {}", time_string(&ut)); - if self.include_where && !ut.host().is_empty() { - let ut_host = ut.host(); - let mut res = ut_host.splitn(2, ':'); - let host = match res.next() { - Some(_) => ut.canon_host().unwrap_or_else(|_| ut_host.clone()), - None => ut_host.clone(), - }; - match res.next() { - Some(d) => print!(" {}:{}", host, d), - None => print!(" {}", host), - } + let mut s = ut.host(); + if self.include_where && !s.is_empty() { + s = safe_unwrap!(ut.canon_host()); + print!(" {}", s); } println!(); diff --git a/src/uu/realpath/src/realpath.rs b/src/uu/realpath/src/realpath.rs index 37ff70fb2..937cee5bd 100644 --- a/src/uu/realpath/src/realpath.rs +++ b/src/uu/realpath/src/realpath.rs @@ -11,7 +11,6 @@ extern crate uucore; use clap::{App, Arg}; -use std::fs; use std::path::{Path, PathBuf}; use uucore::fs::{canonicalize, CanonicalizeMode}; @@ -75,64 +74,35 @@ pub fn uumain(args: impl uucore::Args) -> i32 { let quiet = matches.is_present(OPT_QUIET); let mut retcode = 0; for path in &paths { - if !resolve_path(path, strip, zero, quiet) { + if let Err(e) = resolve_path(path, strip, zero) { + if !quiet { + show_error!("{}: {}", e, path.display()); + } retcode = 1 }; } retcode } -fn resolve_path(p: &Path, strip: bool, zero: bool, quiet: bool) -> bool { - let abs = canonicalize(p, CanonicalizeMode::Normal).unwrap(); - - if strip { - if zero { - print!("{}\0", p.display()); - } else { - println!("{}", p.display()) - } - return true; - } - - let mut result = PathBuf::new(); - let mut links_left = 256; - - for part in abs.components() { - result.push(part.as_os_str()); - loop { - if links_left == 0 { - if !quiet { - show_error!("Too many symbolic links: {}", p.display()) - }; - return false; - } - match fs::metadata(result.as_path()) { - Err(_) => break, - Ok(ref m) if !m.file_type().is_symlink() => break, - Ok(_) => { - links_left -= 1; - match fs::read_link(result.as_path()) { - Ok(x) => { - result.pop(); - result.push(x.as_path()); - } - _ => { - if !quiet { - show_error!("Invalid path: {}", p.display()) - }; - return false; - } - } - } - } - } - } - - if zero { - print!("{}\0", result.display()); +/// Resolve a path to an absolute form and print it. +/// +/// If `strip` is `true`, then this function does not attempt to resolve +/// symbolic links in the path. If `zero` is `true`, then this function +/// prints the path followed by the null byte (`'\0'`) instead of a +/// newline character (`'\n'`). +/// +/// # Errors +/// +/// This function returns an error if there is a problem resolving +/// symbolic links. +fn resolve_path(p: &Path, strip: bool, zero: bool) -> std::io::Result<()> { + let mode = if strip { + CanonicalizeMode::None } else { - println!("{}", result.display()); - } - - true + CanonicalizeMode::Normal + }; + let abs = canonicalize(p, mode)?; + let line_ending = if zero { '\0' } else { '\n' }; + print!("{}{}", abs.display(), line_ending); + Ok(()) } diff --git a/src/uu/sort/BENCHMARKING.md b/src/uu/sort/BENCHMARKING.md index 1caea0326..fd728c41d 100644 --- a/src/uu/sort/BENCHMARKING.md +++ b/src/uu/sort/BENCHMARKING.md @@ -69,6 +69,28 @@ Run `cargo build --release` before benchmarking after you make a change! - Benchmark numeric sorting with hyperfine: `hyperfine "target/release/coreutils sort shuffled_numbers_si.txt -h -o output.txt"`. +## External sorting + +Try running commands with the `-S` option set to an amount of memory to be used, such as `1M`. Additionally, you could try sorting +huge files (ideally multiple Gigabytes) with `-S` (or without `-S` to benchmark with our default value). +Creating such a large file can be achieved by running `cat shuffled_wordlist.txt | sort -R >> shuffled_wordlist.txt` +multiple times (this will add the contents of `shuffled_wordlist.txt` to itself). +Example: Run `hyperfine './target/release/coreutils sort shuffled_wordlist.txt -S 1M' 'sort shuffled_wordlist.txt -S 1M'` + +## Merging + +"Merge" sort merges already sorted files. It is a sub-step of external sorting, so benchmarking it separately may be helpful. + +- Splitting `shuffled_wordlist.txt` can be achieved by running `split shuffled_wordlist.txt shuffled_wordlist_slice_ --additional-suffix=.txt` +- Sort each part by running `for f in shuffled_wordlist_slice_*; do sort $f -o $f; done` +- Benchmark merging by running `hyperfine "target/release/coreutils sort -m shuffled_wordlist_slice_*"` + +## Check + +When invoked with -c, we simply check if the input is already ordered. The input for benchmarking should be an already sorted file. + +- Benchmark checking by running `hyperfine "target/release/coreutils sort -c sorted_wordlist.txt"` + ## Stdout and stdin performance Try to run the above benchmarks by piping the input through stdin (standard input) and redirect the diff --git a/src/uu/sort/Cargo.toml b/src/uu/sort/Cargo.toml index 80ffc92c9..f06610248 100644 --- a/src/uu/sort/Cargo.toml +++ b/src/uu/sort/Cargo.toml @@ -15,19 +15,20 @@ edition = "2018" path = "src/sort.rs" [dependencies] -serde_json = { version = "1.0.64", default-features = false, features = ["alloc"] } -serde = { version = "1.0", features = ["derive"] } -rayon = "1.5" -rand = "0.7" +binary-heap-plus = "0.4.1" clap = "2.33" +compare = "0.1.0" fnv = "1.0.7" itertools = "0.10.0" +memchr = "2.4.0" +ouroboros = "0.9.3" +rand = "0.7" +rayon = "1.5" semver = "0.9.0" -smallvec = { version="1.6.1", features=["serde"] } +tempfile = "3" unicode-width = "0.1.8" uucore = { version=">=0.0.8", package="uucore", path="../../uucore", features=["fs"] } uucore_procs = { version=">=0.0.5", package="uucore_procs", path="../../uucore_procs" } -tempdir = "0.3.7" [[bin]] name = "sort" diff --git a/src/uu/sort/src/check.rs b/src/uu/sort/src/check.rs new file mode 100644 index 000000000..01b5a25b5 --- /dev/null +++ b/src/uu/sort/src/check.rs @@ -0,0 +1,103 @@ +// * This file is part of the uutils coreutils package. +// * +// * (c) Michael Debertol +// * +// * For the full copyright and license information, please view the LICENSE +// * file that was distributed with this source code. + +//! Check if a file is ordered + +use crate::{ + chunks::{self, Chunk}, + compare_by, open, GlobalSettings, +}; +use itertools::Itertools; +use std::{ + cmp::Ordering, + io::Read, + iter, + sync::mpsc::{sync_channel, Receiver, SyncSender}, + thread, +}; + +/// Check if the file at `path` is ordered. +/// +/// # Returns +/// +/// The code we should exit with. +pub fn check(path: &str, settings: &GlobalSettings) -> i32 { + let file = open(path).expect("failed to open input file"); + let (recycled_sender, recycled_receiver) = sync_channel(2); + let (loaded_sender, loaded_receiver) = sync_channel(2); + thread::spawn({ + let settings = settings.clone(); + move || reader(file, recycled_receiver, loaded_sender, &settings) + }); + for _ in 0..2 { + recycled_sender + .send(Chunk::new(vec![0; 100 * 1024], |_| Vec::new())) + .unwrap(); + } + + let mut prev_chunk: Option = None; + let mut line_idx = 0; + for chunk in loaded_receiver.iter() { + line_idx += 1; + if let Some(prev_chunk) = prev_chunk.take() { + // Check if the first element of the new chunk is greater than the last + // element from the previous chunk + let prev_last = prev_chunk.borrow_lines().last().unwrap(); + let new_first = chunk.borrow_lines().first().unwrap(); + + if compare_by(prev_last, new_first, &settings) == Ordering::Greater { + if !settings.check_silent { + println!("sort: {}:{}: disorder: {}", path, line_idx, new_first.line); + } + return 1; + } + recycled_sender.send(prev_chunk).ok(); + } + + for (a, b) in chunk.borrow_lines().iter().tuple_windows() { + line_idx += 1; + if compare_by(a, b, &settings) == Ordering::Greater { + if !settings.check_silent { + println!("sort: {}:{}: disorder: {}", path, line_idx, b.line); + } + return 1; + } + } + + prev_chunk = Some(chunk); + } + 0 +} + +/// The function running on the reader thread. +fn reader( + mut file: Box, + receiver: Receiver, + sender: SyncSender, + settings: &GlobalSettings, +) { + let mut sender = Some(sender); + let mut carry_over = vec![]; + for chunk in receiver.iter() { + let (recycled_lines, recycled_buffer) = chunk.recycle(); + chunks::read( + &mut sender, + recycled_buffer, + None, + &mut carry_over, + &mut file, + &mut iter::empty(), + if settings.zero_terminated { + b'\0' + } else { + b'\n' + }, + recycled_lines, + settings, + ) + } +} diff --git a/src/uu/sort/src/chunks.rs b/src/uu/sort/src/chunks.rs new file mode 100644 index 000000000..6ec759211 --- /dev/null +++ b/src/uu/sort/src/chunks.rs @@ -0,0 +1,229 @@ +// * This file is part of the uutils coreutils package. +// * +// * (c) Michael Debertol +// * +// * For the full copyright and license information, please view the LICENSE +// * file that was distributed with this source code. + +//! Utilities for reading files as chunks. + +use std::{ + io::{ErrorKind, Read}, + sync::mpsc::SyncSender, +}; + +use memchr::memchr_iter; +use ouroboros::self_referencing; + +use crate::{GlobalSettings, Line}; + +/// The chunk that is passed around between threads. +/// `lines` consist of slices into `buffer`. +#[self_referencing(pub_extras)] +#[derive(Debug)] +pub struct Chunk { + pub buffer: Vec, + #[borrows(buffer)] + #[covariant] + pub lines: Vec>, +} + +impl Chunk { + /// Destroy this chunk and return its components to be reused. + /// + /// # Returns + /// + /// * The `lines` vector, emptied + /// * The `buffer` vector, **not** emptied + pub fn recycle(mut self) -> (Vec>, Vec) { + let recycled_lines = self.with_lines_mut(|lines| { + lines.clear(); + unsafe { + // SAFETY: It is safe to (temporarily) transmute to a vector of lines with a longer lifetime, + // because the vector is empty. + // Transmuting is necessary to make recycling possible. See https://github.com/rust-lang/rfcs/pull/2802 + // for a rfc to make this unnecessary. Its example is similar to the code here. + std::mem::transmute::>, Vec>>(std::mem::take(lines)) + } + }); + (recycled_lines, self.into_heads().buffer) + } +} + +/// Read a chunk, parse lines and send them. +/// +/// No empty chunk will be sent. If we reach the end of the input, sender_option +/// is set to None. If this function however does not set sender_option to None, +/// it is not guaranteed that there is still input left: If the input fits _exactly_ +/// into a buffer, we will only notice that there's nothing more to read at the next +/// invocation. +/// +/// # Arguments +/// +/// (see also `read_to_chunk` for a more detailed documentation) +/// +/// * `sender_option`: The sender to send the lines to the sorter. If `None`, this function does nothing. +/// * `buffer`: The recycled buffer. All contents will be overwritten, but it must already be filled. +/// (i.e. `buffer.len()` should be equal to `buffer.capacity()`) +/// * `max_buffer_size`: How big `buffer` can be. +/// * `carry_over`: The bytes that must be carried over in between invocations. +/// * `file`: The current file. +/// * `next_files`: What `file` should be updated to next. +/// * `separator`: The line separator. +/// * `lines`: The recycled vector to fill with lines. Must be empty. +/// * `settings`: The global settings. +#[allow(clippy::too_many_arguments)] +pub fn read( + sender_option: &mut Option>, + mut buffer: Vec, + max_buffer_size: Option, + carry_over: &mut Vec, + file: &mut Box, + next_files: &mut impl Iterator>, + separator: u8, + lines: Vec>, + settings: &GlobalSettings, +) { + assert!(lines.is_empty()); + if let Some(sender) = sender_option { + if buffer.len() < carry_over.len() { + buffer.resize(carry_over.len() + 10 * 1024, 0); + } + buffer[..carry_over.len()].copy_from_slice(&carry_over); + let (read, should_continue) = read_to_buffer( + file, + next_files, + &mut buffer, + max_buffer_size, + carry_over.len(), + separator, + ); + carry_over.clear(); + carry_over.extend_from_slice(&buffer[read..]); + + let payload = Chunk::new(buffer, |buf| { + let mut lines = unsafe { + // SAFETY: It is safe to transmute to a vector of lines with shorter lifetime, + // because it was only temporarily transmuted to a Vec> to make recycling possible. + std::mem::transmute::>, Vec>>(lines) + }; + let read = crash_if_err!(1, std::str::from_utf8(&buf[..read])); + parse_lines(read, &mut lines, separator, &settings); + lines + }); + if !payload.borrow_lines().is_empty() { + sender.send(payload).unwrap(); + } + if !should_continue { + *sender_option = None; + } + } +} + +/// Split `read` into `Line`s, and add them to `lines`. +fn parse_lines<'a>( + mut read: &'a str, + lines: &mut Vec>, + separator: u8, + settings: &GlobalSettings, +) { + // Strip a trailing separator. TODO: Once our MinRustV is 1.45 or above, use strip_suffix() instead. + if read.ends_with(separator as char) { + read = &read[..read.len() - 1]; + } + + lines.extend( + read.split(separator as char) + .map(|line| Line::create(line, settings)), + ); +} + +/// Read from `file` into `buffer`. +/// +/// This function makes sure that at least two lines are read (unless we reach EOF and there's no next file), +/// growing the buffer if necessary. +/// The last line is likely to not have been fully read into the buffer. Its bytes must be copied to +/// the front of the buffer for the next invocation so that it can be continued to be read +/// (see the return values and `start_offset`). +/// +/// # Arguments +/// +/// * `file`: The file to start reading from. +/// * `next_files`: When `file` reaches EOF, it is updated to `next_files.next()` if that is `Some`, +/// and this function continues reading. +/// * `buffer`: The buffer that is filled with bytes. Its contents will mostly be overwritten (see `start_offset` +/// as well). It will be grown up to `max_buffer_size` if necessary, but it will always grow to read at least two lines. +/// * `max_buffer_size`: Grow the buffer to at most this length. If None, the buffer will not grow, unless needed to read at least two lines. +/// * `start_offset`: The amount of bytes at the start of `buffer` that were carried over +/// from the previous read and should not be overwritten. +/// * `separator`: The byte that separates lines. +/// +/// # Returns +/// +/// * The amount of bytes in `buffer` that can now be interpreted as lines. +/// The remaining bytes must be copied to the start of the buffer for the next invocation, +/// if another invocation is necessary, which is determined by the other return value. +/// * Whether this function should be called again. +fn read_to_buffer( + file: &mut Box, + next_files: &mut impl Iterator>, + buffer: &mut Vec, + max_buffer_size: Option, + start_offset: usize, + separator: u8, +) -> (usize, bool) { + let mut read_target = &mut buffer[start_offset..]; + loop { + match file.read(read_target) { + Ok(0) => { + if read_target.is_empty() { + // chunk is full + if let Some(max_buffer_size) = max_buffer_size { + if max_buffer_size > buffer.len() { + // we can grow the buffer + let prev_len = buffer.len(); + if buffer.len() < max_buffer_size / 2 { + buffer.resize(buffer.len() * 2, 0); + } else { + buffer.resize(max_buffer_size, 0); + } + read_target = &mut buffer[prev_len..]; + continue; + } + } + let mut sep_iter = memchr_iter(separator, &buffer).rev(); + let last_line_end = sep_iter.next(); + if sep_iter.next().is_some() { + // We read enough lines. + let end = last_line_end.unwrap(); + // We want to include the separator here, because it shouldn't be carried over. + return (end + 1, true); + } else { + // We need to read more lines + let len = buffer.len(); + // resize the vector to 10 KB more + buffer.resize(len + 1024 * 10, 0); + read_target = &mut buffer[len..]; + } + } else { + // This file is empty. + if let Some(next_file) = next_files.next() { + // There is another file. + *file = next_file; + } else { + // This was the last file. + let leftover_len = read_target.len(); + return (buffer.len() - leftover_len, false); + } + } + } + Ok(n) => { + read_target = &mut read_target[n..]; + } + Err(e) if e.kind() == ErrorKind::Interrupted => { + // retry + } + Err(e) => crash!(1, "{}", e), + } + } +} diff --git a/src/uu/sort/src/ext_sort.rs b/src/uu/sort/src/ext_sort.rs new file mode 100644 index 000000000..23a55aad0 --- /dev/null +++ b/src/uu/sort/src/ext_sort.rs @@ -0,0 +1,201 @@ +// * This file is part of the uutils coreutils package. +// * +// * (c) Michael Debertol +// * +// * For the full copyright and license information, please view the LICENSE +// * file that was distributed with this source code. + +//! Sort big files by using auxiliary files for storing intermediate chunks. +//! +//! Files are read into chunks of memory which are then sorted individually and +//! written to temporary files. There are two threads: One sorter, and one reader/writer. +//! The buffers for the individual chunks are recycled. There are two buffers. + +use std::cmp::Ordering; +use std::io::{BufWriter, Write}; +use std::path::Path; +use std::{ + fs::OpenOptions, + io::Read, + sync::mpsc::{Receiver, SyncSender}, + thread, +}; + +use itertools::Itertools; + +use tempfile::TempDir; + +use crate::{ + chunks::{self, Chunk}, + compare_by, merge, output_sorted_lines, sort_by, GlobalSettings, +}; + +const MIN_BUFFER_SIZE: usize = 8_000; + +/// Sort files by using auxiliary files for storing intermediate chunks (if needed), and output the result. +pub fn ext_sort(files: &mut impl Iterator>, settings: &GlobalSettings) { + let tmp_dir = crash_if_err!(1, tempfile::Builder::new().prefix("uutils_sort").tempdir_in(&settings.tmp_dir)); + let (sorted_sender, sorted_receiver) = std::sync::mpsc::sync_channel(1); + let (recycled_sender, recycled_receiver) = std::sync::mpsc::sync_channel(1); + thread::spawn({ + let settings = settings.clone(); + move || sorter(recycled_receiver, sorted_sender, settings) + }); + let read_result = reader_writer( + files, + &tmp_dir, + if settings.zero_terminated { + b'\0' + } else { + b'\n' + }, + // Heuristically chosen: Dividing by 10 seems to keep our memory usage roughly + // around settings.buffer_size as a whole. + settings.buffer_size / 10, + settings.clone(), + sorted_receiver, + recycled_sender, + ); + match read_result { + ReadResult::WroteChunksToFile { chunks_written } => { + let files = (0..chunks_written) + .map(|chunk_num| tmp_dir.path().join(chunk_num.to_string())) + .collect::>(); + let mut merger = merge::merge(&files, settings); + merger.write_all(settings); + } + ReadResult::SortedSingleChunk(chunk) => { + output_sorted_lines(chunk.borrow_lines().iter(), settings); + } + ReadResult::SortedTwoChunks([a, b]) => { + let merged_iter = a + .borrow_lines() + .iter() + .merge_by(b.borrow_lines().iter(), |line_a, line_b| { + compare_by(line_a, line_b, settings) != Ordering::Greater + }); + output_sorted_lines(merged_iter, settings); + } + ReadResult::EmptyInput => { + // don't output anything + } + } +} + +/// The function that is executed on the sorter thread. +fn sorter(receiver: Receiver, sender: SyncSender, settings: GlobalSettings) { + while let Ok(mut payload) = receiver.recv() { + payload.with_lines_mut(|lines| sort_by(lines, &settings)); + sender.send(payload).unwrap(); + } +} + +/// Describes how we read the chunks from the input. +enum ReadResult { + /// The input was empty. Nothing was read. + EmptyInput, + /// The input fits into a single Chunk, which was kept in memory. + SortedSingleChunk(Chunk), + /// The input fits into two chunks, which were kept in memory. + SortedTwoChunks([Chunk; 2]), + /// The input was read into multiple chunks, which were written to auxiliary files. + WroteChunksToFile { + /// The number of chunks written to auxiliary files. + chunks_written: usize, + }, +} + +/// The function that is executed on the reader/writer thread. +/// +/// # Returns +/// * The number of chunks read. +fn reader_writer( + mut files: impl Iterator>, + tmp_dir: &TempDir, + separator: u8, + buffer_size: usize, + settings: GlobalSettings, + receiver: Receiver, + sender: SyncSender, +) -> ReadResult { + let mut sender_option = Some(sender); + + let mut file = files.next().unwrap(); + + let mut carry_over = vec![]; + // kick things off with two reads + for _ in 0..2 { + chunks::read( + &mut sender_option, + vec![0; MIN_BUFFER_SIZE], + Some(buffer_size), + &mut carry_over, + &mut file, + &mut files, + separator, + Vec::new(), + &settings, + ); + if sender_option.is_none() { + // We have already read the whole input. Since we are in our first two reads, + // this means that we can fit the whole input into memory. Bypass writing below and + // handle this case in a more straightforward way. + return if let Ok(first_chunk) = receiver.recv() { + if let Ok(second_chunk) = receiver.recv() { + ReadResult::SortedTwoChunks([first_chunk, second_chunk]) + } else { + ReadResult::SortedSingleChunk(first_chunk) + } + } else { + ReadResult::EmptyInput + }; + } + } + + let mut file_number = 0; + loop { + let mut chunk = match receiver.recv() { + Ok(it) => it, + _ => { + return ReadResult::WroteChunksToFile { + chunks_written: file_number, + } + } + }; + + write( + &mut chunk, + &tmp_dir.path().join(file_number.to_string()), + separator, + ); + + file_number += 1; + + let (recycled_lines, recycled_buffer) = chunk.recycle(); + + chunks::read( + &mut sender_option, + recycled_buffer, + None, + &mut carry_over, + &mut file, + &mut files, + separator, + recycled_lines, + &settings, + ); + } +} + +/// Write the lines in `chunk` to `file`, separated by `separator`. +fn write(chunk: &mut Chunk, file: &Path, separator: u8) { + chunk.with_lines_mut(|lines| { + // Write the lines to the file + let file = crash_if_err!(1, OpenOptions::new().create(true).write(true).open(file)); + let mut writer = BufWriter::new(file); + for s in lines.iter() { + crash_if_err!(1, writer.write_all(s.line.as_bytes())); + crash_if_err!(1, writer.write_all(&[separator])); + } + }); +} diff --git a/src/uu/sort/src/external_sort/LICENSE b/src/uu/sort/src/external_sort/LICENSE deleted file mode 100644 index e26c89c9f..000000000 --- a/src/uu/sort/src/external_sort/LICENSE +++ /dev/null @@ -1,19 +0,0 @@ -Copyright 2018 Battelle Memorial Institute - -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies -of the Software, and to permit persons to whom the Software is furnished to do -so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. \ No newline at end of file diff --git a/src/uu/sort/src/external_sort/mod.rs b/src/uu/sort/src/external_sort/mod.rs deleted file mode 100644 index fd942d4a7..000000000 --- a/src/uu/sort/src/external_sort/mod.rs +++ /dev/null @@ -1,295 +0,0 @@ -use std::clone::Clone; -use std::cmp::Ordering::Less; -use std::collections::VecDeque; -use std::error::Error; -use std::fs::{File, OpenOptions}; -use std::io::SeekFrom::Start; -use std::io::{BufRead, BufReader, BufWriter, Seek, Write}; -use std::marker::PhantomData; -use std::path::PathBuf; - -use serde::de::DeserializeOwned; -use serde::Serialize; -use serde_json; -use tempdir::TempDir; - -use super::{GlobalSettings, Line}; - -/// Trait for types that can be used by -/// [ExternalSorter](struct.ExternalSorter.html). Must be sortable, cloneable, -/// serializeable, and able to report on it's size -pub trait ExternallySortable: Clone + Serialize + DeserializeOwned { - /// Get the size, in bytes, of this object (used to constrain the buffer - /// used in the external sort). - fn get_size(&self) -> u64; -} - -/// Iterator that provides sorted `T`s -pub struct ExtSortedIterator { - buffers: Vec>, - chunk_offsets: Vec, - max_per_chunk: u64, - chunks: u64, - tmp_dir: TempDir, - settings: GlobalSettings, - failed: bool, -} - -impl Iterator for ExtSortedIterator -where - Line: ExternallySortable, -{ - type Item = Result>; - - /// # Errors - /// - /// This method can fail due to issues reading intermediate sorted chunks - /// from disk, or due to serde deserialization issues - fn next(&mut self) -> Option { - if self.failed { - return None; - } - // fill up any empty buffers - let mut empty = true; - for chunk_num in 0..self.chunks { - if self.buffers[chunk_num as usize].is_empty() { - let mut f = match File::open(self.tmp_dir.path().join(chunk_num.to_string())) { - Ok(f) => f, - Err(e) => { - self.failed = true; - return Some(Err(Box::new(e))); - } - }; - match f.seek(Start(self.chunk_offsets[chunk_num as usize])) { - Ok(_) => (), - Err(e) => { - self.failed = true; - return Some(Err(Box::new(e))); - } - } - let bytes_read = - match fill_buff(&mut self.buffers[chunk_num as usize], f, self.max_per_chunk) { - Ok(bytes_read) => bytes_read, - Err(e) => { - self.failed = true; - return Some(Err(e)); - } - }; - self.chunk_offsets[chunk_num as usize] += bytes_read; - if !self.buffers[chunk_num as usize].is_empty() { - empty = false; - } - } else { - empty = false; - } - } - if empty { - return None; - } - - // find the next record to write - // check is_empty() before unwrap()ing - let mut idx = 0; - for chunk_num in 0..self.chunks as usize { - if !self.buffers[chunk_num].is_empty() { - if self.buffers[idx].is_empty() - || (super::compare_by)( - self.buffers[chunk_num].front().unwrap(), - self.buffers[idx].front().unwrap(), - &self.settings, - ) == Less - { - idx = chunk_num; - } - } - } - - // unwrap due to checks above - let r = self.buffers[idx].pop_front().unwrap(); - Some(Ok(r)) - } -} - -/// Perform an external sort on an unsorted stream of incoming data -pub struct ExternalSorter -where - Line: ExternallySortable, -{ - tmp_dir: Option, - buffer_bytes: u64, - phantom: PhantomData, - settings: GlobalSettings, -} - -impl ExternalSorter -where - Line: ExternallySortable, -{ - /// Create a new `ExternalSorter` with a specified memory buffer and - /// temporary directory - pub fn new( - buffer_bytes: u64, - tmp_dir: Option, - settings: GlobalSettings, - ) -> ExternalSorter { - ExternalSorter { - buffer_bytes, - tmp_dir, - phantom: PhantomData, - settings, - } - } - - /// Sort (based on `compare`) the `T`s provided by `unsorted` and return an - /// iterator - /// - /// # Errors - /// - /// This method can fail due to issues writing intermediate sorted chunks - /// to disk, or due to serde serialization issues - pub fn sort_by( - &self, - unsorted: I, - settings: GlobalSettings, - ) -> Result, Box> - where - I: Iterator, - { - let tmp_dir = match self.tmp_dir { - Some(ref p) => TempDir::new_in(p, "uutils_sort")?, - None => TempDir::new("uutils_sort")?, - }; - // creating the thing we need to return first due to the face that we need to - // borrow tmp_dir and move it out - let mut iter = ExtSortedIterator { - buffers: Vec::new(), - chunk_offsets: Vec::new(), - max_per_chunk: 0, - chunks: 0, - tmp_dir, - settings, - failed: false, - }; - - { - let mut total_read = 0; - let mut chunk = Vec::new(); - // Initial buffer is specified by user - let mut adjusted_buffer_size = self.buffer_bytes; - let (iter_size, _) = unsorted.size_hint(); - - // make the initial chunks on disk - for seq in unsorted { - let seq_size = seq.get_size(); - total_read += seq_size; - - // GNU minimum is 16 * (sizeof struct + 2), but GNU uses about - // 1/10 the memory that we do. And GNU even says in the code it may - // not work on small buffer sizes. - // - // The following seems to work pretty well, and has about the same max - // RSS as lower minimum values. - // - let minimum_buffer_size: u64 = iter_size as u64 * seq_size / 8; - - adjusted_buffer_size = - // Grow buffer size for a struct/Line larger than buffer - if adjusted_buffer_size < seq_size { - seq_size - } else if adjusted_buffer_size < minimum_buffer_size { - minimum_buffer_size - } else { - adjusted_buffer_size - }; - chunk.push(seq); - - if total_read >= adjusted_buffer_size { - super::sort_by(&mut chunk, &self.settings); - self.write_chunk( - &iter.tmp_dir.path().join(iter.chunks.to_string()), - &mut chunk, - )?; - chunk.clear(); - total_read = 0; - iter.chunks += 1; - } - } - // write the last chunk - if chunk.len() > 0 { - super::sort_by(&mut chunk, &self.settings); - self.write_chunk( - &iter.tmp_dir.path().join(iter.chunks.to_string()), - &mut chunk, - )?; - iter.chunks += 1; - } - - // initialize buffers for each chunk - // - // Having a right sized buffer for each chunk for smallish values seems silly to me? - // - // We will have to have the entire iter in memory sometime right? - // Set minimum to the size of the writer buffer, ~8K - // - const MINIMUM_READBACK_BUFFER: u64 = 8200; - let right_sized_buffer = adjusted_buffer_size - .checked_div(iter.chunks) - .unwrap_or(adjusted_buffer_size); - iter.max_per_chunk = if right_sized_buffer > MINIMUM_READBACK_BUFFER { - right_sized_buffer - } else { - MINIMUM_READBACK_BUFFER - }; - iter.buffers = vec![VecDeque::new(); iter.chunks as usize]; - iter.chunk_offsets = vec![0 as u64; iter.chunks as usize]; - for chunk_num in 0..iter.chunks { - let offset = fill_buff( - &mut iter.buffers[chunk_num as usize], - File::open(iter.tmp_dir.path().join(chunk_num.to_string()))?, - iter.max_per_chunk, - )?; - iter.chunk_offsets[chunk_num as usize] = offset; - } - } - - Ok(iter) - } - - fn write_chunk(&self, file: &PathBuf, chunk: &mut Vec) -> Result<(), Box> { - let new_file = OpenOptions::new().create(true).append(true).open(file)?; - let mut buf_write = Box::new(BufWriter::new(new_file)) as Box; - for s in chunk { - let mut serialized = serde_json::to_string(&s).expect("JSON write error: "); - serialized.push_str("\n"); - buf_write.write(serialized.as_bytes())?; - } - buf_write.flush()?; - - Ok(()) - } -} - -fn fill_buff( - vec: &mut VecDeque, - file: File, - max_bytes: u64, -) -> Result> -where - Line: ExternallySortable, -{ - let mut total_read = 0; - let mut bytes_read = 0; - for line in BufReader::new(file).lines() { - let line_s = line?; - bytes_read += line_s.len() + 1; - // This is where the bad stuff happens usually - let deserialized: Line = serde_json::from_str(&line_s).expect("JSON read error: "); - total_read += deserialized.get_size(); - vec.push_back(deserialized); - if total_read > max_bytes { - break; - } - } - - Ok(bytes_read as u64) -} diff --git a/src/uu/sort/src/merge.rs b/src/uu/sort/src/merge.rs new file mode 100644 index 000000000..48d48ad40 --- /dev/null +++ b/src/uu/sort/src/merge.rs @@ -0,0 +1,224 @@ +//! Merge already sorted files. +//! +//! We achieve performance by splitting the tasks of sorting and writing, and reading and parsing between two threads. +//! The threads communicate over channels. There's one channel per file in the direction reader -> sorter, but only +//! one channel from the sorter back to the reader. The channels to the sorter are used to send the read chunks. +//! The sorter reads the next chunk from the channel whenever it needs the next chunk after running out of lines +//! from the previous read of the file. The channel back from the sorter to the reader has two purposes: To allow the reader +//! to reuse memory allocations and to tell the reader which file to read from next. + +use std::{ + cmp::Ordering, + ffi::OsStr, + io::{Read, Write}, + iter, + rc::Rc, + sync::mpsc::{channel, sync_channel, Receiver, Sender, SyncSender}, + thread, +}; + +use compare::Compare; + +use crate::{ + chunks::{self, Chunk}, + compare_by, open, GlobalSettings, +}; + +// Merge already sorted files. +pub fn merge<'a>(files: &[impl AsRef], settings: &'a GlobalSettings) -> FileMerger<'a> { + let (request_sender, request_receiver) = channel(); + let mut reader_files = Vec::with_capacity(files.len()); + let mut loaded_receivers = Vec::with_capacity(files.len()); + for (file_number, file) in files.iter().filter_map(open).enumerate() { + let (sender, receiver) = sync_channel(2); + loaded_receivers.push(receiver); + reader_files.push(ReaderFile { + file, + sender: Some(sender), + carry_over: vec![], + }); + request_sender + .send((file_number, Chunk::new(vec![0; 8 * 1024], |_| Vec::new()))) + .unwrap(); + } + + for file_number in 0..reader_files.len() { + request_sender + .send((file_number, Chunk::new(vec![0; 8 * 1024], |_| Vec::new()))) + .unwrap(); + } + + thread::spawn({ + let settings = settings.clone(); + move || { + reader( + request_receiver, + &mut reader_files, + &settings, + if settings.zero_terminated { + b'\0' + } else { + b'\n' + }, + ) + } + }); + + let mut mergeable_files = vec![]; + + for (file_number, receiver) in loaded_receivers.into_iter().enumerate() { + mergeable_files.push(MergeableFile { + current_chunk: Rc::new(receiver.recv().unwrap()), + file_number, + line_idx: 0, + receiver, + }) + } + + FileMerger { + heap: binary_heap_plus::BinaryHeap::from_vec_cmp( + mergeable_files, + FileComparator { settings }, + ), + request_sender, + prev: None, + } +} +/// The struct on the reader thread representing an input file +struct ReaderFile { + file: Box, + sender: Option>, + carry_over: Vec, +} + +/// The function running on the reader thread. +fn reader( + recycled_receiver: Receiver<(usize, Chunk)>, + files: &mut [ReaderFile], + settings: &GlobalSettings, + separator: u8, +) { + for (file_idx, chunk) in recycled_receiver.iter() { + let (recycled_lines, recycled_buffer) = chunk.recycle(); + let ReaderFile { + file, + sender, + carry_over, + } = &mut files[file_idx]; + chunks::read( + sender, + recycled_buffer, + None, + carry_over, + file, + &mut iter::empty(), + separator, + recycled_lines, + settings, + ); + } +} +/// The struct on the main thread representing an input file +pub struct MergeableFile { + current_chunk: Rc, + line_idx: usize, + receiver: Receiver, + file_number: usize, +} + +/// A struct to keep track of the previous line we encountered. +/// +/// This is required for deduplication purposes. +struct PreviousLine { + chunk: Rc, + line_idx: usize, + file_number: usize, +} + +/// Merges files together. This is **not** an iterator because of lifetime problems. +pub struct FileMerger<'a> { + heap: binary_heap_plus::BinaryHeap>, + request_sender: Sender<(usize, Chunk)>, + prev: Option, +} + +impl<'a> FileMerger<'a> { + /// Write the merged contents to the output file. + pub fn write_all(&mut self, settings: &GlobalSettings) { + let mut out = settings.out_writer(); + while self.write_next(settings, &mut out) {} + } + + fn write_next(&mut self, settings: &GlobalSettings, out: &mut impl Write) -> bool { + if let Some(file) = self.heap.peek() { + let prev = self.prev.replace(PreviousLine { + chunk: file.current_chunk.clone(), + line_idx: file.line_idx, + file_number: file.file_number, + }); + + file.current_chunk.with_lines(|lines| { + let current_line = &lines[file.line_idx]; + if settings.unique { + if let Some(prev) = &prev { + let cmp = compare_by( + &prev.chunk.borrow_lines()[prev.line_idx], + current_line, + settings, + ); + if cmp == Ordering::Equal { + return; + } + } + } + current_line.print(out, settings); + }); + + let was_last_line_for_file = + file.current_chunk.borrow_lines().len() == file.line_idx + 1; + + if was_last_line_for_file { + if let Ok(next_chunk) = file.receiver.recv() { + let mut file = self.heap.peek_mut().unwrap(); + file.current_chunk = Rc::new(next_chunk); + file.line_idx = 0; + } else { + self.heap.pop(); + } + } else { + self.heap.peek_mut().unwrap().line_idx += 1; + } + + if let Some(prev) = prev { + if let Ok(prev_chunk) = Rc::try_unwrap(prev.chunk) { + self.request_sender + .send((prev.file_number, prev_chunk)) + .ok(); + } + } + } + !self.heap.is_empty() + } +} + +/// Compares files by their current line. +struct FileComparator<'a> { + settings: &'a GlobalSettings, +} + +impl<'a> Compare for FileComparator<'a> { + fn compare(&self, a: &MergeableFile, b: &MergeableFile) -> Ordering { + let mut cmp = compare_by( + &a.current_chunk.borrow_lines()[a.line_idx], + &b.current_chunk.borrow_lines()[b.line_idx], + self.settings, + ); + if cmp == Ordering::Equal { + // To make sorting stable, we need to consider the file number as well, + // as lines from a file with a lower number are to be considered "earlier". + cmp = a.file_number.cmp(&b.file_number); + } + // Our BinaryHeap is a max heap. We use it as a min heap, so we need to reverse the ordering. + cmp.reverse() + } +} diff --git a/src/uu/sort/src/numeric_str_cmp.rs b/src/uu/sort/src/numeric_str_cmp.rs index b74d97867..f8666b701 100644 --- a/src/uu/sort/src/numeric_str_cmp.rs +++ b/src/uu/sort/src/numeric_str_cmp.rs @@ -14,21 +14,20 @@ //! More specifically, exponent can be understood so that the original number is in (1..10)*10^exponent. //! From that follows the constraints of this algorithm: It is able to compare numbers in ±(1*10^[i64::MIN]..10*10^[i64::MAX]). -use serde::{Deserialize, Serialize}; use std::{cmp::Ordering, ops::Range}; -#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize, Clone)] +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)] enum Sign { Negative, Positive, } -#[derive(Debug, PartialEq, Serialize, Deserialize, Clone)] +#[derive(Debug, PartialEq, Clone)] pub struct NumInfo { exponent: i64, sign: Sign, } -#[derive(Debug, PartialEq, Serialize, Deserialize, Clone)] +#[derive(Debug, PartialEq, Clone)] pub struct NumInfoParseSettings { pub accept_si_units: bool, pub thousands_separator: Option, diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index c82524796..6d79e80fb 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -15,13 +15,16 @@ #[macro_use] extern crate uucore; +mod check; +mod chunks; mod custom_str_cmp; -mod external_sort; +mod ext_sort; +mod merge; mod numeric_str_cmp; use clap::{App, Arg}; use custom_str_cmp::custom_str_cmp; -use external_sort::{ExternalSorter, ExternallySortable}; +use ext_sort::ext_sort; use fnv::FnvHasher; use itertools::Itertools; use numeric_str_cmp::{numeric_str_cmp, NumInfo, NumInfoParseSettings}; @@ -29,20 +32,16 @@ use rand::distributions::Alphanumeric; use rand::{thread_rng, Rng}; use rayon::prelude::*; use semver::Version; -use serde::{Deserialize, Serialize}; -use smallvec::SmallVec; use std::cmp::Ordering; -use std::collections::BinaryHeap; use std::env; +use std::ffi::OsStr; use std::fs::File; use std::hash::{Hash, Hasher}; -use std::io::{stdin, stdout, BufRead, BufReader, BufWriter, Lines, Read, Write}; -use std::mem::replace; +use std::io::{stdin, stdout, BufRead, BufReader, BufWriter, Read, Write}; use std::ops::Range; use std::path::Path; use std::path::PathBuf; use unicode_width::UnicodeWidthStr; -use uucore::fs::is_stdin_interactive; // for Iterator::dedup() use uucore::InvalidEncodingHandling; static NAME: &str = "sort"; @@ -65,6 +64,17 @@ static OPT_NUMERIC_SORT: &str = "numeric-sort"; static OPT_GENERAL_NUMERIC_SORT: &str = "general-numeric-sort"; static OPT_VERSION_SORT: &str = "version-sort"; +static OPT_SORT: &str = "sort"; + +static ALL_SORT_MODES: &[&str] = &[ + OPT_GENERAL_NUMERIC_SORT, + OPT_HUMAN_NUMERIC_SORT, + OPT_MONTH_SORT, + OPT_NUMERIC_SORT, + OPT_VERSION_SORT, + OPT_RANDOM, +]; + static OPT_DICTIONARY_ORDER: &str = "dictionary-order"; static OPT_MERGE: &str = "merge"; static OPT_CHECK: &str = "check"; @@ -94,7 +104,10 @@ static THOUSANDS_SEP: char = ','; static NEGATIVE: char = '-'; static POSITIVE: char = '+'; -static DEFAULT_BUF_SIZE: usize = std::usize::MAX; +// Choosing a higher buffer size does not result in performance improvements +// (at least not on my machine). TODO: In the future, we should also take the amount of +// available memory into consideration, instead of relying on this constant only. +static DEFAULT_BUF_SIZE: usize = 1_000_000_000; // 1 GB #[derive(Eq, Ord, PartialEq, PartialOrd, Clone, Copy)] enum SortMode { @@ -103,10 +116,11 @@ enum SortMode { GeneralNumeric, Month, Version, + Random, Default, } #[derive(Clone)] -struct GlobalSettings { +pub struct GlobalSettings { mode: SortMode, debug: bool, ignore_blanks: bool, @@ -120,7 +134,6 @@ struct GlobalSettings { unique: bool, check: bool, check_silent: bool, - random: bool, salt: String, selectors: Vec, separator: Option, @@ -128,28 +141,48 @@ struct GlobalSettings { zero_terminated: bool, buffer_size: usize, tmp_dir: PathBuf, - ext_sort: bool, } impl GlobalSettings { - // It's back to do conversions for command line opts! - // Probably want to do through numstrcmp somehow now? - fn human_numeric_convert(a: &str) -> usize { - let num_str = &a[get_leading_gen(a)]; - let (_, suf_str) = a.split_at(num_str.len()); - let num_usize = num_str - .parse::() - .expect("Error parsing buffer size: "); - let suf_usize: usize = match suf_str.to_uppercase().as_str() { - // SI Units - "B" => 1usize, - "K" => 1000usize, - "M" => 1000000usize, - "G" => 1000000000usize, - // GNU regards empty human numeric values as K by default - _ => 1000usize, - }; - num_usize * suf_usize + /// Interpret this `&str` as a number with an optional trailing si unit. + /// + /// If there is no trailing si unit, the implicit unit is K. + /// The suffix B causes the number to be interpreted as a byte count. + fn parse_byte_count(input: &str) -> usize { + const SI_UNITS: &[char] = &['B', 'K', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y']; + + let input = input.trim(); + + let (num_str, si_unit) = + if input.ends_with(|c: char| SI_UNITS.contains(&c.to_ascii_uppercase())) { + let mut chars = input.chars(); + let si_suffix = chars.next_back().unwrap().to_ascii_uppercase(); + let si_unit = SI_UNITS.iter().position(|&c| c == si_suffix).unwrap(); + let num_str = chars.as_str(); + (num_str, si_unit) + } else { + (input, 1) + }; + + let num_usize: usize = num_str + .trim() + .parse() + .unwrap_or_else(|e| crash!(1, "failed to parse buffer size `{}`: {}", num_str, e)); + + num_usize.saturating_mul(1000usize.saturating_pow(si_unit as u32)) + } + + fn out_writer(&self) -> BufWriter> { + match self.outfile { + Some(ref filename) => match File::create(Path::new(&filename)) { + Ok(f) => BufWriter::new(Box::new(f) as Box), + Err(e) => { + show_error!("{0}: {1}", filename, e.to_string()); + panic!("Could not open output file"); + } + }, + None => BufWriter::new(Box::new(stdout()) as Box), + } } } @@ -169,7 +202,6 @@ impl Default for GlobalSettings { unique: false, check: false, check_silent: false, - random: false, salt: String::new(), selectors: vec![], separator: None, @@ -177,7 +209,6 @@ impl Default for GlobalSettings { zero_terminated: false, buffer_size: DEFAULT_BUF_SIZE, tmp_dir: PathBuf::new(), - ext_sort: false, } } } @@ -188,7 +219,6 @@ struct KeySettings { ignore_case: bool, dictionary_order: bool, ignore_non_printing: bool, - random: bool, reverse: bool, } @@ -199,40 +229,16 @@ impl From<&GlobalSettings> for KeySettings { ignore_blanks: settings.ignore_blanks, ignore_case: settings.ignore_case, ignore_non_printing: settings.ignore_non_printing, - random: settings.random, reverse: settings.reverse, dictionary_order: settings.dictionary_order, } } } -#[derive(Debug, Serialize, Deserialize, Clone)] -/// Represents the string selected by a FieldSelector. -struct SelectionRange { - range: Range, -} - -impl SelectionRange { - fn new(range: Range) -> Self { - Self { range } - } - - /// Gets the actual string slice represented by this Selection. - fn get_str<'a>(&self, line: &'a str) -> &'a str { - &line[self.range.to_owned()] - } - - fn shorten(&mut self, new_range: Range) { - self.range.end = self.range.start + new_range.end; - self.range.start += new_range.start; - } -} - -#[derive(Serialize, Deserialize, Clone)] +#[derive(Clone, Debug)] enum NumCache { AsF64(GeneralF64ParseResult), WithInfo(NumInfo), - None, } impl NumCache { @@ -249,77 +255,55 @@ impl NumCache { } } } -#[derive(Serialize, Deserialize, Clone)] -struct Selection { - range: SelectionRange, - num_cache: NumCache, -} -impl Selection { - /// Gets the actual string slice represented by this Selection. - fn get_str<'a>(&'a self, line: &'a Line) -> &'a str { - self.range.get_str(&line.line) - } +#[derive(Clone, Debug)] +struct Selection<'a> { + slice: &'a str, + num_cache: Option>, } type Field = Range; -#[derive(Serialize, Deserialize, Clone)] -struct Line { - line: String, - // The common case is not to specify fields. Let's make this fast. - selections: SmallVec<[Selection; 1]>, +#[derive(Clone, Debug)] +pub struct Line<'a> { + line: &'a str, + selections: Box<[Selection<'a>]>, } -impl ExternallySortable for Line { - fn get_size(&self) -> u64 { - // Currently 96 bytes, but that could change, so we get that size here - std::mem::size_of::() as u64 - } -} - -impl Line { - fn new(line: String, settings: &GlobalSettings) -> Self { +impl<'a> Line<'a> { + fn create(string: &'a str, settings: &GlobalSettings) -> Self { let fields = if settings .selectors .iter() - .any(|selector| selector.needs_tokens()) + .any(|selector| selector.needs_tokens) { // Only tokenize if we will need tokens. - Some(tokenize(&line, settings.separator)) + Some(tokenize(string, settings.separator)) } else { None }; - let selections = settings - .selectors - .iter() - .map(|selector| { - let mut range = - SelectionRange::new(selector.get_selection(&line, fields.as_deref())); - let num_cache = if selector.settings.mode == SortMode::Numeric - || selector.settings.mode == SortMode::HumanNumeric - { - let (info, num_range) = NumInfo::parse( - range.get_str(&line), - NumInfoParseSettings { - accept_si_units: selector.settings.mode == SortMode::HumanNumeric, - thousands_separator: Some(THOUSANDS_SEP), - decimal_pt: Some(DECIMAL_PT), - }, - ); - range.shorten(num_range); - NumCache::WithInfo(info) - } else if selector.settings.mode == SortMode::GeneralNumeric { - let str = range.get_str(&line); - NumCache::AsF64(general_f64_parse(&str[get_leading_gen(str)])) - } else { - NumCache::None - }; - Selection { range, num_cache } - }) - .collect(); - Self { line, selections } + Line { + line: string, + selections: settings + .selectors + .iter() + .filter(|selector| !selector.is_default_selection) + .map(|selector| selector.get_selection(string, fields.as_deref())) + .collect(), + } + } + + fn print(&self, writer: &mut impl Write, settings: &GlobalSettings) { + if settings.zero_terminated && !settings.debug { + crash_if_err!(1, writer.write_all(self.line.as_bytes())); + crash_if_err!(1, writer.write_all("\0".as_bytes())); + } else if !settings.debug { + crash_if_err!(1, writer.write_all(self.line.as_bytes())); + crash_if_err!(1, writer.write_all("\n".as_bytes())); + } else { + crash_if_err!(1, self.print_debug(settings, writer)); + } } /// Writes indicators for the selections this line matched. The original line content is NOT expected @@ -327,7 +311,7 @@ impl Line { fn print_debug( &self, settings: &GlobalSettings, - writer: &mut dyn Write, + writer: &mut impl Write, ) -> std::io::Result<()> { // We do not consider this function performance critical, as debug output is only useful for small files, // which are not a performance problem in any case. Therefore there aren't any special performance @@ -338,7 +322,7 @@ impl Line { let fields = tokenize(&self.line, settings.separator); for selector in settings.selectors.iter() { - let mut selection = selector.get_selection(&self.line, Some(&fields)); + let mut selection = selector.get_range(&self.line, Some(&fields)); match selector.settings.mode { SortMode::Numeric | SortMode::HumanNumeric => { // find out which range is used for numeric comparisons @@ -422,7 +406,7 @@ impl Line { } } } - if !(settings.random + if !(settings.mode == SortMode::Random || settings.stable || settings.unique || !(settings.dictionary_order @@ -526,12 +510,10 @@ impl KeyPosition { 'h' => settings.mode = SortMode::HumanNumeric, 'i' => settings.ignore_non_printing = true, 'n' => settings.mode = SortMode::Numeric, - 'R' => settings.random = true, + 'R' => settings.mode = SortMode::Random, 'r' => settings.reverse = true, 'V' => settings.mode = SortMode::Version, - c => { - crash!(1, "invalid option for key: `{}`", c) - } + c => crash!(1, "invalid option for key: `{}`", c), } // All numeric sorts and month sort conflict with dictionary_order and ignore_non_printing. // Instad of reporting an error, let them overwrite each other. @@ -550,7 +532,9 @@ impl KeyPosition { | SortMode::GeneralNumeric | SortMode::Month => SortMode::Default, // Only SortMode::Default and SortMode::Version work with dictionary_order and ignore_non_printing - m @ SortMode::Default | m @ SortMode::Version => m, + m @ SortMode::Default + | m @ SortMode::Version + | m @ SortMode::Random => m, } } _ => {} @@ -588,16 +572,66 @@ struct FieldSelector { from: KeyPosition, to: Option, settings: KeySettings, + needs_tokens: bool, + // Whether the selection for each line is going to be the whole line with no NumCache + is_default_selection: bool, } impl FieldSelector { - fn needs_tokens(&self) -> bool { - self.from.field != 1 || self.from.char == 0 || self.to.is_some() + fn new(from: KeyPosition, to: Option, settings: KeySettings) -> Self { + Self { + is_default_selection: from.field == 1 + && from.char == 1 + && to.is_none() + // TODO: Once our MinRustV is 1.42 or higher, change this to the matches! macro + && match settings.mode { + SortMode::Numeric | SortMode::GeneralNumeric | SortMode::HumanNumeric => false, + _ => true, + }, + needs_tokens: from.field != 1 || from.char == 0 || to.is_some(), + from, + to, + settings, + } } - /// Look up the slice that corresponds to this selector for the given line. - /// If needs_fields returned false, fields may be None. - fn get_selection<'a>(&self, line: &'a str, tokens: Option<&[Field]>) -> Range { + /// Get the selection that corresponds to this selector for the line. + /// If needs_fields returned false, tokens may be None. + fn get_selection<'a>(&self, line: &'a str, tokens: Option<&[Field]>) -> Selection<'a> { + let mut range = &line[self.get_range(&line, tokens)]; + let num_cache = if self.settings.mode == SortMode::Numeric + || self.settings.mode == SortMode::HumanNumeric + { + // Parse NumInfo for this number. + let (info, num_range) = NumInfo::parse( + range, + NumInfoParseSettings { + accept_si_units: self.settings.mode == SortMode::HumanNumeric, + thousands_separator: Some(THOUSANDS_SEP), + decimal_pt: Some(DECIMAL_PT), + }, + ); + // Shorten the range to what we need to pass to numeric_str_cmp later. + range = &range[num_range]; + Some(Box::new(NumCache::WithInfo(info))) + } else if self.settings.mode == SortMode::GeneralNumeric { + // Parse this number as f64, as this is the requirement for general numeric sorting. + Some(Box::new(NumCache::AsF64(general_f64_parse( + &range[get_leading_gen(range)], + )))) + } else { + // This is not a numeric sort, so we don't need a NumCache. + None + }; + Selection { + slice: range, + num_cache, + } + } + + /// Look up the range in the line that corresponds to this selector. + /// If needs_fields returned false, tokens may be None. + fn get_range<'a>(&self, line: &'a str, tokens: Option<&[Field]>) -> Range { enum Resolution { // The start index of the resolved character, inclusive StartOfChar(usize), @@ -682,85 +716,6 @@ impl FieldSelector { } } -struct MergeableFile<'a> { - lines: Lines>>, - current_line: Line, - settings: &'a GlobalSettings, -} - -// BinaryHeap depends on `Ord`. Note that we want to pop smallest items -// from the heap first, and BinaryHeap.pop() returns the largest, so we -// trick it into the right order by calling reverse() here. -impl<'a> Ord for MergeableFile<'a> { - fn cmp(&self, other: &MergeableFile) -> Ordering { - compare_by(&self.current_line, &other.current_line, self.settings).reverse() - } -} - -impl<'a> PartialOrd for MergeableFile<'a> { - fn partial_cmp(&self, other: &MergeableFile) -> Option { - Some(self.cmp(other)) - } -} - -impl<'a> PartialEq for MergeableFile<'a> { - fn eq(&self, other: &MergeableFile) -> bool { - Ordering::Equal == self.cmp(other) - } -} - -impl<'a> Eq for MergeableFile<'a> {} - -struct FileMerger<'a> { - heap: BinaryHeap>, - settings: &'a GlobalSettings, -} - -impl<'a> FileMerger<'a> { - fn new(settings: &'a GlobalSettings) -> FileMerger<'a> { - FileMerger { - heap: BinaryHeap::new(), - settings, - } - } - fn push_file(&mut self, mut lines: Lines>>) { - if let Some(Ok(next_line)) = lines.next() { - let mergeable_file = MergeableFile { - lines, - current_line: Line::new(next_line, &self.settings), - settings: &self.settings, - }; - self.heap.push(mergeable_file); - } - } -} - -impl<'a> Iterator for FileMerger<'a> { - type Item = Line; - fn next(&mut self) -> Option { - match self.heap.pop() { - Some(mut current) => { - match current.lines.next() { - Some(Ok(next_line)) => { - let ret = replace( - &mut current.current_line, - Line::new(next_line, &self.settings), - ); - self.heap.push(current); - Some(ret) - } - _ => { - // Don't put it back in the heap (it's empty/erroring) - // but its first line is still valid. - Some(current.current_line) - } - } - } - None => None, - } - } -} - fn get_usage() -> String { format!( "{0} {1} @@ -773,6 +728,16 @@ With no FILE, or when FILE is -, read standard input.", ) } +fn make_sort_mode_arg<'a, 'b>(mode: &'a str, short: &'b str, help: &'b str) -> Arg<'a, 'b> { + let mut arg = Arg::with_name(mode).short(short).long(mode).help(help); + for possible_mode in ALL_SORT_MODES { + if *possible_mode != mode { + arg = arg.conflicts_with(possible_mode); + } + } + arg +} + pub fn uumain(args: impl uucore::Args) -> i32 { let args = args .collect_str(InvalidEncodingHandling::Ignore) @@ -785,34 +750,62 @@ pub fn uumain(args: impl uucore::Args) -> i32 { .about(ABOUT) .usage(&usage[..]) .arg( - Arg::with_name(OPT_HUMAN_NUMERIC_SORT) - .short("h") - .long(OPT_HUMAN_NUMERIC_SORT) - .help("compare according to human readable sizes, eg 1M > 100k"), + Arg::with_name(OPT_SORT) + .long(OPT_SORT) + .takes_value(true) + .possible_values( + &[ + "general-numeric", + "human-numeric", + "month", + "numeric", + "version", + "random", + ] + ) + .conflicts_with_all(ALL_SORT_MODES) ) .arg( - Arg::with_name(OPT_MONTH_SORT) - .short("M") - .long(OPT_MONTH_SORT) - .help("compare according to month name abbreviation"), + make_sort_mode_arg( + OPT_HUMAN_NUMERIC_SORT, + "h", + "compare according to human readable sizes, eg 1M > 100k" + ), ) .arg( - Arg::with_name(OPT_NUMERIC_SORT) - .short("n") - .long(OPT_NUMERIC_SORT) - .help("compare according to string numerical value"), + make_sort_mode_arg( + OPT_MONTH_SORT, + "M", + "compare according to month name abbreviation" + ), ) .arg( - Arg::with_name(OPT_GENERAL_NUMERIC_SORT) - .short("g") - .long(OPT_GENERAL_NUMERIC_SORT) - .help("compare according to string general numerical value"), + make_sort_mode_arg( + OPT_NUMERIC_SORT, + "n", + "compare according to string numerical value" + ), ) .arg( - Arg::with_name(OPT_VERSION_SORT) - .short("V") - .long(OPT_VERSION_SORT) - .help("Sort by SemVer version number, eg 1.12.2 > 1.1.2"), + make_sort_mode_arg( + OPT_GENERAL_NUMERIC_SORT, + "g", + "compare according to string general numerical value" + ), + ) + .arg( + make_sort_mode_arg( + OPT_VERSION_SORT, + "V", + "Sort by SemVer version number, eg 1.12.2 > 1.1.2", + ), + ) + .arg( + make_sort_mode_arg( + OPT_RANDOM, + "R", + "shuffle in random order", + ), ) .arg( Arg::with_name(OPT_DICTIONARY_ORDER) @@ -866,12 +859,6 @@ pub fn uumain(args: impl uucore::Args) -> i32 { .takes_value(true) .value_name("FILENAME"), ) - .arg( - Arg::with_name(OPT_RANDOM) - .short("R") - .long(OPT_RANDOM) - .help("shuffle in random order"), - ) .arg( Arg::with_name(OPT_REVERSE) .short("r") @@ -960,7 +947,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { let mut files = Vec::new(); for path in &files0_from { - let (reader, _) = open(path.as_str()).expect("Could not read from file specified."); + let reader = open(path.as_str()).expect("Could not read from file specified."); let buf_reader = BufReader::new(reader); for line in buf_reader.split(b'\0').flatten() { files.push( @@ -978,16 +965,25 @@ pub fn uumain(args: impl uucore::Args) -> i32 { .unwrap_or_default() }; - settings.mode = if matches.is_present(OPT_HUMAN_NUMERIC_SORT) { + settings.mode = if matches.is_present(OPT_HUMAN_NUMERIC_SORT) + || matches.value_of(OPT_SORT) == Some("human-numeric") + { SortMode::HumanNumeric - } else if matches.is_present(OPT_MONTH_SORT) { + } else if matches.is_present(OPT_MONTH_SORT) || matches.value_of(OPT_SORT) == Some("month") { SortMode::Month - } else if matches.is_present(OPT_GENERAL_NUMERIC_SORT) { + } else if matches.is_present(OPT_GENERAL_NUMERIC_SORT) + || matches.value_of(OPT_SORT) == Some("general-numeric") + { SortMode::GeneralNumeric - } else if matches.is_present(OPT_NUMERIC_SORT) { + } else if matches.is_present(OPT_NUMERIC_SORT) || matches.value_of(OPT_SORT) == Some("numeric") + { SortMode::Numeric - } else if matches.is_present(OPT_VERSION_SORT) { + } else if matches.is_present(OPT_VERSION_SORT) || matches.value_of(OPT_SORT) == Some("version") + { SortMode::Version + } else if matches.is_present(OPT_RANDOM) || matches.value_of(OPT_SORT) == Some("random") { + settings.salt = get_rand_string(); + SortMode::Random } else { SortMode::Default }; @@ -1003,28 +999,15 @@ pub fn uumain(args: impl uucore::Args) -> i32 { env::set_var("RAYON_NUM_THREADS", &settings.threads); } - if matches.is_present(OPT_BUF_SIZE) { - settings.buffer_size = { - let input = matches - .value_of(OPT_BUF_SIZE) - .map(String::from) - .unwrap_or(format!("{}", DEFAULT_BUF_SIZE)); + settings.buffer_size = matches + .value_of(OPT_BUF_SIZE) + .map(GlobalSettings::parse_byte_count) + .unwrap_or(DEFAULT_BUF_SIZE); - GlobalSettings::human_numeric_convert(&input) - }; - settings.ext_sort = true; - } - - if matches.is_present(OPT_TMP_DIR) { - let result = matches - .value_of(OPT_TMP_DIR) - .map(String::from) - .unwrap_or(format!("{}", env::temp_dir().display())); - settings.tmp_dir = PathBuf::from(result); - settings.ext_sort = true; - } else { - settings.tmp_dir = env::temp_dir(); - } + settings.tmp_dir = matches + .value_of(OPT_TMP_DIR) + .map(PathBuf::from) + .unwrap_or_else(env::temp_dir); settings.zero_terminated = matches.is_present(OPT_ZERO_TERMINATED); settings.merge = matches.is_present(OPT_MERGE); @@ -1044,11 +1027,6 @@ pub fn uumain(args: impl uucore::Args) -> i32 { settings.stable = matches.is_present(OPT_STABLE); settings.unique = matches.is_present(OPT_UNIQUE); - if matches.is_present(OPT_RANDOM) { - settings.random = matches.is_present(OPT_RANDOM); - settings.salt = get_rand_string(); - } - if files.is_empty() { /* if no file, default to stdin */ files.push("-".to_owned()); @@ -1087,11 +1065,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { let to = from_to .next() .map(|to| KeyPosition::parse(to, 0, &mut key_settings)); - let field_selector = FieldSelector { - from, - to, - settings: key_settings, - }; + let field_selector = FieldSelector::new(from, to, key_settings); settings.selectors.push(field_selector); } } @@ -1099,137 +1073,49 @@ pub fn uumain(args: impl uucore::Args) -> i32 { if !settings.stable || !matches.is_present(OPT_KEY) { // add a default selector matching the whole line let key_settings = KeySettings::from(&settings); - settings.selectors.push(FieldSelector { - from: KeyPosition { + settings.selectors.push(FieldSelector::new( + KeyPosition { field: 1, char: 1, ignore_blanks: key_settings.ignore_blanks, }, - to: None, - settings: key_settings, - }); + None, + key_settings, + )); } - exec(files, settings) + exec(&files, &settings) } -fn exec(files: Vec, settings: GlobalSettings) -> i32 { - let mut lines = Vec::new(); - let mut file_merger = FileMerger::new(&settings); - - for path in &files { - let (reader, _) = match open(path) { - Some(x) => x, - None => continue, - }; - - let buf_reader = BufReader::new(reader); - - if settings.merge { - file_merger.push_file(buf_reader.lines()); - } else if settings.zero_terminated { - for line in buf_reader.split(b'\0').flatten() { - lines.push(Line::new( - std::str::from_utf8(&line) - .expect("Could not parse string from zero terminated input.") - .to_string(), - &settings, - )); - } - } else { - for line in buf_reader.lines() { - if let Ok(n) = line { - lines.push(Line::new(n, &settings)); - } else { - break; - } - } - } - } - - if settings.check { - return exec_check_file(&lines, &settings); - } - - // Only use ext_sorter when we need to. - // Probably faster that we don't create - // an owned value each run - if settings.ext_sort { - lines = ext_sort_by(lines, settings.clone()); - } else { - sort_by(&mut lines, &settings); - } - - if settings.merge { - if settings.unique { - print_sorted( - file_merger.dedup_by(|a, b| compare_by(a, b, &settings) == Ordering::Equal), - &settings, - ) - } else { - print_sorted(file_merger, &settings) - } - } else if settings.unique { +fn output_sorted_lines<'a>(iter: impl Iterator>, settings: &GlobalSettings) { + if settings.unique { print_sorted( - lines - .into_iter() - .dedup_by(|a, b| compare_by(a, b, &settings) == Ordering::Equal), + iter.dedup_by(|a, b| compare_by(a, b, &settings) == Ordering::Equal), &settings, - ) + ); } else { - print_sorted(lines.into_iter(), &settings) + print_sorted(iter, &settings); } +} +fn exec(files: &[String], settings: &GlobalSettings) -> i32 { + if settings.merge { + let mut file_merger = merge::merge(files, settings); + file_merger.write_all(settings); + } else if settings.check { + if files.len() > 1 { + crash!(1, "only one file allowed with -c"); + } + return check::check(files.first().unwrap(), settings); + } else { + let mut lines = files.iter().filter_map(open); + + ext_sort(&mut lines, &settings); + } 0 } -fn exec_check_file(unwrapped_lines: &[Line], settings: &GlobalSettings) -> i32 { - // errors yields the line before each disorder, - // plus the last line (quirk of .coalesce()) - let mut errors = - unwrapped_lines - .iter() - .enumerate() - .coalesce(|(last_i, last_line), (i, line)| { - if compare_by(&last_line, &line, &settings) == Ordering::Greater { - Err(((last_i, last_line), (i, line))) - } else { - Ok((i, line)) - } - }); - if let Some((first_error_index, _line)) = errors.next() { - // Check for a second "error", as .coalesce() always returns the last - // line, no matter what our merging function does. - if let Some(_last_line_or_next_error) = errors.next() { - if !settings.check_silent { - println!("sort: disorder in line {}", first_error_index); - }; - 1 - } else { - // first "error" was actually the last line. - 0 - } - } else { - // unwrapped_lines was empty. Empty files are defined to be sorted. - 0 - } -} - -fn ext_sort_by(unsorted: Vec, settings: GlobalSettings) -> Vec { - let external_sorter = ExternalSorter::new( - settings.buffer_size as u64, - Some(settings.tmp_dir.clone()), - settings.clone(), - ); - let iter = external_sorter - .sort_by(unsorted.into_iter(), settings) - .unwrap() - .map(|x| x.unwrap()) - .collect::>(); - iter -} - -fn sort_by(unsorted: &mut Vec, settings: &GlobalSettings) { +fn sort_by<'a>(unsorted: &mut Vec>, settings: &GlobalSettings) { if settings.stable || settings.unique { unsorted.par_sort_by(|a, b| compare_by(a, b, &settings)) } else { @@ -1237,36 +1123,56 @@ fn sort_by(unsorted: &mut Vec, settings: &GlobalSettings) { } } -fn compare_by(a: &Line, b: &Line, global_settings: &GlobalSettings) -> Ordering { - for (idx, selector) in global_settings.selectors.iter().enumerate() { - let a_selection = &a.selections[idx]; - let b_selection = &b.selections[idx]; - let a_str = a_selection.get_str(a); - let b_str = b_selection.get_str(b); +fn compare_by<'a>(a: &Line<'a>, b: &Line<'a>, global_settings: &GlobalSettings) -> Ordering { + let mut idx = 0; + for selector in &global_settings.selectors { + let mut _selections = None; + let (a_selection, b_selection) = if selector.is_default_selection { + // We can select the whole line. + // We have to store the selections outside of the if-block so that they live long enough. + _selections = Some(( + Selection { + slice: a.line, + num_cache: None, + }, + Selection { + slice: b.line, + num_cache: None, + }, + )); + // Unwrap the selections again, and return references to them. + ( + &_selections.as_ref().unwrap().0, + &_selections.as_ref().unwrap().1, + ) + } else { + let selections = (&a.selections[idx], &b.selections[idx]); + idx += 1; + selections + }; + let a_str = a_selection.slice; + let b_str = b_selection.slice; let settings = &selector.settings; - let cmp: Ordering = if settings.random { - random_shuffle(a_str, b_str, global_settings.salt.clone()) - } else { - match settings.mode { - SortMode::Numeric | SortMode::HumanNumeric => numeric_str_cmp( - (a_str, a_selection.num_cache.as_num_info()), - (b_str, b_selection.num_cache.as_num_info()), - ), - SortMode::GeneralNumeric => general_numeric_compare( - a_selection.num_cache.as_f64(), - b_selection.num_cache.as_f64(), - ), - SortMode::Month => month_compare(a_str, b_str), - SortMode::Version => version_compare(a_str, b_str), - SortMode::Default => custom_str_cmp( - a_str, - b_str, - settings.ignore_non_printing, - settings.dictionary_order, - settings.ignore_case, - ), - } + let cmp: Ordering = match settings.mode { + SortMode::Random => random_shuffle(a_str, b_str, &global_settings.salt), + SortMode::Numeric | SortMode::HumanNumeric => numeric_str_cmp( + (a_str, a_selection.num_cache.as_ref().unwrap().as_num_info()), + (b_str, b_selection.num_cache.as_ref().unwrap().as_num_info()), + ), + SortMode::GeneralNumeric => general_numeric_compare( + a_selection.num_cache.as_ref().unwrap().as_f64(), + b_selection.num_cache.as_ref().unwrap().as_f64(), + ), + SortMode::Month => month_compare(a_str, b_str), + SortMode::Version => version_compare(a_str, b_str), + SortMode::Default => custom_str_cmp( + a_str, + b_str, + settings.ignore_non_printing, + settings.dictionary_order, + settings.ignore_case, + ), }; if cmp != Ordering::Equal { return if settings.reverse { cmp.reverse() } else { cmp }; @@ -1274,10 +1180,13 @@ fn compare_by(a: &Line, b: &Line, global_settings: &GlobalSettings) -> Ordering } // Call "last resort compare" if all selectors returned Equal - let cmp = if global_settings.random || global_settings.stable || global_settings.unique { + let cmp = if global_settings.mode == SortMode::Random + || global_settings.stable + || global_settings.unique + { Ordering::Equal } else { - a.line.cmp(&b.line) + a.line.cmp(b.line) }; if global_settings.reverse { @@ -1332,7 +1241,7 @@ fn get_leading_gen(input: &str) -> Range { leading_whitespace_len..input.len() } -#[derive(Serialize, Deserialize, Copy, Clone, PartialEq, PartialOrd)] +#[derive(Copy, Clone, PartialEq, PartialOrd, Debug)] enum GeneralF64ParseResult { Invalid, NaN, @@ -1341,7 +1250,8 @@ enum GeneralF64ParseResult { Infinity, } -/// Parse the beginning string into an f64, returning -inf instead of NaN on errors. +/// Parse the beginning string into a GeneralF64ParseResult. +/// Using a GeneralF64ParseResult instead of f64 is necessary to correctly order floats. #[inline(always)] fn general_f64_parse(a: &str) -> GeneralF64ParseResult { // The actual behavior here relies on Rust's implementation of parsing floating points. @@ -1377,12 +1287,11 @@ fn get_hash(t: &T) -> u64 { s.finish() } -fn random_shuffle(a: &str, b: &str, x: String) -> Ordering { +fn random_shuffle(a: &str, b: &str, salt: &str) -> Ordering { #![allow(clippy::comparison_chain)] - let salt_slice = x.as_str(); - let da = get_hash(&[a, salt_slice].concat()); - let db = get_hash(&[b, salt_slice].concat()); + let da = get_hash(&[a, salt].concat()); + let db = get_hash(&[b, salt].concat()); da.cmp(&db) } @@ -1473,46 +1382,25 @@ fn version_compare(a: &str, b: &str) -> Ordering { } } -fn print_sorted>(iter: T, settings: &GlobalSettings) { - let mut file: Box = match settings.outfile { - Some(ref filename) => match File::create(Path::new(&filename)) { - Ok(f) => Box::new(BufWriter::new(f)) as Box, - Err(e) => { - show_error!("{0}: {1}", filename, e.to_string()); - panic!("Could not open output file"); - } - }, - None => Box::new(BufWriter::new(stdout())) as Box, - }; - if settings.zero_terminated && !settings.debug { - for line in iter { - crash_if_err!(1, file.write_all(line.line.as_bytes())); - crash_if_err!(1, file.write_all("\0".as_bytes())); - } - } else { - for line in iter { - if !settings.debug { - crash_if_err!(1, file.write_all(line.line.as_bytes())); - crash_if_err!(1, file.write_all("\n".as_bytes())); - } else { - crash_if_err!(1, line.print_debug(settings, &mut file)); - } - } +fn print_sorted<'a, T: Iterator>>(iter: T, settings: &GlobalSettings) { + let mut writer = settings.out_writer(); + for line in iter { + line.print(&mut writer, settings); } - crash_if_err!(1, file.flush()); } // from cat.rs -fn open(path: &str) -> Option<(Box, bool)> { +fn open(path: impl AsRef) -> Option> { + let path = path.as_ref(); if path == "-" { let stdin = stdin(); - return Some((Box::new(stdin) as Box, is_stdin_interactive())); + return Some(Box::new(stdin) as Box); } match File::open(Path::new(path)) { - Ok(f) => Some((Box::new(f) as Box, false)), + Ok(f) => Some(Box::new(f) as Box), Err(e) => { - show_error!("{0}: {1}", path, e.to_string()); + show_error!("{0:?}: {1}", path, e.to_string()); None } } @@ -1536,7 +1424,7 @@ mod tests { let b = "Ted"; let c = get_rand_string(); - assert_eq!(Ordering::Equal, random_shuffle(a, b, c)); + assert_eq!(Ordering::Equal, random_shuffle(a, b, &c)); } #[test] @@ -1560,7 +1448,7 @@ mod tests { let b = "9"; let c = get_rand_string(); - assert_eq!(Ordering::Equal, random_shuffle(a, b, c)); + assert_eq!(Ordering::Equal, random_shuffle(a, b, &c)); } #[test] @@ -1593,4 +1481,18 @@ mod tests { let line = "..a..a"; assert_eq!(tokenize(line, Some('a')), vec![0..2, 3..5]); } + + #[test] + #[cfg(target_pointer_width = "64")] + fn test_line_size() { + // We should make sure to not regress the size of the Line struct because + // it is unconditional overhead for every line we sort. + assert_eq!(std::mem::size_of::(), 32); + // These are the fields of Line: + assert_eq!(std::mem::size_of::<&str>(), 16); + assert_eq!(std::mem::size_of::>(), 16); + + // How big is a selection? Constant cost all lines pay when we need selections. + assert_eq!(std::mem::size_of::(), 24); + } } diff --git a/src/uu/split/src/split.rs b/src/uu/split/src/split.rs index 4f80e25a3..726c9b8cd 100644 --- a/src/uu/split/src/split.rs +++ b/src/uu/split/src/split.rs @@ -13,11 +13,11 @@ extern crate uucore; mod platform; use clap::{App, Arg}; -use std::char; use std::env; use std::fs::File; -use std::io::{stdin, stdout, BufRead, BufReader, BufWriter, Read, Write}; +use std::io::{stdin, BufRead, BufReader, BufWriter, Read, Write}; use std::path::Path; +use std::{char, fs::remove_file}; static NAME: &str = "split"; static VERSION: &str = env!("CARGO_PKG_VERSION"); @@ -213,107 +213,145 @@ struct Settings { verbose: bool, } -struct SplitControl { - current_line: String, // Don't touch - request_new_file: bool, // Splitter implementation requests new file -} - trait Splitter { - // Consume the current_line and return the consumed string - fn consume(&mut self, _: &mut SplitControl) -> String; + // Consume as much as possible from `reader` so as to saturate `writer`. + // Equivalent to finishing one of the part files. Returns the number of + // bytes that have been moved. + fn consume( + &mut self, + reader: &mut BufReader>, + writer: &mut BufWriter>, + ) -> u128; } struct LineSplitter { - saved_lines_to_write: usize, - lines_to_write: usize, + lines_per_split: usize, } impl LineSplitter { fn new(settings: &Settings) -> LineSplitter { - let n = match settings.strategy_param.parse() { - Ok(a) => a, - Err(e) => crash!(1, "invalid number of lines: {}", e), - }; LineSplitter { - saved_lines_to_write: n, - lines_to_write: n, + lines_per_split: settings + .strategy_param + .parse() + .unwrap_or_else(|e| crash!(1, "invalid number of lines: {}", e)), } } } impl Splitter for LineSplitter { - fn consume(&mut self, control: &mut SplitControl) -> String { - self.lines_to_write -= 1; - if self.lines_to_write == 0 { - self.lines_to_write = self.saved_lines_to_write; - control.request_new_file = true; + fn consume( + &mut self, + reader: &mut BufReader>, + writer: &mut BufWriter>, + ) -> u128 { + let mut bytes_consumed = 0u128; + let mut buffer = String::with_capacity(1024); + for _ in 0..self.lines_per_split { + let bytes_read = reader + .read_line(&mut buffer) + .unwrap_or_else(|_| crash!(1, "error reading bytes from input file")); + // If we ever read 0 bytes then we know we've hit EOF. + if bytes_read == 0 { + return bytes_consumed; + } + + writer + .write_all(buffer.as_bytes()) + .unwrap_or_else(|_| crash!(1, "error writing bytes to output file")); + // Empty out the String buffer since `read_line` appends instead of + // replaces. + buffer.clear(); + + bytes_consumed += bytes_read as u128; } - control.current_line.clone() + + bytes_consumed } } struct ByteSplitter { - saved_bytes_to_write: usize, - bytes_to_write: usize, - break_on_line_end: bool, - require_whole_line: bool, + bytes_per_split: u128, } impl ByteSplitter { fn new(settings: &Settings) -> ByteSplitter { - let mut strategy_param: Vec = settings.strategy_param.chars().collect(); - let suffix = strategy_param.pop().unwrap(); - let multiplier = match suffix { - '0'..='9' => 1usize, - 'b' => 512usize, - 'k' => 1024usize, - 'm' => 1024usize * 1024usize, - _ => crash!(1, "invalid number of bytes"), - }; - let n = if suffix.is_alphabetic() { - match strategy_param - .iter() - .cloned() - .collect::() - .parse::() - { - Ok(a) => a, - Err(e) => crash!(1, "invalid number of bytes: {}", e), - } - } else { - match settings.strategy_param.parse::() { - Ok(a) => a, - Err(e) => crash!(1, "invalid number of bytes: {}", e), - } - }; + // These multipliers are the same as supported by GNU coreutils. + let modifiers: Vec<(&str, u128)> = vec![ + ("K", 1024u128), + ("M", 1024 * 1024), + ("G", 1024 * 1024 * 1024), + ("T", 1024 * 1024 * 1024 * 1024), + ("P", 1024 * 1024 * 1024 * 1024 * 1024), + ("E", 1024 * 1024 * 1024 * 1024 * 1024 * 1024), + ("Z", 1024 * 1024 * 1024 * 1024 * 1024 * 1024 * 1024), + ("Y", 1024 * 1024 * 1024 * 1024 * 1024 * 1024 * 1024 * 1024), + ("KB", 1000), + ("MB", 1000 * 1000), + ("GB", 1000 * 1000 * 1000), + ("TB", 1000 * 1000 * 1000 * 1000), + ("PB", 1000 * 1000 * 1000 * 1000 * 1000), + ("EB", 1000 * 1000 * 1000 * 1000 * 1000 * 1000), + ("ZB", 1000 * 1000 * 1000 * 1000 * 1000 * 1000 * 1000), + ("YB", 1000 * 1000 * 1000 * 1000 * 1000 * 1000 * 1000 * 1000), + ]; + + // This sequential find is acceptable since none of the modifiers are + // suffixes of any other modifiers, a la Huffman codes. + let (suffix, multiplier) = modifiers + .iter() + .find(|(suffix, _)| settings.strategy_param.ends_with(suffix)) + .unwrap_or(&("", 1)); + + // Try to parse the actual numeral. + let n = &settings.strategy_param[0..(settings.strategy_param.len() - suffix.len())] + .parse::() + .unwrap_or_else(|e| crash!(1, "invalid number of bytes: {}", e)); + ByteSplitter { - saved_bytes_to_write: n * multiplier, - bytes_to_write: n * multiplier, - break_on_line_end: settings.strategy == "b", - require_whole_line: false, + bytes_per_split: n * multiplier, } } } impl Splitter for ByteSplitter { - fn consume(&mut self, control: &mut SplitControl) -> String { - let line = control.current_line.clone(); - let n = std::cmp::min(line.chars().count(), self.bytes_to_write); - if self.require_whole_line && n < line.chars().count() { - self.bytes_to_write = self.saved_bytes_to_write; - control.request_new_file = true; - self.require_whole_line = false; - return "".to_owned(); + fn consume( + &mut self, + reader: &mut BufReader>, + writer: &mut BufWriter>, + ) -> u128 { + // We buffer reads and writes. We proceed until `bytes_consumed` is + // equal to `self.bytes_per_split` or we reach EOF. + let mut bytes_consumed = 0u128; + const BUFFER_SIZE: usize = 1024; + let mut buffer = [0u8; BUFFER_SIZE]; + while bytes_consumed < self.bytes_per_split { + // Don't overshoot `self.bytes_per_split`! Note: Using std::cmp::min + // doesn't really work since we have to get types to match which + // can't be done in a way that keeps all conversions safe. + let bytes_desired = if (BUFFER_SIZE as u128) <= self.bytes_per_split - bytes_consumed { + BUFFER_SIZE + } else { + // This is a safe conversion since the difference must be less + // than BUFFER_SIZE in this branch. + (self.bytes_per_split - bytes_consumed) as usize + }; + let bytes_read = reader + .read(&mut buffer[0..bytes_desired]) + .unwrap_or_else(|_| crash!(1, "error reading bytes from input file")); + // If we ever read 0 bytes then we know we've hit EOF. + if bytes_read == 0 { + return bytes_consumed; + } + + writer + .write_all(&buffer[0..bytes_read]) + .unwrap_or_else(|_| crash!(1, "error writing bytes to output file")); + + bytes_consumed += bytes_read as u128; } - self.bytes_to_write -= n; - if n == 0 { - self.bytes_to_write = self.saved_bytes_to_write; - control.request_new_file = true; - } - if self.break_on_line_end && n == line.chars().count() { - self.require_whole_line = self.break_on_line_end; - } - line[..n].to_owned() + + bytes_consumed } } @@ -353,14 +391,13 @@ fn split(settings: &Settings) -> i32 { let mut reader = BufReader::new(if settings.input == "-" { Box::new(stdin()) as Box } else { - let r = match File::open(Path::new(&settings.input)) { - Ok(a) => a, - Err(_) => crash!( + let r = File::open(Path::new(&settings.input)).unwrap_or_else(|_| { + crash!( 1, "cannot open '{}' for reading: No such file or directory", settings.input - ), - }; + ) + }); Box::new(r) as Box }); @@ -370,48 +407,39 @@ fn split(settings: &Settings) -> i32 { a => crash!(1, "strategy {} not supported", a), }; - let mut control = SplitControl { - current_line: "".to_owned(), // Request new line - request_new_file: true, // Request new file - }; - - let mut writer = BufWriter::new(Box::new(stdout()) as Box); let mut fileno = 0; loop { - if control.current_line.chars().count() == 0 { - match reader.read_line(&mut control.current_line) { - Ok(0) | Err(_) => break, - _ => {} + // Get a new part file set up, and construct `writer` for it. + let mut filename = settings.prefix.clone(); + filename.push_str( + if settings.numeric_suffix { + num_prefix(fileno, settings.suffix_length) + } else { + str_prefix(fileno, settings.suffix_length) } - } - if control.request_new_file { - let mut filename = settings.prefix.clone(); - filename.push_str( - if settings.numeric_suffix { - num_prefix(fileno, settings.suffix_length) - } else { - str_prefix(fileno, settings.suffix_length) - } - .as_ref(), - ); - filename.push_str(settings.additional_suffix.as_ref()); + .as_ref(), + ); + filename.push_str(settings.additional_suffix.as_ref()); + let mut writer = platform::instantiate_current_writer(&settings.filter, filename.as_str()); - crash_if_err!(1, writer.flush()); - fileno += 1; - writer = platform::instantiate_current_writer(&settings.filter, filename.as_str()); - control.request_new_file = false; - if settings.verbose { - println!("creating file '{}'", filename); + let bytes_consumed = splitter.consume(&mut reader, &mut writer); + writer + .flush() + .unwrap_or_else(|e| crash!(1, "error flushing to output file: {}", e)); + + // If we didn't write anything we should clean up the empty file, and + // break from the loop. + if bytes_consumed == 0 { + // The output file is only ever created if --filter isn't used. + // Complicated, I know... + if settings.filter.is_none() { + remove_file(filename) + .unwrap_or_else(|e| crash!(1, "error removing empty file: {}", e)); } + break; } - let consumed = splitter.consume(&mut control); - crash_if_err!(1, writer.write_all(consumed.as_bytes())); - - let advance = consumed.chars().count(); - let clone = control.current_line.clone(); - let sl = clone; - control.current_line = sl[advance..sl.chars().count()].to_owned(); + fileno += 1; } 0 } diff --git a/src/uu/stat/Cargo.toml b/src/uu/stat/Cargo.toml index 96bf63ffe..86b7da139 100644 --- a/src/uu/stat/Cargo.toml +++ b/src/uu/stat/Cargo.toml @@ -16,8 +16,7 @@ path = "src/stat.rs" [dependencies] clap = "2.33" -time = "0.1.40" -uucore = { version=">=0.0.8", package="uucore", path="../../uucore", features=["entries", "libc"] } +uucore = { version=">=0.0.8", package="uucore", path="../../uucore", features=["entries", "libc", "fs", "fsext"] } uucore_procs = { version=">=0.0.5", package="uucore_procs", path="../../uucore_procs" } [[bin]] diff --git a/src/uu/stat/src/fsext.rs b/src/uu/stat/src/fsext.rs deleted file mode 100644 index d90099892..000000000 --- a/src/uu/stat/src/fsext.rs +++ /dev/null @@ -1,415 +0,0 @@ -// This file is part of the uutils coreutils package. -// -// (c) Jian Zeng -// -// For the full copyright and license information, please view the LICENSE file -// that was distributed with this source code. - -// spell-checker:ignore (ToDO) strerror IFBLK IFCHR IFDIR IFLNK IFIFO IFMT IFREG IFSOCK subsec nanos gnulib statfs Sstatfs bitrig statvfs iosize blksize fnodes fsid namelen bsize bfree bavail ffree frsize namemax errno fstype adfs acfs aufs affs autofs befs bdevfs binfmt ceph cgroups cifs configfs cramfs cgroupfs debugfs devfs devpts ecryptfs btrfs efivarfs exofs fhgfs fuseblk fusectl futexfs gpfs hfsx hostfs hpfs inodefs ibrix inotifyfs isofs jffs logfs hugetlbfs mqueue nsfs ntfs ocfs panfs pipefs ramfs romfs nfsd nilfs pstorefs reiserfs securityfs smackfs snfs sockfs squashfs sysfs sysv tempfs tracefs ubifs usbdevfs vmhgfs tmpfs vxfs wslfs xenfs vzfs openprom overlayfs - -extern crate time; - -use self::time::Timespec; -use std::time::UNIX_EPOCH; -pub use uucore::libc::{ - c_int, mode_t, strerror, S_IFBLK, S_IFCHR, S_IFDIR, S_IFIFO, S_IFLNK, S_IFMT, S_IFREG, - S_IFSOCK, S_IRGRP, S_IROTH, S_IRUSR, S_ISGID, S_ISUID, S_ISVTX, S_IWGRP, S_IWOTH, S_IWUSR, - S_IXGRP, S_IXOTH, S_IXUSR, -}; - -pub trait BirthTime { - fn pretty_birth(&self) -> String; - fn birth(&self) -> String; -} - -use std::fs::Metadata; -impl BirthTime for Metadata { - fn pretty_birth(&self) -> String { - self.created() - .ok() - .and_then(|t| t.duration_since(UNIX_EPOCH).ok()) - .map(|e| pretty_time(e.as_secs() as i64, i64::from(e.subsec_nanos()))) - .unwrap_or_else(|| "-".to_owned()) - } - - fn birth(&self) -> String { - self.created() - .ok() - .and_then(|t| t.duration_since(UNIX_EPOCH).ok()) - .map(|e| format!("{}", e.as_secs())) - .unwrap_or_else(|| "0".to_owned()) - } -} - -#[macro_export] -macro_rules! has { - ($mode:expr, $perm:expr) => { - $mode & $perm != 0 - }; -} - -pub fn pretty_time(sec: i64, nsec: i64) -> String { - // sec == seconds since UNIX_EPOCH - // nsec == nanoseconds since (UNIX_EPOCH + sec) - let tm = time::at(Timespec::new(sec, nsec as i32)); - let res = time::strftime("%Y-%m-%d %H:%M:%S.%f %z", &tm).unwrap(); - if res.ends_with(" -0000") { - res.replace(" -0000", " +0000") - } else { - res - } -} - -pub fn pretty_filetype<'a>(mode: mode_t, size: u64) -> &'a str { - match mode & S_IFMT { - S_IFREG => { - if size != 0 { - "regular file" - } else { - "regular empty file" - } - } - S_IFDIR => "directory", - S_IFLNK => "symbolic link", - S_IFCHR => "character special file", - S_IFBLK => "block special file", - S_IFIFO => "fifo", - S_IFSOCK => "socket", - // TODO: Other file types - // See coreutils/gnulib/lib/file-type.c - _ => "weird file", - } -} - -pub fn pretty_access(mode: mode_t) -> String { - let mut result = String::with_capacity(10); - result.push(match mode & S_IFMT { - S_IFDIR => 'd', - S_IFCHR => 'c', - S_IFBLK => 'b', - S_IFREG => '-', - S_IFIFO => 'p', - S_IFLNK => 'l', - S_IFSOCK => 's', - // TODO: Other file types - _ => '?', - }); - - result.push(if has!(mode, S_IRUSR) { 'r' } else { '-' }); - result.push(if has!(mode, S_IWUSR) { 'w' } else { '-' }); - result.push(if has!(mode, S_ISUID as mode_t) { - if has!(mode, S_IXUSR) { - 's' - } else { - 'S' - } - } else if has!(mode, S_IXUSR) { - 'x' - } else { - '-' - }); - - result.push(if has!(mode, S_IRGRP) { 'r' } else { '-' }); - result.push(if has!(mode, S_IWGRP) { 'w' } else { '-' }); - result.push(if has!(mode, S_ISGID as mode_t) { - if has!(mode, S_IXGRP) { - 's' - } else { - 'S' - } - } else if has!(mode, S_IXGRP) { - 'x' - } else { - '-' - }); - - result.push(if has!(mode, S_IROTH) { 'r' } else { '-' }); - result.push(if has!(mode, S_IWOTH) { 'w' } else { '-' }); - result.push(if has!(mode, S_ISVTX as mode_t) { - if has!(mode, S_IXOTH) { - 't' - } else { - 'T' - } - } else if has!(mode, S_IXOTH) { - 'x' - } else { - '-' - }); - - result -} - -use std::borrow::Cow; -use std::convert::{AsRef, From}; -use std::ffi::CString; -use std::io::Error as IOError; -use std::mem; -use std::path::Path; - -#[cfg(any( - target_os = "linux", - target_vendor = "apple", - target_os = "android", - target_os = "freebsd" -))] -use uucore::libc::statfs as Sstatfs; -#[cfg(any( - target_os = "openbsd", - target_os = "netbsd", - target_os = "openbsd", - target_os = "bitrig", - target_os = "dragonfly" -))] -use uucore::libc::statvfs as Sstatfs; - -#[cfg(any( - target_os = "linux", - target_vendor = "apple", - target_os = "android", - target_os = "freebsd" -))] -use uucore::libc::statfs as statfs_fn; -#[cfg(any( - target_os = "openbsd", - target_os = "netbsd", - target_os = "openbsd", - target_os = "bitrig", - target_os = "dragonfly" -))] -use uucore::libc::statvfs as statfs_fn; - -pub trait FsMeta { - fn fs_type(&self) -> i64; - fn iosize(&self) -> u64; - fn blksize(&self) -> i64; - fn total_blocks(&self) -> u64; - fn free_blocks(&self) -> u64; - fn avail_blocks(&self) -> u64; - fn total_fnodes(&self) -> u64; - fn free_fnodes(&self) -> u64; - fn fsid(&self) -> u64; - fn namelen(&self) -> u64; -} - -impl FsMeta for Sstatfs { - fn blksize(&self) -> i64 { - self.f_bsize as i64 - } - fn total_blocks(&self) -> u64 { - self.f_blocks as u64 - } - fn free_blocks(&self) -> u64 { - self.f_bfree as u64 - } - fn avail_blocks(&self) -> u64 { - self.f_bavail as u64 - } - fn total_fnodes(&self) -> u64 { - self.f_files as u64 - } - fn free_fnodes(&self) -> u64 { - self.f_ffree as u64 - } - #[cfg(any(target_os = "linux", target_vendor = "apple", target_os = "freebsd"))] - fn fs_type(&self) -> i64 { - self.f_type as i64 - } - #[cfg(not(any(target_os = "linux", target_vendor = "apple", target_os = "freebsd")))] - fn fs_type(&self) -> i64 { - // FIXME: statvfs doesn't have an equivalent, so we need to do something else - unimplemented!() - } - - #[cfg(target_os = "linux")] - fn iosize(&self) -> u64 { - self.f_frsize as u64 - } - #[cfg(any(target_vendor = "apple", target_os = "freebsd"))] - fn iosize(&self) -> u64 { - self.f_iosize as u64 - } - // XXX: dunno if this is right - #[cfg(not(any(target_vendor = "apple", target_os = "freebsd", target_os = "linux")))] - fn iosize(&self) -> u64 { - self.f_bsize as u64 - } - - // Linux, SunOS, HP-UX, 4.4BSD, FreeBSD have a system call statfs() that returns - // a struct statfs, containing a fsid_t f_fsid, where fsid_t is defined - // as struct { int val[2]; } - // - // Solaris, Irix and POSIX have a system call statvfs(2) that returns a - // struct statvfs, containing an unsigned long f_fsid - #[cfg(any(target_vendor = "apple", target_os = "freebsd", target_os = "linux"))] - fn fsid(&self) -> u64 { - let f_fsid: &[u32; 2] = - unsafe { &*(&self.f_fsid as *const uucore::libc::fsid_t as *const [u32; 2]) }; - (u64::from(f_fsid[0])) << 32 | u64::from(f_fsid[1]) - } - #[cfg(not(any(target_vendor = "apple", target_os = "freebsd", target_os = "linux")))] - fn fsid(&self) -> u64 { - self.f_fsid as u64 - } - - #[cfg(target_os = "linux")] - fn namelen(&self) -> u64 { - self.f_namelen as u64 - } - #[cfg(target_vendor = "apple")] - fn namelen(&self) -> u64 { - 1024 - } - #[cfg(target_os = "freebsd")] - fn namelen(&self) -> u64 { - self.f_namemax as u64 - } - // XXX: should everything just use statvfs? - #[cfg(not(any(target_vendor = "apple", target_os = "freebsd", target_os = "linux")))] - fn namelen(&self) -> u64 { - self.f_namemax as u64 - } -} - -pub fn statfs>(path: P) -> Result -where - Vec: From

, -{ - match CString::new(path) { - Ok(p) => { - let mut buffer: Sstatfs = unsafe { mem::zeroed() }; - unsafe { - match statfs_fn(p.as_ptr(), &mut buffer) { - 0 => Ok(buffer), - _ => { - let errno = IOError::last_os_error().raw_os_error().unwrap_or(0); - Err(CString::from_raw(strerror(errno)) - .into_string() - .unwrap_or_else(|_| "Unknown Error".to_owned())) - } - } - } - } - Err(e) => Err(e.to_string()), - } -} - -pub fn pretty_fstype<'a>(fstype: i64) -> Cow<'a, str> { - match fstype { - 0x6163_6673 => "acfs".into(), - 0xADF5 => "adfs".into(), - 0xADFF => "affs".into(), - 0x5346_414F => "afs".into(), - 0x0904_1934 => "anon-inode FS".into(), - 0x6175_6673 => "aufs".into(), - 0x0187 => "autofs".into(), - 0x4246_5331 => "befs".into(), - 0x6264_6576 => "bdevfs".into(), - 0x1BAD_FACE => "bfs".into(), - 0xCAFE_4A11 => "bpf_fs".into(), - 0x4249_4E4D => "binfmt_misc".into(), - 0x9123_683E => "btrfs".into(), - 0x7372_7279 => "btrfs_test".into(), - 0x00C3_6400 => "ceph".into(), - 0x0027_E0EB => "cgroupfs".into(), - 0xFF53_4D42 => "cifs".into(), - 0x7375_7245 => "coda".into(), - 0x012F_F7B7 => "coh".into(), - 0x6265_6570 => "configfs".into(), - 0x28CD_3D45 => "cramfs".into(), - 0x453D_CD28 => "cramfs-wend".into(), - 0x6462_6720 => "debugfs".into(), - 0x1373 => "devfs".into(), - 0x1CD1 => "devpts".into(), - 0xF15F => "ecryptfs".into(), - 0xDE5E_81E4 => "efivarfs".into(), - 0x0041_4A53 => "efs".into(), - 0x5DF5 => "exofs".into(), - 0x137D => "ext".into(), - 0xEF53 => "ext2/ext3".into(), - 0xEF51 => "ext2".into(), - 0xF2F5_2010 => "f2fs".into(), - 0x4006 => "fat".into(), - 0x1983_0326 => "fhgfs".into(), - 0x6573_5546 => "fuseblk".into(), - 0x6573_5543 => "fusectl".into(), - 0x0BAD_1DEA => "futexfs".into(), - 0x0116_1970 => "gfs/gfs2".into(), - 0x4750_4653 => "gpfs".into(), - 0x4244 => "hfs".into(), - 0x482B => "hfs+".into(), - 0x4858 => "hfsx".into(), - 0x00C0_FFEE => "hostfs".into(), - 0xF995_E849 => "hpfs".into(), - 0x9584_58F6 => "hugetlbfs".into(), - 0x1130_7854 => "inodefs".into(), - 0x0131_11A8 => "ibrix".into(), - 0x2BAD_1DEA => "inotifyfs".into(), - 0x9660 => "isofs".into(), - 0x4004 => "isofs".into(), - 0x4000 => "isofs".into(), - 0x07C0 => "jffs".into(), - 0x72B6 => "jffs2".into(), - 0x3153_464A => "jfs".into(), - 0x6B41_4653 => "k-afs".into(), - 0xC97E_8168 => "logfs".into(), - 0x0BD0_0BD0 => "lustre".into(), - 0x5346_314D => "m1fs".into(), - 0x137F => "minix".into(), - 0x138F => "minix (30 char.)".into(), - 0x2468 => "minix v2".into(), - 0x2478 => "minix v2 (30 char.)".into(), - 0x4D5A => "minix3".into(), - 0x1980_0202 => "mqueue".into(), - 0x4D44 => "msdos".into(), - 0x564C => "novell".into(), - 0x6969 => "nfs".into(), - 0x6E66_7364 => "nfsd".into(), - 0x3434 => "nilfs".into(), - 0x6E73_6673 => "nsfs".into(), - 0x5346_544E => "ntfs".into(), - 0x9FA1 => "openprom".into(), - 0x7461_636F => "ocfs2".into(), - 0x794C_7630 => "overlayfs".into(), - 0xAAD7_AAEA => "panfs".into(), - 0x5049_5045 => "pipefs".into(), - 0x7C7C_6673 => "prl_fs".into(), - 0x9FA0 => "proc".into(), - 0x6165_676C => "pstorefs".into(), - 0x002F => "qnx4".into(), - 0x6819_1122 => "qnx6".into(), - 0x8584_58F6 => "ramfs".into(), - 0x5265_4973 => "reiserfs".into(), - 0x7275 => "romfs".into(), - 0x6759_6969 => "rpc_pipefs".into(), - 0x7363_6673 => "securityfs".into(), - 0xF97C_FF8C => "selinux".into(), - 0x4341_5D53 => "smackfs".into(), - 0x517B => "smb".into(), - 0xFE53_4D42 => "smb2".into(), - 0xBEEF_DEAD => "snfs".into(), - 0x534F_434B => "sockfs".into(), - 0x7371_7368 => "squashfs".into(), - 0x6265_6572 => "sysfs".into(), - 0x012F_F7B6 => "sysv2".into(), - 0x012F_F7B5 => "sysv4".into(), - 0x0102_1994 => "tmpfs".into(), - 0x7472_6163 => "tracefs".into(), - 0x2405_1905 => "ubifs".into(), - 0x1501_3346 => "udf".into(), - 0x0001_1954 => "ufs".into(), - 0x5419_0100 => "ufs".into(), - 0x9FA2 => "usbdevfs".into(), - 0x0102_1997 => "v9fs".into(), - 0xBACB_ACBC => "vmhgfs".into(), - 0xA501_FCF5 => "vxfs".into(), - 0x565A_4653 => "vzfs".into(), - 0x5346_4846 => "wslfs".into(), - 0xABBA_1974 => "xenfs".into(), - 0x012F_F7B4 => "xenix".into(), - 0x5846_5342 => "xfs".into(), - 0x012F_D16D => "xia".into(), - 0x2FC1_2FC1 => "zfs".into(), - other => format!("UNKNOWN ({:#x})", other).into(), - } -} diff --git a/src/uu/stat/src/stat.rs b/src/uu/stat/src/stat.rs index 5216fb293..582d59841 100644 --- a/src/uu/stat/src/stat.rs +++ b/src/uu/stat/src/stat.rs @@ -5,21 +5,20 @@ // For the full copyright and license information, please view the LICENSE file // that was distributed with this source code. -// spell-checker:ignore (ToDO) mtab fsext showfs otype fmtstr prec ftype blocksize nlink rdev fnodes fsid namelen blksize inodes fstype iosize statfs gnulib NBLOCKSIZE - -#[macro_use] -mod fsext; -pub use crate::fsext::*; +// spell-checker:ignore (ToDO) showfs otype fmtstr prec ftype blocksize nlink rdev fnodes fsid namelen blksize inodes fstype iosize statfs gnulib NBLOCKSIZE #[macro_use] extern crate uucore; use uucore::entries; +use uucore::fs::display_permissions; +use uucore::fsext::{ + pretty_filetype, pretty_fstype, pretty_time, read_fs_list, statfs, BirthTime, FsMeta, +}; +use uucore::libc::mode_t; use clap::{App, Arg, ArgMatches}; use std::borrow::Cow; use std::convert::AsRef; -use std::fs::File; -use std::io::{BufRead, BufReader}; use std::os::unix::fs::{FileTypeExt, MetadataExt}; use std::path::Path; use std::{cmp, fs, iter}; @@ -97,7 +96,6 @@ pub mod options { static ARG_FILES: &str = "files"; -const MOUNT_INFO: &str = "/etc/mtab"; pub const F_ALTER: u8 = 1; pub const F_ZERO: u8 = 1 << 1; pub const F_LEFT: u8 = 1 << 2; @@ -490,13 +488,9 @@ impl Stater { // mount points aren't displayed when showing filesystem information None } else { - let reader = BufReader::new( - File::open(MOUNT_INFO).unwrap_or_else(|_| panic!("Failed to read {}", MOUNT_INFO)), - ); - let mut mount_list = reader - .lines() - .filter_map(Result::ok) - .filter_map(|line| line.split_whitespace().nth(1).map(ToOwned::to_owned)) + let mut mount_list = read_fs_list() + .iter() + .map(|mi| mi.mount_dir.clone()) .collect::>(); // Reverse sort. The longer comes first. mount_list.sort(); @@ -575,7 +569,7 @@ impl Stater { } // access rights in human readable form 'A' => { - arg = pretty_access(meta.mode() as mode_t); + arg = display_permissions(&meta, true); otype = OutputType::Str; } // number of blocks allocated (see %B) @@ -663,7 +657,7 @@ impl Stater { dst.to_string_lossy() ); } else { - arg = format!("`{}'", file); + arg = file.to_string(); } otype = OutputType::Str; } @@ -755,7 +749,7 @@ impl Stater { } } Err(e) => { - show_info!("cannot stat '{}': {}", file, e); + show_error!("cannot stat '{}': {}", file, e); return 1; } } @@ -848,7 +842,7 @@ impl Stater { } } Err(e) => { - show_info!("cannot read file system information for '{}': {}", file, e); + show_error!("cannot read file system information for '{}': {}", file, e); return 1; } } @@ -1007,7 +1001,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { match Stater::new(matches) { Ok(stater) => stater.exec(), Err(e) => { - show_info!("{}", e); + show_error!("{}", e); 1 } } diff --git a/src/uu/stat/src/test_stat.rs b/src/uu/stat/src/test_stat.rs deleted file mode 100644 index 05e91fb84..000000000 --- a/src/uu/stat/src/test_stat.rs +++ /dev/null @@ -1,76 +0,0 @@ -// spell-checker:ignore (ToDO) scanutil qzxc dqzxc - -pub use super::*; - -#[test] -fn test_scanutil() { - assert_eq!(Some((-5, 2)), "-5zxc".scan_num::()); - assert_eq!(Some((51, 2)), "51zxc".scan_num::()); - assert_eq!(Some((192, 4)), "+192zxc".scan_num::()); - assert_eq!(None, "z192zxc".scan_num::()); - - assert_eq!(Some(('a', 3)), "141zxc".scan_char(8)); - assert_eq!(Some(('\n', 2)), "12qzxc".scan_char(8)); - assert_eq!(Some(('\r', 1)), "dqzxc".scan_char(16)); - assert_eq!(None, "z2qzxc".scan_char(8)); -} - -#[cfg(test)] -mod test_generate_tokens { - use super::*; - - #[test] - fn test_normal_format() { - let s = "%10.2ac%-5.w\n"; - let expected = vec![ - Token::Directive { - flag: 0, - width: 10, - precision: 2, - format: 'a', - }, - Token::Char('c'), - Token::Directive { - flag: F_LEFT, - width: 5, - precision: 0, - format: 'w', - }, - Token::Char('\n'), - ]; - assert_eq!(&expected, &Stater::generate_tokens(s, false).unwrap()); - } - - #[test] - fn test_printf_format() { - let s = "%-# 15a\\r\\\"\\\\\\a\\b\\e\\f\\v%+020.-23w\\x12\\167\\132\\112\\n"; - let expected = vec![ - Token::Directive { - flag: F_LEFT | F_ALTER | F_SPACE, - width: 15, - precision: -1, - format: 'a', - }, - Token::Char('\r'), - Token::Char('"'), - Token::Char('\\'), - Token::Char('\x07'), - Token::Char('\x08'), - Token::Char('\x1B'), - Token::Char('\x0C'), - Token::Char('\x0B'), - Token::Directive { - flag: F_SIGN | F_ZERO, - width: 20, - precision: -1, - format: 'w', - }, - Token::Char('\x12'), - Token::Char('w'), - Token::Char('Z'), - Token::Char('J'), - Token::Char('\n'), - ]; - assert_eq!(&expected, &Stater::generate_tokens(s, true).unwrap()); - } -} diff --git a/src/uu/stdbuf/src/stdbuf.rs b/src/uu/stdbuf/src/stdbuf.rs index 77f6d9dad..485b3c70e 100644 --- a/src/uu/stdbuf/src/stdbuf.rs +++ b/src/uu/stdbuf/src/stdbuf.rs @@ -24,18 +24,19 @@ use uucore::InvalidEncodingHandling; static VERSION: &str = env!("CARGO_PKG_VERSION"); static ABOUT: &str = "Run COMMAND, with modified buffering operations for its standard streams.\n\n\ - Mandatory arguments to long options are mandatory for short options too."; -static LONG_HELP: &str = "If MODE is 'L' the corresponding stream will be line buffered.\n\ - This option is invalid with standard input.\n\n\ - If MODE is '0' the corresponding stream will be unbuffered.\n\n\ - Otherwise MODE is a number which may be followed by one of the following:\n\n\ - KB 1000, K 1024, MB 1000*1000, M 1024*1024, and so on for G, T, P, E, Z, Y.\n\ - In this case the corresponding stream will be fully buffered with the buffer size set to \ - MODE bytes.\n\n\ - NOTE: If COMMAND adjusts the buffering of its standard streams ('tee' does for e.g.) then \ - that will override corresponding settings changed by 'stdbuf'.\n\ - Also some filters (like 'dd' and 'cat' etc.) don't use streams for I/O, \ - and are thus unaffected by 'stdbuf' settings.\n"; + Mandatory arguments to long options are mandatory for short options too."; +static LONG_HELP: &str = + "If MODE is 'L' the corresponding stream will be line buffered.\n\ + This option is invalid with standard input.\n\n\ + If MODE is '0' the corresponding stream will be unbuffered.\n\n\ + Otherwise MODE is a number which may be followed by one of the following:\n\n\ + KB 1000, K 1024, MB 1000*1000, M 1024*1024, and so on for G, T, P, E, Z, Y.\n\ + In this case the corresponding stream will be fully buffered with the buffer size set to \ + MODE bytes.\n\n\ + NOTE: If COMMAND adjusts the buffering of its standard streams ('tee' does for e.g.) then \ + that will override corresponding settings changed by 'stdbuf'.\n\ + Also some filters (like 'dd' and 'cat' etc.) don't use streams for I/O, \ + and are thus unaffected by 'stdbuf' settings.\n"; mod options { pub const INPUT: &str = "input"; diff --git a/src/uu/tail/Cargo.toml b/src/uu/tail/Cargo.toml index d3f60e09b..273c67bb3 100644 --- a/src/uu/tail/Cargo.toml +++ b/src/uu/tail/Cargo.toml @@ -17,7 +17,7 @@ path = "src/tail.rs" [dependencies] clap = "2.33" libc = "0.2.42" -uucore = { version=">=0.0.8", package="uucore", path="../../uucore" } +uucore = { version=">=0.0.8", package="uucore", path="../../uucore", features=["ringbuffer"] } uucore_procs = { version=">=0.0.5", package="uucore_procs", path="../../uucore_procs" } winapi = { version="0.3", features=["fileapi", "handleapi", "processthreadsapi", "synchapi", "winbase"] } diff --git a/src/uu/tail/src/chunks.rs b/src/uu/tail/src/chunks.rs new file mode 100644 index 000000000..57a26dabf --- /dev/null +++ b/src/uu/tail/src/chunks.rs @@ -0,0 +1,83 @@ +//! Iterating over a file by chunks, starting at the end of the file. +//! +//! Use [`ReverseChunks::new`] to create a new iterator over chunks of +//! bytes from the file. +use std::fs::File; +use std::io::{Read, Seek, SeekFrom}; + +/// When reading files in reverse in `bounded_tail`, this is the size of each +/// block read at a time. +pub const BLOCK_SIZE: u64 = 1 << 16; + +/// An iterator over a file in non-overlapping chunks from the end of the file. +/// +/// Each chunk is a [`Vec`]<[`u8`]> of size [`BLOCK_SIZE`] (except +/// possibly the last chunk, which might be smaller). Each call to +/// [`next`] will seek backwards through the given file. +pub struct ReverseChunks<'a> { + /// The file to iterate over, by blocks, from the end to the beginning. + file: &'a File, + + /// The total number of bytes in the file. + size: u64, + + /// The total number of blocks to read. + max_blocks_to_read: usize, + + /// The index of the next block to read. + block_idx: usize, +} + +impl<'a> ReverseChunks<'a> { + pub fn new(file: &'a mut File) -> ReverseChunks<'a> { + let size = file.seek(SeekFrom::End(0)).unwrap(); + let max_blocks_to_read = (size as f64 / BLOCK_SIZE as f64).ceil() as usize; + let block_idx = 0; + ReverseChunks { + file, + size, + max_blocks_to_read, + block_idx, + } + } +} + +impl<'a> Iterator for ReverseChunks<'a> { + type Item = Vec; + + fn next(&mut self) -> Option { + // If there are no more chunks to read, terminate the iterator. + if self.block_idx >= self.max_blocks_to_read { + return None; + } + + // The chunk size is `BLOCK_SIZE` for all but the last chunk + // (that is, the chunk closest to the beginning of the file), + // which contains the remainder of the bytes. + let block_size = if self.block_idx == self.max_blocks_to_read - 1 { + self.size % BLOCK_SIZE + } else { + BLOCK_SIZE + }; + + // Seek backwards by the next chunk, read the full chunk into + // `buf`, and then seek back to the start of the chunk again. + let mut buf = vec![0; BLOCK_SIZE as usize]; + let pos = self + .file + .seek(SeekFrom::Current(-(block_size as i64))) + .unwrap(); + self.file + .read_exact(&mut buf[0..(block_size as usize)]) + .unwrap(); + let pos2 = self + .file + .seek(SeekFrom::Current(-(block_size as i64))) + .unwrap(); + assert_eq!(pos, pos2); + + self.block_idx += 1; + + Some(buf[0..(block_size as usize)].to_vec()) + } +} diff --git a/src/uu/tail/src/tail.rs b/src/uu/tail/src/tail.rs index fec88e841..15a819d35 100644 --- a/src/uu/tail/src/tail.rs +++ b/src/uu/tail/src/tail.rs @@ -15,7 +15,9 @@ extern crate clap; #[macro_use] extern crate uucore; +mod chunks; mod platform; +use chunks::ReverseChunks; use clap::{App, Arg}; use std::collections::VecDeque; @@ -26,6 +28,7 @@ use std::io::{stdin, stdout, BufRead, BufReader, Read, Seek, SeekFrom, Write}; use std::path::Path; use std::thread::sleep; use std::time::Duration; +use uucore::ringbuffer::RingBuffer; pub mod options { pub mod verbosity { @@ -239,7 +242,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { } let mut file = File::open(&path).unwrap(); if is_seekable(&mut file) { - bounded_tail(&file, &settings); + bounded_tail(&mut file, &settings); if settings.follow { let reader = BufReader::new(file); readers.push(reader); @@ -353,10 +356,6 @@ pub fn parse_size(mut size_slice: &str) -> Result { } } -/// When reading files in reverse in `bounded_tail`, this is the size of each -/// block read at a time. -const BLOCK_SIZE: u64 = 1 << 16; - fn follow(readers: &mut [BufReader], filenames: &[String], settings: &Settings) { assert!(settings.follow); let mut last = readers.len() - 1; @@ -394,48 +393,42 @@ fn follow(readers: &mut [BufReader], filenames: &[String], settings: } } -/// Iterate over bytes in the file, in reverse, until `should_stop` returns -/// true. The `file` is left seek'd to the position just after the byte that -/// `should_stop` returned true for. -fn backwards_thru_file( - mut file: &File, - size: u64, - buf: &mut Vec, - delimiter: u8, - should_stop: &mut F, -) where - F: FnMut(u8) -> bool, -{ - assert!(buf.len() >= BLOCK_SIZE as usize); +/// Iterate over bytes in the file, in reverse, until we find the +/// `num_delimiters` instance of `delimiter`. The `file` is left seek'd to the +/// position just after that delimiter. +fn backwards_thru_file(file: &mut File, num_delimiters: usize, delimiter: u8) { + // This variable counts the number of delimiters found in the file + // so far (reading from the end of the file toward the beginning). + let mut counter = 0; - let max_blocks_to_read = (size as f64 / BLOCK_SIZE as f64).ceil() as usize; + for (block_idx, slice) in ReverseChunks::new(file).enumerate() { + // Iterate over each byte in the slice in reverse order. + let mut iter = slice.iter().enumerate().rev(); - for block_idx in 0..max_blocks_to_read { - let block_size = if block_idx == max_blocks_to_read - 1 { - size % BLOCK_SIZE - } else { - BLOCK_SIZE - }; - - // Seek backwards by the next block, read the full block into - // `buf`, and then seek back to the start of the block again. - let pos = file.seek(SeekFrom::Current(-(block_size as i64))).unwrap(); - file.read_exact(&mut buf[0..(block_size as usize)]).unwrap(); - let pos2 = file.seek(SeekFrom::Current(-(block_size as i64))).unwrap(); - assert_eq!(pos, pos2); - - // Iterate backwards through the bytes, calling `should_stop` on each - // one. - let slice = &buf[0..(block_size as usize)]; - for (i, ch) in slice.iter().enumerate().rev() { - // Ignore one trailing newline. - if block_idx == 0 && i as u64 == block_size - 1 && *ch == delimiter { - continue; + // Ignore a trailing newline in the last block, if there is one. + if block_idx == 0 { + if let Some(c) = slice.last() { + if *c == delimiter { + iter.next(); + } } + } - if should_stop(*ch) { - file.seek(SeekFrom::Current((i + 1) as i64)).unwrap(); - return; + // For each byte, increment the count of the number of + // delimiters found. If we have found more than the specified + // number of delimiters, terminate the search and seek to the + // appropriate location in the file. + for (i, ch) in iter { + if *ch == delimiter { + counter += 1; + if counter >= num_delimiters { + // After each iteration of the outer loop, the + // cursor in the file is at the *beginning* of the + // block, so seeking forward by `i + 1` bytes puts + // us right after the found delimiter. + file.seek(SeekFrom::Current((i + 1) as i64)).unwrap(); + return; + } } } } @@ -446,21 +439,11 @@ fn backwards_thru_file( /// end of the file, and then read the file "backwards" in blocks of size /// `BLOCK_SIZE` until we find the location of the first line/byte. This ends up /// being a nice performance win for very large files. -fn bounded_tail(mut file: &File, settings: &Settings) { - let size = file.seek(SeekFrom::End(0)).unwrap(); - let mut buf = vec![0; BLOCK_SIZE as usize]; - +fn bounded_tail(file: &mut File, settings: &Settings) { // Find the position in the file to start printing from. match settings.mode { - FilterMode::Lines(mut count, delimiter) => { - backwards_thru_file(&file, size, &mut buf, delimiter, &mut |byte| { - if byte == delimiter { - count -= 1; - count == 0 - } else { - false - } - }); + FilterMode::Lines(count, delimiter) => { + backwards_thru_file(file, count as usize, delimiter); } FilterMode::Bytes(count) => { file.seek(SeekFrom::End(-(count as i64))).unwrap(); @@ -468,17 +451,37 @@ fn bounded_tail(mut file: &File, settings: &Settings) { } // Print the target section of the file. - loop { - let bytes_read = file.read(&mut buf).unwrap(); + let stdout = stdout(); + let mut stdout = stdout.lock(); + std::io::copy(file, &mut stdout).unwrap(); +} - let mut stdout = stdout(); - for b in &buf[0..bytes_read] { - print_byte(&mut stdout, *b); - } - - if bytes_read == 0 { - break; - } +/// Collect the last elements of an iterator into a `VecDeque`. +/// +/// This function returns a [`VecDeque`] containing either the last +/// `count` elements of `iter`, an [`Iterator`] over [`Result`] +/// instances, or all but the first `count` elements of `iter`. If +/// `beginning` is `true`, then all but the first `count` elements are +/// returned. +/// +/// # Panics +/// +/// If any element of `iter` is an [`Err`], then this function panics. +fn unbounded_tail_collect( + iter: impl Iterator>, + count: u64, + beginning: bool, +) -> VecDeque +where + E: fmt::Debug, +{ + if beginning { + // GNU `tail` seems to index bytes and lines starting at 1, not + // at 0. It seems to treat `+0` and `+1` as the same thing. + let i = count.max(1) - 1; + iter.skip(i as usize).map(|r| r.unwrap()).collect() + } else { + RingBuffer::from_iter(iter.map(|r| r.unwrap()), count as usize).data } } @@ -487,66 +490,15 @@ fn unbounded_tail(reader: &mut BufReader, settings: &Settings) { // contains count lines/chars. When reaching the end of file, output the // data in the ringbuf. match settings.mode { - FilterMode::Lines(mut count, _delimiter) => { - let mut ringbuf: VecDeque = VecDeque::new(); - let mut skip = if settings.beginning { - let temp = count; - count = ::std::u64::MAX; - temp - 1 - } else { - 0 - }; - loop { - let mut datum = String::new(); - match reader.read_line(&mut datum) { - Ok(0) => break, - Ok(_) => { - if skip > 0 { - skip -= 1; - } else { - if count <= ringbuf.len() as u64 { - ringbuf.pop_front(); - } - ringbuf.push_back(datum); - } - } - Err(err) => panic!("{}", err), - } - } - let mut stdout = stdout(); - for datum in &ringbuf { - print_string(&mut stdout, datum); + FilterMode::Lines(count, _) => { + for line in unbounded_tail_collect(reader.lines(), count, settings.beginning) { + println!("{}", line); } } - FilterMode::Bytes(mut count) => { - let mut ringbuf: VecDeque = VecDeque::new(); - let mut skip = if settings.beginning { - let temp = count; - count = ::std::u64::MAX; - temp - 1 - } else { - 0 - }; - loop { - let mut datum = [0; 1]; - match reader.read(&mut datum) { - Ok(0) => break, - Ok(_) => { - if skip > 0 { - skip -= 1; - } else { - if count <= ringbuf.len() as u64 { - ringbuf.pop_front(); - } - ringbuf.push_back(datum[0]); - } - } - Err(err) => panic!("{}", err), - } - } - let mut stdout = stdout(); - for datum in &ringbuf { - print_byte(&mut stdout, *datum); + FilterMode::Bytes(count) => { + for byte in unbounded_tail_collect(reader.bytes(), count, settings.beginning) { + let mut stdout = stdout(); + print_byte(&mut stdout, byte); } } } @@ -562,8 +514,3 @@ fn print_byte(stdout: &mut T, ch: u8) { crash!(1, "{}", err); } } - -#[inline] -fn print_string(_: &mut T, s: &str) { - print!("{}", s); -} diff --git a/src/uu/tee/src/tee.rs b/src/uu/tee/src/tee.rs index 7c6a86b4c..c21559b3b 100644 --- a/src/uu/tee/src/tee.rs +++ b/src/uu/tee/src/tee.rs @@ -166,7 +166,7 @@ impl Write for MultiWriter { let result = writer.write_all(buf); match result { Err(f) => { - show_info!("{}: {}", writer.name, f.to_string()); + show_error!("{}: {}", writer.name, f.to_string()); false } _ => true, @@ -180,7 +180,7 @@ impl Write for MultiWriter { let result = writer.flush(); match result { Err(f) => { - show_info!("{}: {}", writer.name, f.to_string()); + show_error!("{}: {}", writer.name, f.to_string()); false } _ => true, @@ -213,7 +213,7 @@ impl Read for NamedReader { fn read(&mut self, buf: &mut [u8]) -> Result { match self.inner.read(buf) { Err(f) => { - show_info!("{}: {}", Path::new("stdin").display(), f.to_string()); + show_error!("{}: {}", Path::new("stdin").display(), f.to_string()); Err(f) } okay => okay, diff --git a/src/uu/test/src/parser.rs b/src/uu/test/src/parser.rs index f1ca9dad6..aa44bc5f2 100644 --- a/src/uu/test/src/parser.rs +++ b/src/uu/test/src/parser.rs @@ -33,7 +33,7 @@ impl Symbol { "(" => Symbol::LParen, "!" => Symbol::Bang, "-a" | "-o" => Symbol::BoolOp(s), - "=" | "!=" => Symbol::StringOp(s), + "=" | "==" | "!=" => Symbol::StringOp(s), "-eq" | "-ge" | "-gt" | "-le" | "-lt" | "-ne" => Symbol::IntOp(s), "-ef" | "-nt" | "-ot" => Symbol::FileOp(s), "-n" | "-z" => Symbol::StrlenOp(s), @@ -83,7 +83,7 @@ impl Symbol { /// TERM → str OP str /// TERM → str | 𝜖 /// OP → STRINGOP | INTOP | FILEOP -/// STRINGOP → = | != +/// STRINGOP → = | == | != /// INTOP → -eq | -ge | -gt | -le | -lt | -ne /// FILEOP → -ef | -nt | -ot /// STRLEN → -n | -z @@ -121,6 +121,8 @@ impl Parser { /// Test if the next token in the stream is a BOOLOP (-a or -o), without /// removing the token from the stream. fn peek_is_boolop(&mut self) -> bool { + // TODO: change to `matches!(self.peek(), Symbol::BoolOp(_))` once MSRV is 1.42 + // #[allow(clippy::match_like_matches_macro)] // needs MSRV 1.43 if let Symbol::BoolOp(_) = self.peek() { true } else { @@ -161,7 +163,7 @@ impl Parser { match self.peek() { // lparen is a literal when followed by nothing or comparison Symbol::None | Symbol::StringOp(_) | Symbol::IntOp(_) | Symbol::FileOp(_) => { - self.literal(Symbol::Literal(OsString::from("("))); + self.literal(Symbol::LParen.into_literal()); } // empty parenthetical Symbol::Literal(s) if s == ")" => {} @@ -181,27 +183,67 @@ impl Parser { /// /// * `! =`: negate the result of the implicit string length test of `=` /// * `! = foo`: compare the literal strings `!` and `foo` - /// * `! `: negate the result of the expression + /// * `! = = str`: negate comparison of literal `=` and `str` + /// * `!`: bang followed by nothing is literal + /// * `! EXPR`: negate the result of the expression + /// + /// Combined Boolean & negation: + /// + /// * `! ( EXPR ) [BOOLOP EXPR]`: negate the parenthesized expression only + /// * `! UOP str BOOLOP EXPR`: negate the unary subexpression + /// * `! str BOOLOP str`: negate the entire Boolean expression + /// * `! str BOOLOP EXPR BOOLOP EXPR`: negate the value of the first `str` term /// fn bang(&mut self) { - if let Symbol::StringOp(_) | Symbol::IntOp(_) | Symbol::FileOp(_) = self.peek() { - // we need to peek ahead one more token to disambiguate the first - // two cases listed above: case 1 — `! ` — and - // case 2: ` OP str`. - let peek2 = self.tokens.clone().nth(1); + match self.peek() { + Symbol::StringOp(_) | Symbol::IntOp(_) | Symbol::FileOp(_) | Symbol::BoolOp(_) => { + // we need to peek ahead one more token to disambiguate the first + // three cases listed above + let peek2 = Symbol::new(self.tokens.clone().nth(1)); - if peek2.is_none() { - // op is literal - let op = self.next_token().into_literal(); - self.stack.push(op); - self.stack.push(Symbol::Bang); - } else { - // bang is literal; parsing continues with op - self.literal(Symbol::Literal(OsString::from("!"))); + match peek2 { + // case 1: `! ` + // case 3: `! = OP str` + Symbol::StringOp(_) | Symbol::None => { + // op is literal + let op = self.next_token().into_literal(); + self.literal(op); + self.stack.push(Symbol::Bang); + } + // case 2: ` OP str [BOOLOP EXPR]`. + _ => { + // bang is literal; parsing continues with op + self.literal(Symbol::Bang.into_literal()); + self.maybe_boolop(); + } + } + } + + // bang followed by nothing is literal + Symbol::None => self.stack.push(Symbol::Bang.into_literal()), + + _ => { + // peek ahead up to 4 tokens to determine if we need to negate + // the entire expression or just the first term + let peek4: Vec = self + .tokens + .clone() + .take(4) + .map(|token| Symbol::new(Some(token))) + .collect(); + + match peek4.as_slice() { + // we peeked ahead 4 but there were only 3 tokens left + [Symbol::Literal(_), Symbol::BoolOp(_), Symbol::Literal(_)] => { + self.expr(); + self.stack.push(Symbol::Bang); + } + _ => { + self.term(); + self.stack.push(Symbol::Bang); + } + } } - } else { - self.expr(); - self.stack.push(Symbol::Bang); } } @@ -209,13 +251,14 @@ impl Parser { /// as appropriate. fn maybe_boolop(&mut self) { if self.peek_is_boolop() { - let token = self.tokens.next().unwrap(); // safe because we peeked + let symbol = self.next_token(); // BoolOp by itself interpreted as Literal if let Symbol::None = self.peek() { - self.literal(Symbol::Literal(token)) + self.literal(symbol.into_literal()); } else { - self.boolop(Symbol::BoolOp(token)) + self.boolop(symbol); + self.maybe_boolop(); } } } @@ -229,7 +272,6 @@ impl Parser { if op == Symbol::BoolOp(OsString::from("-a")) { self.term(); self.stack.push(op); - self.maybe_boolop(); } else { self.expr(); self.stack.push(op); diff --git a/src/uu/test/src/test.rs b/src/uu/test/src/test.rs index 3e97af0a6..86950ecc2 100644 --- a/src/uu/test/src/test.rs +++ b/src/uu/test/src/test.rs @@ -57,7 +57,7 @@ fn eval(stack: &mut Vec) -> Result { Some(Symbol::StringOp(op)) => { let b = stack.pop(); let a = stack.pop(); - Ok(if op == "=" { a == b } else { a != b }) + Ok(if op == "!=" { a != b } else { a == b }) } Some(Symbol::IntOp(op)) => { let b = pop_literal!(); diff --git a/src/uu/truncate/src/truncate.rs b/src/uu/truncate/src/truncate.rs index 91f705bd1..03b18723c 100644 --- a/src/uu/truncate/src/truncate.rs +++ b/src/uu/truncate/src/truncate.rs @@ -11,7 +11,8 @@ extern crate uucore; use clap::{App, Arg}; -use std::fs::{metadata, File, OpenOptions}; +use std::fs::{metadata, OpenOptions}; +use std::io::ErrorKind; use std::path::Path; #[derive(Eq, PartialEq)] @@ -133,7 +134,35 @@ fn truncate( filenames: Vec, ) { let (modsize, mode) = match size { - Some(size_string) => parse_size(&size_string), + Some(size_string) => { + // Trim any whitespace. + let size_string = size_string.trim(); + + // Get the modifier character from the size string, if any. For + // example, if the argument is "+123", then the modifier is '+'. + let c = size_string.chars().next().unwrap(); + + let mode = match c { + '+' => TruncateMode::Extend, + '-' => TruncateMode::Reduce, + '<' => TruncateMode::AtMost, + '>' => TruncateMode::AtLeast, + '/' => TruncateMode::RoundDown, + '%' => TruncateMode::RoundUp, + _ => TruncateMode::Absolute, /* assume that the size is just a number */ + }; + + // If there was a modifier character, strip it. + let size_string = match mode { + TruncateMode::Absolute => size_string, + _ => &size_string[1..], + }; + let num_bytes = match parse_size(size_string) { + Ok(b) => b, + Err(_) => crash!(1, "Invalid number: ‘{}’", size_string), + }; + (num_bytes, mode) + } None => (0, TruncateMode::Reference), }; @@ -146,13 +175,14 @@ fn truncate( TruncateMode::Reduce => (), _ => crash!(1, "you must specify a relative ‘--size’ with ‘--reference’"), }; - let _ = match File::open(Path::new(rfilename)) { - Ok(m) => m, - Err(f) => crash!(1, "{}", f.to_string()), - }; match metadata(rfilename) { Ok(meta) => meta.len(), - Err(f) => crash!(1, "{}", f.to_string()), + Err(f) => match f.kind() { + ErrorKind::NotFound => { + crash!(1, "cannot stat '{}': No such file or directory", rfilename) + } + _ => crash!(1, "{}", f.to_string()), + }, } } None => 0, @@ -181,20 +211,8 @@ fn truncate( TruncateMode::Reference => fsize, TruncateMode::Extend => fsize + modsize, TruncateMode::Reduce => fsize - modsize, - TruncateMode::AtMost => { - if fsize > modsize { - modsize - } else { - fsize - } - } - TruncateMode::AtLeast => { - if fsize < modsize { - modsize - } else { - fsize - } - } + TruncateMode::AtMost => fsize.min(modsize), + TruncateMode::AtLeast => fsize.max(modsize), TruncateMode::RoundDown => fsize - fsize % modsize, TruncateMode::RoundUp => fsize + fsize % modsize, }; @@ -208,64 +226,89 @@ fn truncate( } } -fn parse_size(size: &str) -> (u64, TruncateMode) { - let clean_size = size.replace(" ", ""); - let mode = match clean_size.chars().next().unwrap() { - '+' => TruncateMode::Extend, - '-' => TruncateMode::Reduce, - '<' => TruncateMode::AtMost, - '>' => TruncateMode::AtLeast, - '/' => TruncateMode::RoundDown, - '*' => TruncateMode::RoundUp, - _ => TruncateMode::Absolute, /* assume that the size is just a number */ +/// Parse a size string into a number of bytes. +/// +/// A size string comprises an integer and an optional unit. The unit +/// may be K, M, G, T, P, E, Z, or Y (powers of 1024) or KB, MB, +/// etc. (powers of 1000). +/// +/// # Errors +/// +/// This function returns an error if the string does not begin with a +/// numeral, or if the unit is not one of the supported units described +/// in the preceding section. +/// +/// # Examples +/// +/// ```rust,ignore +/// assert_eq!(parse_size("123").unwrap(), 123); +/// assert_eq!(parse_size("123K").unwrap(), 123 * 1024); +/// assert_eq!(parse_size("123KB").unwrap(), 123 * 1000); +/// ``` +fn parse_size(size: &str) -> Result { + // Get the numeric part of the size argument. For example, if the + // argument is "123K", then the numeric part is "123". + let numeric_string: String = size.chars().take_while(|c| c.is_digit(10)).collect(); + let number: u64 = match numeric_string.parse() { + Ok(n) => n, + Err(_) => return Err(()), }; - let bytes = { - let mut slice = if mode == TruncateMode::Absolute { - &clean_size - } else { - &clean_size[1..] - }; - if slice.chars().last().unwrap().is_alphabetic() { - slice = &slice[..slice.len() - 1]; - if !slice.is_empty() && slice.chars().last().unwrap().is_alphabetic() { - slice = &slice[..slice.len() - 1]; - } - } - slice - } - .to_owned(); - let mut number: u64 = match bytes.parse() { - Ok(num) => num, - Err(e) => crash!(1, "'{}' is not a valid number: {}", size, e), + + // Get the alphabetic units part of the size argument and compute + // the factor it represents. For example, if the argument is "123K", + // then the unit part is "K" and the factor is 1024. This may be the + // empty string, in which case, the factor is 1. + let n = numeric_string.len(); + let (base, exponent): (u64, u32) = match &size[n..] { + "" => (1, 0), + "K" | "k" => (1024, 1), + "M" | "m" => (1024, 2), + "G" | "g" => (1024, 3), + "T" | "t" => (1024, 4), + "P" | "p" => (1024, 5), + "E" | "e" => (1024, 6), + "Z" | "z" => (1024, 7), + "Y" | "y" => (1024, 8), + "KB" | "kB" => (1000, 1), + "MB" | "mB" => (1000, 2), + "GB" | "gB" => (1000, 3), + "TB" | "tB" => (1000, 4), + "PB" | "pB" => (1000, 5), + "EB" | "eB" => (1000, 6), + "ZB" | "zB" => (1000, 7), + "YB" | "yB" => (1000, 8), + _ => return Err(()), }; - if clean_size.chars().last().unwrap().is_alphabetic() { - number *= match clean_size.chars().last().unwrap().to_ascii_uppercase() { - 'B' => match clean_size - .chars() - .nth(clean_size.len() - 2) - .unwrap() - .to_ascii_uppercase() - { - 'K' => 1000u64, - 'M' => 1000u64.pow(2), - 'G' => 1000u64.pow(3), - 'T' => 1000u64.pow(4), - 'P' => 1000u64.pow(5), - 'E' => 1000u64.pow(6), - 'Z' => 1000u64.pow(7), - 'Y' => 1000u64.pow(8), - letter => crash!(1, "'{}B' is not a valid suffix.", letter), - }, - 'K' => 1024u64, - 'M' => 1024u64.pow(2), - 'G' => 1024u64.pow(3), - 'T' => 1024u64.pow(4), - 'P' => 1024u64.pow(5), - 'E' => 1024u64.pow(6), - 'Z' => 1024u64.pow(7), - 'Y' => 1024u64.pow(8), - letter => crash!(1, "'{}' is not a valid suffix.", letter), - }; - } - (number, mode) + let factor = base.pow(exponent); + Ok(number * factor) +} + +#[cfg(test)] +mod tests { + use crate::parse_size; + + #[test] + fn test_parse_size_zero() { + assert_eq!(parse_size("0").unwrap(), 0); + assert_eq!(parse_size("0K").unwrap(), 0); + assert_eq!(parse_size("0KB").unwrap(), 0); + } + + #[test] + fn test_parse_size_without_factor() { + assert_eq!(parse_size("123").unwrap(), 123); + } + + #[test] + fn test_parse_size_kilobytes() { + assert_eq!(parse_size("123K").unwrap(), 123 * 1024); + assert_eq!(parse_size("123KB").unwrap(), 123 * 1000); + } + + #[test] + fn test_parse_size_megabytes() { + assert_eq!(parse_size("123").unwrap(), 123); + assert_eq!(parse_size("123M").unwrap(), 123 * 1024 * 1024); + assert_eq!(parse_size("123MB").unwrap(), 123 * 1000 * 1000); + } } diff --git a/src/uu/wc/src/wc.rs b/src/uu/wc/src/wc.rs index 3b70856fa..6e95254ee 100644 --- a/src/uu/wc/src/wc.rs +++ b/src/uu/wc/src/wc.rs @@ -12,18 +12,20 @@ extern crate uucore; mod count_bytes; mod countable; +mod wordcount; use count_bytes::count_bytes_fast; use countable::WordCountable; +use wordcount::{TitledWordCount, WordCount}; use clap::{App, Arg, ArgMatches}; use thiserror::Error; -use std::cmp::max; -use std::fs::File; -use std::io::{self, Write}; -use std::ops::{Add, AddAssign}; +use std::fs::{self, File}; +use std::io::{self, ErrorKind, Write}; use std::path::Path; -use std::str::from_utf8; + +/// The minimum character width for formatting counts when reading from stdin. +const MINIMUM_WIDTH: usize = 7; #[derive(Error, Debug)] pub enum WcError { @@ -82,51 +84,6 @@ impl Settings { } } -#[derive(Debug, Default, Copy, Clone)] -struct WordCount { - bytes: usize, - chars: usize, - lines: usize, - words: usize, - max_line_length: usize, -} - -impl Add for WordCount { - type Output = Self; - - fn add(self, other: Self) -> Self { - Self { - bytes: self.bytes + other.bytes, - chars: self.chars + other.chars, - lines: self.lines + other.lines, - words: self.words + other.words, - max_line_length: max(self.max_line_length, other.max_line_length), - } - } -} - -impl AddAssign for WordCount { - fn add_assign(&mut self, other: Self) { - *self = *self + other - } -} - -impl WordCount { - fn with_title(self, title: &str) -> TitledWordCount { - TitledWordCount { title, count: self } - } -} - -/// This struct supplements the actual word count with a title that is displayed -/// to the user at the end of the program. -/// The reason we don't simply include title in the `WordCount` struct is that -/// it would result in unneccesary copying of `String`. -#[derive(Debug, Default, Clone)] -struct TitledWordCount<'a> { - title: &'a str, - count: WordCount, -} - static ABOUT: &str = "Display newline, word, and byte counts for each FILE, and a total line if more than one FILE is specified."; static VERSION: &str = env!("CARGO_PKG_VERSION"); @@ -149,6 +106,34 @@ fn get_usage() -> String { ) } +enum StdinKind { + /// Stdin specified on command-line with "-". + Explicit, + + /// Stdin implicitly specified on command-line by not passing any positional argument. + Implicit, +} + +/// Supported inputs. +enum Input { + /// A regular file. + Path(String), + + /// Standard input. + Stdin(StdinKind), +} + +impl Input { + /// Converts input to title that appears in stats. + fn to_title(&self) -> Option<&str> { + match self { + Input::Path(path) => Some(path), + Input::Stdin(StdinKind::Explicit) => Some("-"), + Input::Stdin(StdinKind::Implicit) => None, + } + } +} + pub fn uumain(args: impl uucore::Args) -> i32 { let usage = get_usage(); @@ -189,36 +174,33 @@ pub fn uumain(args: impl uucore::Args) -> i32 { .arg(Arg::with_name(ARG_FILES).multiple(true).takes_value(true)) .get_matches_from(args); - let mut files: Vec = matches + let mut inputs: Vec = matches .values_of(ARG_FILES) - .map(|v| v.map(ToString::to_string).collect()) + .map(|v| { + v.map(|i| { + if i == "-" { + Input::Stdin(StdinKind::Explicit) + } else { + Input::Path(ToString::to_string(i)) + } + }) + .collect() + }) .unwrap_or_default(); - if files.is_empty() { - files.push("-".to_owned()); + if inputs.is_empty() { + inputs.push(Input::Stdin(StdinKind::Implicit)); } let settings = Settings::new(&matches); - if wc(files, &settings).is_ok() { + if wc(inputs, &settings).is_ok() { 0 } else { 1 } } -const CR: u8 = b'\r'; -const LF: u8 = b'\n'; -const SPACE: u8 = b' '; -const TAB: u8 = b'\t'; -const SYN: u8 = 0x16_u8; -const FF: u8 = 0x0C_u8; - -#[inline(always)] -fn is_word_separator(byte: u8) -> bool { - byte == SPACE || byte == TAB || byte == CR || byte == SYN || byte == FF -} - fn word_count_from_reader( mut reader: T, settings: &Settings, @@ -239,104 +221,181 @@ fn word_count_from_reader( // we do not need to decode the byte stream if we're only counting bytes/newlines let decode_chars = settings.show_chars || settings.show_words || settings.show_max_line_length; - let mut line_count: usize = 0; - let mut word_count: usize = 0; - let mut byte_count: usize = 0; - let mut char_count: usize = 0; - let mut longest_line_length: usize = 0; - let mut ends_lf: bool; - - // reading from a TTY seems to raise a condition on, rather than return Some(0) like a file. - // hence the option wrapped in a result here - for line_result in reader.lines() { - let raw_line = match line_result { - Ok(l) => l, + // Sum the WordCount for each line. Show a warning for each line + // that results in an IO error when trying to read it. + let total = reader + .lines() + .filter_map(|res| match res { + Ok(line) => Some(line), Err(e) => { show_warning!("Error while reading {}: {}", path, e); - continue; + None } - }; - - // GNU 'wc' only counts lines that end in LF as lines - ends_lf = *raw_line.last().unwrap() == LF; - line_count += ends_lf as usize; - - byte_count += raw_line.len(); - - if decode_chars { - // try and convert the bytes to UTF-8 first - let current_char_count; - match from_utf8(&raw_line[..]) { - Ok(line) => { - word_count += line.split_whitespace().count(); - current_char_count = line.chars().count(); - } - Err(..) => { - word_count += raw_line.split(|&x| is_word_separator(x)).count(); - current_char_count = raw_line.iter().filter(|c| c.is_ascii()).count() - } - } - char_count += current_char_count; - if current_char_count > longest_line_length { - // -L is a GNU 'wc' extension so same behavior on LF - longest_line_length = current_char_count - (ends_lf as usize); - } - } - } - - Ok(WordCount { - bytes: byte_count, - chars: char_count, - lines: line_count, - words: word_count, - max_line_length: longest_line_length, - }) + }) + .map(|line| WordCount::from_line(&line, decode_chars)) + .sum(); + Ok(total) } -fn word_count_from_path(path: &str, settings: &Settings) -> WcResult { - if path == "-" { - let stdin = io::stdin(); - let stdin_lock = stdin.lock(); - word_count_from_reader(stdin_lock, settings, path) - } else { - let path_obj = Path::new(path); - if path_obj.is_dir() { - Err(WcError::IsDirectory(path.to_owned())) - } else { - let file = File::open(path)?; - word_count_from_reader(file, settings, path) +fn word_count_from_input(input: &Input, settings: &Settings) -> WcResult { + match input { + Input::Stdin(_) => { + let stdin = io::stdin(); + let stdin_lock = stdin.lock(); + word_count_from_reader(stdin_lock, settings, "-") + } + Input::Path(path) => { + let path_obj = Path::new(path); + if path_obj.is_dir() { + Err(WcError::IsDirectory(path.to_owned())) + } else { + let file = File::open(path)?; + word_count_from_reader(file, settings, path) + } } } } -fn wc(files: Vec, settings: &Settings) -> Result<(), u32> { - let mut total_word_count = WordCount::default(); - let mut results = vec![]; - let mut max_width: usize = 0; +/// Print a message appropriate for the particular error to `stderr`. +/// +/// # Examples +/// +/// This will print `wc: /tmp: Is a directory` to `stderr`. +/// +/// ```rust,ignore +/// show_error(Input::Path("/tmp"), WcError::IsDirectory("/tmp")) +/// ``` +fn show_error(input: &Input, err: WcError) { + match (input, err) { + (_, WcError::IsDirectory(path)) => { + show_error_custom_description!(path, "Is a directory"); + } + (Input::Path(path), WcError::Io(e)) if e.kind() == ErrorKind::NotFound => { + show_error_custom_description!(path, "No such file or directory"); + } + (_, e) => { + show_error!("{}", e); + } + }; +} + +/// Compute the number of digits needed to represent any count for this input. +/// +/// If `input` is [`Input::Stdin`], then this function returns +/// [`MINIMUM_WIDTH`]. Otherwise, if metadata could not be read from +/// `input` then this function returns 1. +/// +/// # Errors +/// +/// This function will return an error if `input` is a [`Input::Path`] +/// and there is a problem accessing the metadata of the given `input`. +/// +/// # Examples +/// +/// A [`Input::Stdin`] gets a default minimum width: +/// +/// ```rust,ignore +/// let input = Input::Stdin(StdinKind::Explicit); +/// assert_eq!(7, digit_width(input)); +/// ``` +fn digit_width(input: &Input) -> WcResult> { + match input { + Input::Stdin(_) => Ok(Some(MINIMUM_WIDTH)), + Input::Path(filename) => { + let path = Path::new(filename); + let metadata = fs::metadata(path)?; + if metadata.is_file() { + // TODO We are now computing the number of bytes in a file + // twice: once here and once in `WordCount::from_line()` (or + // in `count_bytes_fast()` if that function is called + // instead). See GitHub issue #2201. + let num_bytes = metadata.len(); + let num_digits = num_bytes.to_string().len(); + Ok(Some(num_digits)) + } else { + Ok(None) + } + } + } +} + +/// Compute the number of digits needed to represent all counts in all inputs. +/// +/// `inputs` may include zero or more [`Input::Stdin`] entries, each of +/// which represents reading from `stdin`. The presence of any such +/// entry causes this function to return a width that is at least +/// [`MINIMUM_WIDTH`]. +/// +/// If `input` is empty, then this function returns 1. If file metadata +/// could not be read from any of the [`Input::Path`] inputs and there +/// are no [`Input::Stdin`] inputs, then this function returns 1. +/// +/// If there is a problem accessing the metadata, this function will +/// silently ignore the error and assume that the number of digits +/// needed to display the counts for that file is 1. +/// +/// # Examples +/// +/// An empty slice implies a width of 1: +/// +/// ```rust,ignore +/// assert_eq!(1, max_width(&vec![])); +/// ``` +/// +/// The presence of [`Input::Stdin`] implies a minimum width: +/// +/// ```rust,ignore +/// let inputs = vec![Input::Stdin(StdinKind::Explicit)]; +/// assert_eq!(7, max_width(&inputs)); +/// ``` +fn max_width(inputs: &[Input]) -> usize { + let mut result = 1; + for input in inputs { + match digit_width(input) { + Ok(maybe_n) => { + if let Some(n) = maybe_n { + result = result.max(n); + } + } + Err(_) => continue, + } + } + result +} + +fn wc(inputs: Vec, settings: &Settings) -> Result<(), u32> { + // Compute the width, in digits, to use when formatting counts. + // + // The width is the number of digits needed to print the number of + // bytes in the largest file. This is true regardless of whether + // the `settings` indicate that the bytes will be displayed. let mut error_count = 0; + let max_width = max_width(&inputs); - let num_files = files.len(); + let mut total_word_count = WordCount::default(); - for path in &files { - let word_count = word_count_from_path(&path, settings).unwrap_or_else(|err| { - show_error!("{}", err); + let num_inputs = inputs.len(); + + for input in &inputs { + let word_count = word_count_from_input(&input, settings).unwrap_or_else(|err| { + show_error(&input, err); error_count += 1; WordCount::default() }); - max_width = max(max_width, word_count.bytes.to_string().len() + 1); total_word_count += word_count; - results.push(word_count.with_title(path)); - } - - for result in &results { + let result = word_count.with_title(input.to_title()); if let Err(err) = print_stats(settings, &result, max_width) { - show_warning!("failed to print result for {}: {}", result.title, err); + show_warning!( + "failed to print result for {}: {}", + result.title.unwrap_or(""), + err + ); error_count += 1; } } - if num_files > 1 { - let total_result = total_word_count.with_title("total"); + if num_inputs > 1 { + let total_result = total_word_count.with_title(Some("total")); if let Err(err) = print_stats(settings, &total_result, max_width) { show_warning!("failed to print total: {}", err); error_count += 1; @@ -364,19 +423,40 @@ fn print_stats( min_width = 0; } + let mut is_first: bool = true; + if settings.show_lines { + if !is_first { + write!(stdout_lock, " ")?; + } write!(stdout_lock, "{:1$}", result.count.lines, min_width)?; + is_first = false; } if settings.show_words { + if !is_first { + write!(stdout_lock, " ")?; + } write!(stdout_lock, "{:1$}", result.count.words, min_width)?; + is_first = false; } if settings.show_bytes { + if !is_first { + write!(stdout_lock, " ")?; + } write!(stdout_lock, "{:1$}", result.count.bytes, min_width)?; + is_first = false; } if settings.show_chars { + if !is_first { + write!(stdout_lock, " ")?; + } write!(stdout_lock, "{:1$}", result.count.chars, min_width)?; + is_first = false; } if settings.show_max_line_length { + if !is_first { + write!(stdout_lock, " ")?; + } write!( stdout_lock, "{:1$}", @@ -384,10 +464,10 @@ fn print_stats( )?; } - if result.title == "-" { - writeln!(stdout_lock)?; + if let Some(title) = result.title { + writeln!(stdout_lock, " {}", title)?; } else { - writeln!(stdout_lock, " {}", result.title)?; + writeln!(stdout_lock)?; } Ok(()) diff --git a/src/uu/wc/src/wordcount.rs b/src/uu/wc/src/wordcount.rs new file mode 100644 index 000000000..9e2a81fca --- /dev/null +++ b/src/uu/wc/src/wordcount.rs @@ -0,0 +1,131 @@ +use std::cmp::max; +use std::iter::Sum; +use std::ops::{Add, AddAssign}; +use std::str::from_utf8; + +const CR: u8 = b'\r'; +const LF: u8 = b'\n'; +const SPACE: u8 = b' '; +const TAB: u8 = b'\t'; +const SYN: u8 = 0x16_u8; +const FF: u8 = 0x0C_u8; + +#[inline(always)] +fn is_word_separator(byte: u8) -> bool { + byte == SPACE || byte == TAB || byte == CR || byte == SYN || byte == FF +} + +#[derive(Debug, Default, Copy, Clone)] +pub struct WordCount { + pub bytes: usize, + pub chars: usize, + pub lines: usize, + pub words: usize, + pub max_line_length: usize, +} + +impl Add for WordCount { + type Output = Self; + + fn add(self, other: Self) -> Self { + Self { + bytes: self.bytes + other.bytes, + chars: self.chars + other.chars, + lines: self.lines + other.lines, + words: self.words + other.words, + max_line_length: max(self.max_line_length, other.max_line_length), + } + } +} + +impl AddAssign for WordCount { + fn add_assign(&mut self, other: Self) { + *self = *self + other + } +} + +impl Sum for WordCount { + fn sum(iter: I) -> WordCount + where + I: Iterator, + { + iter.fold(WordCount::default(), |acc, x| acc + x) + } +} + +impl WordCount { + /// Count the characters and whitespace-separated words in the given bytes. + /// + /// `line` is a slice of bytes that will be decoded as ASCII characters. + fn ascii_word_and_char_count(line: &[u8]) -> (usize, usize) { + let word_count = line.split(|&x| is_word_separator(x)).count(); + let char_count = line.iter().filter(|c| c.is_ascii()).count(); + (word_count, char_count) + } + + /// Create a [`WordCount`] from a sequence of bytes representing a line. + /// + /// If the last byte of `line` encodes a newline character (`\n`), + /// then the [`lines`] field will be set to 1. Otherwise, it will + /// be set to 0. The [`bytes`] field is simply the length of + /// `line`. + /// + /// If `decode_chars` is `false`, the [`chars`] and [`words`] + /// fields will be set to 0. If it is `true`, this function will + /// attempt to decode the bytes first as UTF-8, and failing that, + /// as ASCII. + pub fn from_line(line: &[u8], decode_chars: bool) -> WordCount { + // GNU 'wc' only counts lines that end in LF as lines + let lines = (*line.last().unwrap() == LF) as usize; + let bytes = line.len(); + let (words, chars) = if decode_chars { + WordCount::word_and_char_count(line) + } else { + (0, 0) + }; + // -L is a GNU 'wc' extension so same behavior on LF + let max_line_length = if chars > 0 { chars - lines } else { 0 }; + WordCount { + bytes, + chars, + lines, + words, + max_line_length, + } + } + + /// Count the UTF-8 characters and words in the given string slice. + /// + /// `s` is a string slice that is assumed to be a UTF-8 string. + fn utf8_word_and_char_count(s: &str) -> (usize, usize) { + let word_count = s.split_whitespace().count(); + let char_count = s.chars().count(); + (word_count, char_count) + } + + pub fn with_title(self, title: Option<&str>) -> TitledWordCount { + TitledWordCount { title, count: self } + } + + /// Count the characters and words in the given slice of bytes. + /// + /// `line` is a slice of bytes that will be decoded as UTF-8 + /// characters, or if that fails, as ASCII characters. + fn word_and_char_count(line: &[u8]) -> (usize, usize) { + // try and convert the bytes to UTF-8 first + match from_utf8(line) { + Ok(s) => WordCount::utf8_word_and_char_count(s), + Err(..) => WordCount::ascii_word_and_char_count(line), + } + } +} + +/// This struct supplements the actual word count with an optional title that is +/// displayed to the user at the end of the program. +/// The reason we don't simply include title in the `WordCount` struct is that +/// it would result in unneccesary copying of `String`. +#[derive(Debug, Default, Clone)] +pub struct TitledWordCount<'a> { + pub title: Option<&'a str>, + pub count: WordCount, +} diff --git a/src/uu/who/src/who.rs b/src/uu/who/src/who.rs index ba1360eff..19ae3addb 100644 --- a/src/uu/who/src/who.rs +++ b/src/uu/who/src/who.rs @@ -29,7 +29,6 @@ mod options { pub const ONLY_HOSTNAME_USER: &str = "only_hostname_user"; pub const PROCESS: &str = "process"; pub const COUNT: &str = "count"; - #[cfg(any(target_vendor = "apple", target_os = "linux", target_os = "android"))] pub const RUNLEVEL: &str = "runlevel"; pub const SHORT: &str = "short"; pub const TIME: &str = "time"; @@ -41,14 +40,20 @@ mod options { static VERSION: &str = env!("CARGO_PKG_VERSION"); static ABOUT: &str = "Print information about users who are currently logged in."; +#[cfg(any(target_os = "linux"))] +static RUNLEVEL_HELP: &str = "print current runlevel"; +#[cfg(not(target_os = "linux"))] +static RUNLEVEL_HELP: &str = "print current runlevel (This is meaningless on non Linux)"; + fn get_usage() -> String { format!("{0} [OPTION]... [ FILE | ARG1 ARG2 ]", executable!()) } fn get_long_usage() -> String { - String::from( - "If FILE is not specified, use /var/run/utmp. /var/log/wtmp as FILE is common.\n\ -If ARG1 ARG2 given, -m presumed: 'am i' or 'mom likes' are usual.", + format!( + "If FILE is not specified, use {}. /var/log/wtmp as FILE is common.\n\ + If ARG1 ARG2 given, -m presumed: 'am i' or 'mom likes' are usual.", + utmpx::DEFAULT_FILE, ) } @@ -118,11 +123,10 @@ pub fn uumain(args: impl uucore::Args) -> i32 { .help("all login names and number of users logged on"), ) .arg( - #[cfg(any(target_vendor = "apple", target_os = "linux", target_os = "android"))] Arg::with_name(options::RUNLEVEL) .long(options::RUNLEVEL) .short("r") - .help("print current runlevel"), + .help(RUNLEVEL_HELP), ) .arg( Arg::with_name(options::SHORT) @@ -383,15 +387,12 @@ fn current_tty() -> String { impl Who { fn exec(&mut self) { - let run_level_chk = |record: i16| { - #[allow(unused_assignments)] - let mut res = false; + let run_level_chk = |_record: i16| { + #[cfg(not(target_os = "linux"))] + return false; - #[cfg(any(target_vendor = "apple", target_os = "linux", target_os = "android"))] - { - res = record == utmpx::RUN_LVL; - } - res + #[cfg(target_os = "linux")] + return _record == utmpx::RUN_LVL; }; let f = if self.args.len() == 1 { @@ -424,7 +425,9 @@ impl Who { if self.need_users && ut.is_user_process() { self.print_user(&ut); } else if self.need_runlevel && run_level_chk(ut.record_type()) { - self.print_runlevel(&ut); + if cfg!(target_os = "linux") { + self.print_runlevel(&ut); + } } else if self.need_boottime && ut.record_type() == utmpx::BOOT_TIME { self.print_boottime(&ut); } else if self.need_clockchange && ut.record_type() == utmpx::NEW_TIME { @@ -548,20 +551,10 @@ impl Who { " ?".into() }; - let mut buf = vec![]; - let ut_host = ut.host(); - let mut res = ut_host.splitn(2, ':'); - if let Some(h) = res.next() { - if self.do_lookup { - buf.push(ut.canon_host().unwrap_or_else(|_| h.to_owned())); - } else { - buf.push(h.to_owned()); - } + let mut s = ut.host(); + if self.do_lookup { + s = safe_unwrap!(ut.canon_host()); } - if let Some(h) = res.next() { - buf.push(h.to_owned()); - } - let s = buf.join(":"); let hoststr = if s.is_empty() { s } else { format!("({})", s) }; self.print_line( diff --git a/src/uucore/Cargo.toml b/src/uucore/Cargo.toml index 291456760..482252680 100644 --- a/src/uucore/Cargo.toml +++ b/src/uucore/Cargo.toml @@ -16,6 +16,7 @@ edition = "2018" path="src/lib/lib.rs" [dependencies] +dns-lookup = "1.0.5" dunce = "1.0.0" getopts = "<= 0.2.21" wild = "2.0.4" @@ -29,6 +30,9 @@ time = { version="<= 0.1.43", optional=true } data-encoding = { version="~2.1", optional=true } ## data-encoding: require v2.1; but v2.2.0 breaks the build for MinSRV v1.31.0 libc = { version="0.2.15, <= 0.2.85", optional=true } ## libc: initial utmp support added in v0.2.15; but v0.2.68 breaks the build for MinSRV v1.31.0 +[target.'cfg(target_os = "windows")'.dependencies] +winapi = { version = "0.3", features = ["errhandlingapi", "fileapi", "handleapi", "winerror"] } + [target.'cfg(target_os = "redox")'.dependencies] termion = "1.5" @@ -38,10 +42,12 @@ default = [] encoding = ["data-encoding", "thiserror"] entries = ["libc"] fs = ["libc"] +fsext = ["libc", "time"] mode = ["libc"] parse_time = [] perms = ["libc"] process = ["libc"] +ringbuffer = [] signals = [] utf8 = [] utmpx = ["time", "libc"] diff --git a/src/uucore/src/lib/features.rs b/src/uucore/src/lib/features.rs index c26225cb7..310a41fe1 100644 --- a/src/uucore/src/lib/features.rs +++ b/src/uucore/src/lib/features.rs @@ -4,8 +4,12 @@ pub mod encoding; #[cfg(feature = "fs")] pub mod fs; +#[cfg(feature = "fsext")] +pub mod fsext; #[cfg(feature = "parse_time")] pub mod parse_time; +#[cfg(feature = "ringbuffer")] +pub mod ringbuffer; #[cfg(feature = "zero-copy")] pub mod zero_copy; diff --git a/src/uucore/src/lib/features/fs.rs b/src/uucore/src/lib/features/fs.rs index a72d6ea82..afaa07af1 100644 --- a/src/uucore/src/lib/features/fs.rs +++ b/src/uucore/src/lib/features/fs.rs @@ -8,8 +8,9 @@ #[cfg(unix)] use libc::{ - mode_t, S_IRGRP, S_IROTH, S_IRUSR, S_ISGID, S_ISUID, S_ISVTX, S_IWGRP, S_IWOTH, S_IWUSR, - S_IXGRP, S_IXOTH, S_IXUSR, + mode_t, S_IFBLK, S_IFCHR, S_IFDIR, S_IFIFO, S_IFLNK, S_IFMT, S_IFREG, S_IFSOCK, S_IRGRP, + S_IROTH, S_IRUSR, S_ISGID, S_ISUID, S_ISVTX, S_IWGRP, S_IWOTH, S_IWUSR, S_IXGRP, S_IXOTH, + S_IXUSR, }; use std::borrow::Cow; use std::env; @@ -23,9 +24,10 @@ use std::os::unix::fs::MetadataExt; use std::path::{Component, Path, PathBuf}; #[cfg(unix)] +#[macro_export] macro_rules! has { ($mode:expr, $perm:expr) => { - $mode & ($perm as u32) != 0 + $mode & $perm != 0 }; } @@ -52,11 +54,19 @@ pub fn resolve_relative_path(path: &Path) -> Cow { result.into() } +/// Controls how symbolic links should be handled when canonicalizing a path. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub enum CanonicalizeMode { + /// Do not resolve any symbolic links. None, + + /// Resolve all symbolic links. Normal, + + /// Resolve symbolic links, ignoring errors on the final component. Existing, + + /// Resolve symbolic links, ignoring errors on the non-final components. Missing, } @@ -123,6 +133,24 @@ fn resolve>(original: P) -> IOResult { Ok(result) } +/// Return the canonical, absolute form of a path. +/// +/// This function is a generalization of [`std::fs::canonicalize`] that +/// allows controlling how symbolic links are resolved and how to deal +/// with missing components. It returns the canonical, absolute form of +/// a path. The `can_mode` parameter controls how symbolic links are +/// resolved: +/// +/// * [`CanonicalizeMode::Normal`] makes this function behave like +/// [`std::fs::canonicalize`], resolving symbolic links and returning +/// an error if the path does not exist. +/// * [`CanonicalizeMode::Missing`] makes this function ignore non-final +/// components of the path that could not be resolved. +/// * [`CanonicalizeMode::Existing`] makes this function return an error +/// if the final component of the path does not exist. +/// * [`CanonicalizeMode::None`] makes this function not try to resolve +/// any symbolic links. +/// pub fn canonicalize>(original: P, can_mode: CanonicalizeMode) -> IOResult { // Create an absolute path let original = original.as_ref(); @@ -178,6 +206,10 @@ pub fn canonicalize>(original: P, can_mode: CanonicalizeMode) -> result.push(parts.last().unwrap()); + if can_mode == CanonicalizeMode::None { + return Ok(result); + } + match resolve(&result) { Err(e) => { if can_mode == CanonicalizeMode::Existing { @@ -240,22 +272,42 @@ pub fn is_stderr_interactive() -> bool { #[cfg(not(unix))] #[allow(unused_variables)] -pub fn display_permissions(metadata: &fs::Metadata) -> String { +pub fn display_permissions(metadata: &fs::Metadata, display_file_type: bool) -> String { + if display_file_type { + return String::from("----------"); + } String::from("---------") } #[cfg(unix)] -pub fn display_permissions(metadata: &fs::Metadata) -> String { +pub fn display_permissions(metadata: &fs::Metadata, display_file_type: bool) -> String { let mode: mode_t = metadata.mode() as mode_t; - display_permissions_unix(mode as u32) + display_permissions_unix(mode, display_file_type) } #[cfg(unix)] -pub fn display_permissions_unix(mode: u32) -> String { - let mut result = String::with_capacity(9); +pub fn display_permissions_unix(mode: mode_t, display_file_type: bool) -> String { + let mut result; + if display_file_type { + result = String::with_capacity(10); + result.push(match mode & S_IFMT { + S_IFDIR => 'd', + S_IFCHR => 'c', + S_IFBLK => 'b', + S_IFREG => '-', + S_IFIFO => 'p', + S_IFLNK => 'l', + S_IFSOCK => 's', + // TODO: Other file types + _ => '?', + }); + } else { + result = String::with_capacity(9); + } + result.push(if has!(mode, S_IRUSR) { 'r' } else { '-' }); result.push(if has!(mode, S_IWUSR) { 'w' } else { '-' }); - result.push(if has!(mode, S_ISUID) { + result.push(if has!(mode, S_ISUID as mode_t) { if has!(mode, S_IXUSR) { 's' } else { @@ -269,7 +321,7 @@ pub fn display_permissions_unix(mode: u32) -> String { result.push(if has!(mode, S_IRGRP) { 'r' } else { '-' }); result.push(if has!(mode, S_IWGRP) { 'w' } else { '-' }); - result.push(if has!(mode, S_ISGID) { + result.push(if has!(mode, S_ISGID as mode_t) { if has!(mode, S_IXGRP) { 's' } else { @@ -283,7 +335,7 @@ pub fn display_permissions_unix(mode: u32) -> String { result.push(if has!(mode, S_IROTH) { 'r' } else { '-' }); result.push(if has!(mode, S_IWOTH) { 'w' } else { '-' }); - result.push(if has!(mode, S_ISVTX) { + result.push(if has!(mode, S_ISVTX as mode_t) { if has!(mode, S_IXOTH) { 't' } else { @@ -355,4 +407,57 @@ mod tests { ); } } + + #[cfg(unix)] + #[test] + fn test_display_permissions() { + assert_eq!( + "drwxr-xr-x", + display_permissions_unix(S_IFDIR | 0o755, true) + ); + assert_eq!( + "rwxr-xr-x", + display_permissions_unix(S_IFDIR | 0o755, false) + ); + assert_eq!( + "-rw-r--r--", + display_permissions_unix(S_IFREG | 0o644, true) + ); + assert_eq!( + "srw-r-----", + display_permissions_unix(S_IFSOCK | 0o640, true) + ); + assert_eq!( + "lrw-r-xr-x", + display_permissions_unix(S_IFLNK | 0o655, true) + ); + assert_eq!("?rw-r-xr-x", display_permissions_unix(0o655, true)); + + assert_eq!( + "brwSr-xr-x", + display_permissions_unix(S_IFBLK | S_ISUID as mode_t | 0o655, true) + ); + assert_eq!( + "brwsr-xr-x", + display_permissions_unix(S_IFBLK | S_ISUID as mode_t | 0o755, true) + ); + + assert_eq!( + "prw---sr--", + display_permissions_unix(S_IFIFO | S_ISGID as mode_t | 0o614, true) + ); + assert_eq!( + "prw---Sr--", + display_permissions_unix(S_IFIFO | S_ISGID as mode_t | 0o604, true) + ); + + assert_eq!( + "c---r-xr-t", + display_permissions_unix(S_IFCHR | S_ISVTX as mode_t | 0o055, true) + ); + assert_eq!( + "c---r-xr-T", + display_permissions_unix(S_IFCHR | S_ISVTX as mode_t | 0o054, true) + ); + } } diff --git a/src/uucore/src/lib/features/fsext.rs b/src/uucore/src/lib/features/fsext.rs new file mode 100644 index 000000000..19c634b0b --- /dev/null +++ b/src/uucore/src/lib/features/fsext.rs @@ -0,0 +1,821 @@ +// This file is part of the uutils coreutils package. +// +// (c) Jian Zeng +// (c) Fangxu Hu +// (c) Sylvestre Ledru +// +// For the full copyright and license information, please view the LICENSE file +// that was distributed with this source code. + +// spell-checker:ignore (ToDO) strerror IFBLK IFCHR IFDIR IFLNK IFIFO IFMT IFREG IFSOCK subsec nanos gnulib statfs Sstatfs bitrig statvfs iosize blksize fnodes fsid namelen bsize bfree bavail ffree frsize namemax errno fstype adfs acfs aufs affs autofs befs bdevfs binfmt ceph cgroups cifs configfs cramfs cgroupfs debugfs devfs devpts ecryptfs btrfs efivarfs exofs fhgfs fuseblk fusectl futexfs gpfs hfsx hostfs hpfs inodefs ibrix inotifyfs isofs jffs logfs hugetlbfs mqueue nsfs ntfs ocfs panfs pipefs ramfs romfs nfsd nilfs pstorefs reiserfs securityfs smackfs snfs sockfs squashfs sysfs sysv tempfs tracefs ubifs usbdevfs vmhgfs tmpfs vxfs wslfs xenfs vzfs openprom overlayfs + +extern crate time; + +pub use crate::*; // import macros from `../../macros.rs` + +#[cfg(target_os = "linux")] +const LINUX_MTAB: &str = "/etc/mtab"; +#[cfg(target_os = "linux")] +const LINUX_MOUNTINFO: &str = "/proc/self/mountinfo"; +static MOUNT_OPT_BIND: &str = "bind"; +#[cfg(windows)] +const MAX_PATH: usize = 266; +#[cfg(not(unix))] +static EXIT_ERR: i32 = 1; + +#[cfg(windows)] +use std::ffi::OsString; +#[cfg(windows)] +use std::os::windows::ffi::OsStrExt; +#[cfg(windows)] +use std::os::windows::ffi::OsStringExt; +#[cfg(windows)] +use winapi::shared::minwindef::DWORD; +#[cfg(windows)] +use winapi::um::errhandlingapi::GetLastError; +#[cfg(windows)] +use winapi::um::fileapi::GetDiskFreeSpaceW; +#[cfg(windows)] +use winapi::um::fileapi::{ + FindFirstVolumeW, FindNextVolumeW, FindVolumeClose, GetDriveTypeW, GetVolumeInformationW, + GetVolumePathNamesForVolumeNameW, QueryDosDeviceW, +}; +#[cfg(windows)] +use winapi::um::handleapi::INVALID_HANDLE_VALUE; +#[cfg(windows)] +use winapi::um::winbase::DRIVE_REMOTE; + +#[cfg(windows)] +macro_rules! String2LPWSTR { + ($str: expr) => { + OsString::from($str.clone()) + .as_os_str() + .encode_wide() + .chain(Some(0)) + .collect::>() + .as_ptr() + }; +} + +#[cfg(windows)] +#[allow(non_snake_case)] +fn LPWSTR2String(buf: &[u16]) -> String { + let len = unsafe { libc::wcslen(buf.as_ptr()) }; + OsString::from_wide(&buf[..len as usize]) + .into_string() + .unwrap() +} + +use self::time::Timespec; +#[cfg(unix)] +use libc::{ + mode_t, strerror, S_IFBLK, S_IFCHR, S_IFDIR, S_IFIFO, S_IFLNK, S_IFMT, S_IFREG, S_IFSOCK, +}; +use std::borrow::Cow; +use std::convert::{AsRef, From}; +#[cfg(unix)] +use std::ffi::CString; +#[cfg(unix)] +use std::io::Error as IOError; +#[cfg(unix)] +use std::mem; +use std::path::Path; +use std::time::UNIX_EPOCH; + +#[cfg(any( + target_os = "linux", + target_vendor = "apple", + target_os = "android", + target_os = "freebsd" +))] +pub use libc::statfs as Sstatfs; +#[cfg(any( + target_os = "openbsd", + target_os = "netbsd", + target_os = "openbsd", + target_os = "bitrig", + target_os = "dragonfly" +))] +pub use libc::statvfs as Sstatfs; + +#[cfg(any( + target_os = "linux", + target_vendor = "apple", + target_os = "android", + target_os = "freebsd" +))] +pub use libc::statfs as statfs_fn; +#[cfg(any( + target_os = "openbsd", + target_os = "netbsd", + target_os = "openbsd", + target_os = "bitrig", + target_os = "dragonfly" +))] +pub use libc::statvfs as statfs_fn; + +pub trait BirthTime { + fn pretty_birth(&self) -> String; + fn birth(&self) -> String; +} + +use std::fs::Metadata; +impl BirthTime for Metadata { + fn pretty_birth(&self) -> String { + self.created() + .ok() + .and_then(|t| t.duration_since(UNIX_EPOCH).ok()) + .map(|e| pretty_time(e.as_secs() as i64, i64::from(e.subsec_nanos()))) + .unwrap_or_else(|| "-".to_owned()) + } + + fn birth(&self) -> String { + self.created() + .ok() + .and_then(|t| t.duration_since(UNIX_EPOCH).ok()) + .map(|e| format!("{}", e.as_secs())) + .unwrap_or_else(|| "0".to_owned()) + } +} + +#[derive(Debug, Clone)] +pub struct MountInfo { + // it stores `volume_name` in windows platform and `dev_id` in unix platform + pub dev_id: String, + pub dev_name: String, + pub fs_type: String, + pub mount_dir: String, + pub mount_option: String, // we only care "bind" option + pub mount_root: String, + pub remote: bool, + pub dummy: bool, +} + +impl MountInfo { + fn set_missing_fields(&mut self) { + #[cfg(unix)] + { + // We want to keep the dev_id on Windows + // but set dev_id + let path = CString::new(self.mount_dir.clone()).unwrap(); + unsafe { + let mut stat = mem::zeroed(); + if libc::stat(path.as_ptr(), &mut stat) == 0 { + self.dev_id = (stat.st_dev as i32).to_string(); + } else { + self.dev_id = "".to_string(); + } + } + } + // set MountInfo::dummy + match self.fs_type.as_ref() { + "autofs" | "proc" | "subfs" + /* for Linux 2.6/3.x */ + | "debugfs" | "devpts" | "fusectl" | "mqueue" | "rpc_pipefs" | "sysfs" + /* FreeBSD, Linux 2.4 */ + | "devfs" + /* for NetBSD 3.0 */ + | "kernfs" + /* for Irix 6.5 */ + | "ignore" => self.dummy = true, + _ => self.dummy = self.fs_type == "none" + && self.mount_option.find(MOUNT_OPT_BIND).is_none(), + } + // set MountInfo::remote + #[cfg(windows)] + { + self.remote = DRIVE_REMOTE == unsafe { GetDriveTypeW(String2LPWSTR!(self.mount_root)) }; + } + #[cfg(unix)] + { + if self.dev_name.find(':').is_some() + || (self.dev_name.starts_with("//") && self.fs_type == "smbfs" + || self.fs_type == "cifs") + || self.dev_name == "-hosts" + { + self.remote = true; + } else { + self.remote = false; + } + } + } + + #[cfg(target_os = "linux")] + fn new(file_name: &str, raw: Vec<&str>) -> Option { + match file_name { + // Format: 36 35 98:0 /mnt1 /mnt2 rw,noatime master:1 - ext3 /dev/root rw,errors=continue + // "man proc" for more details + LINUX_MOUNTINFO => { + let mut m = MountInfo { + dev_id: "".to_string(), + dev_name: raw[9].to_string(), + fs_type: raw[8].to_string(), + mount_root: raw[3].to_string(), + mount_dir: raw[4].to_string(), + mount_option: raw[5].to_string(), + remote: false, + dummy: false, + }; + m.set_missing_fields(); + Some(m) + } + LINUX_MTAB => { + let mut m = MountInfo { + dev_id: "".to_string(), + dev_name: raw[0].to_string(), + fs_type: raw[2].to_string(), + mount_root: "".to_string(), + mount_dir: raw[1].to_string(), + mount_option: raw[3].to_string(), + remote: false, + dummy: false, + }; + m.set_missing_fields(); + Some(m) + } + _ => None, + } + } + #[cfg(windows)] + fn new(mut volume_name: String) -> Option { + let mut dev_name_buf = [0u16; MAX_PATH]; + volume_name.pop(); + unsafe { + QueryDosDeviceW( + OsString::from(volume_name.clone()) + .as_os_str() + .encode_wide() + .chain(Some(0)) + .skip(4) + .collect::>() + .as_ptr(), + dev_name_buf.as_mut_ptr(), + dev_name_buf.len() as DWORD, + ) + }; + volume_name.push('\\'); + let dev_name = LPWSTR2String(&dev_name_buf); + + let mut mount_root_buf = [0u16; MAX_PATH]; + let success = unsafe { + GetVolumePathNamesForVolumeNameW( + String2LPWSTR!(volume_name), + mount_root_buf.as_mut_ptr(), + mount_root_buf.len() as DWORD, + ptr::null_mut(), + ) + }; + if 0 == success { + // TODO: support the case when `GetLastError()` returns `ERROR_MORE_DATA` + return None; + } + let mount_root = LPWSTR2String(&mount_root_buf); + + let mut fs_type_buf = [0u16; MAX_PATH]; + let success = unsafe { + GetVolumeInformationW( + String2LPWSTR!(mount_root), + ptr::null_mut(), + 0, + ptr::null_mut(), + ptr::null_mut(), + ptr::null_mut(), + fs_type_buf.as_mut_ptr(), + fs_type_buf.len() as DWORD, + ) + }; + let fs_type = if 0 != success { + Some(LPWSTR2String(&fs_type_buf)) + } else { + None + }; + let mut mn_info = MountInfo { + dev_id: volume_name, + dev_name, + fs_type: fs_type.unwrap_or_else(|| "".to_string()), + mount_root, + mount_dir: "".to_string(), + mount_option: "".to_string(), + remote: false, + dummy: false, + }; + mn_info.set_missing_fields(); + Some(mn_info) + } +} + +#[cfg(any(target_vendor = "apple", target_os = "freebsd"))] +use std::ffi::CStr; +#[cfg(any(target_os = "freebsd", target_vendor = "apple"))] +impl From for MountInfo { + fn from(statfs: Sstatfs) -> Self { + let mut info = MountInfo { + dev_id: "".to_string(), + dev_name: unsafe { + CStr::from_ptr(&statfs.f_mntfromname[0]) + .to_string_lossy() + .into_owned() + }, + fs_type: unsafe { + CStr::from_ptr(&statfs.f_fstypename[0]) + .to_string_lossy() + .into_owned() + }, + mount_dir: unsafe { + CStr::from_ptr(&statfs.f_mntonname[0]) + .to_string_lossy() + .into_owned() + }, + mount_root: "".to_string(), + mount_option: "".to_string(), + remote: false, + dummy: false, + }; + info.set_missing_fields(); + info + } +} + +#[cfg(any(target_os = "freebsd", target_vendor = "apple"))] +use libc::c_int; +#[cfg(any(target_os = "freebsd", target_vendor = "apple"))] +extern "C" { + #[cfg(all(target_vendor = "apple", target_arch = "x86_64"))] + #[link_name = "getmntinfo$INODE64"] + fn getmntinfo(mntbufp: *mut *mut Sstatfs, flags: c_int) -> c_int; + + #[cfg(any( + all(target_os = "freebsd"), + all(target_vendor = "apple", target_arch = "aarch64") + ))] + fn getmntinfo(mntbufp: *mut *mut Sstatfs, flags: c_int) -> c_int; +} + +#[cfg(target_os = "linux")] +use std::fs::File; +#[cfg(target_os = "linux")] +use std::io::{BufRead, BufReader}; +#[cfg(any(target_vendor = "apple", target_os = "freebsd", target_os = "windows"))] +use std::ptr; +#[cfg(any(target_vendor = "apple", target_os = "freebsd"))] +use std::slice; +/// Read file system list. +pub fn read_fs_list() -> Vec { + #[cfg(target_os = "linux")] + { + let (file_name, fobj) = File::open(LINUX_MOUNTINFO) + .map(|f| (LINUX_MOUNTINFO, f)) + .or_else(|_| File::open(LINUX_MTAB).map(|f| (LINUX_MTAB, f))) + .expect("failed to find mount list files"); + let reader = BufReader::new(fobj); + reader + .lines() + .filter_map(|line| line.ok()) + .filter_map(|line| { + let raw_data = line.split_whitespace().collect::>(); + MountInfo::new(file_name, raw_data) + }) + .collect::>() + } + #[cfg(any(target_os = "freebsd", target_vendor = "apple"))] + { + let mut mptr: *mut Sstatfs = ptr::null_mut(); + let len = unsafe { getmntinfo(&mut mptr, 1_i32) }; + if len < 0 { + crash!(1, "getmntinfo failed"); + } + let mounts = unsafe { slice::from_raw_parts(mptr, len as usize) }; + mounts + .iter() + .map(|m| MountInfo::from(*m)) + .collect::>() + } + #[cfg(windows)] + { + let mut volume_name_buf = [0u16; MAX_PATH]; + // As recommended in the MS documentation, retrieve the first volume before the others + let find_handle = unsafe { + FindFirstVolumeW(volume_name_buf.as_mut_ptr(), volume_name_buf.len() as DWORD) + }; + if INVALID_HANDLE_VALUE == find_handle { + crash!(EXIT_ERR, "FindFirstVolumeW failed: {}", unsafe { + GetLastError() + }); + } + let mut mounts = Vec::::new(); + loop { + let volume_name = LPWSTR2String(&volume_name_buf); + if !volume_name.starts_with("\\\\?\\") || !volume_name.ends_with('\\') { + show_warning!("A bad path was skipped: {}", volume_name); + continue; + } + if let Some(m) = MountInfo::new(volume_name) { + mounts.push(m); + } + if 0 == unsafe { + FindNextVolumeW( + find_handle, + volume_name_buf.as_mut_ptr(), + volume_name_buf.len() as DWORD, + ) + } { + let err = unsafe { GetLastError() }; + if err != winapi::shared::winerror::ERROR_NO_MORE_FILES { + crash!(EXIT_ERR, "FindNextVolumeW failed: {}", err); + } + break; + } + } + unsafe { + FindVolumeClose(find_handle); + } + mounts + } +} + +#[derive(Debug, Clone)] +pub struct FsUsage { + pub blocksize: u64, + pub blocks: u64, + pub bfree: u64, + pub bavail: u64, + pub bavail_top_bit_set: bool, + pub files: u64, + pub ffree: u64, +} + +impl FsUsage { + #[cfg(unix)] + pub fn new(statvfs: Sstatfs) -> FsUsage { + { + FsUsage { + blocksize: statvfs.f_bsize as u64, // or `statvfs.f_frsize` ? + blocks: statvfs.f_blocks as u64, + bfree: statvfs.f_bfree as u64, + bavail: statvfs.f_bavail as u64, + bavail_top_bit_set: ((statvfs.f_bavail as u64) & (1u64.rotate_right(1))) != 0, + files: statvfs.f_files as u64, + ffree: statvfs.f_ffree as u64, + } + } + } + #[cfg(not(unix))] + pub fn new(path: &Path) -> FsUsage { + let mut root_path = [0u16; MAX_PATH]; + let success = unsafe { + GetVolumePathNamesForVolumeNameW( + //path_utf8.as_ptr(), + String2LPWSTR!(path.as_os_str()), + root_path.as_mut_ptr(), + root_path.len() as DWORD, + ptr::null_mut(), + ) + }; + if 0 == success { + crash!( + EXIT_ERR, + "GetVolumePathNamesForVolumeNameW failed: {}", + unsafe { GetLastError() } + ); + } + + let mut sectors_per_cluster = 0; + let mut bytes_per_sector = 0; + let mut number_of_free_clusters = 0; + let mut total_number_of_clusters = 0; + + let success = unsafe { + GetDiskFreeSpaceW( + String2LPWSTR!(path.as_os_str()), + &mut sectors_per_cluster, + &mut bytes_per_sector, + &mut number_of_free_clusters, + &mut total_number_of_clusters, + ) + }; + if 0 == success { + // Fails in case of CD for example + //crash!(EXIT_ERR, "GetDiskFreeSpaceW failed: {}", unsafe { + //GetLastError() + //}); + } + + let bytes_per_cluster = sectors_per_cluster as u64 * bytes_per_sector as u64; + FsUsage { + // f_bsize File system block size. + blocksize: bytes_per_cluster as u64, + // f_blocks - Total number of blocks on the file system, in units of f_frsize. + // frsize = Fundamental file system block size (fragment size). + blocks: total_number_of_clusters as u64, + // Total number of free blocks. + bfree: number_of_free_clusters as u64, + // Total number of free blocks available to non-privileged processes. + bavail: 0, + bavail_top_bit_set: ((bytes_per_sector as u64) & (1u64.rotate_right(1))) != 0, + // Total number of file nodes (inodes) on the file system. + files: 0, // Not available on windows + // Total number of free file nodes (inodes). + ffree: 4096, // Meaningless on Windows + } + } +} + +#[cfg(unix)] +pub trait FsMeta { + fn fs_type(&self) -> i64; + fn iosize(&self) -> u64; + fn blksize(&self) -> i64; + fn total_blocks(&self) -> u64; + fn free_blocks(&self) -> u64; + fn avail_blocks(&self) -> u64; + fn total_fnodes(&self) -> u64; + fn free_fnodes(&self) -> u64; + fn fsid(&self) -> u64; + fn namelen(&self) -> u64; +} + +#[cfg(unix)] +impl FsMeta for Sstatfs { + fn blksize(&self) -> i64 { + self.f_bsize as i64 + } + fn total_blocks(&self) -> u64 { + self.f_blocks as u64 + } + fn free_blocks(&self) -> u64 { + self.f_bfree as u64 + } + fn avail_blocks(&self) -> u64 { + self.f_bavail as u64 + } + fn total_fnodes(&self) -> u64 { + self.f_files as u64 + } + fn free_fnodes(&self) -> u64 { + self.f_ffree as u64 + } + #[cfg(any(target_os = "linux", target_vendor = "apple", target_os = "freebsd"))] + fn fs_type(&self) -> i64 { + self.f_type as i64 + } + #[cfg(not(any(target_os = "linux", target_vendor = "apple", target_os = "freebsd")))] + fn fs_type(&self) -> i64 { + // FIXME: statvfs doesn't have an equivalent, so we need to do something else + unimplemented!() + } + + #[cfg(target_os = "linux")] + fn iosize(&self) -> u64 { + self.f_frsize as u64 + } + #[cfg(any(target_vendor = "apple", target_os = "freebsd"))] + fn iosize(&self) -> u64 { + self.f_iosize as u64 + } + // XXX: dunno if this is right + #[cfg(not(any(target_vendor = "apple", target_os = "freebsd", target_os = "linux")))] + fn iosize(&self) -> u64 { + self.f_bsize as u64 + } + + // Linux, SunOS, HP-UX, 4.4BSD, FreeBSD have a system call statfs() that returns + // a struct statfs, containing a fsid_t f_fsid, where fsid_t is defined + // as struct { int val[2]; } + // + // Solaris, Irix and POSIX have a system call statvfs(2) that returns a + // struct statvfs, containing an unsigned long f_fsid + #[cfg(any(target_vendor = "apple", target_os = "freebsd", target_os = "linux"))] + fn fsid(&self) -> u64 { + let f_fsid: &[u32; 2] = + unsafe { &*(&self.f_fsid as *const libc::fsid_t as *const [u32; 2]) }; + (u64::from(f_fsid[0])) << 32 | u64::from(f_fsid[1]) + } + #[cfg(not(any(target_vendor = "apple", target_os = "freebsd", target_os = "linux")))] + fn fsid(&self) -> u64 { + self.f_fsid as u64 + } + + #[cfg(target_os = "linux")] + fn namelen(&self) -> u64 { + self.f_namelen as u64 + } + #[cfg(target_vendor = "apple")] + fn namelen(&self) -> u64 { + 1024 + } + #[cfg(target_os = "freebsd")] + fn namelen(&self) -> u64 { + self.f_namemax as u64 + } + // XXX: should everything just use statvfs? + #[cfg(not(any(target_vendor = "apple", target_os = "freebsd", target_os = "linux")))] + fn namelen(&self) -> u64 { + self.f_namemax as u64 + } +} + +#[cfg(unix)] +pub fn statfs>(path: P) -> Result +where + Vec: From

, +{ + match CString::new(path) { + Ok(p) => { + let mut buffer: Sstatfs = unsafe { mem::zeroed() }; + unsafe { + match statfs_fn(p.as_ptr(), &mut buffer) { + 0 => Ok(buffer), + _ => { + let errno = IOError::last_os_error().raw_os_error().unwrap_or(0); + Err(CString::from_raw(strerror(errno)) + .into_string() + .unwrap_or_else(|_| "Unknown Error".to_owned())) + } + } + } + } + Err(e) => Err(e.to_string()), + } +} + +pub fn pretty_time(sec: i64, nsec: i64) -> String { + // sec == seconds since UNIX_EPOCH + // nsec == nanoseconds since (UNIX_EPOCH + sec) + let tm = time::at(Timespec::new(sec, nsec as i32)); + let res = time::strftime("%Y-%m-%d %H:%M:%S.%f %z", &tm).unwrap(); + if res.ends_with(" -0000") { + res.replace(" -0000", " +0000") + } else { + res + } +} + +#[cfg(unix)] +pub fn pretty_filetype<'a>(mode: mode_t, size: u64) -> &'a str { + match mode & S_IFMT { + S_IFREG => { + if size != 0 { + "regular file" + } else { + "regular empty file" + } + } + S_IFDIR => "directory", + S_IFLNK => "symbolic link", + S_IFCHR => "character special file", + S_IFBLK => "block special file", + S_IFIFO => "fifo", + S_IFSOCK => "socket", + // TODO: Other file types + // See coreutils/gnulib/lib/file-type.c + _ => "weird file", + } +} + +pub fn pretty_fstype<'a>(fstype: i64) -> Cow<'a, str> { + match fstype { + 0x6163_6673 => "acfs".into(), + 0xADF5 => "adfs".into(), + 0xADFF => "affs".into(), + 0x5346_414F => "afs".into(), + 0x0904_1934 => "anon-inode FS".into(), + 0x6175_6673 => "aufs".into(), + 0x0187 => "autofs".into(), + 0x4246_5331 => "befs".into(), + 0x6264_6576 => "bdevfs".into(), + 0x1BAD_FACE => "bfs".into(), + 0xCAFE_4A11 => "bpf_fs".into(), + 0x4249_4E4D => "binfmt_misc".into(), + 0x9123_683E => "btrfs".into(), + 0x7372_7279 => "btrfs_test".into(), + 0x00C3_6400 => "ceph".into(), + 0x0027_E0EB => "cgroupfs".into(), + 0xFF53_4D42 => "cifs".into(), + 0x7375_7245 => "coda".into(), + 0x012F_F7B7 => "coh".into(), + 0x6265_6570 => "configfs".into(), + 0x28CD_3D45 => "cramfs".into(), + 0x453D_CD28 => "cramfs-wend".into(), + 0x6462_6720 => "debugfs".into(), + 0x1373 => "devfs".into(), + 0x1CD1 => "devpts".into(), + 0xF15F => "ecryptfs".into(), + 0xDE5E_81E4 => "efivarfs".into(), + 0x0041_4A53 => "efs".into(), + 0x5DF5 => "exofs".into(), + 0x137D => "ext".into(), + 0xEF53 => "ext2/ext3".into(), + 0xEF51 => "ext2".into(), + 0xF2F5_2010 => "f2fs".into(), + 0x4006 => "fat".into(), + 0x1983_0326 => "fhgfs".into(), + 0x6573_5546 => "fuseblk".into(), + 0x6573_5543 => "fusectl".into(), + 0x0BAD_1DEA => "futexfs".into(), + 0x0116_1970 => "gfs/gfs2".into(), + 0x4750_4653 => "gpfs".into(), + 0x4244 => "hfs".into(), + 0x482B => "hfs+".into(), + 0x4858 => "hfsx".into(), + 0x00C0_FFEE => "hostfs".into(), + 0xF995_E849 => "hpfs".into(), + 0x9584_58F6 => "hugetlbfs".into(), + 0x1130_7854 => "inodefs".into(), + 0x0131_11A8 => "ibrix".into(), + 0x2BAD_1DEA => "inotifyfs".into(), + 0x9660 => "isofs".into(), + 0x4004 => "isofs".into(), + 0x4000 => "isofs".into(), + 0x07C0 => "jffs".into(), + 0x72B6 => "jffs2".into(), + 0x3153_464A => "jfs".into(), + 0x6B41_4653 => "k-afs".into(), + 0xC97E_8168 => "logfs".into(), + 0x0BD0_0BD0 => "lustre".into(), + 0x5346_314D => "m1fs".into(), + 0x137F => "minix".into(), + 0x138F => "minix (30 char.)".into(), + 0x2468 => "minix v2".into(), + 0x2478 => "minix v2 (30 char.)".into(), + 0x4D5A => "minix3".into(), + 0x1980_0202 => "mqueue".into(), + 0x4D44 => "msdos".into(), + 0x564C => "novell".into(), + 0x6969 => "nfs".into(), + 0x6E66_7364 => "nfsd".into(), + 0x3434 => "nilfs".into(), + 0x6E73_6673 => "nsfs".into(), + 0x5346_544E => "ntfs".into(), + 0x9FA1 => "openprom".into(), + 0x7461_636F => "ocfs2".into(), + 0x794C_7630 => "overlayfs".into(), + 0xAAD7_AAEA => "panfs".into(), + 0x5049_5045 => "pipefs".into(), + 0x7C7C_6673 => "prl_fs".into(), + 0x9FA0 => "proc".into(), + 0x6165_676C => "pstorefs".into(), + 0x002F => "qnx4".into(), + 0x6819_1122 => "qnx6".into(), + 0x8584_58F6 => "ramfs".into(), + 0x5265_4973 => "reiserfs".into(), + 0x7275 => "romfs".into(), + 0x6759_6969 => "rpc_pipefs".into(), + 0x7363_6673 => "securityfs".into(), + 0xF97C_FF8C => "selinux".into(), + 0x4341_5D53 => "smackfs".into(), + 0x517B => "smb".into(), + 0xFE53_4D42 => "smb2".into(), + 0xBEEF_DEAD => "snfs".into(), + 0x534F_434B => "sockfs".into(), + 0x7371_7368 => "squashfs".into(), + 0x6265_6572 => "sysfs".into(), + 0x012F_F7B6 => "sysv2".into(), + 0x012F_F7B5 => "sysv4".into(), + 0x0102_1994 => "tmpfs".into(), + 0x7472_6163 => "tracefs".into(), + 0x2405_1905 => "ubifs".into(), + 0x1501_3346 => "udf".into(), + 0x0001_1954 => "ufs".into(), + 0x5419_0100 => "ufs".into(), + 0x9FA2 => "usbdevfs".into(), + 0x0102_1997 => "v9fs".into(), + 0xBACB_ACBC => "vmhgfs".into(), + 0xA501_FCF5 => "vxfs".into(), + 0x565A_4653 => "vzfs".into(), + 0x5346_4846 => "wslfs".into(), + 0xABBA_1974 => "xenfs".into(), + 0x012F_F7B4 => "xenix".into(), + 0x5846_5342 => "xfs".into(), + 0x012F_D16D => "xia".into(), + 0x2FC1_2FC1 => "zfs".into(), + other => format!("UNKNOWN ({:#x})", other).into(), + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + #[cfg(unix)] + fn test_file_type() { + assert_eq!("block special file", pretty_filetype(S_IFBLK, 0)); + assert_eq!("character special file", pretty_filetype(S_IFCHR, 0)); + assert_eq!("regular file", pretty_filetype(S_IFREG, 1)); + assert_eq!("regular empty file", pretty_filetype(S_IFREG, 0)); + assert_eq!("weird file", pretty_filetype(0, 0)); + } + + #[test] + fn test_fs_type() { + assert_eq!("ext2/ext3", pretty_fstype(0xEF53)); + assert_eq!("tmpfs", pretty_fstype(0x01021994)); + assert_eq!("nfs", pretty_fstype(0x6969)); + assert_eq!("btrfs", pretty_fstype(0x9123683e)); + assert_eq!("xfs", pretty_fstype(0x58465342)); + assert_eq!("zfs", pretty_fstype(0x2FC12FC1)); + assert_eq!("ntfs", pretty_fstype(0x5346544e)); + assert_eq!("fat", pretty_fstype(0x4006)); + assert_eq!("UNKNOWN (0x1234)", pretty_fstype(0x1234)); + } +} diff --git a/src/uucore/src/lib/features/mode.rs b/src/uucore/src/lib/features/mode.rs index 1bb79ac03..4fb5a6509 100644 --- a/src/uucore/src/lib/features/mode.rs +++ b/src/uucore/src/lib/features/mode.rs @@ -132,19 +132,15 @@ fn parse_change(mode: &str, fperm: u32, considering_dir: bool) -> (u32, usize) { (srwx, pos) } -pub fn parse_mode(mode: Option) -> Result { +pub fn parse_mode(mode: &str) -> Result { let fperm = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH; - if let Some(mode) = mode { - let arr: &[char] = &['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']; - let result = if mode.contains(arr) { - parse_numeric(fperm as u32, mode.as_str()) - } else { - parse_symbolic(fperm as u32, mode.as_str(), true) - }; - result.map(|mode| mode as mode_t) + let arr: &[char] = &['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']; + let result = if mode.contains(arr) { + parse_numeric(fperm as u32, mode) } else { - Ok(fperm) - } + parse_symbolic(fperm as u32, mode, true) + }; + result.map(|mode| mode as mode_t) } #[cfg(test)] @@ -152,20 +148,19 @@ mod test { #[test] fn symbolic_modes() { - assert_eq!(super::parse_mode(Some("u+x".to_owned())).unwrap(), 0o766); + assert_eq!(super::parse_mode("u+x").unwrap(), 0o766); assert_eq!( - super::parse_mode(Some("+x".to_owned())).unwrap(), + super::parse_mode("+x").unwrap(), if !crate::os::is_wsl_1() { 0o777 } else { 0o776 } ); - assert_eq!(super::parse_mode(Some("a-w".to_owned())).unwrap(), 0o444); - assert_eq!(super::parse_mode(Some("g-r".to_owned())).unwrap(), 0o626); + assert_eq!(super::parse_mode("a-w").unwrap(), 0o444); + assert_eq!(super::parse_mode("g-r").unwrap(), 0o626); } #[test] fn numeric_modes() { - assert_eq!(super::parse_mode(Some("644".to_owned())).unwrap(), 0o644); - assert_eq!(super::parse_mode(Some("+100".to_owned())).unwrap(), 0o766); - assert_eq!(super::parse_mode(Some("-4".to_owned())).unwrap(), 0o662); - assert_eq!(super::parse_mode(None).unwrap(), 0o666); + assert_eq!(super::parse_mode("644").unwrap(), 0o644); + assert_eq!(super::parse_mode("+100").unwrap(), 0o766); + assert_eq!(super::parse_mode("-4").unwrap(), 0o662); } } diff --git a/src/uucore/src/lib/features/ringbuffer.rs b/src/uucore/src/lib/features/ringbuffer.rs new file mode 100644 index 000000000..60847df8f --- /dev/null +++ b/src/uucore/src/lib/features/ringbuffer.rs @@ -0,0 +1,134 @@ +//! A fixed-size ring buffer. +use std::collections::VecDeque; + +/// A fixed-size ring buffer backed by a `VecDeque`. +/// +/// If the ring buffer is not full, then calling the [`push_back`] +/// method appends elements, as in a [`VecDeque`]. If the ring buffer +/// is full, then calling [`push_back`] removes the element at the +/// front of the buffer (in a first-in, first-out manner) before +/// appending the new element to the back of the buffer. +/// +/// Use [`from_iter`] to take the last `size` elements from an +/// iterator. +/// +/// # Examples +/// +/// After exceeding the size limit, the oldest elements are dropped in +/// favor of the newest element: +/// +/// ```rust,ignore +/// let mut buffer: RingBuffer = RingBuffer::new(2); +/// buffer.push_back(0); +/// buffer.push_back(1); +/// buffer.push_back(2); +/// assert_eq!(vec![1, 2], buffer.data); +/// ``` +/// +/// Take the last `n` elements from an iterator: +/// +/// ```rust,ignore +/// let iter = [0, 1, 2].iter(); +/// let actual = RingBuffer::from_iter(iter, 2).data; +/// let expected = VecDeque::from_iter([1, 2].iter()); +/// assert_eq!(expected, actual); +/// ``` +pub struct RingBuffer { + pub data: VecDeque, + size: usize, +} + +impl RingBuffer { + pub fn new(size: usize) -> RingBuffer { + RingBuffer { + data: VecDeque::new(), + size, + } + } + + pub fn from_iter(iter: impl Iterator, size: usize) -> RingBuffer { + let mut ringbuf = RingBuffer::new(size); + for value in iter { + ringbuf.push_back(value); + } + ringbuf + } + + /// Append a value to the end of the ring buffer. + /// + /// If the ring buffer is not full, this method return [`None`]. If + /// the ring buffer is full, appending a new element will cause the + /// oldest element to be evicted. In that case this method returns + /// that element, or `None`. + /// + /// In the special case where the size limit is zero, each call to + /// this method with input `value` returns `Some(value)`, because + /// the input is immediately evicted. + /// + /// # Examples + /// + /// Appending an element when the buffer is full returns the oldest + /// element: + /// + /// ```rust,ignore + /// let mut buf = RingBuffer::new(3); + /// assert_eq!(None, buf.push_back(0)); + /// assert_eq!(None, buf.push_back(1)); + /// assert_eq!(None, buf.push_back(2)); + /// assert_eq!(Some(0), buf.push_back(3)); + /// ``` + /// + /// If the size limit is zero, then this method always returns the + /// input value: + /// + /// ```rust,ignore + /// let mut buf = RingBuffer::new(0); + /// assert_eq!(Some(0), buf.push_back(0)); + /// assert_eq!(Some(1), buf.push_back(1)); + /// assert_eq!(Some(2), buf.push_back(2)); + /// ``` + pub fn push_back(&mut self, value: T) -> Option { + if self.size == 0 { + return Some(value); + } + let result = if self.size <= self.data.len() { + self.data.pop_front() + } else { + None + }; + self.data.push_back(value); + result + } +} + +#[cfg(test)] +mod tests { + + use crate::ringbuffer::RingBuffer; + use std::collections::VecDeque; + use std::iter::FromIterator; + + #[test] + fn test_size_limit_zero() { + let mut buf = RingBuffer::new(0); + assert_eq!(Some(0), buf.push_back(0)); + assert_eq!(Some(1), buf.push_back(1)); + assert_eq!(Some(2), buf.push_back(2)); + } + + #[test] + fn test_evict_oldest() { + let mut buf = RingBuffer::new(2); + assert_eq!(None, buf.push_back(0)); + assert_eq!(None, buf.push_back(1)); + assert_eq!(Some(0), buf.push_back(2)); + } + + #[test] + fn test_from_iter() { + let iter = [0, 1, 2].iter(); + let actual = RingBuffer::from_iter(iter, 2).data; + let expected = VecDeque::from_iter([1, 2].iter()); + assert_eq!(expected, actual); + } +} diff --git a/src/uucore/src/lib/features/utmpx.rs b/src/uucore/src/lib/features/utmpx.rs index 0308d8a5e..826831ba6 100644 --- a/src/uucore/src/lib/features/utmpx.rs +++ b/src/uucore/src/lib/features/utmpx.rs @@ -54,6 +54,8 @@ pub unsafe extern "C" fn utmpxname(_file: *const libc::c_char) -> libc::c_int { 0 } +pub use crate::*; // import macros from `../../macros.rs` + // In case the c_char array doesn't end with NULL macro_rules! chars2string { ($arr:expr) => { @@ -188,47 +190,40 @@ impl Utmpx { /// Canonicalize host name using DNS pub fn canon_host(&self) -> IOResult { - const AI_CANONNAME: libc::c_int = 0x2; let host = self.host(); - let host = host.split(':').next().unwrap(); - let hints = libc::addrinfo { - ai_flags: AI_CANONNAME, - ai_family: 0, - ai_socktype: 0, - ai_protocol: 0, - ai_addrlen: 0, - ai_addr: ptr::null_mut(), - ai_canonname: ptr::null_mut(), - ai_next: ptr::null_mut(), - }; - let c_host = CString::new(host).unwrap(); - let mut res = ptr::null_mut(); - let status = unsafe { - libc::getaddrinfo( - c_host.as_ptr(), - ptr::null(), - &hints as *const _, - &mut res as *mut _, - ) - }; - if status == 0 { - let info: libc::addrinfo = unsafe { ptr::read(res as *const _) }; - // http://lists.gnu.org/archive/html/bug-coreutils/2006-09/msg00300.html - // says Darwin 7.9.0 getaddrinfo returns 0 but sets - // res->ai_canonname to NULL. - let ret = if info.ai_canonname.is_null() { - Ok(String::from(host)) - } else { - Ok(unsafe { CString::from_raw(info.ai_canonname).into_string().unwrap() }) + + // TODO: change to use `split_once` when MSRV hits 1.52.0 + // let (hostname, display) = host.split_once(':').unwrap_or((&host, "")); + let mut h = host.split(':'); + let hostname = h.next().unwrap_or(&host); + let display = h.next().unwrap_or(""); + + if !hostname.is_empty() { + extern crate dns_lookup; + use dns_lookup::{getaddrinfo, AddrInfoHints}; + + const AI_CANONNAME: i32 = 0x2; + let hints = AddrInfoHints { + flags: AI_CANONNAME, + ..AddrInfoHints::default() }; - unsafe { - libc::freeaddrinfo(res); + let sockets = getaddrinfo(Some(&hostname), None, Some(hints)) + .unwrap() + .collect::>>()?; + for socket in sockets { + if let Some(ai_canonname) = socket.canonname { + return Ok(if display.is_empty() { + ai_canonname + } else { + format!("{}:{}", ai_canonname, display) + }); + } } - ret - } else { - Err(IOError::last_os_error()) } + + Ok(host.to_string()) } + pub fn iter_all_records() -> UtmpxIter { UtmpxIter } @@ -247,7 +242,7 @@ impl UtmpxIter { utmpxname(cstr.as_ptr()) }; if res != 0 { - println!("Warning: {}", IOError::last_os_error()); + show_warning!("utmpxname: {}", IOError::last_os_error()); } unsafe { setutxent(); diff --git a/src/uucore/src/lib/lib.rs b/src/uucore/src/lib/lib.rs index 6dddf8696..c17f14516 100644 --- a/src/uucore/src/lib/lib.rs +++ b/src/uucore/src/lib/lib.rs @@ -25,6 +25,7 @@ mod features; // feature-gated code modules mod mods; // core cross-platform modules // * cross-platform modules +pub use crate::mods::backup_control; pub use crate::mods::coreopts; pub use crate::mods::os; pub use crate::mods::panic; @@ -35,8 +36,12 @@ pub use crate::mods::ranges; pub use crate::features::encoding; #[cfg(feature = "fs")] pub use crate::features::fs; +#[cfg(feature = "fsext")] +pub use crate::features::fsext; #[cfg(feature = "parse_time")] pub use crate::features::parse_time; +#[cfg(feature = "ringbuffer")] +pub use crate::features::ringbuffer; #[cfg(feature = "zero-copy")] pub use crate::features::zero_copy; @@ -187,6 +192,7 @@ mod tests { vec.into_iter().collect_str(handling) } + #[cfg(any(unix, target_os = "redox"))] fn test_invalid_utf8_args_lossy(os_str: &OsStr) { //assert our string is invalid utf8 assert!(os_str.to_os_string().into_string().is_err()); @@ -210,6 +216,7 @@ mod tests { ); } + #[cfg(any(unix, target_os = "redox"))] fn test_invalid_utf8_args_ignore(os_str: &OsStr) { //assert our string is invalid utf8 assert!(os_str.to_os_string().into_string().is_err()); @@ -234,7 +241,7 @@ mod tests { //create a vector containing only correct encoding let test_vec = make_os_vec(&OsString::from("test2")); //expect complete conversion without losses, even when lossy conversion is accepted - let _ = collect_os_str(test_vec.clone(), InvalidEncodingHandling::ConvertLossy) + let _ = collect_os_str(test_vec, InvalidEncodingHandling::ConvertLossy) .expect_complete("Lossy conversion not expected in this test"); } diff --git a/src/uucore/src/lib/macros.rs b/src/uucore/src/lib/macros.rs index 637e91f8f..438fec960 100644 --- a/src/uucore/src/lib/macros.rs +++ b/src/uucore/src/lib/macros.rs @@ -25,7 +25,7 @@ macro_rules! executable( #[macro_export] macro_rules! show_error( ($($args:tt)+) => ({ - eprint!("{}: error: ", executable!()); + eprint!("{}: ", executable!()); eprintln!($($args)+); }) ); @@ -47,15 +47,6 @@ macro_rules! show_warning( }) ); -/// Show an info message to stderr in a silimar style to GNU coreutils. -#[macro_export] -macro_rules! show_info( - ($($args:tt)+) => ({ - eprint!("{}: ", executable!()); - eprintln!($($args)+); - }) -); - /// Show a bad inocation help message in a similar style to GNU coreutils. #[macro_export] macro_rules! show_usage_error( @@ -176,13 +167,6 @@ macro_rules! msg_invalid_input { }; } -#[macro_export] -macro_rules! snippet_no_file_at_path { - ($path:expr) => { - format!("nonexistent path {}", $path) - }; -} - // -- message templates : invalid input : flag #[macro_export] @@ -229,55 +213,6 @@ macro_rules! msg_opt_invalid_should_be { }; } -// -- message templates : invalid input : args - -#[macro_export] -macro_rules! msg_arg_invalid_value { - ($expects:expr, $received:expr) => { - msg_invalid_input!(format!( - "expects its argument to be {}, but was provided {}", - $expects, $received - )) - }; -} - -#[macro_export] -macro_rules! msg_args_invalid_value { - ($expects:expr, $received:expr) => { - msg_invalid_input!(format!( - "expects its arguments to be {}, but was provided {}", - $expects, $received - )) - }; - ($msg:expr) => { - msg_invalid_input!($msg) - }; -} - -#[macro_export] -macro_rules! msg_args_nonexistent_file { - ($received:expr) => { - msg_args_invalid_value!("paths to files", snippet_no_file_at_path!($received)) - }; -} - -#[macro_export] -macro_rules! msg_wrong_number_of_arguments { - () => { - msg_args_invalid_value!("wrong number of arguments") - }; - ($min:expr, $max:expr) => { - msg_args_invalid_value!(format!("expects {}-{} arguments", $min, $max)) - }; - ($exact:expr) => { - if $exact == 1 { - msg_args_invalid_value!("expects 1 argument") - } else { - msg_args_invalid_value!(format!("expects {} arguments", $exact)) - } - }; -} - // -- message templates : invalid input : input combinations #[macro_export] diff --git a/src/uucore/src/lib/mods.rs b/src/uucore/src/lib/mods.rs index 74725e141..2689361a0 100644 --- a/src/uucore/src/lib/mods.rs +++ b/src/uucore/src/lib/mods.rs @@ -1,5 +1,6 @@ // mods ~ cross-platforms modules (core/bundler file) +pub mod backup_control; pub mod coreopts; pub mod os; pub mod panic; diff --git a/src/uucore/src/lib/mods/backup_control.rs b/src/uucore/src/lib/mods/backup_control.rs new file mode 100644 index 000000000..6004ae84d --- /dev/null +++ b/src/uucore/src/lib/mods/backup_control.rs @@ -0,0 +1,97 @@ +use std::{ + env, + path::{Path, PathBuf}, +}; + +pub static BACKUP_CONTROL_VALUES: &[&str] = &[ + "simple", "never", "numbered", "t", "existing", "nil", "none", "off", +]; + +pub static BACKUP_CONTROL_LONG_HELP: &str = "The backup suffix is '~', unless set with --suffix or SIMPLE_BACKUP_SUFFIX. Here are the version control values: + +none, off + never make backups (even if --backup is given) + +numbered, t + make numbered backups + +existing, nil + numbered if numbered backups exist, simple otherwise + +simple, never + always make simple backups"; + +#[derive(Debug, Clone, Copy, Eq, PartialEq)] +pub enum BackupMode { + NoBackup, + SimpleBackup, + NumberedBackup, + ExistingBackup, +} + +pub fn determine_backup_suffix(supplied_suffix: Option<&str>) -> String { + if let Some(suffix) = supplied_suffix { + String::from(suffix) + } else { + env::var("SIMPLE_BACKUP_SUFFIX").unwrap_or("~".to_owned()) + } +} + +pub fn determine_backup_mode(backup_opt_exists: bool, backup_opt: Option<&str>) -> BackupMode { + if backup_opt_exists { + match backup_opt.map(String::from) { + // default is existing, see: + // https://www.gnu.org/software/coreutils/manual/html_node/Backup-options.html + None => BackupMode::ExistingBackup, + Some(mode) => match &mode[..] { + "simple" | "never" => BackupMode::SimpleBackup, + "numbered" | "t" => BackupMode::NumberedBackup, + "existing" | "nil" => BackupMode::ExistingBackup, + "none" | "off" => BackupMode::NoBackup, + _ => panic!(), // cannot happen as it is managed by clap + }, + } + } else { + BackupMode::NoBackup + } +} + +pub fn get_backup_path( + backup_mode: BackupMode, + backup_path: &Path, + suffix: &str, +) -> Option { + match backup_mode { + BackupMode::NoBackup => None, + BackupMode::SimpleBackup => Some(simple_backup_path(backup_path, suffix)), + BackupMode::NumberedBackup => Some(numbered_backup_path(backup_path)), + BackupMode::ExistingBackup => Some(existing_backup_path(backup_path, suffix)), + } +} + +pub fn simple_backup_path(path: &Path, suffix: &str) -> PathBuf { + let mut p = path.to_string_lossy().into_owned(); + p.push_str(suffix); + PathBuf::from(p) +} + +pub fn numbered_backup_path(path: &Path) -> PathBuf { + for i in 1_u64.. { + let path_str = &format!("{}.~{}~", path.to_string_lossy(), i); + let path = Path::new(path_str); + if !path.exists() { + return path.to_path_buf(); + } + } + panic!("cannot create backup") +} + +pub fn existing_backup_path(path: &Path, suffix: &str) -> PathBuf { + let test_path_str = &format!("{}.~1~", path.to_string_lossy()); + let test_path = Path::new(test_path_str); + if test_path.exists() { + numbered_backup_path(path) + } else { + simple_backup_path(path, suffix) + } +} diff --git a/tests/benches/factor/Cargo.toml b/tests/benches/factor/Cargo.toml new file mode 100644 index 000000000..b3b718477 --- /dev/null +++ b/tests/benches/factor/Cargo.toml @@ -0,0 +1,26 @@ +[package] +name = "uu_factor_benches" +version = "0.0.0" +authors = ["nicoo "] +license = "MIT" +description = "Benchmarks for the uu_factor integer factorization tool" +homepage = "https://github.com/uutils/coreutils" +edition = "2018" + +[dependencies] +uu_factor = { path = "../../../src/uu/factor" } + +[dev-dependencies] +array-init = "2.0.0" +criterion = "0.3" +rand = "0.7" +rand_chacha = "0.2.2" + + +[[bench]] +name = "gcd" +harness = false + +[[bench]] +name = "table" +harness = false diff --git a/src/uu/factor/benches/gcd.rs b/tests/benches/factor/benches/gcd.rs similarity index 100% rename from src/uu/factor/benches/gcd.rs rename to tests/benches/factor/benches/gcd.rs diff --git a/tests/benches/factor/benches/table.rs b/tests/benches/factor/benches/table.rs new file mode 100644 index 000000000..0b31b2b4c --- /dev/null +++ b/tests/benches/factor/benches/table.rs @@ -0,0 +1,78 @@ +use array_init::array_init; +use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput}; +use std::convert::TryInto; +use uu_factor::{table::*, Factors}; + +fn table(c: &mut Criterion) { + #[cfg(target_os = "linux")] + check_personality(); + + const INPUT_SIZE: usize = 128; + assert!( + INPUT_SIZE % CHUNK_SIZE == 0, + "INPUT_SIZE ({}) is not divisible by CHUNK_SIZE ({})", + INPUT_SIZE, + CHUNK_SIZE + ); + let inputs = { + // Deterministic RNG; use an explicitely-named RNG to guarantee stability + use rand::{RngCore, SeedableRng}; + use rand_chacha::ChaCha8Rng; + const SEED: u64 = 0xdead_bebe_ea75_cafe; + let mut rng = ChaCha8Rng::seed_from_u64(SEED); + + std::iter::repeat_with(move || array_init::<_, _, INPUT_SIZE>(|_| rng.next_u64())) + }; + + let mut group = c.benchmark_group("table"); + group.throughput(Throughput::Elements(INPUT_SIZE as _)); + for a in inputs.take(10) { + let a_str = format!("{:?}", a); + group.bench_with_input(BenchmarkId::new("factor_chunk", &a_str), &a, |b, &a| { + b.iter(|| { + let mut n_s = a.clone(); + let mut f_s: [_; INPUT_SIZE] = array_init(|_| Factors::one()); + for (n_s, f_s) in n_s.chunks_mut(CHUNK_SIZE).zip(f_s.chunks_mut(CHUNK_SIZE)) { + factor_chunk(n_s.try_into().unwrap(), f_s.try_into().unwrap()) + } + }) + }); + group.bench_with_input(BenchmarkId::new("factor", &a_str), &a, |b, &a| { + b.iter(|| { + let mut n_s = a.clone(); + let mut f_s: [_; INPUT_SIZE] = array_init(|_| Factors::one()); + for (n, f) in n_s.iter_mut().zip(f_s.iter_mut()) { + factor(n, f) + } + }) + }); + } + group.finish() +} + +#[cfg(target_os = "linux")] +fn check_personality() { + use std::fs; + const ADDR_NO_RANDOMIZE: u64 = 0x0040000; + const PERSONALITY_PATH: &'static str = "/proc/self/personality"; + + let p_string = fs::read_to_string(PERSONALITY_PATH) + .expect(&format!("Couldn't read '{}'", PERSONALITY_PATH)) + .strip_suffix("\n") + .unwrap() + .to_owned(); + + let personality = u64::from_str_radix(&p_string, 16).expect(&format!( + "Expected a hex value for personality, got '{:?}'", + p_string + )); + if personality & ADDR_NO_RANDOMIZE == 0 { + eprintln!( + "WARNING: Benchmarking with ASLR enabled (personality is {:x}), results might not be reproducible.", + personality + ); + } +} + +criterion_group!(benches, table); +criterion_main!(benches); diff --git a/tests/by-util/test_base32.rs b/tests/by-util/test_base32.rs index fd49aa951..e36c376be 100644 --- a/tests/by-util/test_base32.rs +++ b/tests/by-util/test_base32.rs @@ -98,7 +98,7 @@ fn test_wrap_bad_arg() { .arg(wrap_param) .arg("b") .fails() - .stderr_only("base32: error: Invalid wrap size: ‘b’: invalid digit found in string\n"); + .stderr_only("base32: Invalid wrap size: ‘b’: invalid digit found in string\n"); } } @@ -109,7 +109,7 @@ fn test_base32_extra_operand() { .arg("a.txt") .arg("a.txt") .fails() - .stderr_only("base32: error: extra operand ‘a.txt’"); + .stderr_only("base32: extra operand ‘a.txt’"); } #[test] @@ -117,5 +117,5 @@ fn test_base32_file_not_found() { new_ucmd!() .arg("a.txt") .fails() - .stderr_only("base32: error: a.txt: No such file or directory"); + .stderr_only("base32: a.txt: No such file or directory"); } diff --git a/tests/by-util/test_base64.rs b/tests/by-util/test_base64.rs index 8d9dc5639..89405d791 100644 --- a/tests/by-util/test_base64.rs +++ b/tests/by-util/test_base64.rs @@ -88,7 +88,7 @@ fn test_wrap_bad_arg() { .arg(wrap_param) .arg("b") .fails() - .stderr_only("base64: error: Invalid wrap size: ‘b’: invalid digit found in string\n"); + .stderr_only("base64: Invalid wrap size: ‘b’: invalid digit found in string\n"); } } @@ -99,7 +99,7 @@ fn test_base64_extra_operand() { .arg("a.txt") .arg("a.txt") .fails() - .stderr_only("base64: error: extra operand ‘a.txt’"); + .stderr_only("base64: extra operand ‘a.txt’"); } #[test] @@ -107,5 +107,5 @@ fn test_base64_file_not_found() { new_ucmd!() .arg("a.txt") .fails() - .stderr_only("base64: error: a.txt: No such file or directory"); + .stderr_only("base64: a.txt: No such file or directory"); } diff --git a/tests/by-util/test_basename.rs b/tests/by-util/test_basename.rs index 2a40ba4b9..1d26a922a 100644 --- a/tests/by-util/test_basename.rs +++ b/tests/by-util/test_basename.rs @@ -1,4 +1,5 @@ use crate::common::util::*; +#[cfg(any(unix, target_os = "redox"))] use std::ffi::OsStr; #[test] @@ -108,7 +109,7 @@ fn test_no_args() { fn test_no_args_output() { new_ucmd!() .fails() - .stderr_is("basename: error: missing operand\nTry 'basename --help' for more information."); + .stderr_is("basename: missing operand\nTry 'basename --help' for more information."); } #[test] @@ -118,11 +119,13 @@ fn test_too_many_args() { #[test] fn test_too_many_args_output() { - new_ucmd!().args(&["a", "b", "c"]).fails().stderr_is( - "basename: error: extra operand 'c'\nTry 'basename --help' for more information.", - ); + new_ucmd!() + .args(&["a", "b", "c"]) + .fails() + .stderr_is("basename: extra operand 'c'\nTry 'basename --help' for more information."); } +#[cfg(any(unix, target_os = "redox"))] fn test_invalid_utf8_args(os_str: &OsStr) { let test_vec = vec![os_str.to_os_string()]; new_ucmd!().args(&test_vec).succeeds().stdout_is("fo�o\n"); diff --git a/tests/by-util/test_cat.rs b/tests/by-util/test_cat.rs index c8ae29a9d..adda905b3 100644 --- a/tests/by-util/test_cat.rs +++ b/tests/by-util/test_cat.rs @@ -347,7 +347,13 @@ fn test_squeeze_blank_before_numbering() { #[cfg(unix)] fn test_dev_random() { let mut buf = [0; 2048]; - let mut proc = new_ucmd!().args(&["/dev/random"]).run_no_wait(); + #[cfg(target_os = "linux")] + const DEV_RANDOM: &str = "/dev/urandom"; + + #[cfg(not(target_os = "linux"))] + const DEV_RANDOM: &str = "/dev/random"; + + let mut proc = new_ucmd!().args(&[DEV_RANDOM]).run_no_wait(); let mut proc_stdout = proc.stdout.take().unwrap(); proc_stdout.read_exact(&mut buf).unwrap(); @@ -395,14 +401,14 @@ fn test_dev_full_show_all() { #[test] #[cfg(unix)] +#[ignore] fn test_domain_socket() { use std::io::prelude::*; use std::sync::{Arc, Barrier}; use std::thread; - use tempdir::TempDir; use unix_socket::UnixListener; - let dir = TempDir::new("unix_socket").expect("failed to create dir"); + let dir = tempfile::Builder::new().prefix("unix_socket").tempdir().expect("failed to create dir"); let socket_path = dir.path().join("sock"); let listener = UnixListener::bind(&socket_path).expect("failed to create socket"); diff --git a/tests/by-util/test_chmod.rs b/tests/by-util/test_chmod.rs index 3958c0a36..f20429a6e 100644 --- a/tests/by-util/test_chmod.rs +++ b/tests/by-util/test_chmod.rs @@ -282,6 +282,26 @@ fn test_chmod_reference_file() { run_single_test(&tests[0], at, ucmd); } +#[test] +fn test_permission_denied() { + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + + at.mkdir("d/"); + at.mkdir("d/no-x"); + at.mkdir("d/no-x/y"); + + scene.ucmd().arg("u=rw").arg("d/no-x").succeeds(); + + scene + .ucmd() + .arg("-R") + .arg("o=r") + .arg("d") + .fails() + .stderr_is("chmod: 'd/no-x/y': Permission denied"); +} + #[test] fn test_chmod_recursive() { let _guard = UMASK_MUTEX.lock(); @@ -338,7 +358,7 @@ fn test_chmod_preserve_root() { .arg("755") .arg("/") .fails() - .stderr_contains(&"chmod: error: it is dangerous to operate recursively on '/'"); + .stderr_contains(&"chmod: it is dangerous to operate recursively on '/'"); } #[test] diff --git a/tests/by-util/test_chroot.rs b/tests/by-util/test_chroot.rs index e2e355e14..0479e7c3a 100644 --- a/tests/by-util/test_chroot.rs +++ b/tests/by-util/test_chroot.rs @@ -21,7 +21,7 @@ fn test_enter_chroot_fails() { assert!(result .stderr_str() - .starts_with("chroot: error: cannot chroot to jail: Operation not permitted (os error 1)")); + .starts_with("chroot: cannot chroot to jail: Operation not permitted (os error 1)")); } #[test] @@ -32,7 +32,7 @@ fn test_no_such_directory() { ucmd.arg("a") .fails() - .stderr_is("chroot: error: cannot change root directory to `a`: no such directory"); + .stderr_is("chroot: cannot change root directory to `a`: no such directory"); } #[test] @@ -43,9 +43,7 @@ fn test_invalid_user_spec() { let result = ucmd.arg("a").arg("--userspec=ARABA:").fails(); - assert!(result - .stderr_str() - .starts_with("chroot: error: invalid userspec")); + assert!(result.stderr_str().starts_with("chroot: invalid userspec")); } #[test] diff --git a/tests/by-util/test_cksum.rs b/tests/by-util/test_cksum.rs index 592e45c58..81ef4c177 100644 --- a/tests/by-util/test_cksum.rs +++ b/tests/by-util/test_cksum.rs @@ -66,7 +66,7 @@ fn test_invalid_file() { .arg(folder_name) .fails() .no_stdout() - .stderr_contains("cksum: error: 'asdf' No such file or directory"); + .stderr_contains("cksum: 'asdf' No such file or directory"); // Then check when the file is of an invalid type at.mkdir(folder_name); @@ -74,7 +74,7 @@ fn test_invalid_file() { .arg(folder_name) .fails() .no_stdout() - .stderr_contains("cksum: error: 'asdf' Is a directory"); + .stderr_contains("cksum: 'asdf' Is a directory"); } // Make sure crc is correct for files larger than 32 bytes diff --git a/tests/by-util/test_cp.rs b/tests/by-util/test_cp.rs index 1e99da0fb..d49219b04 100644 --- a/tests/by-util/test_cp.rs +++ b/tests/by-util/test_cp.rs @@ -214,8 +214,8 @@ fn test_cp_arg_symlink() { fn test_cp_arg_no_clobber() { let (at, mut ucmd) = at_and_ucmd!(); ucmd.arg(TEST_HELLO_WORLD_SOURCE) - .arg("--no-clobber") .arg(TEST_HOW_ARE_YOU_SOURCE) + .arg("--no-clobber") .succeeds(); assert_eq!(at.read(TEST_HOW_ARE_YOU_SOURCE), "How are you?\n"); @@ -305,7 +305,39 @@ fn test_cp_arg_backup() { let (at, mut ucmd) = at_and_ucmd!(); ucmd.arg(TEST_HELLO_WORLD_SOURCE) - .arg("--backup") + .arg(TEST_HOW_ARE_YOU_SOURCE) + .arg("-b") + .succeeds(); + + assert_eq!(at.read(TEST_HOW_ARE_YOU_SOURCE), "Hello, World!\n"); + assert_eq!( + at.read(&*format!("{}~", TEST_HOW_ARE_YOU_SOURCE)), + "How are you?\n" + ); +} + +#[test] +fn test_cp_arg_backup_with_other_args() { + let (at, mut ucmd) = at_and_ucmd!(); + + ucmd.arg(TEST_HELLO_WORLD_SOURCE) + .arg(TEST_HOW_ARE_YOU_SOURCE) + .arg("-vbL") + .succeeds(); + + assert_eq!(at.read(TEST_HOW_ARE_YOU_SOURCE), "Hello, World!\n"); + assert_eq!( + at.read(&*format!("{}~", TEST_HOW_ARE_YOU_SOURCE)), + "How are you?\n" + ); +} + +#[test] +fn test_cp_arg_backup_arg_first() { + let (at, mut ucmd) = at_and_ucmd!(); + + ucmd.arg("--backup") + .arg(TEST_HELLO_WORLD_SOURCE) .arg(TEST_HOW_ARE_YOU_SOURCE) .succeeds(); @@ -321,6 +353,7 @@ fn test_cp_arg_suffix() { let (at, mut ucmd) = at_and_ucmd!(); ucmd.arg(TEST_HELLO_WORLD_SOURCE) + .arg("-b") .arg("--suffix") .arg(".bak") .arg(TEST_HOW_ARE_YOU_SOURCE) @@ -333,6 +366,207 @@ fn test_cp_arg_suffix() { ); } +#[test] +fn test_cp_custom_backup_suffix_via_env() { + let (at, mut ucmd) = at_and_ucmd!(); + let suffix = "super-suffix-of-the-century"; + + ucmd.arg("-b") + .env("SIMPLE_BACKUP_SUFFIX", suffix) + .arg(TEST_HELLO_WORLD_SOURCE) + .arg(TEST_HOW_ARE_YOU_SOURCE) + .succeeds() + .no_stderr(); + + assert_eq!(at.read(TEST_HOW_ARE_YOU_SOURCE), "Hello, World!\n"); + assert_eq!( + at.read(&*format!("{}{}", TEST_HOW_ARE_YOU_SOURCE, suffix)), + "How are you?\n" + ); +} + +#[test] +fn test_cp_backup_numbered_with_t() { + let (at, mut ucmd) = at_and_ucmd!(); + + ucmd.arg("--backup=t") + .arg(TEST_HELLO_WORLD_SOURCE) + .arg(TEST_HOW_ARE_YOU_SOURCE) + .succeeds() + .no_stderr(); + + assert_eq!(at.read(TEST_HOW_ARE_YOU_SOURCE), "Hello, World!\n"); + assert_eq!( + at.read(&*format!("{}.~1~", TEST_HOW_ARE_YOU_SOURCE)), + "How are you?\n" + ); +} + +#[test] +fn test_cp_backup_numbered() { + let (at, mut ucmd) = at_and_ucmd!(); + + ucmd.arg("--backup=numbered") + .arg(TEST_HELLO_WORLD_SOURCE) + .arg(TEST_HOW_ARE_YOU_SOURCE) + .succeeds() + .no_stderr(); + + assert_eq!(at.read(TEST_HOW_ARE_YOU_SOURCE), "Hello, World!\n"); + assert_eq!( + at.read(&*format!("{}.~1~", TEST_HOW_ARE_YOU_SOURCE)), + "How are you?\n" + ); +} + +#[test] +fn test_cp_backup_existing() { + let (at, mut ucmd) = at_and_ucmd!(); + + ucmd.arg("--backup=existing") + .arg(TEST_HELLO_WORLD_SOURCE) + .arg(TEST_HOW_ARE_YOU_SOURCE) + .succeeds() + .no_stderr(); + + assert_eq!(at.read(TEST_HOW_ARE_YOU_SOURCE), "Hello, World!\n"); + assert_eq!( + at.read(&*format!("{}~", TEST_HOW_ARE_YOU_SOURCE)), + "How are you?\n" + ); +} + +#[test] +fn test_cp_backup_nil() { + let (at, mut ucmd) = at_and_ucmd!(); + + ucmd.arg("--backup=nil") + .arg(TEST_HELLO_WORLD_SOURCE) + .arg(TEST_HOW_ARE_YOU_SOURCE) + .succeeds() + .no_stderr(); + + assert_eq!(at.read(TEST_HOW_ARE_YOU_SOURCE), "Hello, World!\n"); + assert_eq!( + at.read(&*format!("{}~", TEST_HOW_ARE_YOU_SOURCE)), + "How are you?\n" + ); +} + +#[test] +fn test_cp_numbered_if_existing_backup_existing() { + let (at, mut ucmd) = at_and_ucmd!(); + let existing_backup = &*format!("{}.~1~", TEST_HOW_ARE_YOU_SOURCE); + at.touch(existing_backup); + + ucmd.arg("--backup=existing") + .arg(TEST_HELLO_WORLD_SOURCE) + .arg(TEST_HOW_ARE_YOU_SOURCE) + .succeeds() + .no_stderr(); + + assert!(at.file_exists(TEST_HOW_ARE_YOU_SOURCE)); + assert!(at.file_exists(existing_backup)); + assert_eq!( + at.read(&*format!("{}.~2~", TEST_HOW_ARE_YOU_SOURCE)), + "How are you?\n" + ); +} + +#[test] +fn test_cp_numbered_if_existing_backup_nil() { + let (at, mut ucmd) = at_and_ucmd!(); + let existing_backup = &*format!("{}.~1~", TEST_HOW_ARE_YOU_SOURCE); + + at.touch(existing_backup); + ucmd.arg("--backup=nil") + .arg(TEST_HELLO_WORLD_SOURCE) + .arg(TEST_HOW_ARE_YOU_SOURCE) + .succeeds() + .no_stderr(); + + assert!(at.file_exists(TEST_HOW_ARE_YOU_SOURCE)); + assert!(at.file_exists(existing_backup)); + assert_eq!( + at.read(&*format!("{}.~2~", TEST_HOW_ARE_YOU_SOURCE)), + "How are you?\n" + ); +} + +#[test] +fn test_cp_backup_simple() { + let (at, mut ucmd) = at_and_ucmd!(); + + ucmd.arg("--backup=simple") + .arg(TEST_HELLO_WORLD_SOURCE) + .arg(TEST_HOW_ARE_YOU_SOURCE) + .succeeds() + .no_stderr(); + + assert_eq!(at.read(TEST_HOW_ARE_YOU_SOURCE), "Hello, World!\n"); + assert_eq!( + at.read(&*format!("{}~", TEST_HOW_ARE_YOU_SOURCE)), + "How are you?\n" + ); +} + +#[test] +fn test_cp_backup_never() { + let (at, mut ucmd) = at_and_ucmd!(); + + ucmd.arg("--backup=never") + .arg(TEST_HELLO_WORLD_SOURCE) + .arg(TEST_HOW_ARE_YOU_SOURCE) + .succeeds() + .no_stderr(); + + assert_eq!(at.read(TEST_HOW_ARE_YOU_SOURCE), "Hello, World!\n"); + assert_eq!( + at.read(&*format!("{}~", TEST_HOW_ARE_YOU_SOURCE)), + "How are you?\n" + ); +} + +#[test] +fn test_cp_backup_none() { + let (at, mut ucmd) = at_and_ucmd!(); + + ucmd.arg("--backup=none") + .arg(TEST_HELLO_WORLD_SOURCE) + .arg(TEST_HOW_ARE_YOU_SOURCE) + .succeeds() + .no_stderr(); + + assert_eq!(at.read(TEST_HOW_ARE_YOU_SOURCE), "Hello, World!\n"); + assert!(!at.file_exists(&format!("{}~", TEST_HOW_ARE_YOU_SOURCE))); +} + +#[test] +fn test_cp_backup_off() { + let (at, mut ucmd) = at_and_ucmd!(); + + ucmd.arg("--backup=off") + .arg(TEST_HELLO_WORLD_SOURCE) + .arg(TEST_HOW_ARE_YOU_SOURCE) + .succeeds() + .no_stderr(); + + assert_eq!(at.read(TEST_HOW_ARE_YOU_SOURCE), "Hello, World!\n"); + assert!(!at.file_exists(&format!("{}~", TEST_HOW_ARE_YOU_SOURCE))); +} + +#[test] +fn test_cp_backup_no_clobber_conflicting_options() { + let (_, mut ucmd) = at_and_ucmd!(); + + ucmd.arg("--backup") + .arg("--no-clobber") + .arg(TEST_HELLO_WORLD_SOURCE) + .arg(TEST_HOW_ARE_YOU_SOURCE) + .fails() + .stderr_is("cp: options --backup and --no-clobber are mutually exclusive\nTry 'cp --help' for more information."); +} + #[test] fn test_cp_deref_conflicting_options() { new_ucmd!() diff --git a/tests/by-util/test_csplit.rs b/tests/by-util/test_csplit.rs index 51cab483c..ae0885ff8 100644 --- a/tests/by-util/test_csplit.rs +++ b/tests/by-util/test_csplit.rs @@ -208,7 +208,7 @@ fn test_up_to_match_repeat_over() { ucmd.args(&["numbers50.txt", "/9$/", "{50}"]) .fails() .stdout_is("16\n29\n30\n30\n30\n6\n") - .stderr_is("csplit: error: '/9$/': match not found on repetition 5"); + .stderr_is("csplit: '/9$/': match not found on repetition 5"); let count = glob(&at.plus_as_string("xx*")) .expect("there should be splits created") @@ -219,7 +219,7 @@ fn test_up_to_match_repeat_over() { ucmd.args(&["numbers50.txt", "/9$/", "{50}", "-k"]) .fails() .stdout_is("16\n29\n30\n30\n30\n6\n") - .stderr_is("csplit: error: '/9$/': match not found on repetition 5"); + .stderr_is("csplit: '/9$/': match not found on repetition 5"); let count = glob(&at.plus_as_string("xx*")) .expect("there should be splits created") @@ -365,7 +365,7 @@ fn test_option_keep() { let (at, mut ucmd) = at_and_ucmd!(); ucmd.args(&["-k", "numbers50.txt", "/20/", "/nope/"]) .fails() - .stderr_is("csplit: error: '/nope/': match not found") + .stderr_is("csplit: '/nope/': match not found") .stdout_is("48\n93\n"); let count = glob(&at.plus_as_string("xx*")) @@ -541,7 +541,7 @@ fn test_up_to_match_context_overflow() { ucmd.args(&["numbers50.txt", "/45/+10"]) .fails() .stdout_is("141\n") - .stderr_is("csplit: error: '/45/+10': line number out of range"); + .stderr_is("csplit: '/45/+10': line number out of range"); let count = glob(&at.plus_as_string("xx*")) .expect("counting splits") @@ -552,7 +552,7 @@ fn test_up_to_match_context_overflow() { ucmd.args(&["numbers50.txt", "/45/+10", "-k"]) .fails() .stdout_is("141\n") - .stderr_is("csplit: error: '/45/+10': line number out of range"); + .stderr_is("csplit: '/45/+10': line number out of range"); let count = glob(&at.plus_as_string("xx*")) .expect("counting splits") @@ -567,7 +567,7 @@ fn test_skip_to_match_context_underflow() { ucmd.args(&["numbers50.txt", "%5%-10"]) .fails() .stdout_is("141\n") - .stderr_is("csplit: error: '%5%-10': line number out of range"); + .stderr_is("csplit: '%5%-10': line number out of range"); let count = glob(&at.plus_as_string("xx*")) .expect("counting splits") @@ -578,7 +578,7 @@ fn test_skip_to_match_context_underflow() { ucmd.args(&["numbers50.txt", "%5%-10", "-k"]) .fails() .stdout_is("141\n") - .stderr_is("csplit: error: '%5%-10': line number out of range"); + .stderr_is("csplit: '%5%-10': line number out of range"); let count = glob(&at.plus_as_string("xx*")) .expect("counting splits") @@ -592,7 +592,7 @@ fn test_skip_to_match_context_overflow() { let (at, mut ucmd) = at_and_ucmd!(); ucmd.args(&["numbers50.txt", "%45%+10"]) .fails() - .stderr_only("csplit: error: '%45%+10': line number out of range"); + .stderr_only("csplit: '%45%+10': line number out of range"); let count = glob(&at.plus_as_string("xx*")) .expect("counting splits") @@ -602,7 +602,7 @@ fn test_skip_to_match_context_overflow() { let (at, mut ucmd) = at_and_ucmd!(); ucmd.args(&["numbers50.txt", "%45%+10", "-k"]) .fails() - .stderr_only("csplit: error: '%45%+10': line number out of range"); + .stderr_only("csplit: '%45%+10': line number out of range"); let count = glob(&at.plus_as_string("xx*")) .expect("counting splits") @@ -616,7 +616,7 @@ fn test_up_to_no_match1() { ucmd.args(&["numbers50.txt", "/4/", "/nope/"]) .fails() .stdout_is("6\n135\n") - .stderr_is("csplit: error: '/nope/': match not found"); + .stderr_is("csplit: '/nope/': match not found"); let count = glob(&at.plus_as_string("xx*")) .expect("counting splits") @@ -627,7 +627,7 @@ fn test_up_to_no_match1() { ucmd.args(&["numbers50.txt", "/4/", "/nope/", "-k"]) .fails() .stdout_is("6\n135\n") - .stderr_is("csplit: error: '/nope/': match not found"); + .stderr_is("csplit: '/nope/': match not found"); let count = glob(&at.plus_as_string("xx*")) .expect("counting splits") @@ -643,7 +643,7 @@ fn test_up_to_no_match2() { ucmd.args(&["numbers50.txt", "/4/", "/nope/", "{50}"]) .fails() .stdout_is("6\n135\n") - .stderr_is("csplit: error: '/nope/': match not found"); + .stderr_is("csplit: '/nope/': match not found"); let count = glob(&at.plus_as_string("xx*")) .expect("counting splits") @@ -654,7 +654,7 @@ fn test_up_to_no_match2() { ucmd.args(&["numbers50.txt", "/4/", "/nope/", "{50}", "-k"]) .fails() .stdout_is("6\n135\n") - .stderr_is("csplit: error: '/nope/': match not found"); + .stderr_is("csplit: '/nope/': match not found"); let count = glob(&at.plus_as_string("xx*")) .expect("counting splits") @@ -670,7 +670,7 @@ fn test_up_to_no_match3() { ucmd.args(&["numbers50.txt", "/0$/", "{50}"]) .fails() .stdout_is("18\n30\n30\n30\n30\n3\n") - .stderr_is("csplit: error: '/0$/': match not found on repetition 5"); + .stderr_is("csplit: '/0$/': match not found on repetition 5"); let count = glob(&at.plus_as_string("xx*")) .expect("counting splits") @@ -681,7 +681,7 @@ fn test_up_to_no_match3() { ucmd.args(&["numbers50.txt", "/0$/", "{50}", "-k"]) .fails() .stdout_is("18\n30\n30\n30\n30\n3\n") - .stderr_is("csplit: error: '/0$/': match not found on repetition 5"); + .stderr_is("csplit: '/0$/': match not found on repetition 5"); let count = glob(&at.plus_as_string("xx*")) .expect("counting splits") @@ -701,7 +701,7 @@ fn test_up_to_no_match4() { ucmd.args(&["numbers50.txt", "/nope/", "/4/"]) .fails() .stdout_is("141\n") - .stderr_is("csplit: error: '/nope/': match not found"); + .stderr_is("csplit: '/nope/': match not found"); let count = glob(&at.plus_as_string("xx*")) .expect("counting splits") @@ -712,7 +712,7 @@ fn test_up_to_no_match4() { ucmd.args(&["numbers50.txt", "/nope/", "/4/", "-k"]) .fails() .stdout_is("141\n") - .stderr_is("csplit: error: '/nope/': match not found"); + .stderr_is("csplit: '/nope/': match not found"); let count = glob(&at.plus_as_string("xx*")) .expect("counting splits") @@ -741,7 +741,7 @@ fn test_up_to_no_match6() { ucmd.args(&["numbers50.txt", "/nope/-5"]) .fails() .stdout_is("141\n") - .stderr_is("csplit: error: '/nope/-5': match not found"); + .stderr_is("csplit: '/nope/-5': match not found"); let count = glob(&at.plus_as_string("xx*")) .expect("counting splits") @@ -752,7 +752,7 @@ fn test_up_to_no_match6() { ucmd.args(&["numbers50.txt", "/nope/-5", "-k"]) .fails() .stdout_is("141\n") - .stderr_is("csplit: error: '/nope/-5': match not found"); + .stderr_is("csplit: '/nope/-5': match not found"); let count = glob(&at.plus_as_string("xx*")) .expect("counting splits") @@ -767,7 +767,7 @@ fn test_up_to_no_match7() { ucmd.args(&["numbers50.txt", "/nope/+5"]) .fails() .stdout_is("141\n") - .stderr_is("csplit: error: '/nope/+5': match not found"); + .stderr_is("csplit: '/nope/+5': match not found"); let count = glob(&at.plus_as_string("xx*")) .expect("counting splits") @@ -778,7 +778,7 @@ fn test_up_to_no_match7() { ucmd.args(&["numbers50.txt", "/nope/+5", "-k"]) .fails() .stdout_is("141\n") - .stderr_is("csplit: error: '/nope/+5': match not found"); + .stderr_is("csplit: '/nope/+5': match not found"); let count = glob(&at.plus_as_string("xx*")) .expect("counting splits") @@ -792,7 +792,7 @@ fn test_skip_to_no_match1() { let (at, mut ucmd) = at_and_ucmd!(); ucmd.args(&["numbers50.txt", "%nope%"]) .fails() - .stderr_only("csplit: error: '%nope%': match not found"); + .stderr_only("csplit: '%nope%': match not found"); let count = glob(&at.plus_as_string("xx*")) .expect("counting splits") @@ -805,7 +805,7 @@ fn test_skip_to_no_match2() { let (at, mut ucmd) = at_and_ucmd!(); ucmd.args(&["numbers50.txt", "%nope%", "{50}"]) .fails() - .stderr_only("csplit: error: '%nope%': match not found"); + .stderr_only("csplit: '%nope%': match not found"); let count = glob(&at.plus_as_string("xx*")) .expect("counting splits") @@ -818,7 +818,7 @@ fn test_skip_to_no_match3() { let (at, mut ucmd) = at_and_ucmd!(); ucmd.args(&["numbers50.txt", "%0$%", "{50}"]) .fails() - .stderr_only("csplit: error: '%0$%': match not found on repetition 5"); + .stderr_only("csplit: '%0$%': match not found on repetition 5"); let count = glob(&at.plus_as_string("xx*")) .expect("counting splits") @@ -831,7 +831,7 @@ fn test_skip_to_no_match4() { let (at, mut ucmd) = at_and_ucmd!(); ucmd.args(&["numbers50.txt", "%nope%", "/4/"]) .fails() - .stderr_only("csplit: error: '%nope%': match not found"); + .stderr_only("csplit: '%nope%': match not found"); let count = glob(&at.plus_as_string("xx*")) .expect("counting splits") @@ -858,7 +858,7 @@ fn test_skip_to_no_match6() { let (at, mut ucmd) = at_and_ucmd!(); ucmd.args(&["numbers50.txt", "%nope%-5"]) .fails() - .stderr_only("csplit: error: '%nope%-5': match not found"); + .stderr_only("csplit: '%nope%-5': match not found"); let count = glob(&at.plus_as_string("xx*")) .expect("counting splits") @@ -871,7 +871,7 @@ fn test_skip_to_no_match7() { let (at, mut ucmd) = at_and_ucmd!(); ucmd.args(&["numbers50.txt", "%nope%+5"]) .fails() - .stderr_only("csplit: error: '%nope%+5': match not found"); + .stderr_only("csplit: '%nope%+5': match not found"); let count = glob(&at.plus_as_string("xx*")) .expect("counting splits") @@ -884,7 +884,7 @@ fn test_no_match() { let (at, mut ucmd) = at_and_ucmd!(); ucmd.args(&["numbers50.txt", "%nope%"]) .fails() - .stderr_only("csplit: error: '%nope%': match not found"); + .stderr_only("csplit: '%nope%': match not found"); let count = glob(&at.plus_as_string("xx*")) .expect("counting splits") @@ -895,7 +895,7 @@ fn test_no_match() { ucmd.args(&["numbers50.txt", "/nope/"]) .fails() .stdout_is("141\n") - .stderr_is("csplit: error: '/nope/': match not found"); + .stderr_is("csplit: '/nope/': match not found"); let count = glob(&at.plus_as_string("xx*")) .expect("counting splits") @@ -992,7 +992,7 @@ fn test_too_small_linenum_repeat() { let (at, mut ucmd) = at_and_ucmd!(); ucmd.args(&["numbers50.txt", "/20/", "10", "{*}"]) .fails() - .stderr_is("csplit: error: '10': line number out of range on repetition 5") + .stderr_is("csplit: '10': line number out of range on repetition 5") .stdout_is("48\n0\n0\n30\n30\n30\n3\n"); let count = glob(&at.plus_as_string("xx*")) @@ -1003,7 +1003,7 @@ fn test_too_small_linenum_repeat() { let (at, mut ucmd) = at_and_ucmd!(); ucmd.args(&["numbers50.txt", "/20/", "10", "{*}", "-k"]) .fails() - .stderr_is("csplit: error: '10': line number out of range on repetition 5") + .stderr_is("csplit: '10': line number out of range on repetition 5") .stdout_is("48\n0\n0\n30\n30\n30\n3\n"); let count = glob(&at.plus_as_string("xx*")) @@ -1025,7 +1025,7 @@ fn test_linenum_out_of_range1() { ucmd.args(&["numbers50.txt", "100"]) .fails() .stdout_is("141\n") - .stderr_is("csplit: error: '100': line number out of range"); + .stderr_is("csplit: '100': line number out of range"); let count = glob(&at.plus_as_string("xx*")) .expect("there should be splits created") @@ -1036,7 +1036,7 @@ fn test_linenum_out_of_range1() { ucmd.args(&["numbers50.txt", "100", "-k"]) .fails() .stdout_is("141\n") - .stderr_is("csplit: error: '100': line number out of range"); + .stderr_is("csplit: '100': line number out of range"); let count = glob(&at.plus_as_string("xx*")) .expect("there should be splits created") @@ -1051,7 +1051,7 @@ fn test_linenum_out_of_range2() { ucmd.args(&["numbers50.txt", "10", "100"]) .fails() .stdout_is("18\n123\n") - .stderr_is("csplit: error: '100': line number out of range"); + .stderr_is("csplit: '100': line number out of range"); let count = glob(&at.plus_as_string("xx*")) .expect("there should be splits created") @@ -1062,7 +1062,7 @@ fn test_linenum_out_of_range2() { ucmd.args(&["numbers50.txt", "10", "100", "-k"]) .fails() .stdout_is("18\n123\n") - .stderr_is("csplit: error: '100': line number out of range"); + .stderr_is("csplit: '100': line number out of range"); let count = glob(&at.plus_as_string("xx*")) .expect("there should be splits created") @@ -1078,7 +1078,7 @@ fn test_linenum_out_of_range3() { ucmd.args(&["numbers50.txt", "40", "{2}"]) .fails() .stdout_is("108\n33\n") - .stderr_is("csplit: error: '40': line number out of range on repetition 1"); + .stderr_is("csplit: '40': line number out of range on repetition 1"); let count = glob(&at.plus_as_string("xx*")) .expect("there should be splits created") @@ -1089,7 +1089,7 @@ fn test_linenum_out_of_range3() { ucmd.args(&["numbers50.txt", "40", "{2}", "-k"]) .fails() .stdout_is("108\n33\n") - .stderr_is("csplit: error: '40': line number out of range on repetition 1"); + .stderr_is("csplit: '40': line number out of range on repetition 1"); let count = glob(&at.plus_as_string("xx*")) .expect("there should be splits created") @@ -1105,7 +1105,7 @@ fn test_linenum_out_of_range4() { ucmd.args(&["numbers50.txt", "40", "{*}"]) .fails() .stdout_is("108\n33\n") - .stderr_is("csplit: error: '40': line number out of range on repetition 1"); + .stderr_is("csplit: '40': line number out of range on repetition 1"); let count = glob(&at.plus_as_string("xx*")) .expect("there should be splits created") @@ -1116,7 +1116,7 @@ fn test_linenum_out_of_range4() { ucmd.args(&["numbers50.txt", "40", "{*}", "-k"]) .fails() .stdout_is("108\n33\n") - .stderr_is("csplit: error: '40': line number out of range on repetition 1"); + .stderr_is("csplit: '40': line number out of range on repetition 1"); let count = glob(&at.plus_as_string("xx*")) .expect("there should be splits created") @@ -1132,7 +1132,7 @@ fn test_skip_to_match_negative_offset_before_a_match() { ucmd.args(&["numbers50.txt", "/20/-10", "/15/"]) .fails() .stdout_is("18\n123\n") - .stderr_is("csplit: error: '/15/': match not found"); + .stderr_is("csplit: '/15/': match not found"); let count = glob(&at.plus_as_string("xx*")) .expect("there should be splits created") @@ -1177,7 +1177,7 @@ fn test_corner_case2() { let (at, mut ucmd) = at_and_ucmd!(); ucmd.args(&["numbers50.txt", "/10/-5", "/10/"]) .fails() - .stderr_is("csplit: error: '/10/': match not found") + .stderr_is("csplit: '/10/': match not found") .stdout_is("8\n133\n"); let count = glob(&at.plus_as_string("xx*")) @@ -1191,7 +1191,7 @@ fn test_corner_case3() { let (at, mut ucmd) = at_and_ucmd!(); ucmd.args(&["numbers50.txt", "/15/-3", "14", "/15/"]) .fails() - .stderr_is("csplit: error: '/15/': match not found") + .stderr_is("csplit: '/15/': match not found") .stdout_is("24\n6\n111\n"); let count = glob(&at.plus_as_string("xx*")) @@ -1223,7 +1223,7 @@ fn test_up_to_match_context_underflow() { ucmd.args(&["numbers50.txt", "/5/-10"]) .fails() .stdout_is("0\n141\n") - .stderr_is("csplit: error: '/5/-10': line number out of range"); + .stderr_is("csplit: '/5/-10': line number out of range"); let count = glob(&at.plus_as_string("xx*")) .expect("counting splits") @@ -1234,7 +1234,7 @@ fn test_up_to_match_context_underflow() { ucmd.args(&["numbers50.txt", "/5/-10", "-k"]) .fails() .stdout_is("0\n141\n") - .stderr_is("csplit: error: '/5/-10': line number out of range"); + .stderr_is("csplit: '/5/-10': line number out of range"); let count = glob(&at.plus_as_string("xx*")) .expect("counting splits") @@ -1251,7 +1251,7 @@ fn test_linenum_range_with_up_to_match1() { let (at, mut ucmd) = at_and_ucmd!(); ucmd.args(&["numbers50.txt", "10", "/12/-5"]) .fails() - .stderr_is("csplit: error: '/12/-5': line number out of range") + .stderr_is("csplit: '/12/-5': line number out of range") .stdout_is("18\n0\n123\n"); let count = glob(&at.plus_as_string("xx*")) @@ -1262,7 +1262,7 @@ fn test_linenum_range_with_up_to_match1() { let (at, mut ucmd) = at_and_ucmd!(); ucmd.args(&["numbers50.txt", "10", "/12/-5", "-k"]) .fails() - .stderr_is("csplit: error: '/12/-5': line number out of range") + .stderr_is("csplit: '/12/-5': line number out of range") .stdout_is("18\n0\n123\n"); let count = glob(&at.plus_as_string("xx*")) @@ -1281,7 +1281,7 @@ fn test_linenum_range_with_up_to_match2() { let (at, mut ucmd) = at_and_ucmd!(); ucmd.args(&["numbers50.txt", "10", "/12/-15"]) .fails() - .stderr_is("csplit: error: '/12/-15': line number out of range") + .stderr_is("csplit: '/12/-15': line number out of range") .stdout_is("18\n0\n123\n"); let count = glob(&at.plus_as_string("xx*")) @@ -1292,7 +1292,7 @@ fn test_linenum_range_with_up_to_match2() { let (at, mut ucmd) = at_and_ucmd!(); ucmd.args(&["numbers50.txt", "10", "/12/-15", "-k"]) .fails() - .stderr_is("csplit: error: '/12/-15': line number out of range") + .stderr_is("csplit: '/12/-15': line number out of range") .stdout_is("18\n0\n123\n"); let count = glob(&at.plus_as_string("xx*")) @@ -1310,7 +1310,7 @@ fn test_linenum_range_with_up_to_match3() { let (at, mut ucmd) = at_and_ucmd!(); ucmd.args(&["numbers50.txt", "10", "/10/", "-k"]) .fails() - .stderr_is("csplit: error: '/10/': match not found") + .stderr_is("csplit: '/10/': match not found") .stdout_is("18\n123\n"); let count = glob(&at.plus_as_string("xx*")) diff --git a/tests/by-util/test_cut.rs b/tests/by-util/test_cut.rs index 875317721..413b73154 100644 --- a/tests/by-util/test_cut.rs +++ b/tests/by-util/test_cut.rs @@ -149,11 +149,11 @@ fn test_directory_and_no_such_file() { ucmd.arg("-b1") .arg("some") .run() - .stderr_is("cut: error: some: Is a directory\n"); + .stderr_is("cut: some: Is a directory\n"); new_ucmd!() .arg("-b1") .arg("some") .run() - .stderr_is("cut: error: some: No such file or directory\n"); + .stderr_is("cut: some: No such file or directory\n"); } diff --git a/tests/by-util/test_date.rs b/tests/by-util/test_date.rs index 0ca0a74ea..f4990566a 100644 --- a/tests/by-util/test_date.rs +++ b/tests/by-util/test_date.rs @@ -104,6 +104,29 @@ fn test_date_format_full_day() { .stdout_matches(&re); } +#[test] +fn test_date_nano_seconds() { + // %N nanoseconds (000000000..999999999) + let re = Regex::new(r"^\d{1,9}$").unwrap(); + new_ucmd!().arg("+%N").succeeds().stdout_matches(&re); +} + +#[test] +fn test_date_format_without_plus() { + // [+FORMAT] + new_ucmd!() + .arg("%s") + .fails() + .stderr_contains("date: invalid date ‘%s’") + .code_is(1); +} + +#[test] +fn test_date_format_literal() { + new_ucmd!().arg("+%%s").succeeds().stdout_is("%s\n"); + new_ucmd!().arg("+%%N").succeeds().stdout_is("%N\n"); +} + #[test] #[cfg(all(unix, not(target_os = "macos")))] fn test_date_set_valid() { diff --git a/tests/by-util/test_df.rs b/tests/by-util/test_df.rs index e3b7141d1..ac3776b96 100644 --- a/tests/by-util/test_df.rs +++ b/tests/by-util/test_df.rs @@ -27,7 +27,7 @@ fn test_df_output() { stdout_only("Filesystem Size Used Available Capacity Use% Mounted on \n"); } else { new_ucmd!().arg("-H").arg("-total").succeeds().stdout_only( - "Filesystem Size Used Available Use% Mounted on \n" + "Filesystem Size Used Available Use% Mounted on \n", ); } } diff --git a/tests/by-util/test_du.rs b/tests/by-util/test_du.rs index 111f2dc90..c5d262c3b 100644 --- a/tests/by-util/test_du.rs +++ b/tests/by-util/test_du.rs @@ -53,7 +53,15 @@ fn _du_basics_subdir(s: &str) { fn _du_basics_subdir(s: &str) { assert_eq!(s, "0\tsubdir/deeper\n"); } -#[cfg(all(not(target_vendor = "apple"), not(target_os = "windows")))] +#[cfg(target_os = "freebsd")] +fn _du_basics_subdir(s: &str) { + assert_eq!(s, "8\tsubdir/deeper\n"); +} +#[cfg(all( + not(target_vendor = "apple"), + not(target_os = "windows"), + not(target_os = "freebsd") +))] fn _du_basics_subdir(s: &str) { // MS-WSL linux has altered expected output if !uucore::os::is_wsl_1() { @@ -68,7 +76,7 @@ fn test_du_basics_bad_name() { new_ucmd!() .arg("bad_name") .succeeds() // TODO: replace with ".fails()" once `du` is fixed - .stderr_only("du: error: bad_name: No such file or directory\n"); + .stderr_only("du: bad_name: No such file or directory\n"); } #[test] @@ -100,7 +108,15 @@ fn _du_soft_link(s: &str) { fn _du_soft_link(s: &str) { assert_eq!(s, "8\tsubdir/links\n"); } -#[cfg(all(not(target_vendor = "apple"), not(target_os = "windows")))] +#[cfg(target_os = "freebsd")] +fn _du_soft_link(s: &str) { + assert_eq!(s, "16\tsubdir/links\n"); +} +#[cfg(all( + not(target_vendor = "apple"), + not(target_os = "windows"), + not(target_os = "freebsd") +))] fn _du_soft_link(s: &str) { // MS-WSL linux has altered expected output if !uucore::os::is_wsl_1() { @@ -113,11 +129,9 @@ fn _du_soft_link(s: &str) { #[test] fn test_du_hard_link() { let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; - let result_ln = scene.cmd("ln").arg(SUB_FILE).arg(SUB_LINK).run(); - if !result_ln.succeeded() { - scene.ccmd("ln").arg(SUB_FILE).arg(SUB_LINK).succeeds(); - } + at.hard_link(SUB_FILE, SUB_LINK); let result = scene.ucmd().arg(SUB_DIR_LINKS).succeeds(); @@ -141,7 +155,15 @@ fn _du_hard_link(s: &str) { fn _du_hard_link(s: &str) { assert_eq!(s, "8\tsubdir/links\n") } -#[cfg(all(not(target_vendor = "apple"), not(target_os = "windows")))] +#[cfg(target_os = "freebsd")] +fn _du_hard_link(s: &str) { + assert_eq!(s, "16\tsubdir/links\n") +} +#[cfg(all( + not(target_vendor = "apple"), + not(target_os = "windows"), + not(target_os = "freebsd") +))] fn _du_hard_link(s: &str) { // MS-WSL linux has altered expected output if !uucore::os::is_wsl_1() { @@ -181,7 +203,15 @@ fn _du_d_flag(s: &str) { fn _du_d_flag(s: &str) { assert_eq!(s, "8\t./subdir\n8\t./\n"); } -#[cfg(all(not(target_vendor = "apple"), not(target_os = "windows")))] +#[cfg(target_os = "freebsd")] +fn _du_d_flag(s: &str) { + assert_eq!(s, "28\t./subdir\n36\t./\n"); +} +#[cfg(all( + not(target_vendor = "apple"), + not(target_os = "windows"), + not(target_os = "freebsd") +))] fn _du_d_flag(s: &str) { // MS-WSL linux has altered expected output if !uucore::os::is_wsl_1() { diff --git a/tests/by-util/test_expr.rs b/tests/by-util/test_expr.rs index bb0760676..f20739e13 100644 --- a/tests/by-util/test_expr.rs +++ b/tests/by-util/test_expr.rs @@ -17,11 +17,11 @@ fn test_complex_arithmetic() { .args(&["9223372036854775807", "+", "9223372036854775807"]) .run(); run.stdout_is(""); - run.stderr_is("expr: error: +: Numerical result out of range"); + run.stderr_is("expr: +: Numerical result out of range"); let run = new_ucmd!().args(&["9", "/", "0"]).run(); run.stdout_is(""); - run.stderr_is("expr: error: division by zero"); + run.stderr_is("expr: division by zero"); } #[test] @@ -54,3 +54,32 @@ fn test_and() { new_ucmd!().args(&["", "&", "1"]).run().stdout_is("0\n"); } + +#[test] +fn test_substr() { + new_ucmd!() + .args(&["substr", "abc", "1", "1"]) + .succeeds() + .stdout_only("a\n"); +} + +#[test] +fn test_invalid_substr() { + new_ucmd!() + .args(&["substr", "abc", "0", "1"]) + .fails() + .status_code(1) + .stdout_only("\n"); + + new_ucmd!() + .args(&["substr", "abc", &(std::usize::MAX.to_string() + "0"), "1"]) + .fails() + .status_code(1) + .stdout_only("\n"); + + new_ucmd!() + .args(&["substr", "abc", "0", &(std::usize::MAX.to_string() + "0")]) + .fails() + .status_code(1) + .stdout_only("\n"); +} diff --git a/tests/by-util/test_fmt.rs b/tests/by-util/test_fmt.rs index 21a5f3396..0d6d9bb24 100644 --- a/tests/by-util/test_fmt.rs +++ b/tests/by-util/test_fmt.rs @@ -30,21 +30,19 @@ fn test_fmt_w_too_big() { //.stdout_is_fixture("call_graph.expected"); assert_eq!( result.stderr_str().trim(), - "fmt: error: invalid width: '2501': Numerical result out of range" + "fmt: invalid width: '2501': Numerical result out of range" ); } -/* #[test] - Fails for now, see https://github.com/uutils/coreutils/issues/1501 +#[test] fn test_fmt_w() { let result = new_ucmd!() .arg("-w") .arg("10") .arg("one-word-per-line.txt") .run(); - //.stdout_is_fixture("call_graph.expected"); - assert_eq!(result.stdout_str().trim(), "this is a file with one word per line"); + //.stdout_is_fixture("call_graph.expected"); + assert_eq!( + result.stdout_str().trim(), + "this is\na file\nwith one\nword per\nline" + ); } - - -fmt is pretty broken in general, needs more works to have more tests - */ diff --git a/tests/by-util/test_head.rs b/tests/by-util/test_head.rs index 4f009c800..b2a3cf0cb 100755 --- a/tests/by-util/test_head.rs +++ b/tests/by-util/test_head.rs @@ -129,6 +129,15 @@ fn test_zero_terminated_syntax_2() { .stdout_is("x\0y"); } +#[test] +fn test_zero_terminated_negative_lines() { + new_ucmd!() + .args(&["-z", "-n", "-1"]) + .pipe_in("x\0y\0z\0") + .run() + .stdout_is("x\0y\0"); +} + #[test] fn test_negative_byte_syntax() { new_ucmd!() @@ -162,6 +171,18 @@ fn test_no_such_file_or_directory() { .stderr_contains("cannot open 'no_such_file.toml' for reading: No such file or directory"); } +/// Test that each non-existent files gets its own error message printed. +#[test] +fn test_multiple_nonexistent_files() { + new_ucmd!() + .args(&["bogusfile1", "bogusfile2"]) + .fails() + .stdout_does_not_contain("==> bogusfile1 <==") + .stderr_contains("cannot open 'bogusfile1' for reading: No such file or directory") + .stdout_does_not_contain("==> bogusfile2 <==") + .stderr_contains("cannot open 'bogusfile2' for reading: No such file or directory"); +} + // there was a bug not caught by previous tests // where for negative n > 3, the total amount of lines // was correct, but it would eat from the second line @@ -196,3 +217,28 @@ fn test_obsolete_extras() { .succeeds() .stdout_is("==> standard input <==\n1\02\03\04\05\0"); } + +#[test] +fn test_multiple_files() { + new_ucmd!() + .args(&["emptyfile.txt", "emptyfile.txt"]) + .succeeds() + .stdout_is("==> emptyfile.txt <==\n\n==> emptyfile.txt <==\n"); +} + +#[test] +fn test_multiple_files_with_stdin() { + new_ucmd!() + .args(&["emptyfile.txt", "-", "emptyfile.txt"]) + .pipe_in("hello\n") + .succeeds() + .stdout_is( + "==> emptyfile.txt <== + +==> standard input <== +hello + +==> emptyfile.txt <== +", + ); +} diff --git a/tests/by-util/test_id.rs b/tests/by-util/test_id.rs index 534736a32..1f8249aab 100644 --- a/tests/by-util/test_id.rs +++ b/tests/by-util/test_id.rs @@ -7,7 +7,7 @@ use crate::common::util::*; // From the Logs: "Build (ubuntu-18.04, x86_64-unknown-linux-gnu, feat_os_unix, use-cross)" // stderr: "whoami: cannot find name for user ID 1001" // Maybe: "adduser --uid 1001 username" can put things right? -// stderr = id: error: Could not find uid 1001: No such id: 1001 +// stderr = id: Could not find uid 1001: No such id: 1001 fn skipping_test_is_okay(result: &CmdResult, needle: &str) -> bool { if !result.succeeded() { println!("result.stdout = {}", result.stdout_str()); diff --git a/tests/by-util/test_install.rs b/tests/by-util/test_install.rs index fc4459072..fb79454c1 100644 --- a/tests/by-util/test_install.rs +++ b/tests/by-util/test_install.rs @@ -301,7 +301,7 @@ fn test_install_target_new_file_with_group() { .arg(format!("{}/{}", dir, file)) .run(); - if is_ci() && result.stderr_str().contains("error: no such group:") { + if is_ci() && result.stderr_str().contains("no such group:") { // In the CI, some server are failing to return the group. // As seems to be a configuration issue, ignoring it return; @@ -328,7 +328,7 @@ fn test_install_target_new_file_with_owner() { .arg(format!("{}/{}", dir, file)) .run(); - if is_ci() && result.stderr_str().contains("error: no such user:") { + if is_ci() && result.stderr_str().contains("no such user:") { // In the CI, some server are failing to return the user id. // As seems to be a configuration issue, ignoring it return; diff --git a/tests/by-util/test_join.rs b/tests/by-util/test_join.rs index b0311df84..a8f046851 100644 --- a/tests/by-util/test_join.rs +++ b/tests/by-util/test_join.rs @@ -148,7 +148,7 @@ fn multitab_character() { .arg("-t") .arg("э") .fails() - .stderr_is("join: error: multi-character tab э"); + .stderr_is("join: multi-character tab э"); } #[test] @@ -211,7 +211,7 @@ fn empty_format() { .arg("-o") .arg("") .fails() - .stderr_is("join: error: invalid file number in field spec: ''"); + .stderr_is("join: invalid file number in field spec: ''"); } #[test] diff --git a/tests/by-util/test_link.rs b/tests/by-util/test_link.rs index 99559a7fe..6ac3f35cc 100644 --- a/tests/by-util/test_link.rs +++ b/tests/by-util/test_link.rs @@ -23,7 +23,7 @@ fn test_link_no_circular() { ucmd.args(&[link, link]) .fails() - .stderr_is("link: error: No such file or directory (os error 2)\n"); + .stderr_is("link: No such file or directory (os error 2)\n"); assert!(!at.file_exists(link)); } @@ -35,7 +35,7 @@ fn test_link_nonexistent_file() { ucmd.args(&[file, link]) .fails() - .stderr_is("link: error: No such file or directory (os error 2)\n"); + .stderr_is("link: No such file or directory (os error 2)\n"); assert!(!at.file_exists(file)); assert!(!at.file_exists(link)); } diff --git a/tests/by-util/test_ln.rs b/tests/by-util/test_ln.rs index 646091b09..f2508ecbf 100644 --- a/tests/by-util/test_ln.rs +++ b/tests/by-util/test_ln.rs @@ -409,7 +409,7 @@ fn test_symlink_missing_destination() { at.touch(file); ucmd.args(&["-s", "-T", file]).fails().stderr_is(format!( - "ln: error: missing destination file operand after '{}'", + "ln: missing destination file operand after '{}'", file )); } diff --git a/tests/by-util/test_logname.rs b/tests/by-util/test_logname.rs index bd9d04a50..0e8125191 100644 --- a/tests/by-util/test_logname.rs +++ b/tests/by-util/test_logname.rs @@ -9,7 +9,7 @@ fn test_normal() { for (key, value) in env::vars() { println!("{}: {}", key, value); } - if (is_ci() || uucore::os::is_wsl_1()) && result.stderr_str().contains("error: no login name") { + if (is_ci() || uucore::os::is_wsl_1()) && result.stderr_str().contains("no login name") { // ToDO: investigate WSL failure // In the CI, some server are failing to return logname. // As seems to be a configuration issue, ignoring it diff --git a/tests/by-util/test_ls.rs b/tests/by-util/test_ls.rs index 110764aa5..01c5ab5c4 100644 --- a/tests/by-util/test_ls.rs +++ b/tests/by-util/test_ls.rs @@ -5,6 +5,7 @@ use crate::common::util::*; extern crate regex; use self::regex::Regex; +use std::collections::HashMap; use std::path::Path; use std::thread::sleep; use std::time::Duration; @@ -18,9 +19,7 @@ use std::path::PathBuf; #[cfg(not(windows))] use std::sync::Mutex; #[cfg(not(windows))] -extern crate tempdir; -#[cfg(not(windows))] -use self::tempdir::TempDir; +extern crate tempfile; #[cfg(not(windows))] lazy_static! { @@ -166,7 +165,7 @@ fn test_ls_width() { .ucmd() .args(&option.split(" ").collect::>()) .fails() - .stderr_only("ls: error: invalid line width: ‘1a’"); + .stderr_only("ls: invalid line width: ‘1a’"); } } @@ -308,6 +307,50 @@ fn test_ls_long() { } } +#[test] +fn test_ls_long_total_size() { + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + at.touch(&at.plus_as_string("test-long")); + at.append("test-long", "1"); + at.touch(&at.plus_as_string("test-long2")); + at.append("test-long2", "2"); + + let expected_prints: HashMap<_, _> = if cfg!(unix) { + [ + ("long_vanilla", "total 8"), + ("long_human_readable", "total 8.0K"), + ("long_si", "total 8.2k"), + ] + .iter() + .cloned() + .collect() + } else { + [ + ("long_vanilla", "total 2"), + ("long_human_readable", "total 2"), + ("long_si", "total 2"), + ] + .iter() + .cloned() + .collect() + }; + + for arg in &["-l", "--long", "--format=long", "--format=verbose"] { + let result = scene.ucmd().arg(arg).succeeds(); + result.stdout_contains(expected_prints["long_vanilla"]); + + for arg2 in &["-h", "--human-readable", "--si"] { + let result = scene.ucmd().arg(arg).arg(arg2).succeeds(); + result.stdout_contains(if *arg2 == "--si" { + expected_prints["long_si"] + } else { + expected_prints["long_human_readable"] + }); + } + } +} + #[test] fn test_ls_long_formats() { let scene = TestScenario::new(util_name!()); @@ -640,7 +683,7 @@ fn test_ls_styles() { at.touch("test"); let re_full = Regex::new( - r"[a-z-]* \d* \w* \w* \d* \d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d* \+\d{4} test\n", + r"[a-z-]* \d* \w* \w* \d* \d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d* (\+|\-)\d{4} test\n", ) .unwrap(); let re_long = @@ -830,7 +873,7 @@ fn test_ls_files_dirs() { .ucmd() .arg("doesntexist") .fails() - .stderr_contains(&"error: 'doesntexist': No such file or directory"); + .stderr_contains(&"'doesntexist': No such file or directory"); // One exists, the other doesn't scene @@ -838,7 +881,7 @@ fn test_ls_files_dirs() { .arg("a") .arg("doesntexist") .fails() - .stderr_contains(&"error: 'doesntexist': No such file or directory") + .stderr_contains(&"'doesntexist': No such file or directory") .stdout_contains(&"a:"); } @@ -1042,7 +1085,7 @@ fn test_ls_indicator_style() { { use self::unix_socket::UnixListener; - let dir = TempDir::new("unix_socket").expect("failed to create dir"); + let dir = tempfile::Builder::new().prefix("unix_socket").tempdir().expect("failed to create dir"); let socket_path = dir.path().join("sock"); let _listener = UnixListener::bind(&socket_path).expect("failed to create socket"); @@ -1921,3 +1964,48 @@ fn test_ls_sort_extension() { expected, ); } + +#[test] +fn test_ls_path() { + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + + let file1 = "file1"; + let file2 = "file2"; + let dir = "dir"; + let path = &format!("{}/{}", dir, file2); + + at.mkdir(dir); + at.touch(file1); + at.touch(path); + + let expected_stdout = &format!("{}\n", path); + scene.ucmd().arg(path).run().stdout_is(expected_stdout); + + let expected_stdout = &format!("./{}\n", path); + scene + .ucmd() + .arg(format!("./{}", path)) + .run() + .stdout_is(expected_stdout); + + let abs_path = format!("{}/{}", at.as_string(), path); + let expected_stdout = if cfg!(windows) { + format!("\'{}\'\n", abs_path) + } else { + format!("{}\n", abs_path) + }; + scene.ucmd().arg(&abs_path).run().stdout_is(expected_stdout); + + let expected_stdout = if cfg!(windows) { + format!("{} {}\n", path, file1) + } else { + format!("{}\n{}\n", path, file1) + }; + scene + .ucmd() + .arg(file1) + .arg(path) + .run() + .stdout_is(expected_stdout); +} diff --git a/tests/by-util/test_mkfifo.rs b/tests/by-util/test_mkfifo.rs index 23108d976..318a2ea5d 100644 --- a/tests/by-util/test_mkfifo.rs +++ b/tests/by-util/test_mkfifo.rs @@ -2,9 +2,7 @@ use crate::common::util::*; #[test] fn test_create_fifo_missing_operand() { - new_ucmd!() - .fails() - .stderr_is("mkfifo: error: missing operand"); + new_ucmd!().fails().stderr_is("mkfifo: missing operand"); } #[test] @@ -43,5 +41,5 @@ fn test_create_one_fifo_already_exists() { .arg("abcdef") .arg("abcdef") .fails() - .stderr_is("mkfifo: error: cannot create fifo 'abcdef': File exists"); + .stderr_is("mkfifo: cannot create fifo 'abcdef': File exists"); } diff --git a/tests/by-util/test_mknod.rs b/tests/by-util/test_mknod.rs index 651491045..1d39372ac 100644 --- a/tests/by-util/test_mknod.rs +++ b/tests/by-util/test_mknod.rs @@ -1 +1,124 @@ -// ToDO: add tests +use crate::common::util::*; + +#[cfg(not(windows))] +#[test] +fn test_mknod_help() { + new_ucmd!() + .arg("--help") + .succeeds() + .no_stderr() + .stdout_contains("USAGE:"); +} + +#[test] +#[cfg(not(windows))] +fn test_mknod_version() { + assert!(new_ucmd!() + .arg("--version") + .succeeds() + .no_stderr() + .stdout_str() + .starts_with("mknod")); +} + +#[test] +#[cfg(not(windows))] +fn test_mknod_fifo_default_writable() { + let ts = TestScenario::new(util_name!()); + ts.ucmd().arg("test_file").arg("p").succeeds(); + assert!(ts.fixtures.is_fifo("test_file")); + assert!(!ts.fixtures.metadata("test_file").permissions().readonly()); +} + +#[test] +#[cfg(not(windows))] +fn test_mknod_fifo_mnemonic_usage() { + let ts = TestScenario::new(util_name!()); + ts.ucmd().arg("test_file").arg("pipe").succeeds(); + assert!(ts.fixtures.is_fifo("test_file")); +} + +#[test] +#[cfg(not(windows))] +fn test_mknod_fifo_read_only() { + let ts = TestScenario::new(util_name!()); + ts.ucmd() + .arg("-m") + .arg("a=r") + .arg("test_file") + .arg("p") + .succeeds(); + assert!(ts.fixtures.is_fifo("test_file")); + assert!(ts.fixtures.metadata("test_file").permissions().readonly()); +} + +#[test] +#[cfg(not(windows))] +fn test_mknod_fifo_invalid_extra_operand() { + new_ucmd!() + .arg("test_file") + .arg("p") + .arg("1") + .arg("2") + .fails() + .stderr_contains(&"Fifos do not have major and minor device numbers"); +} + +#[test] +#[cfg(not(windows))] +fn test_mknod_character_device_requires_major_and_minor() { + new_ucmd!() + .arg("test_file") + .arg("c") + .fails() + .status_code(1) + .stderr_contains(&"Special files require major and minor device numbers."); + new_ucmd!() + .arg("test_file") + .arg("c") + .arg("1") + .fails() + .status_code(1) + .stderr_contains(&"Special files require major and minor device numbers."); + new_ucmd!() + .arg("test_file") + .arg("c") + .arg("1") + .arg("c") + .fails() + .status_code(1) + .stderr_contains(&"Invalid value for ''"); + new_ucmd!() + .arg("test_file") + .arg("c") + .arg("c") + .arg("1") + .fails() + .status_code(1) + .stderr_contains(&"Invalid value for ''"); +} + +#[test] +#[cfg(not(windows))] +fn test_mknod_invalid_arg() { + new_ucmd!() + .arg("--foo") + .fails() + .status_code(1) + .no_stdout() + .stderr_contains(&"Found argument '--foo' which wasn't expected"); +} + +#[test] +#[cfg(not(windows))] +fn test_mknod_invalid_mode() { + new_ucmd!() + .arg("--mode") + .arg("rw") + .arg("test_file") + .arg("p") + .fails() + .no_stdout() + .status_code(1) + .stderr_contains(&"invalid mode"); +} diff --git a/tests/by-util/test_mktemp.rs b/tests/by-util/test_mktemp.rs index c273c407c..617f0fd06 100644 --- a/tests/by-util/test_mktemp.rs +++ b/tests/by-util/test_mktemp.rs @@ -120,7 +120,7 @@ fn test_mktemp_mktemp_t() { .arg(TEST_TEMPLATE8) .fails() .no_stdout() - .stderr_contains("error: suffix cannot contain any path separators"); + .stderr_contains("suffix cannot contain any path separators"); } #[test] diff --git a/tests/by-util/test_mv.rs b/tests/by-util/test_mv.rs index e8ba43282..e0bdd9ef3 100644 --- a/tests/by-util/test_mv.rs +++ b/tests/by-util/test_mv.rs @@ -251,6 +251,40 @@ fn test_mv_simple_backup() { assert!(at.file_exists(&format!("{}~", file_b))); } +#[test] +fn test_mv_simple_backup_with_file_extension() { + let (at, mut ucmd) = at_and_ucmd!(); + let file_a = "test_mv_simple_backup_file_a.txt"; + let file_b = "test_mv_simple_backup_file_b.txt"; + + at.touch(file_a); + at.touch(file_b); + ucmd.arg("-b") + .arg(file_a) + .arg(file_b) + .succeeds() + .no_stderr(); + + assert!(!at.file_exists(file_a)); + assert!(at.file_exists(file_b)); + assert!(at.file_exists(&format!("{}~", file_b))); +} + +#[test] +fn test_mv_arg_backup_arg_first() { + let (at, mut ucmd) = at_and_ucmd!(); + let file_a = "test_mv_simple_backup_file_a"; + let file_b = "test_mv_simple_backup_file_b"; + + at.touch(file_a); + at.touch(file_b); + ucmd.arg("--backup").arg(file_a).arg(file_b).succeeds(); + + assert!(!at.file_exists(file_a)); + assert!(at.file_exists(file_b)); + assert!(at.file_exists(&format!("{}~", file_b))); +} + #[test] fn test_mv_custom_backup_suffix() { let (at, mut ucmd) = at_and_ucmd!(); @@ -293,7 +327,7 @@ fn test_mv_custom_backup_suffix_via_env() { } #[test] -fn test_mv_backup_numbering() { +fn test_mv_backup_numbered_with_t() { let (at, mut ucmd) = at_and_ucmd!(); let file_a = "test_mv_backup_numbering_file_a"; let file_b = "test_mv_backup_numbering_file_b"; @@ -311,6 +345,25 @@ fn test_mv_backup_numbering() { assert!(at.file_exists(&format!("{}.~1~", file_b))); } +#[test] +fn test_mv_backup_numbered() { + let (at, mut ucmd) = at_and_ucmd!(); + let file_a = "test_mv_backup_numbering_file_a"; + let file_b = "test_mv_backup_numbering_file_b"; + + at.touch(file_a); + at.touch(file_b); + ucmd.arg("--backup=numbered") + .arg(file_a) + .arg(file_b) + .succeeds() + .no_stderr(); + + assert!(!at.file_exists(file_a)); + assert!(at.file_exists(file_b)); + assert!(at.file_exists(&format!("{}.~1~", file_b))); +} + #[test] fn test_mv_backup_existing() { let (at, mut ucmd) = at_and_ucmd!(); @@ -330,6 +383,67 @@ fn test_mv_backup_existing() { assert!(at.file_exists(&format!("{}~", file_b))); } +#[test] +fn test_mv_backup_nil() { + let (at, mut ucmd) = at_and_ucmd!(); + let file_a = "test_mv_backup_numbering_file_a"; + let file_b = "test_mv_backup_numbering_file_b"; + + at.touch(file_a); + at.touch(file_b); + ucmd.arg("--backup=nil") + .arg(file_a) + .arg(file_b) + .succeeds() + .no_stderr(); + + assert!(!at.file_exists(file_a)); + assert!(at.file_exists(file_b)); + assert!(at.file_exists(&format!("{}~", file_b))); +} + +#[test] +fn test_mv_numbered_if_existing_backup_existing() { + let (at, mut ucmd) = at_and_ucmd!(); + let file_a = "test_mv_backup_numbering_file_a"; + let file_b = "test_mv_backup_numbering_file_b"; + let file_b_backup = "test_mv_backup_numbering_file_b.~1~"; + + at.touch(file_a); + at.touch(file_b); + at.touch(file_b_backup); + ucmd.arg("--backup=existing") + .arg(file_a) + .arg(file_b) + .succeeds() + .no_stderr(); + + assert!(at.file_exists(file_b)); + assert!(at.file_exists(file_b_backup)); + assert!(at.file_exists(&*format!("{}.~2~", file_b))); +} + +#[test] +fn test_mv_numbered_if_existing_backup_nil() { + let (at, mut ucmd) = at_and_ucmd!(); + let file_a = "test_mv_backup_numbering_file_a"; + let file_b = "test_mv_backup_numbering_file_b"; + let file_b_backup = "test_mv_backup_numbering_file_b.~1~"; + + at.touch(file_a); + at.touch(file_b); + at.touch(file_b_backup); + ucmd.arg("--backup=nil") + .arg(file_a) + .arg(file_b) + .succeeds() + .no_stderr(); + + assert!(at.file_exists(file_b)); + assert!(at.file_exists(file_b_backup)); + assert!(at.file_exists(&*format!("{}.~2~", file_b))); +} + #[test] fn test_mv_backup_simple() { let (at, mut ucmd) = at_and_ucmd!(); @@ -349,6 +463,25 @@ fn test_mv_backup_simple() { assert!(at.file_exists(&format!("{}~", file_b))); } +#[test] +fn test_mv_backup_never() { + let (at, mut ucmd) = at_and_ucmd!(); + let file_a = "test_mv_backup_numbering_file_a"; + let file_b = "test_mv_backup_numbering_file_b"; + + at.touch(file_a); + at.touch(file_b); + ucmd.arg("--backup=never") + .arg(file_a) + .arg(file_b) + .succeeds() + .no_stderr(); + + assert!(!at.file_exists(file_a)); + assert!(at.file_exists(file_b)); + assert!(at.file_exists(&format!("{}~", file_b))); +} + #[test] fn test_mv_backup_none() { let (at, mut ucmd) = at_and_ucmd!(); @@ -369,17 +502,14 @@ fn test_mv_backup_none() { } #[test] -fn test_mv_existing_backup() { +fn test_mv_backup_off() { let (at, mut ucmd) = at_and_ucmd!(); - let file_a = "test_mv_existing_backup_file_a"; - let file_b = "test_mv_existing_backup_file_b"; - let file_b_backup = "test_mv_existing_backup_file_b.~1~"; - let resulting_backup = "test_mv_existing_backup_file_b.~2~"; + let file_a = "test_mv_backup_numbering_file_a"; + let file_b = "test_mv_backup_numbering_file_b"; at.touch(file_a); at.touch(file_b); - at.touch(file_b_backup); - ucmd.arg("--backup=nil") + ucmd.arg("--backup=off") .arg(file_a) .arg(file_b) .succeeds() @@ -387,8 +517,19 @@ fn test_mv_existing_backup() { assert!(!at.file_exists(file_a)); assert!(at.file_exists(file_b)); - assert!(at.file_exists(file_b_backup)); - assert!(at.file_exists(resulting_backup)); + assert!(!at.file_exists(&format!("{}~", file_b))); +} + +#[test] +fn test_mv_backup_no_clobber_conflicting_options() { + let (_, mut ucmd) = at_and_ucmd!(); + + ucmd.arg("--backup") + .arg("--no-clobber") + .arg("file1") + .arg("file2") + .fails() + .stderr_is("mv: options --backup and --no-clobber are mutually exclusive\nTry 'mv --help' for more information."); } #[test] @@ -472,7 +613,7 @@ fn test_mv_overwrite_nonempty_dir() { at.touch(dummy); // Not same error as GNU; the error message is a rust builtin // TODO: test (and implement) correct error message (or at least decide whether to do so) - // Current: "mv: error: couldn't rename path (Directory not empty; from=a; to=b)" + // Current: "mv: couldn't rename path (Directory not empty; from=a; to=b)" // GNU: "mv: cannot move ‘a’ to ‘b’: Directory not empty" // Verbose output for the move should not be shown on failure @@ -539,7 +680,7 @@ fn test_mv_errors() { .arg(dir) .fails() .stderr_is(format!( - "mv: error: cannot overwrite directory ‘{}’ with non-directory\n", + "mv: cannot overwrite directory ‘{}’ with non-directory\n", dir )); @@ -587,6 +728,24 @@ fn test_mv_verbose() { )); } +#[test] +fn test_mv_permission_error() { + let scene = TestScenario::new("mkdir"); + let folder1 = "bar"; + let folder2 = "foo"; + let folder_to_move = "bar/foo"; + scene.ucmd().arg("-m444").arg(folder1).succeeds(); + scene.ucmd().arg("-m777").arg(folder2).succeeds(); + + scene + .cmd_keepenv(util_name!()) + .arg(folder2) + .arg(folder_to_move) + .run() + .stderr_str() + .ends_with("Permission denied"); +} + // Todo: // $ at.touch a b diff --git a/tests/by-util/test_nice.rs b/tests/by-util/test_nice.rs index d3457c686..9e004b98b 100644 --- a/tests/by-util/test_nice.rs +++ b/tests/by-util/test_nice.rs @@ -25,7 +25,7 @@ fn test_adjustment_with_no_command_should_error() { new_ucmd!() .args(&["-n", "19"]) .run() - .stderr_is("nice: error: A command must be given with an adjustment.\nTry \"nice --help\" for more information.\n"); + .stderr_is("nice: A command must be given with an adjustment.\nTry \"nice --help\" for more information.\n"); } #[test] diff --git a/tests/by-util/test_numfmt.rs b/tests/by-util/test_numfmt.rs index 64fc5360d..b52dbc359 100644 --- a/tests/by-util/test_numfmt.rs +++ b/tests/by-util/test_numfmt.rs @@ -281,6 +281,7 @@ fn test_leading_whitespace_in_free_argument_should_imply_padding() { } #[test] +#[ignore] fn test_should_calculate_implicit_padding_per_free_argument() { new_ucmd!() .args(&["--from=auto", " 1Ki", " 2K"]) diff --git a/tests/by-util/test_pinky.rs b/tests/by-util/test_pinky.rs index 1a7ef8b61..ccabb7345 100644 --- a/tests/by-util/test_pinky.rs +++ b/tests/by-util/test_pinky.rs @@ -20,42 +20,37 @@ fn test_long_format() { let ulogin = "root"; let pw: Passwd = Passwd::locate(ulogin).unwrap(); let real_name = pw.user_info().replace("&", &pw.name().capitalize()); - new_ucmd!().arg("-l").arg(ulogin).run().stdout_is(format!( - "Login name: {:<28}In real life: {}\nDirectory: {:<29}Shell: {}\n\n", - ulogin, - real_name, - pw.user_dir(), - pw.user_shell() - )); + new_ucmd!() + .arg("-l") + .arg(ulogin) + .succeeds() + .stdout_is(format!( + "Login name: {:<28}In real life: {}\nDirectory: {:<29}Shell: {}\n\n", + ulogin, + real_name, + pw.user_dir(), + pw.user_shell() + )); - new_ucmd!().arg("-lb").arg(ulogin).run().stdout_is(format!( - "Login name: {:<28}In real life: {1}\n\n", - ulogin, real_name - )); + new_ucmd!() + .arg("-lb") + .arg(ulogin) + .succeeds() + .stdout_is(format!( + "Login name: {:<28}In real life: {1}\n\n", + ulogin, real_name + )); } -#[cfg(target_os = "linux")] +#[cfg(any(target_vendor = "apple", target_os = "linux"))] #[test] fn test_long_format_multiple_users() { - let scene = TestScenario::new(util_name!()); + let args = ["-l", "root", "root", "root"]; - let expected = scene - .cmd_keepenv(util_name!()) - .env("LANGUAGE", "C") - .arg("-l") - .arg("root") - .arg("root") - .arg("root") - .succeeds(); - - scene - .ucmd() - .arg("-l") - .arg("root") - .arg("root") - .arg("root") + new_ucmd!() + .args(&args) .succeeds() - .stdout_is(expected.stdout_str()); + .stdout_is(expected_result(&args)); } #[test] @@ -64,46 +59,53 @@ fn test_long_format_wo_user() { new_ucmd!().arg("-l").fails().code_is(1); } -#[cfg(target_os = "linux")] +#[cfg(any(target_vendor = "apple", target_os = "linux"))] #[test] fn test_short_format_i() { // allow whitespace variation // * minor whitespace differences occur between platform built-in outputs; specifically, the number of trailing TABs may be variant let args = ["-i"]; - let actual = TestScenario::new(util_name!()) - .ucmd() - .args(&args) - .succeeds() - .stdout_move_str(); + let actual = new_ucmd!().args(&args).succeeds().stdout_move_str(); let expect = expected_result(&args); let v_actual: Vec<&str> = actual.split_whitespace().collect(); let v_expect: Vec<&str> = expect.split_whitespace().collect(); assert_eq!(v_actual, v_expect); } -#[cfg(target_os = "linux")] +#[cfg(any(target_vendor = "apple", target_os = "linux"))] #[test] fn test_short_format_q() { // allow whitespace variation // * minor whitespace differences occur between platform built-in outputs; specifically, the number of trailing TABs may be variant let args = ["-q"]; - let actual = TestScenario::new(util_name!()) - .ucmd() - .args(&args) - .succeeds() - .stdout_move_str(); + let actual = new_ucmd!().args(&args).succeeds().stdout_move_str(); let expect = expected_result(&args); let v_actual: Vec<&str> = actual.split_whitespace().collect(); let v_expect: Vec<&str> = expect.split_whitespace().collect(); assert_eq!(v_actual, v_expect); } -#[cfg(target_os = "linux")] +#[cfg(any(target_vendor = "apple", target_os = "linux"))] +#[test] +fn test_no_flag() { + let actual = new_ucmd!().succeeds().stdout_move_str(); + let expect = expected_result(&[]); + let v_actual: Vec<&str> = actual.split_whitespace().collect(); + let v_expect: Vec<&str> = expect.split_whitespace().collect(); + assert_eq!(v_actual, v_expect); +} + +#[cfg(any(target_vendor = "apple", target_os = "linux"))] fn expected_result(args: &[&str]) -> String { - TestScenario::new(util_name!()) - .cmd_keepenv(util_name!()) + #[cfg(target_os = "linux")] + let util_name = util_name!(); + #[cfg(target_vendor = "apple")] + let util_name = format!("g{}", util_name!()); + + TestScenario::new(&util_name) + .cmd_keepenv(util_name) .env("LANGUAGE", "C") .args(args) - .run() + .succeeds() .stdout_move_str() } diff --git a/tests/by-util/test_relpath.rs b/tests/by-util/test_relpath.rs index 5094d25a8..70d9f2a5d 100644 --- a/tests/by-util/test_relpath.rs +++ b/tests/by-util/test_relpath.rs @@ -155,7 +155,7 @@ fn test_relpath_no_from_with_d() { at.mkdir_all(to); // d is part of subpath -> expect relative path - let mut result_stdout = scene + let _result_stdout = scene .ucmd() .arg(to) .arg(&format!("-d{}", pwd)) @@ -163,10 +163,10 @@ fn test_relpath_no_from_with_d() { .stdout_move_str(); // relax rules for windows test environment #[cfg(not(windows))] - assert!(Path::new(&result_stdout).is_relative()); + assert!(Path::new(&_result_stdout).is_relative()); // d is not part of subpath -> expect absolut path - result_stdout = scene + let result_stdout = scene .ucmd() .arg(to) .arg("-dnon_existing") diff --git a/tests/by-util/test_rm.rs b/tests/by-util/test_rm.rs index 9a068887c..2a87038d5 100644 --- a/tests/by-util/test_rm.rs +++ b/tests/by-util/test_rm.rs @@ -258,7 +258,7 @@ fn test_rm_no_operand() { let mut ucmd = new_ucmd!(); ucmd.fails() - .stderr_is("rm: error: missing an argument\nrm: error: for help, try 'rm --help'\n"); + .stderr_is("rm: missing an argument\nrm: for help, try 'rm --help'\n"); } #[test] diff --git a/tests/by-util/test_rmdir.rs b/tests/by-util/test_rmdir.rs index 34531cf22..eef2d50f5 100644 --- a/tests/by-util/test_rmdir.rs +++ b/tests/by-util/test_rmdir.rs @@ -39,7 +39,7 @@ fn test_rmdir_nonempty_directory_no_parents() { assert!(at.file_exists(file)); ucmd.arg(dir).fails().stderr_is( - "rmdir: error: failed to remove 'test_rmdir_nonempty_no_parents': Directory not \ + "rmdir: failed to remove 'test_rmdir_nonempty_no_parents': Directory not \ empty\n", ); @@ -59,9 +59,9 @@ fn test_rmdir_nonempty_directory_with_parents() { assert!(at.file_exists(file)); ucmd.arg("-p").arg(dir).fails().stderr_is( - "rmdir: error: failed to remove 'test_rmdir_nonempty/with/parents': Directory not \ - empty\nrmdir: error: failed to remove 'test_rmdir_nonempty/with': Directory not \ - empty\nrmdir: error: failed to remove 'test_rmdir_nonempty': Directory not \ + "rmdir: failed to remove 'test_rmdir_nonempty/with/parents': Directory not \ + empty\nrmdir: failed to remove 'test_rmdir_nonempty/with': Directory not \ + empty\nrmdir: failed to remove 'test_rmdir_nonempty': Directory not \ empty\n", ); diff --git a/tests/by-util/test_sort.rs b/tests/by-util/test_sort.rs index eac9490a5..133dc0028 100644 --- a/tests/by-util/test_sort.rs +++ b/tests/by-util/test_sort.rs @@ -1,48 +1,79 @@ use crate::common::util::*; -fn test_helper(file_name: &str, args: &str) { - new_ucmd!() - .arg(format!("{}.txt", file_name)) - .args(&args.split(' ').collect::>()) - .succeeds() - .stdout_is_fixture(format!("{}.expected", file_name)); +fn test_helper(file_name: &str, possible_args: &[&str]) { + for args in possible_args { + new_ucmd!() + .arg(format!("{}.txt", file_name)) + .args(&args.split(' ').collect::>()) + .succeeds() + .stdout_is_fixture(format!("{}.expected", file_name)); - new_ucmd!() - .arg(format!("{}.txt", file_name)) - .arg("--debug") - .args(&args.split(' ').collect::>()) - .succeeds() - .stdout_is_fixture(format!("{}.expected.debug", file_name)); + new_ucmd!() + .arg(format!("{}.txt", file_name)) + .arg("--debug") + .args(&args.split(' ').collect::>()) + .succeeds() + .stdout_is_fixture(format!("{}.expected.debug", file_name)); + } } -// FYI, the initialization size of our Line struct is 96 bytes. -// -// At very small buffer sizes, with that overhead we are certainly going -// to overrun our buffer way, way, way too quickly because of these excess -// bytes for the struct. -// -// For instance, seq 0..20000 > ...text = 108894 bytes -// But overhead is 1920000 + 108894 = 2028894 bytes -// -// Or kjvbible-random.txt = 4332506 bytes, but minimum size of its -// 99817 lines in memory * 96 bytes = 9582432 bytes -// -// Here, we test 108894 bytes with a 50K buffer -// #[test] -fn test_larger_than_specified_segment() { +fn test_buffer_sizes() { + let buffer_sizes = [ + "0", "50K", "50k", "1M", "100M", "1000G", "10T", "500E", "1Y", + ]; + for buffer_size in &buffer_sizes { + new_ucmd!() + .arg("-n") + .arg("-S") + .arg(buffer_size) + .arg("ext_sort.txt") + .succeeds() + .stdout_is_fixture("ext_sort.expected"); + } +} + +#[test] +fn test_invalid_buffer_size() { + let buffer_sizes = ["asd", "100f"]; + for invalid_buffer_size in &buffer_sizes { + new_ucmd!() + .arg("-S") + .arg(invalid_buffer_size) + .fails() + .stderr_only(format!( + "sort: failed to parse buffer size `{}`: invalid digit found in string", + invalid_buffer_size + )); + } +} + +#[test] +fn test_ext_sort_stable() { new_ucmd!() .arg("-n") + .arg("--stable") .arg("-S") - .arg("50K") - .arg("ext_sort.txt") + .arg("0M") + .arg("ext_stable.txt") .succeeds() - .stdout_is_fixture(format!("{}", "ext_sort.expected")); + .stdout_only_fixture("ext_stable.expected"); +} + +#[test] +fn test_extsort_zero_terminated() { + new_ucmd!() + .arg("-z") + .arg("-S") + .arg("10K") + .arg("zero-terminated.txt") + .succeeds() + .stdout_is_fixture("zero-terminated.expected"); } #[test] fn test_months_whitespace() { - test_helper("months-whitespace", "-M"); + test_helper("months-whitespace", &["-M", "--month-sort", "--sort=month"]); } #[test] @@ -56,7 +87,10 @@ fn test_version_empty_lines() { #[test] fn test_human_numeric_whitespace() { - test_helper("human-numeric-whitespace", "-h"); + test_helper( + "human-numeric-whitespace", + &["-h", "--human-numeric-sort", "--sort=human-numeric"], + ); } // This tests where serde often fails when reading back JSON @@ -73,12 +107,18 @@ fn test_extsort_as64_bailout() { #[test] fn test_multiple_decimals_general() { - test_helper("multiple_decimals_general", "-g") + test_helper( + "multiple_decimals_general", + &["-g", "--general-numeric-sort", "--sort=general-numeric"], + ) } #[test] fn test_multiple_decimals_numeric() { - test_helper("multiple_decimals_numeric", "-n") + test_helper( + "multiple_decimals_numeric", + &["-n", "--numeric-sort", "--sort=numeric"], + ) } #[test] @@ -88,7 +128,7 @@ fn test_check_zero_terminated_failure() { .arg("-c") .arg("zero-terminated.txt") .fails() - .stdout_is("sort: disorder in line 0\n"); + .stdout_is("sort: zero-terminated.txt:2: disorder: ../../fixtures/du\n"); } #[test] @@ -157,72 +197,93 @@ fn test_random_shuffle_contains_two_runs_not_the_same() { #[test] fn test_numeric_floats_and_ints() { - test_helper("numeric_floats_and_ints", "-n"); + test_helper( + "numeric_floats_and_ints", + &["-n", "--numeric-sort", "--sort=numeric"], + ); } #[test] fn test_numeric_floats() { - test_helper("numeric_floats", "-n"); + test_helper( + "numeric_floats", + &["-n", "--numeric-sort", "--sort=numeric"], + ); } #[test] fn test_numeric_floats_with_nan() { - test_helper("numeric_floats_with_nan", "-n"); + test_helper( + "numeric_floats_with_nan", + &["-n", "--numeric-sort", "--sort=numeric"], + ); } #[test] fn test_numeric_unfixed_floats() { - test_helper("numeric_unfixed_floats", "-n"); + test_helper( + "numeric_unfixed_floats", + &["-n", "--numeric-sort", "--sort=numeric"], + ); } #[test] fn test_numeric_fixed_floats() { - test_helper("numeric_fixed_floats", "-n"); + test_helper( + "numeric_fixed_floats", + &["-n", "--numeric-sort", "--sort=numeric"], + ); } #[test] fn test_numeric_unsorted_ints() { - test_helper("numeric_unsorted_ints", "-n"); + test_helper( + "numeric_unsorted_ints", + &["-n", "--numeric-sort", "--sort=numeric"], + ); } #[test] fn test_human_block_sizes() { - test_helper("human_block_sizes", "-h"); + test_helper( + "human_block_sizes", + &["-h", "--human-numeric-sort", "--sort=human-numeric"], + ); } #[test] fn test_month_default() { - test_helper("month_default", "-M"); + test_helper("month_default", &["-M", "--month-sort", "--sort=month"]); } #[test] fn test_month_stable() { - test_helper("month_stable", "-Ms"); + test_helper("month_stable", &["-Ms"]); } #[test] fn test_default_unsorted_ints() { - test_helper("default_unsorted_ints", ""); + test_helper("default_unsorted_ints", &[""]); } #[test] fn test_numeric_unique_ints() { - test_helper("numeric_unsorted_ints_unique", "-nu"); + test_helper("numeric_unsorted_ints_unique", &["-nu"]); } #[test] fn test_version() { - test_helper("version", "-V"); + test_helper("version", &["-V"]); } #[test] fn test_ignore_case() { - test_helper("ignore_case", "-f"); + test_helper("ignore_case", &["-f"]); } #[test] fn test_dictionary_order() { - test_helper("dictionary_order", "-d"); + test_helper("dictionary_order", &["-d"]); } #[test] @@ -249,47 +310,53 @@ fn test_non_printing_chars() { #[test] fn test_exponents_positive_general_fixed() { - test_helper("exponents_general", "-g"); + test_helper("exponents_general", &["-g"]); } #[test] fn test_exponents_positive_numeric() { - test_helper("exponents-positive-numeric", "-n"); + test_helper( + "exponents-positive-numeric", + &["-n", "--numeric-sort", "--sort=numeric"], + ); } #[test] fn test_months_dedup() { - test_helper("months-dedup", "-Mu"); + test_helper("months-dedup", &["-Mu"]); } #[test] fn test_mixed_floats_ints_chars_numeric() { - test_helper("mixed_floats_ints_chars_numeric", "-n"); + test_helper( + "mixed_floats_ints_chars_numeric", + &["-n", "--numeric-sort", "--sort=numeric"], + ); } #[test] fn test_mixed_floats_ints_chars_numeric_unique() { - test_helper("mixed_floats_ints_chars_numeric_unique", "-nu"); + test_helper("mixed_floats_ints_chars_numeric_unique", &["-nu"]); } #[test] fn test_words_unique() { - test_helper("words_unique", "-u"); + test_helper("words_unique", &["-u"]); } #[test] fn test_numeric_unique() { - test_helper("numeric_unique", "-nu"); + test_helper("numeric_unique", &["-nu"]); } #[test] fn test_mixed_floats_ints_chars_numeric_reverse() { - test_helper("mixed_floats_ints_chars_numeric_unique_reverse", "-nur"); + test_helper("mixed_floats_ints_chars_numeric_unique_reverse", &["-nur"]); } #[test] fn test_mixed_floats_ints_chars_numeric_stable() { - test_helper("mixed_floats_ints_chars_numeric_stable", "-ns"); + test_helper("mixed_floats_ints_chars_numeric_stable", &["-ns"]); } #[test] @@ -318,12 +385,15 @@ fn test_numeric_floats2() { #[test] fn test_numeric_floats_with_nan2() { - test_helper("numeric-floats-with-nan2", "-n"); + test_helper( + "numeric-floats-with-nan2", + &["-n", "--numeric-sort", "--sort=numeric"], + ); } #[test] fn test_human_block_sizes2() { - for human_numeric_sort_param in vec!["-h", "--human-numeric-sort"] { + for human_numeric_sort_param in &["-h", "--human-numeric-sort", "--sort=human-numeric"] { let input = "8981K\n909991M\n-8T\n21G\n0.8M"; new_ucmd!() .arg(human_numeric_sort_param) @@ -335,7 +405,7 @@ fn test_human_block_sizes2() { #[test] fn test_month_default2() { - for month_sort_param in vec!["-M", "--month-sort"] { + for month_sort_param in &["-M", "--month-sort", "--sort=month"] { let input = "JAn\nMAY\n000may\nJun\nFeb"; new_ucmd!() .arg(month_sort_param) @@ -368,32 +438,32 @@ fn test_numeric_unique_ints2() { #[test] fn test_keys_open_ended() { - test_helper("keys_open_ended", "-k 2.3"); + test_helper("keys_open_ended", &["-k 2.3"]); } #[test] fn test_keys_closed_range() { - test_helper("keys_closed_range", "-k 2.2,2.2"); + test_helper("keys_closed_range", &["-k 2.2,2.2"]); } #[test] fn test_keys_multiple_ranges() { - test_helper("keys_multiple_ranges", "-k 2,2 -k 3,3"); + test_helper("keys_multiple_ranges", &["-k 2,2 -k 3,3"]); } #[test] fn test_keys_no_field_match() { - test_helper("keys_no_field_match", "-k 4,4"); + test_helper("keys_no_field_match", &["-k 4,4"]); } #[test] fn test_keys_no_char_match() { - test_helper("keys_no_char_match", "-k 1.2"); + test_helper("keys_no_char_match", &["-k 1.2"]); } #[test] fn test_keys_custom_separator() { - test_helper("keys_custom_separator", "-k 2.2,2.2 -t x"); + test_helper("keys_custom_separator", &["-k 2.2,2.2 -t x"]); } #[test] @@ -401,7 +471,7 @@ fn test_keys_invalid_field() { new_ucmd!() .args(&["-k", "1."]) .fails() - .stderr_only("sort: error: failed to parse character index for key `1.`: cannot parse integer from empty string"); + .stderr_only("sort: failed to parse character index for key `1.`: cannot parse integer from empty string"); } #[test] @@ -409,7 +479,7 @@ fn test_keys_invalid_field_option() { new_ucmd!() .args(&["-k", "1.1x"]) .fails() - .stderr_only("sort: error: invalid option for key: `x`"); + .stderr_only("sort: invalid option for key: `x`"); } #[test] @@ -417,14 +487,15 @@ fn test_keys_invalid_field_zero() { new_ucmd!() .args(&["-k", "0.1"]) .fails() - .stderr_only("sort: error: field index was 0"); + .stderr_only("sort: field index was 0"); } #[test] fn test_keys_invalid_char_zero() { - new_ucmd!().args(&["-k", "1.0"]).fails().stderr_only( - "sort: error: invalid character index 0 in `1.0` for the start position of a field", - ); + new_ucmd!() + .args(&["-k", "1.0"]) + .fails() + .stderr_only("sort: invalid character index 0 in `1.0` for the start position of a field"); } #[test] @@ -505,7 +576,7 @@ aaaa #[test] fn test_zero_terminated() { - test_helper("zero-terminated", "-z"); + test_helper("zero-terminated", &["-z"]); } #[test] @@ -544,6 +615,18 @@ fn test_merge_unique() { .stdout_only_fixture("merge_ints_interleaved.expected"); } +#[test] +fn test_merge_stable() { + new_ucmd!() + .arg("-m") + .arg("--stable") + .arg("-n") + .arg("merge_stable_1.txt") + .arg("merge_stable_2.txt") + .succeeds() + .stdout_only_fixture("merge_stable.expected"); +} + #[test] fn test_merge_reversed() { new_ucmd!() @@ -575,7 +658,7 @@ fn test_check() { .arg("-c") .arg("check_fail.txt") .fails() - .stdout_is("sort: disorder in line 4\n"); + .stdout_is("sort: check_fail.txt:6: disorder: 5\n"); new_ucmd!() .arg("-c") diff --git a/tests/by-util/test_split.rs b/tests/by-util/test_split.rs index 521cbbe9a..d83de4323 100644 --- a/tests/by-util/test_split.rs +++ b/tests/by-util/test_split.rs @@ -4,11 +4,15 @@ extern crate regex; use self::rand::{thread_rng, Rng}; use self::regex::Regex; use crate::common::util::*; +use rand::SeedableRng; #[cfg(not(windows))] use std::env; -use std::fs::{read_dir, File}; use std::io::Write; use std::path::Path; +use std::{ + fs::{read_dir, File}, + io::BufWriter, +}; fn random_chars(n: usize) -> String { thread_rng() @@ -58,7 +62,7 @@ impl Glob { files.sort(); let mut data: Vec = vec![]; for name in &files { - data.extend(self.directory.read(name).into_bytes()); + data.extend(self.directory.read_bytes(name)); } data } @@ -81,20 +85,30 @@ impl RandomFile { } fn add_bytes(&mut self, bytes: usize) { - let chunk_size: usize = if bytes >= 1024 { 1024 } else { bytes }; - let mut n = bytes; - while n > chunk_size { - let _ = write!(self.inner, "{}", random_chars(chunk_size)); - n -= chunk_size; + // Note that just writing random characters isn't enough to cover all + // cases. We need truly random bytes. + let mut writer = BufWriter::new(&self.inner); + + // Seed the rng so as to avoid spurious test failures. + let mut rng = rand::rngs::StdRng::seed_from_u64(123); + let mut buffer = [0; 1024]; + let mut remaining_size = bytes; + + while remaining_size > 0 { + let to_write = std::cmp::min(remaining_size, buffer.len()); + let buf = &mut buffer[..to_write]; + rng.fill(buf); + writer.write(buf).unwrap(); + + remaining_size -= to_write; } - let _ = write!(self.inner, "{}", random_chars(n)); } /// Add n lines each of size `RandomFile::LINESIZE` fn add_lines(&mut self, lines: usize) { let mut n = lines; while n > 0 { - let _ = writeln!(self.inner, "{}", random_chars(RandomFile::LINESIZE)); + writeln!(self.inner, "{}", random_chars(RandomFile::LINESIZE)).unwrap(); n -= 1; } } @@ -104,18 +118,18 @@ impl RandomFile { fn test_split_default() { let (at, mut ucmd) = at_and_ucmd!(); let name = "split_default"; - let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$"); RandomFile::new(&at, name).add_lines(2000); ucmd.args(&[name]).succeeds(); + + let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$"); assert_eq!(glob.count(), 2); - assert_eq!(glob.collate(), at.read(name).into_bytes()); + assert_eq!(glob.collate(), at.read_bytes(name)); } #[test] fn test_split_numeric_prefixed_chunks_by_bytes() { let (at, mut ucmd) = at_and_ucmd!(); let name = "split_num_prefixed_chunks_by_bytes"; - let glob = Glob::new(&at, ".", r"a\d\d$"); RandomFile::new(&at, name).add_bytes(10000); ucmd.args(&[ "-d", // --numeric-suffixes @@ -123,52 +137,89 @@ fn test_split_numeric_prefixed_chunks_by_bytes() { "1000", name, "a", ]) .succeeds(); + + let glob = Glob::new(&at, ".", r"a\d\d$"); assert_eq!(glob.count(), 10); - assert_eq!(glob.collate(), at.read(name).into_bytes()); + for filename in glob.collect() { + assert_eq!(glob.directory.metadata(&filename).len(), 1000); + } + assert_eq!(glob.collate(), at.read_bytes(name)); } #[test] fn test_split_str_prefixed_chunks_by_bytes() { let (at, mut ucmd) = at_and_ucmd!(); let name = "split_str_prefixed_chunks_by_bytes"; - let glob = Glob::new(&at, ".", r"b[[:alpha:]][[:alpha:]]$"); RandomFile::new(&at, name).add_bytes(10000); + // Important that this is less than 1024 since that's our internal buffer + // size. Good to test that we don't overshoot. ucmd.args(&["-b", "1000", name, "b"]).succeeds(); + + let glob = Glob::new(&at, ".", r"b[[:alpha:]][[:alpha:]]$"); assert_eq!(glob.count(), 10); - assert_eq!(glob.collate(), at.read(name).into_bytes()); + for filename in glob.collect() { + assert_eq!(glob.directory.metadata(&filename).len(), 1000); + } + assert_eq!(glob.collate(), at.read_bytes(name)); +} + +// This is designed to test what happens when the desired part size is not a +// multiple of the buffer size and we hopefully don't overshoot the desired part +// size. +#[test] +fn test_split_bytes_prime_part_size() { + let (at, mut ucmd) = at_and_ucmd!(); + let name = "test_split_bytes_prime_part_size"; + RandomFile::new(&at, name).add_bytes(10000); + // 1753 is prime and greater than the buffer size, 1024. + ucmd.args(&["-b", "1753", name, "b"]).succeeds(); + + let glob = Glob::new(&at, ".", r"b[[:alpha:]][[:alpha:]]$"); + assert_eq!(glob.count(), 6); + let mut fns = glob.collect(); + // glob.collect() is not guaranteed to return in sorted order, so we sort. + fns.sort(); + for i in 0..5 { + assert_eq!(glob.directory.metadata(&fns[i]).len(), 1753); + } + assert_eq!(glob.directory.metadata(&fns[5]).len(), 1235); + assert_eq!(glob.collate(), at.read_bytes(name)); } #[test] fn test_split_num_prefixed_chunks_by_lines() { let (at, mut ucmd) = at_and_ucmd!(); let name = "split_num_prefixed_chunks_by_lines"; - let glob = Glob::new(&at, ".", r"c\d\d$"); RandomFile::new(&at, name).add_lines(10000); ucmd.args(&["-d", "-l", "1000", name, "c"]).succeeds(); + + let glob = Glob::new(&at, ".", r"c\d\d$"); assert_eq!(glob.count(), 10); - assert_eq!(glob.collate(), at.read(name).into_bytes()); + assert_eq!(glob.collate(), at.read_bytes(name)); } #[test] fn test_split_str_prefixed_chunks_by_lines() { let (at, mut ucmd) = at_and_ucmd!(); let name = "split_str_prefixed_chunks_by_lines"; - let glob = Glob::new(&at, ".", r"d[[:alpha:]][[:alpha:]]$"); RandomFile::new(&at, name).add_lines(10000); ucmd.args(&["-l", "1000", name, "d"]).succeeds(); + + let glob = Glob::new(&at, ".", r"d[[:alpha:]][[:alpha:]]$"); assert_eq!(glob.count(), 10); - assert_eq!(glob.collate(), at.read(name).into_bytes()); + assert_eq!(glob.collate(), at.read_bytes(name)); } #[test] fn test_split_additional_suffix() { let (at, mut ucmd) = at_and_ucmd!(); let name = "split_additional_suffix"; - let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]].txt$"); RandomFile::new(&at, name).add_lines(2000); ucmd.args(&["--additional-suffix", ".txt", name]).succeeds(); + + let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]].txt$"); assert_eq!(glob.count(), 2); - assert_eq!(glob.collate(), at.read(name).into_bytes()); + assert_eq!(glob.collate(), at.read_bytes(name)); } // note: the test_filter* tests below are unix-only @@ -182,15 +233,16 @@ fn test_filter() { // like `test_split_default()` but run a command before writing let (at, mut ucmd) = at_and_ucmd!(); let name = "filtered"; - let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$"); let n_lines = 3; RandomFile::new(&at, name).add_lines(n_lines); // change all characters to 'i' ucmd.args(&["--filter=sed s/./i/g > $FILE", name]) .succeeds(); + // assert all characters are 'i' / no character is not 'i' // (assert that command succeded) + let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$"); assert!( glob.collate().iter().find(|&&c| { // is not i @@ -209,7 +261,6 @@ fn test_filter_with_env_var_set() { // implemented like `test_split_default()` but run a command before writing let (at, mut ucmd) = at_and_ucmd!(); let name = "filtered"; - let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$"); let n_lines = 3; RandomFile::new(&at, name).add_lines(n_lines); @@ -217,7 +268,9 @@ fn test_filter_with_env_var_set() { env::set_var("FILE", &env_var_value); ucmd.args(&[format!("--filter={}", "cat > $FILE").as_str(), name]) .succeeds(); - assert_eq!(glob.collate(), at.read(name).into_bytes()); + + let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$"); + assert_eq!(glob.collate(), at.read_bytes(name)); assert!(env::var("FILE").unwrap_or("var was unset".to_owned()) == env_var_value); } diff --git a/tests/by-util/test_stat.rs b/tests/by-util/test_stat.rs index 60d735c51..44bce9cd8 100644 --- a/tests/by-util/test_stat.rs +++ b/tests/by-util/test_stat.rs @@ -5,69 +5,6 @@ use crate::common::util::*; extern crate stat; pub use self::stat::*; -#[cfg(test)] -mod test_fsext { - use super::*; - - #[test] - fn test_access() { - assert_eq!("drwxr-xr-x", pretty_access(S_IFDIR | 0o755)); - assert_eq!("-rw-r--r--", pretty_access(S_IFREG | 0o644)); - assert_eq!("srw-r-----", pretty_access(S_IFSOCK | 0o640)); - assert_eq!("lrw-r-xr-x", pretty_access(S_IFLNK | 0o655)); - assert_eq!("?rw-r-xr-x", pretty_access(0o655)); - - assert_eq!( - "brwSr-xr-x", - pretty_access(S_IFBLK | S_ISUID as mode_t | 0o655) - ); - assert_eq!( - "brwsr-xr-x", - pretty_access(S_IFBLK | S_ISUID as mode_t | 0o755) - ); - - assert_eq!( - "prw---sr--", - pretty_access(S_IFIFO | S_ISGID as mode_t | 0o614) - ); - assert_eq!( - "prw---Sr--", - pretty_access(S_IFIFO | S_ISGID as mode_t | 0o604) - ); - - assert_eq!( - "c---r-xr-t", - pretty_access(S_IFCHR | S_ISVTX as mode_t | 0o055) - ); - assert_eq!( - "c---r-xr-T", - pretty_access(S_IFCHR | S_ISVTX as mode_t | 0o054) - ); - } - - #[test] - fn test_file_type() { - assert_eq!("block special file", pretty_filetype(S_IFBLK, 0)); - assert_eq!("character special file", pretty_filetype(S_IFCHR, 0)); - assert_eq!("regular file", pretty_filetype(S_IFREG, 1)); - assert_eq!("regular empty file", pretty_filetype(S_IFREG, 0)); - assert_eq!("weird file", pretty_filetype(0, 0)); - } - - #[test] - fn test_fs_type() { - assert_eq!("ext2/ext3", pretty_fstype(0xEF53)); - assert_eq!("tmpfs", pretty_fstype(0x01021994)); - assert_eq!("nfs", pretty_fstype(0x6969)); - assert_eq!("btrfs", pretty_fstype(0x9123683e)); - assert_eq!("xfs", pretty_fstype(0x58465342)); - assert_eq!("zfs", pretty_fstype(0x2FC12FC1)); - assert_eq!("ntfs", pretty_fstype(0x5346544e)); - assert_eq!("fat", pretty_fstype(0x4006)); - assert_eq!("UNKNOWN (0x1234)", pretty_fstype(0x1234)); - } -} - #[test] fn test_scanutil() { assert_eq!(Some((-5, 2)), "-5zxc".scan_num::()); @@ -159,10 +96,10 @@ fn test_invalid_option() { new_ucmd!().arg("-w").arg("-q").arg("/").fails(); } -#[cfg(target_os = "linux")] +#[cfg(any(target_os = "linux", target_vendor = "apple"))] const NORMAL_FMTSTR: &'static str = "%a %A %b %B %d %D %f %F %g %G %h %i %m %n %o %s %u %U %x %X %y %Y %z %Z"; // avoid "%w %W" (birth/creation) due to `stat` limitations and linux kernel & rust version capability variations -#[cfg(target_os = "linux")] +#[cfg(any(target_os = "linux"))] const DEV_FMTSTR: &'static str = "%a %A %b %B %d %D %f %F %g %G %h %i %m %n %o %s (%t/%T) %u %U %w %W %x %X %y %Y %z %Z"; #[cfg(target_os = "linux")] @@ -188,8 +125,8 @@ fn test_fs_format() { .stdout_is(expected_result(&args)); } +#[cfg(any(target_os = "linux", target_vendor = "apple"))] #[test] -#[cfg(target_os = "linux")] fn test_terse_normal_format() { // note: contains birth/creation date which increases test fragility // * results may vary due to built-in `stat` limitations as well as linux kernel and rust version capability variations @@ -198,9 +135,16 @@ fn test_terse_normal_format() { let expect = expected_result(&args); println!("actual: {:?}", actual); println!("expect: {:?}", expect); - let v_actual: Vec<&str> = actual.split(' ').collect(); - let v_expect: Vec<&str> = expect.split(' ').collect(); + let v_actual: Vec<&str> = actual.trim().split(' ').collect(); + let mut v_expect: Vec<&str> = expect.trim().split(' ').collect(); assert!(!v_expect.is_empty()); + + // uu_stat does not support selinux + if v_actual.len() == v_expect.len() - 1 && v_expect[v_expect.len() - 1].contains(":") { + // assume last element contains: `SELinux security context string` + v_expect.pop(); + } + // * allow for inequality if `stat` (aka, expect) returns "0" (unknown value) assert!( expect == "0" @@ -212,10 +156,10 @@ fn test_terse_normal_format() { ); } +#[cfg(any(target_os = "linux", target_vendor = "apple"))] #[test] -#[cfg(target_os = "linux")] fn test_format_created_time() { - let args = ["-c", "%w", "/boot"]; + let args = ["-c", "%w", "/bin"]; let actual = new_ucmd!().args(&args).succeeds().stdout_move_str(); let expect = expected_result(&args); println!("actual: {:?}", actual); @@ -236,10 +180,10 @@ fn test_format_created_time() { ); } +#[cfg(any(target_os = "linux", target_vendor = "apple"))] #[test] -#[cfg(target_os = "linux")] fn test_format_created_seconds() { - let args = ["-c", "%W", "/boot"]; + let args = ["-c", "%W", "/bin"]; let actual = new_ucmd!().args(&args).succeeds().stdout_move_str(); let expect = expected_result(&args); println!("actual: {:?}", actual); @@ -260,65 +204,97 @@ fn test_format_created_seconds() { ); } +#[cfg(any(target_os = "linux", target_vendor = "apple"))] #[test] -#[cfg(target_os = "linux")] fn test_normal_format() { - let args = ["-c", NORMAL_FMTSTR, "/boot"]; + let args = ["-c", NORMAL_FMTSTR, "/bin"]; new_ucmd!() .args(&args) - .run() + .succeeds() .stdout_is(expected_result(&args)); } +#[cfg(any(target_os = "linux", target_vendor = "apple"))] #[test] -#[cfg(target_os = "linux")] -fn test_follow_symlink() { - let args = ["-L", "-c", DEV_FMTSTR, "/dev/cdrom"]; - new_ucmd!() - .args(&args) - .run() - .stdout_is(expected_result(&args)); +fn test_symlinks() { + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + + let mut tested: bool = false; + // arbitrarily chosen symlinks with hope that the CI environment provides at least one of them + for file in vec![ + "/bin/sh", + "/bin/sudoedit", + "/usr/bin/ex", + "/etc/localtime", + "/etc/aliases", + ] { + if at.file_exists(file) && at.is_symlink(file) { + tested = true; + let args = ["-c", NORMAL_FMTSTR, file]; + scene + .ucmd() + .args(&args) + .succeeds() + .stdout_is(expected_result(&args)); + // -L, --dereference follow links + let args = ["-L", "-c", NORMAL_FMTSTR, file]; + scene + .ucmd() + .args(&args) + .succeeds() + .stdout_is(expected_result(&args)); + } + } + if !tested { + panic!("No symlink found to test in this environment"); + } } +#[cfg(any(target_os = "linux", target_vendor = "apple"))] #[test] -#[cfg(target_os = "linux")] -fn test_symlink() { - let args = ["-c", DEV_FMTSTR, "/dev/cdrom"]; - new_ucmd!() - .args(&args) - .run() - .stdout_is(expected_result(&args)); -} - -#[test] -#[cfg(target_os = "linux")] fn test_char() { - let args = ["-c", DEV_FMTSTR, "/dev/pts/ptmx"]; + // TODO: "(%t) (%x) (%w)" deviate from GNU stat for `character special file` on macOS + // Diff < left / right > : + // <"(f0000) (2021-05-20 23:08:03.442555000 +0200) (1970-01-01 01:00:00.000000000 +0100)\n" + // >"(f) (2021-05-20 23:08:03.455598000 +0200) (-)\n" + let args = [ + "-c", + #[cfg(target_os = "linux")] + DEV_FMTSTR, + #[cfg(target_os = "linux")] + "/dev/pts/ptmx", + #[cfg(any(target_vendor = "apple"))] + "%a %A %b %B %d %D %f %F %g %G %h %i %m %n %o %s (/%T) %u %U %W %X %y %Y %z %Z", + #[cfg(any(target_vendor = "apple"))] + "/dev/ptmx", + ]; new_ucmd!() .args(&args) - .run() + .succeeds() .stdout_is(expected_result(&args)); } +#[cfg(any(target_os = "linux", target_vendor = "apple"))] #[test] -#[cfg(target_os = "linux")] fn test_multi_files() { let args = [ "-c", NORMAL_FMTSTR, "/dev", "/usr/lib", + #[cfg(target_os = "linux")] "/etc/fstab", "/var", ]; new_ucmd!() .args(&args) - .run() + .succeeds() .stdout_is(expected_result(&args)); } +#[cfg(any(target_vendor = "apple", target_os = "linux"))] #[test] -#[cfg(target_os = "linux")] fn test_printf() { let args = [ "--printf=123%-# 15q\\r\\\"\\\\\\a\\b\\e\\f\\v%+020.23m\\x12\\167\\132\\112\\n", @@ -326,16 +302,21 @@ fn test_printf() { ]; new_ucmd!() .args(&args) - .run() + .succeeds() .stdout_is(expected_result(&args)); } -#[cfg(target_os = "linux")] +#[cfg(any(target_vendor = "apple", target_os = "linux"))] fn expected_result(args: &[&str]) -> String { - TestScenario::new(util_name!()) - .cmd_keepenv(util_name!()) + #[cfg(target_os = "linux")] + let util_name = util_name!(); + #[cfg(target_vendor = "apple")] + let util_name = format!("g{}", util_name!()); + + TestScenario::new(&util_name) + .cmd_keepenv(util_name) .env("LANGUAGE", "C") .args(args) - .run() + .succeeds() .stdout_move_str() } diff --git a/tests/by-util/test_stdbuf.rs b/tests/by-util/test_stdbuf.rs index 808b7382a..2e09601ce 100644 --- a/tests/by-util/test_stdbuf.rs +++ b/tests/by-util/test_stdbuf.rs @@ -27,12 +27,12 @@ fn test_stdbuf_line_buffered_stdout() { fn test_stdbuf_no_buffer_option_fails() { new_ucmd!().args(&["head"]).fails().stderr_is( "error: The following required arguments were not provided:\n \ - --error \n \ - --input \n \ - --output \n\n\ - USAGE:\n \ - stdbuf OPTION... COMMAND\n\n\ - For more information try --help", + --error \n \ + --input \n \ + --output \n\n\ + USAGE:\n \ + stdbuf OPTION... COMMAND\n\n\ + For more information try --help", ); } @@ -49,10 +49,9 @@ fn test_stdbuf_trailing_var_arg() { #[cfg(not(target_os = "windows"))] #[test] fn test_stdbuf_line_buffering_stdin_fails() { - new_ucmd!() - .args(&["-i", "L", "head"]) - .fails() - .stderr_is("stdbuf: error: line buffering stdin is meaningless\nTry 'stdbuf --help' for more information."); + new_ucmd!().args(&["-i", "L", "head"]).fails().stderr_is( + "stdbuf: line buffering stdin is meaningless\nTry 'stdbuf --help' for more information.", + ); } #[cfg(not(target_os = "windows"))] @@ -61,5 +60,5 @@ fn test_stdbuf_invalid_mode_fails() { new_ucmd!() .args(&["-i", "1024R", "head"]) .fails() - .stderr_is("stdbuf: error: invalid mode 1024R\nTry 'stdbuf --help' for more information."); + .stderr_is("stdbuf: invalid mode 1024R\nTry 'stdbuf --help' for more information."); } diff --git a/tests/by-util/test_sum.rs b/tests/by-util/test_sum.rs index d12455749..f09ba9d00 100644 --- a/tests/by-util/test_sum.rs +++ b/tests/by-util/test_sum.rs @@ -59,9 +59,7 @@ fn test_invalid_file() { at.mkdir("a"); - ucmd.arg("a") - .fails() - .stderr_is("sum: error: 'a' Is a directory"); + ucmd.arg("a").fails().stderr_is("sum: 'a' Is a directory"); } #[test] @@ -70,5 +68,5 @@ fn test_invalid_metadata() { ucmd.arg("b") .fails() - .stderr_is("sum: error: 'b' No such file or directory"); + .stderr_is("sum: 'b' No such file or directory"); } diff --git a/tests/by-util/test_sync.rs b/tests/by-util/test_sync.rs index 436bfdef3..033651910 100644 --- a/tests/by-util/test_sync.rs +++ b/tests/by-util/test_sync.rs @@ -37,5 +37,5 @@ fn test_sync_no_existing_files() { .arg("--data") .arg("do-no-exist") .fails() - .stderr_contains("error: cannot stat"); + .stderr_contains("cannot stat"); } diff --git a/tests/by-util/test_tail.rs b/tests/by-util/test_tail.rs index 1c025cf4c..f3c9a7b11 100644 --- a/tests/by-util/test_tail.rs +++ b/tests/by-util/test_tail.rs @@ -348,3 +348,43 @@ fn test_negative_indexing() { fn test_sleep_interval() { new_ucmd!().arg("-s").arg("10").arg(FOOBAR_TXT).succeeds(); } + +/// Test for reading all but the first NUM bytes: `tail -c +3`. +#[test] +fn test_positive_bytes() { + new_ucmd!() + .args(&["-c", "+3"]) + .pipe_in("abcde") + .succeeds() + .stdout_is("cde"); +} + +/// Test for reading all bytes, specified by `tail -c +0`. +#[test] +fn test_positive_zero_bytes() { + new_ucmd!() + .args(&["-c", "+0"]) + .pipe_in("abcde") + .succeeds() + .stdout_is("abcde"); +} + +/// Test for reading all but the first NUM lines: `tail -n +3`. +#[test] +fn test_positive_lines() { + new_ucmd!() + .args(&["-n", "+3"]) + .pipe_in("a\nb\nc\nd\ne\n") + .succeeds() + .stdout_is("c\nd\ne\n"); +} + +/// Test for reading all lines, specified by `tail -n +0`. +#[test] +fn test_positive_zero_lines() { + new_ucmd!() + .args(&["-n", "+0"]) + .pipe_in("a\nb\nc\nd\ne\n") + .succeeds() + .stdout_is("a\nb\nc\nd\ne\n"); +} diff --git a/tests/by-util/test_test.rs b/tests/by-util/test_test.rs index 000013d9c..3a55f772a 100644 --- a/tests/by-util/test_test.rs +++ b/tests/by-util/test_test.rs @@ -122,6 +122,13 @@ fn test_zero_len_not_equals_zero_len_is_false() { new_ucmd!().args(&["", "!=", ""]).run().status_code(1); } +#[test] +fn test_double_equal_is_string_comparison_op() { + // undocumented but part of the GNU test suite + new_ucmd!().args(&["t", "==", "t"]).succeeds(); + new_ucmd!().args(&["t", "==", "f"]).run().status_code(1); +} + #[test] fn test_string_comparison() { let scenario = TestScenario::new(util_name!()); @@ -131,11 +138,22 @@ fn test_string_comparison() { ["(", "=", "("], ["(", "!=", ")"], ["!", "=", "!"], + ["=", "=", "="], ]; for test in &tests { scenario.ucmd().args(&test[..]).succeeds(); } + + // run the inverse of all these tests + for test in &tests { + scenario + .ucmd() + .arg("!") + .args(&test[..]) + .run() + .status_code(1); + } } #[test] @@ -419,10 +437,9 @@ fn test_not_is_not_empty() { #[cfg(not(windows))] fn test_symlink_is_symlink() { let scenario = TestScenario::new(util_name!()); - let mut ln = scenario.cmd("ln"); + let at = &scenario.fixtures; - // creating symlinks requires admin on Windows - ln.args(&["-s", "regular_file", "symlink"]).succeeds(); + at.symlink_file("regular_file", "symlink"); // FIXME: implement on Windows scenario.ucmd().args(&["-h", "symlink"]).succeeds(); @@ -485,6 +502,81 @@ fn test_op_prec_and_or_2_overridden_by_parentheses() { .status_code(1); } +#[test] +fn test_negated_boolean_precedence() { + let scenario = TestScenario::new(util_name!()); + + let tests = [ + vec!["!", "(", "foo", ")", "-o", "bar"], + vec!["!", "", "-o", "", "-a", ""], + vec!["!", "(", "", "-a", "", ")", "-o", ""], + ]; + + for test in &tests { + scenario.ucmd().args(&test[..]).succeeds(); + } + + let negative_tests = [ + vec!["!", "-n", "", "-a", ""], + vec!["", "-a", "", "-o", ""], + vec!["!", "", "-a", "", "-o", ""], + vec!["!", "(", "", "-a", "", ")", "-a", ""], + ]; + + for test in &negative_tests { + scenario.ucmd().args(&test[..]).run().status_code(1); + } +} + +#[test] +fn test_bang_boolop_precedence() { + // For a Boolean combination of two literals, bang inverts the entire expression + new_ucmd!().args(&["!", "", "-a", ""]).succeeds(); + new_ucmd!().args(&["!", "", "-o", ""]).succeeds(); + + new_ucmd!() + .args(&["!", "a value", "-o", "another value"]) + .run() + .status_code(1); + + // Introducing a UOP — even one that is equivalent to a bare string — causes + // bang to invert only the first term + new_ucmd!() + .args(&["!", "-n", "", "-a", ""]) + .run() + .status_code(1); + new_ucmd!() + .args(&["!", "", "-a", "-n", ""]) + .run() + .status_code(1); + + // for compound Boolean expressions, bang inverts the _next_ expression + // only, not the entire compound expression + new_ucmd!() + .args(&["!", "", "-a", "", "-a", ""]) + .run() + .status_code(1); + + // parentheses can override this + new_ucmd!() + .args(&["!", "(", "", "-a", "", "-a", "", ")"]) + .succeeds(); +} + +#[test] +fn test_inverted_parenthetical_boolop_precedence() { + // For a Boolean combination of two literals, bang inverts the entire expression + new_ucmd!() + .args(&["!", "a value", "-o", "another value"]) + .run() + .status_code(1); + + // only the parenthetical is inverted, not the entire expression + new_ucmd!() + .args(&["!", "(", "a value", ")", "-o", "another value"]) + .succeeds(); +} + #[test] #[ignore = "fixme: error reporting"] fn test_dangling_parenthesis() { diff --git a/tests/by-util/test_truncate.rs b/tests/by-util/test_truncate.rs index 8f88f4c74..120982e3c 100644 --- a/tests/by-util/test_truncate.rs +++ b/tests/by-util/test_truncate.rs @@ -206,7 +206,7 @@ fn test_round_up() { let (at, mut ucmd) = at_and_ucmd!(); let mut file = at.make_file(TFILE2); file.write_all(b"1234567890").unwrap(); - ucmd.args(&["--size", "*4", TFILE2]).succeeds(); + ucmd.args(&["--size", "%4", TFILE2]).succeeds(); file.seek(SeekFrom::End(0)).unwrap(); let actual = file.seek(SeekFrom::Current(0)).unwrap(); assert!( @@ -235,3 +235,30 @@ fn test_size_and_reference() { actual ); } + +#[test] +fn test_invalid_numbers() { + // TODO For compatibility with GNU, `truncate -s 0X` should cause + // the same error as `truncate -s 0X file`, but currently it returns + // a different error. + new_ucmd!() + .args(&["-s", "0X", "file"]) + .fails() + .stderr_contains("Invalid number: ‘0X’"); + new_ucmd!() + .args(&["-s", "0XB", "file"]) + .fails() + .stderr_contains("Invalid number: ‘0XB’"); + new_ucmd!() + .args(&["-s", "0B", "file"]) + .fails() + .stderr_contains("Invalid number: ‘0B’"); +} + +#[test] +fn test_reference_file_not_found() { + new_ucmd!() + .args(&["-r", "a", "b"]) + .fails() + .stderr_contains("cannot stat 'a': No such file or directory"); +} diff --git a/tests/by-util/test_uname.rs b/tests/by-util/test_uname.rs index da901d985..d878ed7ac 100644 --- a/tests/by-util/test_uname.rs +++ b/tests/by-util/test_uname.rs @@ -43,5 +43,5 @@ fn test_uname_kernel() { } #[cfg(not(target_os = "linux"))] - let result = ucmd.arg("-o").succeeds(); + ucmd.arg("-o").succeeds(); } diff --git a/tests/by-util/test_uniq.rs b/tests/by-util/test_uniq.rs index c1e53faf3..2645c38ca 100644 --- a/tests/by-util/test_uniq.rs +++ b/tests/by-util/test_uniq.rs @@ -145,7 +145,7 @@ fn test_invalid_utf8() { .arg("not-utf8-sequence.txt") .run() .failure() - .stderr_only("uniq: error: invalid utf-8 sequence of 1 bytes from index 0"); + .stderr_only("uniq: invalid utf-8 sequence of 1 bytes from index 0"); } #[test] diff --git a/tests/by-util/test_unlink.rs b/tests/by-util/test_unlink.rs index fa8f962c4..1999e965c 100644 --- a/tests/by-util/test_unlink.rs +++ b/tests/by-util/test_unlink.rs @@ -22,7 +22,7 @@ fn test_unlink_multiple_files() { at.touch(file_b); ucmd.arg(file_a).arg(file_b).fails().stderr_is( - "unlink: error: extra operand: 'test_unlink_multiple_file_b'\nTry 'unlink --help' \ + "unlink: extra operand: 'test_unlink_multiple_file_b'\nTry 'unlink --help' \ for more information.\n", ); } @@ -35,7 +35,7 @@ fn test_unlink_directory() { at.mkdir(dir); ucmd.arg(dir).fails().stderr_is( - "unlink: error: cannot unlink 'test_unlink_empty_directory': Not a regular file \ + "unlink: cannot unlink 'test_unlink_empty_directory': Not a regular file \ or symlink\n", ); } @@ -45,7 +45,7 @@ fn test_unlink_nonexistent() { let file = "test_unlink_nonexistent"; new_ucmd!().arg(file).fails().stderr_is( - "unlink: error: Cannot stat 'test_unlink_nonexistent': No such file or directory \ + "unlink: Cannot stat 'test_unlink_nonexistent': No such file or directory \ (os error 2)\n", ); } diff --git a/tests/by-util/test_wc.rs b/tests/by-util/test_wc.rs index a16f1854e..1203c0b1d 100644 --- a/tests/by-util/test_wc.rs +++ b/tests/by-util/test_wc.rs @@ -33,7 +33,16 @@ fn test_stdin_default() { new_ucmd!() .pipe_in_fixture("lorem_ipsum.txt") .run() - .stdout_is(" 13 109 772\n"); + .stdout_is(" 13 109 772\n"); +} + +#[test] +fn test_stdin_explicit() { + new_ucmd!() + .pipe_in_fixture("lorem_ipsum.txt") + .arg("-") + .run() + .stdout_is(" 13 109 772 -\n"); } #[test] @@ -42,9 +51,11 @@ fn test_utf8() { .args(&["-lwmcL"]) .pipe_in_fixture("UTF_8_test.txt") .run() - .stdout_is(" 300 4969 22781 22213 79\n"); - // GNU returns " 300 2086 22219 22781 79" - // TODO: we should fix that to match GNU's behavior + .stdout_is(" 300 4969 22781 22213 79\n"); + // GNU returns " 300 2086 22219 22781 79" + // + // TODO: we should fix the word, character, and byte count to + // match the behavior of GNU wc } #[test] @@ -71,7 +82,7 @@ fn test_stdin_all_counts() { .args(&["-c", "-m", "-l", "-L", "-w"]) .pipe_in_fixture("alice_in_wonderland.txt") .run() - .stdout_is(" 5 57 302 302 66\n"); + .stdout_is(" 5 57 302 302 66\n"); } #[test] @@ -79,7 +90,7 @@ fn test_single_default() { new_ucmd!() .arg("moby_dick.txt") .run() - .stdout_is(" 18 204 1115 moby_dick.txt\n"); + .stdout_is(" 18 204 1115 moby_dick.txt\n"); } #[test] @@ -95,7 +106,7 @@ fn test_single_all_counts() { new_ucmd!() .args(&["-c", "-l", "-L", "-m", "-w", "alice_in_wonderland.txt"]) .run() - .stdout_is(" 5 57 302 302 66 alice_in_wonderland.txt\n"); + .stdout_is(" 5 57 302 302 66 alice_in_wonderland.txt\n"); } #[test] @@ -108,64 +119,101 @@ fn test_multiple_default() { ]) .run() .stdout_is( - " 13 109 772 lorem_ipsum.txt\n 18 204 1115 moby_dick.txt\n 5 57 302 \ - alice_in_wonderland.txt\n 36 370 2189 total\n", + " 13 109 772 lorem_ipsum.txt\n 18 204 1115 moby_dick.txt\n 5 57 302 \ + alice_in_wonderland.txt\n 36 370 2189 total\n", ); } /// Test for an empty file. #[test] fn test_file_empty() { - // TODO There is a leading space in the output that should be - // removed; see issue #2173. new_ucmd!() .args(&["-clmwL", "emptyfile.txt"]) .run() - .stdout_is(" 0 0 0 0 0 emptyfile.txt\n"); + .stdout_is("0 0 0 0 0 emptyfile.txt\n"); } /// Test for an file containing a single non-whitespace character /// *without* a trailing newline. #[test] fn test_file_single_line_no_trailing_newline() { - // TODO There is a leading space in the output that should be - // removed; see issue #2173. new_ucmd!() .args(&["-clmwL", "notrailingnewline.txt"]) .run() - .stdout_is(" 1 1 2 2 1 notrailingnewline.txt\n"); + .stdout_is("1 1 2 2 1 notrailingnewline.txt\n"); } /// Test for a file that has 100 empty lines (that is, the contents of /// the file are the newline character repeated one hundred times). #[test] fn test_file_many_empty_lines() { - // TODO There is a leading space in the output that should be - // removed; see issue #2173. new_ucmd!() .args(&["-clmwL", "manyemptylines.txt"]) .run() - .stdout_is(" 100 0 100 100 0 manyemptylines.txt\n"); + .stdout_is("100 0 100 100 0 manyemptylines.txt\n"); } /// Test for a file that has one long line comprising only spaces. #[test] fn test_file_one_long_line_only_spaces() { - // TODO There is a leading space in the output that should be - // removed; see issue #2173. new_ucmd!() .args(&["-clmwL", "onelongemptyline.txt"]) .run() - .stdout_is(" 1 0 10001 10001 10000 onelongemptyline.txt\n"); + .stdout_is(" 1 0 10001 10001 10000 onelongemptyline.txt\n"); } /// Test for a file that has one long line comprising a single "word". #[test] fn test_file_one_long_word() { - // TODO There is a leading space in the output that should be - // removed; see issue #2173. new_ucmd!() .args(&["-clmwL", "onelongword.txt"]) .run() - .stdout_is(" 1 1 10001 10001 10000 onelongword.txt\n"); + .stdout_is(" 1 1 10001 10001 10000 onelongword.txt\n"); +} + +/// Test that the number of bytes in the file dictate the display width. +/// +/// The width in digits of any count is the width in digits of the +/// number of bytes in the file, regardless of whether the number of +/// bytes are displayed. +#[test] +fn test_file_bytes_dictate_width() { + // This file has 10,001 bytes. Five digits are required to + // represent that. Even though the number of lines is 1 and the + // number of words is 0, each of those counts is formatted with + // five characters, filled with whitespace. + new_ucmd!() + .args(&["-lw", "onelongemptyline.txt"]) + .run() + .stdout_is(" 1 0 onelongemptyline.txt\n"); + + // This file has zero bytes. Only one digit is required to + // represent that. + new_ucmd!() + .args(&["-lw", "emptyfile.txt"]) + .run() + .stdout_is("0 0 emptyfile.txt\n"); +} + +/// Test that getting counts from a directory is an error. +#[test] +fn test_read_from_directory_error() { + // TODO To match GNU `wc`, the `stdout` should be: + // + // " 0 0 0 .\n" + // + new_ucmd!() + .args(&["."]) + .fails() + .stderr_contains(".: Is a directory\n") + .stdout_is("0 0 0 .\n"); +} + +/// Test that getting counts from nonexistent file is an error. +#[test] +fn test_read_from_nonexistent_file() { + new_ucmd!() + .args(&["bogusfile"]) + .fails() + .stderr_contains("bogusfile: No such file or directory\n"); } diff --git a/tests/by-util/test_who.rs b/tests/by-util/test_who.rs index a5637f23a..21b5eb93e 100644 --- a/tests/by-util/test_who.rs +++ b/tests/by-util/test_who.rs @@ -1,28 +1,28 @@ use crate::common::util::*; -#[cfg(target_os = "linux")] +#[cfg(any(target_vendor = "apple", target_os = "linux"))] #[test] fn test_count() { for opt in vec!["-q", "--count"] { new_ucmd!() .arg(opt) .succeeds() - .stdout_is(expected_result(opt)); + .stdout_is(expected_result(&[opt])); } } -#[cfg(target_os = "linux")] +#[cfg(any(target_vendor = "apple", target_os = "linux"))] #[test] fn test_boot() { for opt in vec!["-b", "--boot"] { new_ucmd!() .arg(opt) .succeeds() - .stdout_is(expected_result(opt)); + .stdout_is(expected_result(&[opt])); } } -#[cfg(target_os = "linux")] +#[cfg(any(target_vendor = "apple", target_os = "linux"))] #[test] fn test_heading() { for opt in vec!["-H", "--heading"] { @@ -30,7 +30,7 @@ fn test_heading() { // * minor whitespace differences occur between platform built-in outputs; // specifically number of TABs between "TIME" and "COMMENT" may be variant let actual = new_ucmd!().arg(opt).succeeds().stdout_move_str(); - let expect = expected_result(opt); + let expect = expected_result(&[opt]); println!("actual: {:?}", actual); println!("expect: {:?}", expect); let v_actual: Vec<&str> = actual.split_whitespace().collect(); @@ -39,205 +39,208 @@ fn test_heading() { } } -#[cfg(target_os = "linux")] +#[cfg(any(target_vendor = "apple", target_os = "linux"))] #[test] fn test_short() { for opt in vec!["-s", "--short"] { new_ucmd!() .arg(opt) .succeeds() - .stdout_is(expected_result(opt)); + .stdout_is(expected_result(&[opt])); } } -#[cfg(target_os = "linux")] +#[cfg(any(target_vendor = "apple", target_os = "linux"))] #[test] fn test_login() { for opt in vec!["-l", "--login"] { new_ucmd!() .arg(opt) .succeeds() - .stdout_is(expected_result(opt)); + .stdout_is(expected_result(&[opt])); } } -#[cfg(target_os = "linux")] +#[cfg(any(target_vendor = "apple", target_os = "linux"))] #[test] fn test_m() { for opt in vec!["-m"] { new_ucmd!() .arg(opt) .succeeds() - .stdout_is(expected_result(opt)); + .stdout_is(expected_result(&[opt])); } } -#[cfg(target_os = "linux")] +#[cfg(any(target_vendor = "apple", target_os = "linux"))] #[test] fn test_process() { for opt in vec!["-p", "--process"] { new_ucmd!() .arg(opt) .succeeds() - .stdout_is(expected_result(opt)); + .stdout_is(expected_result(&[opt])); } } -#[cfg(target_os = "linux")] #[test] fn test_runlevel() { for opt in vec!["-r", "--runlevel"] { + #[cfg(any(target_vendor = "apple", target_os = "linux"))] new_ucmd!() .arg(opt) .succeeds() - .stdout_is(expected_result(opt)); + .stdout_is(expected_result(&[opt])); + + #[cfg(not(target_os = "linux"))] + new_ucmd!().arg(opt).succeeds().stdout_is(""); } } -#[cfg(target_os = "linux")] +#[cfg(any(target_vendor = "apple", target_os = "linux"))] #[test] fn test_time() { for opt in vec!["-t", "--time"] { new_ucmd!() .arg(opt) .succeeds() - .stdout_is(expected_result(opt)); + .stdout_is(expected_result(&[opt])); } } -#[cfg(target_os = "linux")] +#[cfg(any(target_vendor = "apple", target_os = "linux"))] #[test] fn test_mesg() { - for opt in vec!["-w", "-T", "--users", "--message", "--writable"] { + // -T, -w, --mesg + // add user's message status as +, - or ? + // --message + // same as -T + // --writable + // same as -T + for opt in vec!["-T", "-w", "--mesg", "--message", "--writable"] { new_ucmd!() .arg(opt) .succeeds() - .stdout_is(expected_result(opt)); + .stdout_is(expected_result(&[opt])); } } -#[cfg(target_os = "linux")] #[test] fn test_arg1_arg2() { - let scene = TestScenario::new(util_name!()); + let args = ["am", "i"]; - let expected = scene - .cmd_keepenv(util_name!()) - .env("LANGUAGE", "C") - .arg("am") - .arg("i") - .succeeds(); - - scene - .ucmd() - .arg("am") - .arg("i") + new_ucmd!() + .args(&args) .succeeds() - .stdout_is(expected.stdout_str()); + .stdout_is(expected_result(&args)); } #[test] fn test_too_many_args() { - let expected = + const EXPECTED: &str = "error: The value 'u' was provided to '...', but it wasn't expecting any more values"; - new_ucmd!() - .arg("am") - .arg("i") - .arg("u") - .fails() - .stderr_contains(expected); + let args = ["am", "i", "u"]; + new_ucmd!().args(&args).fails().stderr_contains(EXPECTED); } -#[cfg(target_os = "linux")] +#[cfg(any(target_vendor = "apple", target_os = "linux"))] #[test] fn test_users() { for opt in vec!["-u", "--users"] { - new_ucmd!() - .arg(opt) - .succeeds() - .stdout_is(expected_result(opt)); + let actual = new_ucmd!().arg(opt).succeeds().stdout_move_str(); + let expect = expected_result(&[opt]); + println!("actual: {:?}", actual); + println!("expect: {:?}", expect); + + let mut v_actual: Vec<&str> = actual.split_whitespace().collect(); + let mut v_expect: Vec<&str> = expect.split_whitespace().collect(); + + // TODO: `--users` differs from GNU's output on macOS + // Diff < left / right > : + // <"runner console 2021-05-20 22:03 00:08 196\n" + // >"runner console 2021-05-20 22:03 old 196\n" + if cfg!(target_os = "macos") { + v_actual.remove(4); + v_expect.remove(4); + } + + assert_eq!(v_actual, v_expect); } } -#[cfg(target_os = "linux")] +#[cfg(any(target_vendor = "apple", target_os = "linux"))] #[test] fn test_lookup() { for opt in vec!["--lookup"] { new_ucmd!() .arg(opt) .succeeds() - .stdout_is(expected_result(opt)); + .stdout_is(expected_result(&[opt])); } } -#[cfg(target_os = "linux")] +#[cfg(any(target_vendor = "apple", target_os = "linux"))] #[test] fn test_dead() { for opt in vec!["-d", "--dead"] { new_ucmd!() .arg(opt) .succeeds() - .stdout_is(expected_result(opt)); + .stdout_is(expected_result(&[opt])); } } -#[cfg(target_os = "linux")] +#[cfg(any(target_vendor = "apple", target_os = "linux"))] #[test] fn test_all_separately() { + if cfg!(target_os = "macos") { + // TODO: fix `-u`, see: test_users + return; + } + // -a, --all same as -b -d --login -p -r -t -T -u + let args = ["-b", "-d", "--login", "-p", "-r", "-t", "-T", "-u"]; let scene = TestScenario::new(util_name!()); - - let expected = scene - .cmd_keepenv(util_name!()) - .env("LANGUAGE", "C") - .arg("-b") - .arg("-d") - .arg("--login") - .arg("-p") - .arg("-r") - .arg("-t") - .arg("-T") - .arg("-u") - .succeeds(); - scene .ucmd() - .arg("-b") - .arg("-d") - .arg("--login") - .arg("-p") - .arg("-r") - .arg("-t") - .arg("-T") - .arg("-u") + .args(&args) .succeeds() - .stdout_is(expected.stdout_str()); - + .stdout_is(expected_result(&args)); scene .ucmd() .arg("--all") .succeeds() - .stdout_is(expected.stdout_str()); + .stdout_is(expected_result(&args)); } -#[cfg(target_os = "linux")] +#[cfg(any(target_vendor = "apple", target_os = "linux"))] #[test] fn test_all() { + if cfg!(target_os = "macos") { + // TODO: fix `-u`, see: test_users + return; + } + for opt in vec!["-a", "--all"] { new_ucmd!() .arg(opt) .succeeds() - .stdout_is(expected_result(opt)); + .stdout_is(expected_result(&[opt])); } } -#[cfg(target_os = "linux")] -fn expected_result(arg: &str) -> String { - TestScenario::new(util_name!()) - .cmd_keepenv(util_name!()) +#[cfg(any(target_vendor = "apple", target_os = "linux"))] +fn expected_result(args: &[&str]) -> String { + #[cfg(target_os = "linux")] + let util_name = util_name!(); + #[cfg(target_vendor = "apple")] + let util_name = format!("g{}", util_name!()); + + TestScenario::new(&util_name) + .cmd_keepenv(util_name) .env("LANGUAGE", "C") - .args(&[arg]) + .args(args) .succeeds() .stdout_move_str() } diff --git a/tests/by-util/test_whoami.rs b/tests/by-util/test_whoami.rs index dc6a1ceed..a98541b2d 100644 --- a/tests/by-util/test_whoami.rs +++ b/tests/by-util/test_whoami.rs @@ -5,7 +5,7 @@ use crate::common::util::*; // considered okay. If we are not inside the CI this calls assert!(result.success). // // From the Logs: "Build (ubuntu-18.04, x86_64-unknown-linux-gnu, feat_os_unix, use-cross)" -// stderr: "whoami: error: failed to get username" +// stderr: "whoami: failed to get username" // Maybe: "adduser --uid 1001 username" can put things right? fn skipping_test_is_okay(result: &CmdResult, needle: &str) -> bool { if !result.succeeded() { diff --git a/tests/common/util.rs b/tests/common/util.rs index 1ade70127..6f9f779ef 100644 --- a/tests/common/util.rs +++ b/tests/common/util.rs @@ -7,7 +7,7 @@ use std::env; #[cfg(not(windows))] use std::ffi::CString; use std::ffi::OsStr; -use std::fs::{self, File, OpenOptions}; +use std::fs::{self, hard_link, File, OpenOptions}; use std::io::{Read, Result, Write}; #[cfg(unix)] use std::os::unix::fs::{symlink as symlink_dir, symlink as symlink_file}; @@ -163,7 +163,7 @@ impl CmdResult { /// asserts that the command's exit code is the same as the given one pub fn status_code(&self, code: i32) -> &CmdResult { - assert!(self.code == Some(code)); + assert_eq!(self.code, Some(code)); self } @@ -295,17 +295,32 @@ impl CmdResult { } pub fn stdout_contains>(&self, cmp: T) -> &CmdResult { - assert!(self.stdout_str().contains(cmp.as_ref())); + assert!( + self.stdout_str().contains(cmp.as_ref()), + "'{}' does not contain '{}'", + self.stdout_str(), + cmp.as_ref() + ); self } pub fn stderr_contains>(&self, cmp: T) -> &CmdResult { - assert!(self.stderr_str().contains(cmp.as_ref())); + assert!( + self.stderr_str().contains(cmp.as_ref()), + "'{}' does not contain '{}'", + self.stderr_str(), + cmp.as_ref() + ); self } pub fn stdout_does_not_contain>(&self, cmp: T) -> &CmdResult { - assert!(!self.stdout_str().contains(cmp.as_ref())); + assert!( + !self.stdout_str().contains(cmp.as_ref()), + "'{}' contains '{}' but should not", + self.stdout_str(), + cmp.as_ref(), + ); self } @@ -509,6 +524,14 @@ impl AtPath { } } + pub fn hard_link(&self, src: &str, dst: &str) { + log_info( + "hard_link", + &format!("{},{}", self.plus_as_string(src), self.plus_as_string(dst)), + ); + hard_link(&self.plus(src), &self.plus(dst)).unwrap(); + } + pub fn symlink_file(&self, src: &str, dst: &str) { log_info( "symlink", @@ -665,6 +688,10 @@ impl TestScenario { cmd } + /// Returns builder for invoking any system command. Paths given are treated + /// relative to the environment's unique temporary test directory. + /// Differs from the builder returned by `cmd` in that `cmd_keepenv` does not call + /// `Command::env_clear` (Clears the entire environment map for the child process.) pub fn cmd_keepenv>(&self, bin: S) -> UCommand { UCommand::new_from_tmp(bin, self.tmpd.clone(), false) } diff --git a/tests/fixtures/head/emptyfile.txt b/tests/fixtures/head/emptyfile.txt new file mode 100644 index 000000000..e69de29bb diff --git a/tests/fixtures/sort/ext_stable.expected b/tests/fixtures/sort/ext_stable.expected new file mode 100644 index 000000000..11ca4deb7 --- /dev/null +++ b/tests/fixtures/sort/ext_stable.expected @@ -0,0 +1,4 @@ +0a +0a +0b +0b diff --git a/tests/fixtures/sort/ext_stable.txt b/tests/fixtures/sort/ext_stable.txt new file mode 100644 index 000000000..11ca4deb7 --- /dev/null +++ b/tests/fixtures/sort/ext_stable.txt @@ -0,0 +1,4 @@ +0a +0a +0b +0b diff --git a/tests/fixtures/sort/merge_stable.expected b/tests/fixtures/sort/merge_stable.expected new file mode 100644 index 000000000..49f57888d --- /dev/null +++ b/tests/fixtures/sort/merge_stable.expected @@ -0,0 +1,3 @@ +0a +0c +0b diff --git a/tests/fixtures/sort/merge_stable_1.txt b/tests/fixtures/sort/merge_stable_1.txt new file mode 100644 index 000000000..20528104f --- /dev/null +++ b/tests/fixtures/sort/merge_stable_1.txt @@ -0,0 +1,2 @@ +0a +0c \ No newline at end of file diff --git a/tests/fixtures/sort/merge_stable_2.txt b/tests/fixtures/sort/merge_stable_2.txt new file mode 100644 index 000000000..d3523d976 --- /dev/null +++ b/tests/fixtures/sort/merge_stable_2.txt @@ -0,0 +1 @@ +0b \ No newline at end of file diff --git a/util/build-gnu.sh b/util/build-gnu.sh new file mode 100644 index 000000000..9d73450f6 --- /dev/null +++ b/util/build-gnu.sh @@ -0,0 +1,99 @@ +#!/bin/bash +set -e +if test ! -d ../gnu; then + echo "Could not find ../gnu" + echo "git clone git@github.com:coreutils/coreutils.git ../gnu" + exit 1 +fi +if test ! -d ../gnulib; then + echo "Could not find ../gnulib" + echo "git clone git@github.com:coreutils/gnulib.git ../gnulib" + exit 1 +fi + + +pushd $(pwd) +make PROFILE=release +BUILDDIR="$PWD/target/release/" +cp "${BUILDDIR}/install" "${BUILDDIR}/ginstall" # The GNU tests rename this script before running, to avoid confusion with the make target +# Create *sum binaries +for sum in b2sum md5sum sha1sum sha224sum sha256sum sha384sum sha512sum +do + sum_path="${BUILDDIR}/${sum}" + test -f "${sum_path}" || cp "${BUILDDIR}/hashsum" "${sum_path}" +done +test -f "${BUILDDIR}/[" || cp "${BUILDDIR}/test" "${BUILDDIR}/[" +popd +GNULIB_SRCDIR="$PWD/../gnulib" +pushd ../gnu/ + +# Any binaries that aren't built become `false` so their tests fail +for binary in $(./build-aux/gen-lists-of-programs.sh --list-progs) +do + bin_path="${BUILDDIR}/${binary}" + test -f "${bin_path}" || { echo "'${binary}' was not built with uutils, using the 'false' program"; cp "${BUILDDIR}/false" "${bin_path}"; } +done + +./bootstrap --gnulib-srcdir="$GNULIB_SRCDIR" +./configure --quiet --disable-gcc-warnings +#Add timeout to to protect against hangs +sed -i 's|"\$@|/usr/bin/timeout 600 "\$@|' build-aux/test-driver +# Change the PATH in the Makefile to test the uutils coreutils instead of the GNU coreutils +sed -i "s/^[[:blank:]]*PATH=.*/ PATH='${BUILDDIR//\//\\/}\$(PATH_SEPARATOR)'\"\$\$PATH\" \\\/" Makefile +sed -i 's| tr | /usr/bin/tr |' tests/init.sh +make +# Generate the factor tests, so they can be fixed +# Used to be 36. Reduced to 20 to decrease the log size +for i in {00..20} +do + make tests/factor/t${i}.sh +done + +# strip the long stuff +for i in {21..36} +do + sed -i -e "s/\$(tf)\/t${i}.sh//g" Makefile +done + + +grep -rl 'path_prepend_' tests/* | xargs sed -i 's|path_prepend_ ./src||' +sed -i -e 's|^seq |/usr/bin/seq |' -e 's|sha1sum |/usr/bin/sha1sum |' tests/factor/t*sh + +# Remove tests checking for --version & --help +# Not really interesting for us and logs are too big +sed -i -e '/tests\/misc\/invalid-opt.pl/ D' \ + -e '/tests\/misc\/help-version.sh/ D' \ + -e '/tests\/misc\/help-version-getopt.sh/ D' \ + Makefile + +# logs are clotted because of this test +sed -i -e '/tests\/misc\/seq-precision.sh/ D' \ + Makefile + +# printf doesn't limit the values used in its arg, so this produced ~2GB of output +sed -i '/INT_OFLOW/ D' tests/misc/printf.sh + +# Use the system coreutils where the test fails due to error in a util that is not the one being tested +sed -i 's|stat|/usr/bin/stat|' tests/chgrp/basic.sh tests/cp/existing-perm-dir.sh tests/touch/60-seconds.sh tests/misc/sort-compress-proc.sh +sed -i 's|ls -|/usr/bin/ls -|' tests/chgrp/posix-H.sh tests/chown/deref.sh tests/cp/same-file.sh tests/misc/mknod.sh tests/mv/part-symlink.sh tests/du/8gb.sh +sed -i 's|mkdir |/usr/bin/mkdir |' tests/cp/existing-perm-dir.sh tests/rm/empty-inacc.sh +sed -i 's|timeout \([[:digit:]]\)| /usr/bin/timeout \1|' tests/tail-2/inotify-rotate.sh tests/tail-2/inotify-dir-recreate.sh tests/tail-2/inotify-rotate-resources.sh tests/cp/parent-perm-race.sh tests/ls/infloop.sh tests/misc/sort-exit-early.sh tests/misc/sort-NaN-infloop.sh tests/misc/uniq-perf.sh tests/tail-2/inotify-only-regular.sh tests/tail-2/pipe-f2.sh tests/tail-2/retry.sh tests/tail-2/symlink.sh tests/tail-2/wait.sh tests/tail-2/pid.sh tests/dd/stats.sh tests/tail-2/follow-name.sh tests/misc/shuf.sh # Don't break the function called 'grep_timeout' +sed -i 's|chmod |/usr/bin/chmod |' tests/du/inacc-dir.sh tests/mkdir/p-3.sh tests/tail-2/tail-n0f.sh tests/cp/fail-perm.sh tests/du/inaccessible-cwd.sh tests/mv/i-2.sh tests/chgrp/basic.sh tests/misc/shuf.sh +sed -i 's|sort |/usr/bin/sort |' tests/ls/hyperlink.sh tests/misc/test-N.sh +sed -i 's|split |/usr/bin/split |' tests/misc/factor-parallel.sh +sed -i 's|truncate |/usr/bin/truncate |' tests/split/fail.sh +sed -i 's|dd |/usr/bin/dd |' tests/du/8gb.sh tests/tail-2/big-4gb.sh init.cfg +sed -i 's|id -|/usr/bin/id -|' tests/misc/runcon-no-reorder.sh +sed -i 's|touch |/usr/bin/touch |' tests/cp/preserve-link.sh tests/cp/reflink-perm.sh tests/ls/block-size.sh tests/ls/abmon-align.sh tests/ls/rt-1.sh tests/mv/update.sh tests/misc/ls-time.sh tests/misc/stat-nanoseconds.sh tests/misc/time-style.sh tests/misc/test-N.sh +sed -i 's|ln -|/usr/bin/ln -|' tests/cp/link-deref.sh +sed -i 's|printf |/usr/bin/printf |' tests/dd/ascii.sh +sed -i 's|cp |/usr/bin/cp |' tests/mv/hard-2.sh +sed -i 's|paste |/usr/bin/paste |' tests/misc/od-endian.sh +sed -i 's|seq |/usr/bin/seq |' tests/misc/sort-discrim.sh + +#Add specific timeout to tests that currently hang to limit time spent waiting +sed -i 's|seq \$|/usr/bin/timeout 0.1 seq \$|' tests/misc/seq-precision.sh tests/misc/seq-long-double.sh +sed -i 's|cat |/usr/bin/timeout 0.1 cat |' tests/misc/cat-self.sh + +test -f "${BUILDDIR}/getlimits" || cp src/getlimits "${BUILDDIR}" + diff --git a/util/run-gnu-test.sh b/util/run-gnu-test.sh new file mode 100644 index 000000000..b9948ccd3 --- /dev/null +++ b/util/run-gnu-test.sh @@ -0,0 +1,7 @@ +#!/bin/bash +set -e +BUILDDIR="${PWD}/uutils/target/release" +GNULIB_DIR="${PWD}/gnulib" +pushd gnu + +timeout -sKILL 2h make -j "$(nproc)" check SUBDIRS=. RUN_EXPENSIVE_TESTS=yes RUN_VERY_EXPENSIVE_TESTS=yes VERBOSE=no || : # Kill after 4 hours in case something gets stuck in make