1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-28 03:27:44 +00:00

Merge branch 'main' into fuzz-cksum2

This commit is contained in:
Sylvestre Ledru 2024-12-28 12:13:35 +01:00 committed by GitHub
commit 0d83c440e3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
132 changed files with 4718 additions and 2380 deletions

View file

@ -1,4 +1,4 @@
msrv = "1.70.0" msrv = "1.79.0"
cognitive-complexity-threshold = 24 cognitive-complexity-threshold = 24
missing-docs-in-crate-items = true missing-docs-in-crate-items = true
check-private-items = true check-private-items = true

View file

@ -11,7 +11,7 @@ env:
PROJECT_NAME: coreutils PROJECT_NAME: coreutils
PROJECT_DESC: "Core universal (cross-platform) utilities" PROJECT_DESC: "Core universal (cross-platform) utilities"
PROJECT_AUTH: "uutils" PROJECT_AUTH: "uutils"
RUST_MIN_SRV: "1.70.0" RUST_MIN_SRV: "1.79.0"
# * style job configuration # * style job configuration
STYLE_FAIL_ON_FAULT: true ## (bool) fail the build if a style job contains a fault (error or warning); may be overridden on a per-job basis STYLE_FAIL_ON_FAULT: true ## (bool) fail the build if a style job contains a fault (error or warning); may be overridden on a per-job basis
@ -37,6 +37,8 @@ jobs:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
with:
persist-credentials: false
- uses: EmbarkStudios/cargo-deny-action@v2 - uses: EmbarkStudios/cargo-deny-action@v2
style_deps: style_deps:
@ -54,6 +56,8 @@ jobs:
- { os: windows-latest , features: feat_os_windows } - { os: windows-latest , features: feat_os_windows }
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
with:
persist-credentials: false
- uses: dtolnay/rust-toolchain@nightly - uses: dtolnay/rust-toolchain@nightly
## note: requires 'nightly' toolchain b/c `cargo-udeps` uses the `rustc` '-Z save-analysis' option ## note: requires 'nightly' toolchain b/c `cargo-udeps` uses the `rustc` '-Z save-analysis' option
## * ... ref: <https://github.com/est31/cargo-udeps/issues/73> ## * ... ref: <https://github.com/est31/cargo-udeps/issues/73>
@ -106,13 +110,15 @@ jobs:
# - { os: windows-latest , features: feat_os_windows } # - { os: windows-latest , features: feat_os_windows }
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
with:
persist-credentials: false
- uses: dtolnay/rust-toolchain@master - uses: dtolnay/rust-toolchain@master
with: with:
toolchain: stable toolchain: stable
components: clippy components: clippy
- uses: Swatinem/rust-cache@v2 - uses: Swatinem/rust-cache@v2
- name: Run sccache-cache - name: Run sccache-cache
uses: mozilla-actions/sccache-action@v0.0.6 uses: mozilla-actions/sccache-action@v0.0.7
- name: Initialize workflow variables - name: Initialize workflow variables
id: vars id: vars
shell: bash shell: bash
@ -139,7 +145,7 @@ jobs:
shell: bash shell: bash
run: | run: |
RUSTDOCFLAGS="-Dwarnings" cargo doc ${{ steps.vars.outputs.CARGO_FEATURES_OPTION }} --no-deps --workspace --document-private-items RUSTDOCFLAGS="-Dwarnings" cargo doc ${{ steps.vars.outputs.CARGO_FEATURES_OPTION }} --no-deps --workspace --document-private-items
- uses: DavidAnson/markdownlint-cli2-action@v17 - uses: DavidAnson/markdownlint-cli2-action@v18
with: with:
fix: "true" fix: "true"
globs: | globs: |
@ -159,6 +165,8 @@ jobs:
- { os: ubuntu-latest , features: feat_os_unix } - { os: ubuntu-latest , features: feat_os_unix }
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
with:
persist-credentials: false
- uses: dtolnay/rust-toolchain@master - uses: dtolnay/rust-toolchain@master
with: with:
toolchain: ${{ env.RUST_MIN_SRV }} toolchain: ${{ env.RUST_MIN_SRV }}
@ -166,7 +174,7 @@ jobs:
- uses: taiki-e/install-action@nextest - uses: taiki-e/install-action@nextest
- uses: Swatinem/rust-cache@v2 - uses: Swatinem/rust-cache@v2
- name: Run sccache-cache - name: Run sccache-cache
uses: mozilla-actions/sccache-action@v0.0.6 uses: mozilla-actions/sccache-action@v0.0.7
- name: Initialize workflow variables - name: Initialize workflow variables
id: vars id: vars
shell: bash shell: bash
@ -227,6 +235,8 @@ jobs:
- { os: ubuntu-latest , features: feat_os_unix } - { os: ubuntu-latest , features: feat_os_unix }
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
with:
persist-credentials: false
- uses: dtolnay/rust-toolchain@stable - uses: dtolnay/rust-toolchain@stable
- uses: Swatinem/rust-cache@v2 - uses: Swatinem/rust-cache@v2
- name: "`cargo update` testing" - name: "`cargo update` testing"
@ -250,11 +260,13 @@ jobs:
- { os: ubuntu-latest , features: feat_os_unix } - { os: ubuntu-latest , features: feat_os_unix }
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
with:
persist-credentials: false
- uses: dtolnay/rust-toolchain@stable - uses: dtolnay/rust-toolchain@stable
- uses: taiki-e/install-action@nextest - uses: taiki-e/install-action@nextest
- uses: Swatinem/rust-cache@v2 - uses: Swatinem/rust-cache@v2
- name: Run sccache-cache - name: Run sccache-cache
uses: mozilla-actions/sccache-action@v0.0.6 uses: mozilla-actions/sccache-action@v0.0.7
- name: "`make build`" - name: "`make build`"
shell: bash shell: bash
run: | run: |
@ -304,11 +316,13 @@ jobs:
- { os: windows-latest , features: feat_os_windows } - { os: windows-latest , features: feat_os_windows }
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
with:
persist-credentials: false
- uses: dtolnay/rust-toolchain@stable - uses: dtolnay/rust-toolchain@stable
- uses: taiki-e/install-action@nextest - uses: taiki-e/install-action@nextest
- uses: Swatinem/rust-cache@v2 - uses: Swatinem/rust-cache@v2
- name: Run sccache-cache - name: Run sccache-cache
uses: mozilla-actions/sccache-action@v0.0.6 uses: mozilla-actions/sccache-action@v0.0.7
- name: Test - name: Test
run: cargo nextest run --hide-progress-bar --profile ci --features ${{ matrix.job.features }} run: cargo nextest run --hide-progress-bar --profile ci --features ${{ matrix.job.features }}
env: env:
@ -331,11 +345,13 @@ jobs:
- { os: windows-latest , features: feat_os_windows } - { os: windows-latest , features: feat_os_windows }
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
with:
persist-credentials: false
- uses: dtolnay/rust-toolchain@nightly - uses: dtolnay/rust-toolchain@nightly
- uses: taiki-e/install-action@nextest - uses: taiki-e/install-action@nextest
- uses: Swatinem/rust-cache@v2 - uses: Swatinem/rust-cache@v2
- name: Run sccache-cache - name: Run sccache-cache
uses: mozilla-actions/sccache-action@v0.0.6 uses: mozilla-actions/sccache-action@v0.0.7
- name: Test - name: Test
run: cargo nextest run --hide-progress-bar --profile ci --features ${{ matrix.job.features }} run: cargo nextest run --hide-progress-bar --profile ci --features ${{ matrix.job.features }}
env: env:
@ -355,10 +371,12 @@ jobs:
- { os: ubuntu-latest , features: feat_os_unix } - { os: ubuntu-latest , features: feat_os_unix }
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
with:
persist-credentials: false
- uses: dtolnay/rust-toolchain@stable - uses: dtolnay/rust-toolchain@stable
- uses: Swatinem/rust-cache@v2 - uses: Swatinem/rust-cache@v2
- name: Run sccache-cache - name: Run sccache-cache
uses: mozilla-actions/sccache-action@v0.0.6 uses: mozilla-actions/sccache-action@v0.0.7
- name: Install dependencies - name: Install dependencies
shell: bash shell: bash
run: | run: |
@ -397,14 +415,14 @@ jobs:
--arg multisize "$SIZE_MULTI" \ --arg multisize "$SIZE_MULTI" \
'{($date): { sha: $sha, size: $size, multisize: $multisize, }}' > size-result.json '{($date): { sha: $sha, size: $size, multisize: $multisize, }}' > size-result.json
- name: Download the previous individual size result - name: Download the previous individual size result
uses: dawidd6/action-download-artifact@v6 uses: dawidd6/action-download-artifact@v7
with: with:
workflow: CICD.yml workflow: CICD.yml
name: individual-size-result name: individual-size-result
repo: uutils/coreutils repo: uutils/coreutils
path: dl path: dl
- name: Download the previous size result - name: Download the previous size result
uses: dawidd6/action-download-artifact@v6 uses: dawidd6/action-download-artifact@v7
with: with:
workflow: CICD.yml workflow: CICD.yml
name: size-result name: size-result
@ -485,6 +503,8 @@ jobs:
- { os: windows-latest , target: aarch64-pc-windows-msvc , features: feat_os_windows, use-cross: use-cross , skip-tests: true } - { os: windows-latest , target: aarch64-pc-windows-msvc , features: feat_os_windows, use-cross: use-cross , skip-tests: true }
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
with:
persist-credentials: false
- uses: dtolnay/rust-toolchain@master - uses: dtolnay/rust-toolchain@master
with: with:
toolchain: ${{ env.RUST_MIN_SRV }} toolchain: ${{ env.RUST_MIN_SRV }}
@ -493,7 +513,7 @@ jobs:
with: with:
key: "${{ matrix.job.os }}_${{ matrix.job.target }}" key: "${{ matrix.job.os }}_${{ matrix.job.target }}"
- name: Run sccache-cache - name: Run sccache-cache
uses: mozilla-actions/sccache-action@v0.0.6 uses: mozilla-actions/sccache-action@v0.0.7
- name: Initialize workflow variables - name: Initialize workflow variables
id: vars id: vars
shell: bash shell: bash
@ -753,6 +773,7 @@ jobs:
uses: softprops/action-gh-release@v2 uses: softprops/action-gh-release@v2
if: steps.vars.outputs.DEPLOY if: steps.vars.outputs.DEPLOY
with: with:
draft: true
files: | files: |
${{ steps.vars.outputs.STAGING }}/${{ steps.vars.outputs.PKG_NAME }} ${{ steps.vars.outputs.STAGING }}/${{ steps.vars.outputs.PKG_NAME }}
${{ steps.vars.outputs.STAGING }}/${{ steps.vars.outputs.DPKG_NAME }} ${{ steps.vars.outputs.STAGING }}/${{ steps.vars.outputs.DPKG_NAME }}
@ -779,9 +800,11 @@ jobs:
## VARs setup ## VARs setup
echo "TEST_SUMMARY_FILE=busybox-result.json" >> $GITHUB_OUTPUT echo "TEST_SUMMARY_FILE=busybox-result.json" >> $GITHUB_OUTPUT
- uses: actions/checkout@v4 - uses: actions/checkout@v4
with:
persist-credentials: false
- uses: Swatinem/rust-cache@v2 - uses: Swatinem/rust-cache@v2
- name: Run sccache-cache - name: Run sccache-cache
uses: mozilla-actions/sccache-action@v0.0.6 uses: mozilla-actions/sccache-action@v0.0.7
- name: Install/setup prerequisites - name: Install/setup prerequisites
shell: bash shell: bash
run: | run: |
@ -859,13 +882,15 @@ jobs:
TEST_SUMMARY_FILE="toybox-result.json" TEST_SUMMARY_FILE="toybox-result.json"
outputs TEST_SUMMARY_FILE outputs TEST_SUMMARY_FILE
- uses: actions/checkout@v4 - uses: actions/checkout@v4
with:
persist-credentials: false
- uses: dtolnay/rust-toolchain@master - uses: dtolnay/rust-toolchain@master
with: with:
toolchain: ${{ env.RUST_MIN_SRV }} toolchain: ${{ env.RUST_MIN_SRV }}
components: rustfmt components: rustfmt
- uses: Swatinem/rust-cache@v2 - uses: Swatinem/rust-cache@v2
- name: Run sccache-cache - name: Run sccache-cache
uses: mozilla-actions/sccache-action@v0.0.6 uses: mozilla-actions/sccache-action@v0.0.7
- name: Build coreutils as multiple binaries - name: Build coreutils as multiple binaries
shell: bash shell: bash
run: | run: |
@ -934,6 +959,8 @@ jobs:
os: [ubuntu-latest, macos-latest, windows-latest] os: [ubuntu-latest, macos-latest, windows-latest]
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
with:
persist-credentials: false
- uses: dtolnay/rust-toolchain@stable - uses: dtolnay/rust-toolchain@stable
- uses: Swatinem/rust-cache@v2 - uses: Swatinem/rust-cache@v2
- name: build and test all programs individually - name: build and test all programs individually

View file

@ -30,6 +30,8 @@ jobs:
contents: read contents: read
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
with:
persist-credentials: false
- name: Run ShellCheck - name: Run ShellCheck
uses: ludeeus/action-shellcheck@master uses: ludeeus/action-shellcheck@master
env: env:
@ -46,6 +48,8 @@ jobs:
contents: read contents: read
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
with:
persist-credentials: false
- name: Setup shfmt - name: Setup shfmt
uses: mfinelli/setup-shfmt@v3 uses: mfinelli/setup-shfmt@v3
- name: Run shfmt - name: Run shfmt

View file

@ -27,6 +27,8 @@ jobs:
- { os: ubuntu-latest , features: feat_os_unix } - { os: ubuntu-latest , features: feat_os_unix }
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
with:
persist-credentials: false
- name: Initialize job variables - name: Initialize job variables
id: vars id: vars
shell: bash shell: bash
@ -86,6 +88,8 @@ jobs:
- { os: ubuntu-latest , features: feat_os_unix } - { os: ubuntu-latest , features: feat_os_unix }
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
with:
persist-credentials: false
- name: Initialize job variables - name: Initialize job variables
id: vars id: vars
shell: bash shell: bash

View file

@ -4,7 +4,7 @@ on:
workflow_run: workflow_run:
workflows: ["GnuTests"] workflows: ["GnuTests"]
types: types:
- completed - completed # zizmor: ignore[dangerous-triggers]
permissions: {} permissions: {}
jobs: jobs:

View file

@ -23,6 +23,9 @@ concurrency:
group: ${{ github.workflow }}-${{ github.ref }} group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
env:
DEFAULT_BRANCH: ${{ github.event.repository.default_branch }}
jobs: jobs:
gnu: gnu:
permissions: permissions:
@ -45,9 +48,9 @@ jobs:
path_reference="reference" path_reference="reference"
outputs path_GNU path_GNU_tests path_reference path_UUTILS outputs path_GNU path_GNU_tests path_reference path_UUTILS
# #
repo_default_branch="${{ github.event.repository.default_branch }}" repo_default_branch="$DEFAULT_BRANCH"
repo_GNU_ref="v9.5" repo_GNU_ref="v9.5"
repo_reference_branch="${{ github.event.repository.default_branch }}" repo_reference_branch="$DEFAULT_BRANCH"
outputs repo_default_branch repo_GNU_ref repo_reference_branch outputs repo_default_branch repo_GNU_ref repo_reference_branch
# #
SUITE_LOG_FILE="${path_GNU_tests}/test-suite.log" SUITE_LOG_FILE="${path_GNU_tests}/test-suite.log"
@ -62,6 +65,7 @@ jobs:
uses: actions/checkout@v4 uses: actions/checkout@v4
with: with:
path: '${{ steps.vars.outputs.path_UUTILS }}' path: '${{ steps.vars.outputs.path_UUTILS }}'
persist-credentials: false
- uses: dtolnay/rust-toolchain@master - uses: dtolnay/rust-toolchain@master
with: with:
toolchain: stable toolchain: stable
@ -76,6 +80,7 @@ jobs:
path: '${{ steps.vars.outputs.path_GNU }}' path: '${{ steps.vars.outputs.path_GNU }}'
ref: ${{ steps.vars.outputs.repo_GNU_ref }} ref: ${{ steps.vars.outputs.repo_GNU_ref }}
submodules: false submodules: false
persist-credentials: false
- name: Override submodule URL and initialize submodules - name: Override submodule URL and initialize submodules
# Use github instead of upstream git server # Use github instead of upstream git server
@ -86,7 +91,7 @@ jobs:
working-directory: ${{ steps.vars.outputs.path_GNU }} working-directory: ${{ steps.vars.outputs.path_GNU }}
- name: Retrieve reference artifacts - name: Retrieve reference artifacts
uses: dawidd6/action-download-artifact@v6 uses: dawidd6/action-download-artifact@v7
# ref: <https://github.com/dawidd6/action-download-artifact> # ref: <https://github.com/dawidd6/action-download-artifact>
continue-on-error: true ## don't break the build for missing reference artifacts (may be expired or just not generated yet) continue-on-error: true ## don't break the build for missing reference artifacts (may be expired or just not generated yet)
with: with:
@ -244,11 +249,16 @@ jobs:
CURRENT_RUN_ERROR=$(sed -n "s/^ERROR: \([[:print:]]\+\).*/\1/p" "${new_log_file}" | sort) CURRENT_RUN_ERROR=$(sed -n "s/^ERROR: \([[:print:]]\+\).*/\1/p" "${new_log_file}" | sort)
REF_FAILING=$(sed -n "s/^FAIL: \([[:print:]]\+\).*/\1/p" "${ref_log_file}"| sort) REF_FAILING=$(sed -n "s/^FAIL: \([[:print:]]\+\).*/\1/p" "${ref_log_file}"| sort)
CURRENT_RUN_FAILING=$(sed -n "s/^FAIL: \([[:print:]]\+\).*/\1/p" "${new_log_file}" | sort) CURRENT_RUN_FAILING=$(sed -n "s/^FAIL: \([[:print:]]\+\).*/\1/p" "${new_log_file}" | sort)
echo "Detailled information:" REF_SKIP=$(sed -n "s/^SKIP: \([[:print:]]\+\).*/\1/p" "${ref_log_file}"| sort)
CURRENT_RUN_SKIP=$(sed -n "s/^SKIP: \([[:print:]]\+\).*/\1/p" "${new_log_file}" | sort)
echo "Detailed information:"
echo "REF_ERROR = ${REF_ERROR}" echo "REF_ERROR = ${REF_ERROR}"
echo "CURRENT_RUN_ERROR = ${CURRENT_RUN_ERROR}" echo "CURRENT_RUN_ERROR = ${CURRENT_RUN_ERROR}"
echo "REF_FAILING = ${REF_FAILING}" echo "REF_FAILING = ${REF_FAILING}"
echo "CURRENT_RUN_FAILING = ${CURRENT_RUN_FAILING}" echo "CURRENT_RUN_FAILING = ${CURRENT_RUN_FAILING}"
echo "REF_SKIP_PASS = ${REF_SKIP}"
echo "CURRENT_RUN_SKIP = ${CURRENT_RUN_SKIP}"
# Compare failing and error tests # Compare failing and error tests
for LINE in ${CURRENT_RUN_FAILING} for LINE in ${CURRENT_RUN_FAILING}
@ -303,11 +313,22 @@ jobs:
do do
if ! grep -Fxq ${LINE}<<<"${CURRENT_RUN_ERROR}" if ! grep -Fxq ${LINE}<<<"${CURRENT_RUN_ERROR}"
then then
MSG="Congrats! The gnu test ${LINE} is no longer ERROR!" MSG="Congrats! The gnu test ${LINE} is no longer ERROR! (might be PASS or FAIL)"
echo "::warning ::$MSG" echo "::warning ::$MSG"
echo $MSG >> ${COMMENT_LOG} echo $MSG >> ${COMMENT_LOG}
fi fi
done done
for LINE in ${REF_SKIP}
do
if ! grep -Fxq ${LINE}<<<"${CURRENT_RUN_SKIP}"
then
MSG="Congrats! The gnu test ${LINE} is no longer SKIP! (might be PASS, ERROR or FAIL)"
echo "::warning ::$MSG"
echo $MSG >> ${COMMENT_LOG}
fi
done
else else
echo "::warning ::Skipping ${test_type} test failure comparison; no prior reference test logs are available." echo "::warning ::Skipping ${test_type} test failure comparison; no prior reference test logs are available."
fi fi

View file

@ -79,6 +79,8 @@ jobs:
sudo udevadm control --reload-rules sudo udevadm control --reload-rules
sudo udevadm trigger --name-match=kvm sudo udevadm trigger --name-match=kvm
- uses: actions/checkout@v4 - uses: actions/checkout@v4
with:
persist-credentials: false
- name: Collect information about runner - name: Collect information about runner
if: always() if: always()
continue-on-error: true continue-on-error: true
@ -176,7 +178,7 @@ jobs:
util/android-commands.sh sync_host util/android-commands.sh sync_host
util/android-commands.sh build util/android-commands.sh build
util/android-commands.sh tests util/android-commands.sh tests
if [[ "${{ steps.rust-cache.outputs.cache-hit }}" != 'true' ]]; then util/android-commands.sh sync_image; fi; exit 0 if [ "${{ steps.rust-cache.outputs.cache-hit }}" != 'true' ]; then util/android-commands.sh sync_image; fi; exit 0
- name: Collect information about runner ressources - name: Collect information about runner ressources
if: always() if: always()
continue-on-error: true continue-on-error: true

View file

@ -32,6 +32,8 @@ jobs:
- { os: ubuntu-latest , features: feat_os_unix } - { os: ubuntu-latest , features: feat_os_unix }
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
with:
persist-credentials: false
- uses: dtolnay/rust-toolchain@master - uses: dtolnay/rust-toolchain@master
with: with:
toolchain: stable toolchain: stable
@ -44,7 +46,7 @@ jobs:
## VARs setup ## VARs setup
outputs() { step_id="${{ github.action }}"; for var in "$@" ; do echo steps.${step_id}.outputs.${var}="${!var}"; echo "${var}=${!var}" >> $GITHUB_OUTPUT; done; } outputs() { step_id="${{ github.action }}"; for var in "$@" ; do echo steps.${step_id}.outputs.${var}="${!var}"; echo "${var}=${!var}" >> $GITHUB_OUTPUT; done; }
# failure mode # failure mode
unset FAIL_ON_FAULT ; case '${{ env.STYLE_FAIL_ON_FAULT }}' in unset FAIL_ON_FAULT ; case "$STYLE_FAIL_ON_FAULT" in
''|0|f|false|n|no|off) FAULT_TYPE=warning ;; ''|0|f|false|n|no|off) FAULT_TYPE=warning ;;
*) FAIL_ON_FAULT=true ; FAULT_TYPE=error ;; *) FAIL_ON_FAULT=true ; FAULT_TYPE=error ;;
esac; esac;
@ -75,13 +77,15 @@ jobs:
- { os: windows-latest , features: feat_os_windows } - { os: windows-latest , features: feat_os_windows }
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
with:
persist-credentials: false
- uses: dtolnay/rust-toolchain@master - uses: dtolnay/rust-toolchain@master
with: with:
toolchain: stable toolchain: stable
components: clippy components: clippy
- uses: Swatinem/rust-cache@v2 - uses: Swatinem/rust-cache@v2
- name: Run sccache-cache - name: Run sccache-cache
uses: mozilla-actions/sccache-action@v0.0.6 uses: mozilla-actions/sccache-action@v0.0.7
- name: Initialize workflow variables - name: Initialize workflow variables
id: vars id: vars
shell: bash shell: bash
@ -89,7 +93,7 @@ jobs:
## VARs setup ## VARs setup
outputs() { step_id="${{ github.action }}"; for var in "$@" ; do echo steps.${step_id}.outputs.${var}="${!var}"; echo "${var}=${!var}" >> $GITHUB_OUTPUT; done; } outputs() { step_id="${{ github.action }}"; for var in "$@" ; do echo steps.${step_id}.outputs.${var}="${!var}"; echo "${var}=${!var}" >> $GITHUB_OUTPUT; done; }
# failure mode # failure mode
unset FAIL_ON_FAULT ; case '${{ env.STYLE_FAIL_ON_FAULT }}' in unset FAIL_ON_FAULT ; case "$STYLE_FAIL_ON_FAULT" in
''|0|f|false|n|no|off) FAULT_TYPE=warning ;; ''|0|f|false|n|no|off) FAULT_TYPE=warning ;;
*) FAIL_ON_FAULT=true ; FAULT_TYPE=error ;; *) FAIL_ON_FAULT=true ; FAULT_TYPE=error ;;
esac; esac;
@ -120,6 +124,8 @@ jobs:
- { os: ubuntu-latest , features: feat_os_unix } - { os: ubuntu-latest , features: feat_os_unix }
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
with:
persist-credentials: false
- name: Initialize workflow variables - name: Initialize workflow variables
id: vars id: vars
shell: bash shell: bash
@ -127,7 +133,7 @@ jobs:
## VARs setup ## VARs setup
outputs() { step_id="${{ github.action }}"; for var in "$@" ; do echo steps.${step_id}.outputs.${var}="${!var}"; echo "${var}=${!var}" >> $GITHUB_OUTPUT; done; } outputs() { step_id="${{ github.action }}"; for var in "$@" ; do echo steps.${step_id}.outputs.${var}="${!var}"; echo "${var}=${!var}" >> $GITHUB_OUTPUT; done; }
# failure mode # failure mode
unset FAIL_ON_FAULT ; case '${{ env.STYLE_FAIL_ON_FAULT }}' in unset FAIL_ON_FAULT ; case "$STYLE_FAIL_ON_FAULT" in
''|0|f|false|n|no|off) FAULT_TYPE=warning ;; ''|0|f|false|n|no|off) FAULT_TYPE=warning ;;
*) FAIL_ON_FAULT=true ; FAULT_TYPE=error ;; *) FAIL_ON_FAULT=true ; FAULT_TYPE=error ;;
esac; esac;
@ -156,6 +162,8 @@ jobs:
steps: steps:
- name: Clone repository - name: Clone repository
uses: actions/checkout@v4 uses: actions/checkout@v4
with:
persist-credentials: false
- name: Check - name: Check
run: npx --yes @taplo/cli fmt --check run: npx --yes @taplo/cli fmt --check

View file

@ -35,11 +35,13 @@ jobs:
RUSTC_WRAPPER: "sccache" RUSTC_WRAPPER: "sccache"
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
with:
persist-credentials: false
- uses: Swatinem/rust-cache@v2 - uses: Swatinem/rust-cache@v2
- name: Run sccache-cache - name: Run sccache-cache
uses: mozilla-actions/sccache-action@v0.0.6 uses: mozilla-actions/sccache-action@v0.0.7
- name: Prepare, build and test - name: Prepare, build and test
uses: vmactions/freebsd-vm@v1.1.5 uses: vmactions/freebsd-vm@v1.1.6
with: with:
usesh: true usesh: true
sync: rsync sync: rsync
@ -127,11 +129,13 @@ jobs:
RUSTC_WRAPPER: "sccache" RUSTC_WRAPPER: "sccache"
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
with:
persist-credentials: false
- uses: Swatinem/rust-cache@v2 - uses: Swatinem/rust-cache@v2
- name: Run sccache-cache - name: Run sccache-cache
uses: mozilla-actions/sccache-action@v0.0.6 uses: mozilla-actions/sccache-action@v0.0.7
- name: Prepare, build and test - name: Prepare, build and test
uses: vmactions/freebsd-vm@v1.1.5 uses: vmactions/freebsd-vm@v1.1.6
with: with:
usesh: true usesh: true
sync: rsync sync: rsync

View file

@ -22,6 +22,8 @@ jobs:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
with:
persist-credentials: false
- uses: dtolnay/rust-toolchain@nightly - uses: dtolnay/rust-toolchain@nightly
- name: Install `cargo-fuzz` - name: Install `cargo-fuzz`
run: cargo install cargo-fuzz run: cargo install cargo-fuzz
@ -63,6 +65,8 @@ jobs:
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
with:
persist-credentials: false
- uses: dtolnay/rust-toolchain@nightly - uses: dtolnay/rust-toolchain@nightly
- name: Install `cargo-fuzz` - name: Install `cargo-fuzz`
run: cargo install cargo-fuzz run: cargo install cargo-fuzz

View file

@ -10,6 +10,7 @@ bytewise
canonicalization canonicalization
canonicalize canonicalize
canonicalizing canonicalizing
capget
codepoint codepoint
codepoints codepoints
codegen codegen
@ -65,6 +66,7 @@ kibi
kibibytes kibibytes
libacl libacl
lcase lcase
llistxattr
lossily lossily
lstat lstat
mebi mebi
@ -108,6 +110,7 @@ seedable
semver semver
semiprime semiprime
semiprimes semiprimes
setcap
setfacl setfacl
shortcode shortcode
shortcodes shortcodes
@ -157,6 +160,8 @@ retval
subdir subdir
val val
vals vals
inval
nofield
# * clippy # * clippy
uninlined uninlined

456
Cargo.lock generated

File diff suppressed because it is too large Load diff

View file

@ -16,7 +16,7 @@ repository = "https://github.com/uutils/coreutils"
readme = "README.md" readme = "README.md"
keywords = ["coreutils", "uutils", "cross-platform", "cli", "utility"] keywords = ["coreutils", "uutils", "cross-platform", "cli", "utility"]
categories = ["command-line-utilities"] categories = ["command-line-utilities"]
rust-version = "1.70.0" rust-version = "1.79.0"
edition = "2021" edition = "2021"
build = "build.rs" build = "build.rs"
@ -276,12 +276,12 @@ chrono = { version = "0.4.38", default-features = false, features = [
"alloc", "alloc",
"clock", "clock",
] } ] }
clap = { version = "4.4", features = ["wrap_help", "cargo"] } clap = { version = "4.5", features = ["wrap_help", "cargo"] }
clap_complete = "4.4" clap_complete = "4.4"
clap_mangen = "0.2" clap_mangen = "0.2"
compare = "0.1.0" compare = "0.1.0"
coz = { version = "0.1.3" } coz = { version = "0.1.3" }
crossterm = ">=0.27.0" crossterm = "0.28.1"
ctrlc = { version = "3.4.4", features = ["termination"] } ctrlc = { version = "3.4.4", features = ["termination"] }
dns-lookup = { version = "2.0.4" } dns-lookup = { version = "2.0.4" }
exacl = "0.12.0" exacl = "0.12.0"
@ -332,17 +332,17 @@ tempfile = "3.10.1"
uutils_term_grid = "0.6" uutils_term_grid = "0.6"
terminal_size = "0.4.0" terminal_size = "0.4.0"
textwrap = { version = "0.16.1", features = ["terminal_size"] } textwrap = { version = "0.16.1", features = ["terminal_size"] }
thiserror = "1.0.59" thiserror = "2.0.3"
time = { version = "0.3.36" } time = { version = "0.3.36" }
unicode-segmentation = "1.11.0" unicode-segmentation = "1.11.0"
unicode-width = "0.1.12" unicode-width = "0.2.0"
utf-8 = "0.7.6" utf-8 = "0.7.6"
utmp-classic = "0.1.6" utmp-classic = "0.1.6"
walkdir = "2.5" walkdir = "2.5"
winapi-util = "0.1.8" winapi-util = "0.1.8"
windows-sys = { version = "0.59.0", default-features = false } windows-sys = { version = "0.59.0", default-features = false }
xattr = "1.3.1" xattr = "1.3.1"
zip = { version = "1.1.4", default-features = false, features = ["deflate"] } zip = { version = "2.2.2", default-features = false, features = ["deflate"] }
hex = "0.4.3" hex = "0.4.3"
md-5 = "0.10.6" md-5 = "0.10.6"
@ -354,10 +354,10 @@ blake3 = "1.5.1"
sm3 = "0.4.2" sm3 = "0.4.2"
digest = "0.10.7" digest = "0.10.7"
uucore = { version = ">=0.0.19", package = "uucore", path = "src/uucore" } uucore = { version = "0.0.28", package = "uucore", path = "src/uucore" }
uucore_procs = { version = ">=0.0.19", package = "uucore_procs", path = "src/uucore_procs" } uucore_procs = { version = "0.0.28", package = "uucore_procs", path = "src/uucore_procs" }
uu_ls = { version = ">=0.0.18", path = "src/uu/ls" } uu_ls = { version = "0.0.28", path = "src/uu/ls" }
uu_base32 = { version = ">=0.0.18", path = "src/uu/base32" } uu_base32 = { version = "0.0.28", path = "src/uu/base32" }
[dependencies] [dependencies]
clap = { workspace = true } clap = { workspace = true }

View file

@ -147,7 +147,6 @@ UNIX_PROGS := \
nohup \ nohup \
pathchk \ pathchk \
pinky \ pinky \
sleep \
stat \ stat \
stdbuf \ stdbuf \
timeout \ timeout \

View file

@ -14,7 +14,7 @@
[![dependency status](https://deps.rs/repo/github/uutils/coreutils/status.svg)](https://deps.rs/repo/github/uutils/coreutils) [![dependency status](https://deps.rs/repo/github/uutils/coreutils/status.svg)](https://deps.rs/repo/github/uutils/coreutils)
[![CodeCov](https://codecov.io/gh/uutils/coreutils/branch/master/graph/badge.svg)](https://codecov.io/gh/uutils/coreutils) [![CodeCov](https://codecov.io/gh/uutils/coreutils/branch/master/graph/badge.svg)](https://codecov.io/gh/uutils/coreutils)
![MSRV](https://img.shields.io/badge/MSRV-1.70.0-brightgreen) ![MSRV](https://img.shields.io/badge/MSRV-1.79.0-brightgreen)
</div> </div>
@ -70,7 +70,7 @@ the [coreutils docs](https://github.com/uutils/uutils.github.io) repository.
### Rust Version ### Rust Version
uutils follows Rust's release channels and is tested against stable, beta and uutils follows Rust's release channels and is tested against stable, beta and
nightly. The current Minimum Supported Rust Version (MSRV) is `1.70.0`. nightly. The current Minimum Supported Rust Version (MSRV) is `1.79.0`.
## Building ## Building

View file

@ -104,6 +104,12 @@ skip = [
{ name = "terminal_size", version = "0.2.6" }, { name = "terminal_size", version = "0.2.6" },
# ansi-width, console, os_display # ansi-width, console, os_display
{ name = "unicode-width", version = "0.1.13" }, { name = "unicode-width", version = "0.1.13" },
# notify
{ name = "mio", version = "0.8.11" },
# various crates
{ name = "thiserror", version = "1.0.69" },
# thiserror
{ name = "thiserror-impl", version = "1.0.69" },
] ]
# spell-checker: enable # spell-checker: enable

65
fuzz/Cargo.lock generated
View file

@ -546,20 +546,25 @@ dependencies = [
] ]
[[package]] [[package]]
name = "libc" name = "lazy_static"
version = "0.2.161" version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e9489c2807c139ffd9c1794f4af0ebe86a828db53ecdc7fea2111d0fed085d1" checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
[[package]]
name = "libc"
version = "0.2.169"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a"
[[package]] [[package]]
name = "libfuzzer-sys" name = "libfuzzer-sys"
version = "0.4.7" version = "0.4.8"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a96cfd5557eb82f2b83fed4955246c988d331975a002961b07c81584d107e7f7" checksum = "9b9569d2f74e257076d8c6bfa73fb505b46b851e51ddaecc825944aa3bed17fa"
dependencies = [ dependencies = [
"arbitrary", "arbitrary",
"cc", "cc",
"once_cell",
] ]
[[package]] [[package]]
@ -714,7 +719,7 @@ version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a6229bad892b46b0dcfaaeb18ad0d2e56400f5aaea05b768bde96e73676cf75" checksum = "7a6229bad892b46b0dcfaaeb18ad0d2e56400f5aaea05b768bde96e73676cf75"
dependencies = [ dependencies = [
"unicode-width", "unicode-width 0.1.12",
] ]
[[package]] [[package]]
@ -742,9 +747,9 @@ checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de"
[[package]] [[package]]
name = "proc-macro2" name = "proc-macro2"
version = "1.0.83" version = "1.0.92"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b33eb56c327dec362a9e55b3ad14f9d2f0904fb5a5b03b513ab5465399e9f43" checksum = "37d3544b3f2748c54e147655edb5025752e2303145b5aefb3c3ea2c78b973bb0"
dependencies = [ dependencies = [
"unicode-ident", "unicode-ident",
] ]
@ -850,9 +855,9 @@ dependencies = [
[[package]] [[package]]
name = "rustix" name = "rustix"
version = "0.38.37" version = "0.38.40"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8acb788b847c24f28525660c4d7758620a7210875711f79e7f663cc152726811" checksum = "99e4ea3e1cdc4b559b8e5650f9c8e5998e3e5c1343b4eaf034565f32318d63c0"
dependencies = [ dependencies = [
"bitflags 2.5.0", "bitflags 2.5.0",
"errno", "errno",
@ -948,20 +953,9 @@ checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
[[package]] [[package]]
name = "syn" name = "syn"
version = "1.0.109" version = "2.0.89"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" checksum = "44d46482f1c1c87acd84dea20c1bf5ebff4c757009ed6bf19cfd36fb10e92c4e"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "syn"
version = "2.0.65"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d2863d96a84c6439701d7a38f9de935ec562c8832cc55d1dde0f513b52fad106"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
@ -970,9 +964,9 @@ dependencies = [
[[package]] [[package]]
name = "tempfile" name = "tempfile"
version = "3.13.0" version = "3.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f0f2c9fc62d0beef6951ccffd757e241266a2c833136efbe35af6cd2567dca5b" checksum = "28cce251fcbc87fac86a866eeb0d6c2d536fc16d06f184bb61aeae11aa4cee0c"
dependencies = [ dependencies = [
"cfg-if", "cfg-if",
"fastrand", "fastrand",
@ -993,18 +987,18 @@ dependencies = [
[[package]] [[package]]
name = "thiserror" name = "thiserror"
version = "1.0.61" version = "2.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c546c80d6be4bc6a00c0f01730c08df82eaa7a7a61f11d656526506112cc1709" checksum = "c006c85c7651b3cf2ada4584faa36773bd07bac24acfb39f3c431b36d7e667aa"
dependencies = [ dependencies = [
"thiserror-impl", "thiserror-impl",
] ]
[[package]] [[package]]
name = "thiserror-impl" name = "thiserror-impl"
version = "1.0.61" version = "2.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "46c3384250002a6d5af4d114f2845d37b57521033f30d5c3f46c4d70e1197533" checksum = "f077553d607adc1caf65430528a576c757a71ed73944b66ebb58ef2bbd243568"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
@ -1044,6 +1038,12 @@ version = "0.1.12"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "68f5e5f3158ecfd4b8ff6fe086db7c8467a2dfdac97fe420f2b7c4aa97af66d6" checksum = "68f5e5f3158ecfd4b8ff6fe086db7c8467a2dfdac97fe420f2b7c4aa97af66d6"
[[package]]
name = "unicode-width"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fc81956842c57dac11422a97c3b8195a1ff727f06e85c84ed2e8aa277c9a0fd"
[[package]] [[package]]
name = "utf8parse" name = "utf8parse"
version = "0.2.1" version = "0.2.1"
@ -1146,7 +1146,7 @@ dependencies = [
"rayon", "rayon",
"self_cell", "self_cell",
"tempfile", "tempfile",
"unicode-width", "unicode-width 0.2.0",
"uucore", "uucore",
] ]
@ -1186,7 +1186,7 @@ dependencies = [
"libc", "libc",
"nix 0.29.0", "nix 0.29.0",
"thiserror", "thiserror",
"unicode-width", "unicode-width 0.2.0",
"uucore", "uucore",
] ]
@ -1204,6 +1204,7 @@ dependencies = [
"glob", "glob",
"hex", "hex",
"itertools", "itertools",
"lazy_static",
"libc", "libc",
"md-5", "md-5",
"memchr", "memchr",

View file

@ -172,7 +172,7 @@ struct MDWriter<'a, 'b> {
markdown: Option<String>, markdown: Option<String>,
} }
impl<'a, 'b> MDWriter<'a, 'b> { impl MDWriter<'_, '_> {
/// # Errors /// # Errors
/// Returns an error if the writer fails. /// Returns an error if the writer fails.
fn markdown(&mut self) -> io::Result<()> { fn markdown(&mut self) -> io::Result<()> {

View file

@ -1,5 +1,3 @@
# spell-checker:ignore proptest
[package] [package]
name = "uu_base32" name = "uu_base32"
version = "0.0.28" version = "0.0.28"
@ -22,9 +20,6 @@ path = "src/base32.rs"
clap = { workspace = true } clap = { workspace = true }
uucore = { workspace = true, features = ["encoding"] } uucore = { workspace = true, features = ["encoding"] }
[dev-dependencies]
proptest = "1.5.0"
[[bin]] [[bin]]
name = "base32" name = "base32"
path = "src/main.rs" path = "src/main.rs"

View file

@ -5,6 +5,7 @@
pub mod base_common; pub mod base_common;
use base_common::ReadSeek;
use clap::Command; use clap::Command;
use uucore::{encoding::Format, error::UResult, help_about, help_usage}; use uucore::{encoding::Format, error::UResult, help_about, help_usage};
@ -17,7 +18,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
let config = base_common::parse_base_cmd_args(args, ABOUT, USAGE)?; let config = base_common::parse_base_cmd_args(args, ABOUT, USAGE)?;
let mut input = base_common::get_input(&config)?; let mut input: Box<dyn ReadSeek> = base_common::get_input(&config)?;
base_common::handle_input(&mut input, format, config) base_common::handle_input(&mut input, format, config)
} }

View file

@ -3,15 +3,15 @@
// For the full copyright and license information, please view the LICENSE // For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code. // file that was distributed with this source code.
// spell-checker:ignore hexupper lsbf msbf unpadded // spell-checker:ignore hexupper lsbf msbf unpadded nopad aGVsbG8sIHdvcmxkIQ
use clap::{crate_version, Arg, ArgAction, Command}; use clap::{crate_version, Arg, ArgAction, Command};
use std::fs::File; use std::fs::File;
use std::io::{self, ErrorKind, Read}; use std::io::{self, ErrorKind, Read, Seek, SeekFrom};
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use uucore::display::Quotable; use uucore::display::Quotable;
use uucore::encoding::{ use uucore::encoding::{
for_base_common::{BASE32, BASE32HEX, BASE64, BASE64URL, HEXUPPER}, for_base_common::{BASE32, BASE32HEX, BASE64, BASE64URL, BASE64_NOPAD, HEXUPPER_PERMISSIVE},
Format, Z85Wrapper, BASE2LSBF, BASE2MSBF, Format, Z85Wrapper, BASE2LSBF, BASE2MSBF,
}; };
use uucore::encoding::{EncodingWrapper, SupportsFastDecodeAndEncode}; use uucore::encoding::{EncodingWrapper, SupportsFastDecodeAndEncode};
@ -143,25 +143,50 @@ pub fn base_app(about: &'static str, usage: &str) -> Command {
) )
} }
pub fn get_input(config: &Config) -> UResult<Box<dyn Read>> { /// A trait alias for types that implement both `Read` and `Seek`.
pub trait ReadSeek: Read + Seek {}
/// Automatically implement the `ReadSeek` trait for any type that implements both `Read` and `Seek`.
impl<T: Read + Seek> ReadSeek for T {}
pub fn get_input(config: &Config) -> UResult<Box<dyn ReadSeek>> {
match &config.to_read { match &config.to_read {
Some(path_buf) => { Some(path_buf) => {
// Do not buffer input, because buffering is handled by `fast_decode` and `fast_encode` // Do not buffer input, because buffering is handled by `fast_decode` and `fast_encode`
let file = let file =
File::open(path_buf).map_err_context(|| path_buf.maybe_quote().to_string())?; File::open(path_buf).map_err_context(|| path_buf.maybe_quote().to_string())?;
Ok(Box::new(file)) Ok(Box::new(file))
} }
None => { None => {
let stdin_lock = io::stdin().lock(); let mut buffer = Vec::new();
io::stdin().read_to_end(&mut buffer)?;
Ok(Box::new(stdin_lock)) Ok(Box::new(io::Cursor::new(buffer)))
} }
} }
} }
pub fn handle_input<R: Read>(input: &mut R, format: Format, config: Config) -> UResult<()> { /// Determines if the input buffer ends with padding ('=') after trimming trailing whitespace.
let supports_fast_decode_and_encode = get_supports_fast_decode_and_encode(format); fn has_padding<R: Read + Seek>(input: &mut R) -> UResult<bool> {
let mut buf = Vec::new();
input
.read_to_end(&mut buf)
.map_err(|err| USimpleError::new(1, format_read_error(err.kind())))?;
// Reverse iterator and skip trailing whitespace without extra collections
let has_padding = buf
.iter()
.rfind(|&&byte| !byte.is_ascii_whitespace())
.is_some_and(|&byte| byte == b'=');
input.seek(SeekFrom::Start(0))?;
Ok(has_padding)
}
pub fn handle_input<R: Read + Seek>(input: &mut R, format: Format, config: Config) -> UResult<()> {
let has_padding = has_padding(input)?;
let supports_fast_decode_and_encode =
get_supports_fast_decode_and_encode(format, config.decode, has_padding);
let supports_fast_decode_and_encode_ref = supports_fast_decode_and_encode.as_ref(); let supports_fast_decode_and_encode_ref = supports_fast_decode_and_encode.as_ref();
@ -184,7 +209,11 @@ pub fn handle_input<R: Read>(input: &mut R, format: Format, config: Config) -> U
} }
} }
pub fn get_supports_fast_decode_and_encode(format: Format) -> Box<dyn SupportsFastDecodeAndEncode> { pub fn get_supports_fast_decode_and_encode(
format: Format,
decode: bool,
has_padding: bool,
) -> Box<dyn SupportsFastDecodeAndEncode> {
const BASE16_VALID_DECODING_MULTIPLE: usize = 2; const BASE16_VALID_DECODING_MULTIPLE: usize = 2;
const BASE2_VALID_DECODING_MULTIPLE: usize = 8; const BASE2_VALID_DECODING_MULTIPLE: usize = 8;
const BASE32_VALID_DECODING_MULTIPLE: usize = 8; const BASE32_VALID_DECODING_MULTIPLE: usize = 8;
@ -197,11 +226,11 @@ pub fn get_supports_fast_decode_and_encode(format: Format) -> Box<dyn SupportsFa
match format { match format {
Format::Base16 => Box::from(EncodingWrapper::new( Format::Base16 => Box::from(EncodingWrapper::new(
HEXUPPER, HEXUPPER_PERMISSIVE,
BASE16_VALID_DECODING_MULTIPLE, BASE16_VALID_DECODING_MULTIPLE,
BASE16_UNPADDED_MULTIPLE, BASE16_UNPADDED_MULTIPLE,
// spell-checker:disable-next-line // spell-checker:disable-next-line
b"0123456789ABCDEF", b"0123456789ABCDEFabcdef",
)), )),
Format::Base2Lsbf => Box::from(EncodingWrapper::new( Format::Base2Lsbf => Box::from(EncodingWrapper::new(
BASE2LSBF, BASE2LSBF,
@ -231,13 +260,24 @@ pub fn get_supports_fast_decode_and_encode(format: Format) -> Box<dyn SupportsFa
// spell-checker:disable-next-line // spell-checker:disable-next-line
b"0123456789ABCDEFGHIJKLMNOPQRSTUV=", b"0123456789ABCDEFGHIJKLMNOPQRSTUV=",
)), )),
Format::Base64 => Box::from(EncodingWrapper::new( Format::Base64 => {
BASE64, let alphabet: &[u8] = if has_padding {
BASE64_VALID_DECODING_MULTIPLE, &b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789+/="[..]
BASE64_UNPADDED_MULTIPLE, } else {
// spell-checker:disable-next-line &b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789+/"[..]
b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789=+/", };
)), let wrapper = if decode && !has_padding {
BASE64_NOPAD
} else {
BASE64
};
Box::from(EncodingWrapper::new(
wrapper,
BASE64_VALID_DECODING_MULTIPLE,
BASE64_UNPADDED_MULTIPLE,
alphabet,
))
}
Format::Base64Url => Box::from(EncodingWrapper::new( Format::Base64Url => Box::from(EncodingWrapper::new(
BASE64URL, BASE64URL,
BASE64_VALID_DECODING_MULTIPLE, BASE64_VALID_DECODING_MULTIPLE,
@ -316,6 +356,7 @@ pub mod fast_encode {
encoded_buffer: &mut VecDeque<u8>, encoded_buffer: &mut VecDeque<u8>,
output: &mut dyn Write, output: &mut dyn Write,
is_cleanup: bool, is_cleanup: bool,
empty_wrap: bool,
) -> io::Result<()> { ) -> io::Result<()> {
// TODO // TODO
// `encoded_buffer` only has to be a VecDeque if line wrapping is enabled // `encoded_buffer` only has to be a VecDeque if line wrapping is enabled
@ -324,7 +365,9 @@ pub mod fast_encode {
output.write_all(encoded_buffer.make_contiguous())?; output.write_all(encoded_buffer.make_contiguous())?;
if is_cleanup { if is_cleanup {
output.write_all(b"\n")?; if !empty_wrap {
output.write_all(b"\n")?;
}
} else { } else {
encoded_buffer.clear(); encoded_buffer.clear();
} }
@ -377,25 +420,26 @@ pub mod fast_encode {
} }
fn write_to_output( fn write_to_output(
line_wrapping_option: &mut Option<LineWrapping>, line_wrapping: &mut Option<LineWrapping>,
encoded_buffer: &mut VecDeque<u8>, encoded_buffer: &mut VecDeque<u8>,
output: &mut dyn Write, output: &mut dyn Write,
is_cleanup: bool, is_cleanup: bool,
empty_wrap: bool,
) -> io::Result<()> { ) -> io::Result<()> {
// Write all data in `encoded_buffer` to `output` // Write all data in `encoded_buffer` to `output`
if let &mut Some(ref mut li) = line_wrapping_option { if let &mut Some(ref mut li) = line_wrapping {
write_with_line_breaks(li, encoded_buffer, output, is_cleanup)?; write_with_line_breaks(li, encoded_buffer, output, is_cleanup)?;
} else { } else {
write_without_line_breaks(encoded_buffer, output, is_cleanup)?; write_without_line_breaks(encoded_buffer, output, is_cleanup, empty_wrap)?;
} }
Ok(()) Ok(())
} }
// End of helper functions // End of helper functions
pub fn fast_encode<R: Read, W: Write>( pub fn fast_encode(
input: &mut R, input: &mut dyn Read,
mut output: W, output: &mut dyn Write,
supports_fast_decode_and_encode: &dyn SupportsFastDecodeAndEncode, supports_fast_decode_and_encode: &dyn SupportsFastDecodeAndEncode,
wrap: Option<usize>, wrap: Option<usize>,
) -> UResult<()> { ) -> UResult<()> {
@ -473,16 +517,21 @@ pub mod fast_encode {
)?; )?;
assert!(leftover_buffer.len() < encode_in_chunks_of_size); assert!(leftover_buffer.len() < encode_in_chunks_of_size);
// Write all data in `encoded_buffer` to `output` // Write all data in `encoded_buffer` to `output`
write_to_output(&mut line_wrapping, &mut encoded_buffer, &mut output, false)?; write_to_output(
&mut line_wrapping,
&mut encoded_buffer,
output,
false,
wrap == Some(0),
)?;
} }
Err(er) => { Err(er) => {
let kind = er.kind(); let kind = er.kind();
if kind == ErrorKind::Interrupted { if kind == ErrorKind::Interrupted {
// TODO // Retry reading
// Retry reading? continue;
} }
return Err(USimpleError::new(1, format_read_error(kind))); return Err(USimpleError::new(1, format_read_error(kind)));
@ -499,7 +548,13 @@ pub mod fast_encode {
// Write all data in `encoded_buffer` to output // Write all data in `encoded_buffer` to output
// `is_cleanup` triggers special cleanup-only logic // `is_cleanup` triggers special cleanup-only logic
write_to_output(&mut line_wrapping, &mut encoded_buffer, &mut output, true)?; write_to_output(
&mut line_wrapping,
&mut encoded_buffer,
output,
true,
wrap == Some(0),
)?;
} }
Ok(()) Ok(())
@ -606,9 +661,9 @@ pub mod fast_decode {
} }
// End of helper functions // End of helper functions
pub fn fast_decode<R: Read, W: Write>( pub fn fast_decode(
input: &mut R, input: &mut dyn Read,
mut output: &mut W, output: &mut dyn Write,
supports_fast_decode_and_encode: &dyn SupportsFastDecodeAndEncode, supports_fast_decode_and_encode: &dyn SupportsFastDecodeAndEncode,
ignore_garbage: bool, ignore_garbage: bool,
) -> UResult<()> { ) -> UResult<()> {
@ -711,14 +766,14 @@ pub mod fast_decode {
assert!(leftover_buffer.len() < decode_in_chunks_of_size); assert!(leftover_buffer.len() < decode_in_chunks_of_size);
// Write all data in `decoded_buffer` to `output` // Write all data in `decoded_buffer` to `output`
write_to_output(&mut decoded_buffer, &mut output)?; write_to_output(&mut decoded_buffer, output)?;
} }
Err(er) => { Err(er) => {
let kind = er.kind(); let kind = er.kind();
if kind == ErrorKind::Interrupted { if kind == ErrorKind::Interrupted {
// TODO // Retry reading
// Retry reading? continue;
} }
return Err(USimpleError::new(1, format_read_error(kind))); return Err(USimpleError::new(1, format_read_error(kind)));
@ -734,7 +789,7 @@ pub mod fast_decode {
.decode_into_vec(&leftover_buffer, &mut decoded_buffer)?; .decode_into_vec(&leftover_buffer, &mut decoded_buffer)?;
// Write all data in `decoded_buffer` to `output` // Write all data in `decoded_buffer` to `output`
write_to_output(&mut decoded_buffer, &mut output)?; write_to_output(&mut decoded_buffer, output)?;
} }
Ok(()) Ok(())
@ -759,3 +814,33 @@ fn format_read_error(kind: ErrorKind) -> String {
format!("read error: {kind_string_capitalized}") format!("read error: {kind_string_capitalized}")
} }
#[cfg(test)]
mod tests {
use super::*;
use std::io::Cursor;
#[test]
fn test_has_padding() {
let test_cases = vec![
("aGVsbG8sIHdvcmxkIQ==", true),
("aGVsbG8sIHdvcmxkIQ== ", true),
("aGVsbG8sIHdvcmxkIQ==\n", true),
("aGVsbG8sIHdvcmxkIQ== \n", true),
("aGVsbG8sIHdvcmxkIQ=", true),
("aGVsbG8sIHdvcmxkIQ= ", true),
("aGVsbG8sIHdvcmxkIQ \n", false),
("aGVsbG8sIHdvcmxkIQ", false),
];
for (input, expected) in test_cases {
let mut cursor = Cursor::new(input.as_bytes());
assert_eq!(
has_padding(&mut cursor).unwrap(),
expected,
"Failed for input: '{}'",
input
);
}
}
}

View file

@ -1,430 +0,0 @@
// spell-checker:ignore lsbf msbf proptest
use proptest::{prelude::TestCaseError, prop_assert, prop_assert_eq, test_runner::TestRunner};
use std::io::Cursor;
use uu_base32::base_common::{fast_decode, fast_encode, get_supports_fast_decode_and_encode};
use uucore::encoding::{Format, SupportsFastDecodeAndEncode};
const CASES: u32 = {
#[cfg(debug_assertions)]
{
32
}
#[cfg(not(debug_assertions))]
{
128
}
};
const NORMAL_INPUT_SIZE_LIMIT: usize = {
#[cfg(debug_assertions)]
{
// 256 kibibytes
256 * 1024
}
#[cfg(not(debug_assertions))]
{
// 4 mebibytes
4 * 1024 * 1024
}
};
const LARGE_INPUT_SIZE_LIMIT: usize = 4 * NORMAL_INPUT_SIZE_LIMIT;
// Note that `TestRunner`s cannot be reused
fn get_test_runner() -> TestRunner {
TestRunner::new(proptest::test_runner::Config {
cases: CASES,
failure_persistence: None,
..proptest::test_runner::Config::default()
})
}
fn generic_round_trip(format: Format) {
let supports_fast_decode_and_encode = get_supports_fast_decode_and_encode(format);
let supports_fast_decode_and_encode_ref = supports_fast_decode_and_encode.as_ref();
// Make sure empty inputs round trip
{
get_test_runner()
.run(
&(
proptest::bool::ANY,
proptest::bool::ANY,
proptest::option::of(0_usize..512_usize),
),
|(ignore_garbage, line_wrap_zero, line_wrap)| {
configurable_round_trip(
format,
supports_fast_decode_and_encode_ref,
ignore_garbage,
line_wrap_zero,
line_wrap,
// Do not add garbage
Vec::<(usize, u8)>::new(),
// Empty input
Vec::<u8>::new(),
)
},
)
.unwrap();
}
// Unusually large line wrapping settings
{
get_test_runner()
.run(
&(
proptest::bool::ANY,
proptest::bool::ANY,
proptest::option::of(512_usize..65_535_usize),
proptest::collection::vec(proptest::num::u8::ANY, 0..NORMAL_INPUT_SIZE_LIMIT),
),
|(ignore_garbage, line_wrap_zero, line_wrap, input)| {
configurable_round_trip(
format,
supports_fast_decode_and_encode_ref,
ignore_garbage,
line_wrap_zero,
line_wrap,
// Do not add garbage
Vec::<(usize, u8)>::new(),
input,
)
},
)
.unwrap();
}
// Spend more time on sane line wrapping settings
{
get_test_runner()
.run(
&(
proptest::bool::ANY,
proptest::bool::ANY,
proptest::option::of(0_usize..512_usize),
proptest::collection::vec(proptest::num::u8::ANY, 0..NORMAL_INPUT_SIZE_LIMIT),
),
|(ignore_garbage, line_wrap_zero, line_wrap, input)| {
configurable_round_trip(
format,
supports_fast_decode_and_encode_ref,
ignore_garbage,
line_wrap_zero,
line_wrap,
// Do not add garbage
Vec::<(usize, u8)>::new(),
input,
)
},
)
.unwrap();
}
// Test with garbage data
{
get_test_runner()
.run(
&(
proptest::bool::ANY,
proptest::bool::ANY,
proptest::option::of(0_usize..512_usize),
// Garbage data to insert
proptest::collection::vec(
(
// Random index
proptest::num::usize::ANY,
// In all of the encodings being tested, non-ASCII bytes are garbage
128_u8..=u8::MAX,
),
0..4_096,
),
proptest::collection::vec(proptest::num::u8::ANY, 0..NORMAL_INPUT_SIZE_LIMIT),
),
|(ignore_garbage, line_wrap_zero, line_wrap, garbage_data, input)| {
configurable_round_trip(
format,
supports_fast_decode_and_encode_ref,
ignore_garbage,
line_wrap_zero,
line_wrap,
garbage_data,
input,
)
},
)
.unwrap();
}
// Test small inputs
{
get_test_runner()
.run(
&(
proptest::bool::ANY,
proptest::bool::ANY,
proptest::option::of(0_usize..512_usize),
proptest::collection::vec(proptest::num::u8::ANY, 0..1_024),
),
|(ignore_garbage, line_wrap_zero, line_wrap, input)| {
configurable_round_trip(
format,
supports_fast_decode_and_encode_ref,
ignore_garbage,
line_wrap_zero,
line_wrap,
// Do not add garbage
Vec::<(usize, u8)>::new(),
input,
)
},
)
.unwrap();
}
// Test small inputs with garbage data
{
get_test_runner()
.run(
&(
proptest::bool::ANY,
proptest::bool::ANY,
proptest::option::of(0_usize..512_usize),
// Garbage data to insert
proptest::collection::vec(
(
// Random index
proptest::num::usize::ANY,
// In all of the encodings being tested, non-ASCII bytes are garbage
128_u8..=u8::MAX,
),
0..1_024,
),
proptest::collection::vec(proptest::num::u8::ANY, 0..1_024),
),
|(ignore_garbage, line_wrap_zero, line_wrap, garbage_data, input)| {
configurable_round_trip(
format,
supports_fast_decode_and_encode_ref,
ignore_garbage,
line_wrap_zero,
line_wrap,
garbage_data,
input,
)
},
)
.unwrap();
}
// Test large inputs
{
get_test_runner()
.run(
&(
proptest::bool::ANY,
proptest::bool::ANY,
proptest::option::of(0_usize..512_usize),
proptest::collection::vec(proptest::num::u8::ANY, 0..LARGE_INPUT_SIZE_LIMIT),
),
|(ignore_garbage, line_wrap_zero, line_wrap, input)| {
configurable_round_trip(
format,
supports_fast_decode_and_encode_ref,
ignore_garbage,
line_wrap_zero,
line_wrap,
// Do not add garbage
Vec::<(usize, u8)>::new(),
input,
)
},
)
.unwrap();
}
}
fn configurable_round_trip(
format: Format,
supports_fast_decode_and_encode: &dyn SupportsFastDecodeAndEncode,
ignore_garbage: bool,
line_wrap_zero: bool,
line_wrap: Option<usize>,
garbage_data: Vec<(usize, u8)>,
mut input: Vec<u8>,
) -> Result<(), TestCaseError> {
// Z85 only accepts inputs with lengths divisible by 4
if let Format::Z85 = format {
// Reduce length of "input" until it is divisible by 4
input.truncate((input.len() / 4) * 4);
assert!((input.len() % 4) == 0);
}
let line_wrap_to_use = if line_wrap_zero { Some(0) } else { line_wrap };
let input_len = input.len();
let garbage_data_len = garbage_data.len();
let garbage_data_is_empty = garbage_data_len == 0;
let (input, encoded) = {
let mut output = Vec::with_capacity(input_len * 8);
let mut cursor = Cursor::new(input);
fast_encode::fast_encode(
&mut cursor,
&mut output,
supports_fast_decode_and_encode,
line_wrap_to_use,
)
.unwrap();
(cursor.into_inner(), output)
};
let encoded_or_encoded_with_garbage = if garbage_data_is_empty {
encoded
} else {
let encoded_len = encoded.len();
let encoded_highest_index = match encoded_len.checked_sub(1) {
Some(0) | None => None,
Some(x) => Some(x),
};
let mut garbage_data_indexed = vec![Option::<u8>::None; encoded_len];
let mut encoded_with_garbage = Vec::<u8>::with_capacity(encoded_len + garbage_data_len);
for (index, garbage_byte) in garbage_data {
if let Some(x) = encoded_highest_index {
let index_to_use = index % x;
garbage_data_indexed[index_to_use] = Some(garbage_byte);
} else {
encoded_with_garbage.push(garbage_byte);
}
}
for (index, encoded_byte) in encoded.into_iter().enumerate() {
encoded_with_garbage.push(encoded_byte);
if let Some(garbage_byte) = garbage_data_indexed[index] {
encoded_with_garbage.push(garbage_byte);
}
}
encoded_with_garbage
};
match line_wrap_to_use {
Some(0) => {
let line_endings_count = encoded_or_encoded_with_garbage
.iter()
.filter(|byte| **byte == b'\n')
.count();
// If line wrapping is disabled, there should only be one '\n' character (at the very end of the output)
prop_assert_eq!(line_endings_count, 1);
}
_ => {
// TODO
// Validate other line wrapping settings
}
}
let decoded_or_error = {
let mut output = Vec::with_capacity(input_len);
let mut cursor = Cursor::new(encoded_or_encoded_with_garbage);
match fast_decode::fast_decode(
&mut cursor,
&mut output,
supports_fast_decode_and_encode,
ignore_garbage,
) {
Ok(()) => Ok(output),
Err(er) => Err(er),
}
};
let made_round_trip = match decoded_or_error {
Ok(ve) => input.as_slice() == ve.as_slice(),
Err(_) => false,
};
let result_was_correct = if garbage_data_is_empty || ignore_garbage {
// If there was no garbage data added, or if "ignore_garbage" was enabled, expect the round trip to succeed
made_round_trip
} else {
// If garbage data was added, and "ignore_garbage" was disabled, expect the round trip to fail
!made_round_trip
};
if !result_was_correct {
eprintln!(
"\
(configurable_round_trip) FAILURE
format: {format:?}
ignore_garbage: {ignore_garbage}
line_wrap_to_use: {line_wrap_to_use:?}
garbage_data_len: {garbage_data_len}
input_len: {input_len}
",
);
}
prop_assert!(result_was_correct);
Ok(())
}
#[test]
fn base16_round_trip() {
generic_round_trip(Format::Base16);
}
#[test]
fn base2lsbf_round_trip() {
generic_round_trip(Format::Base2Lsbf);
}
#[test]
fn base2msbf_round_trip() {
generic_round_trip(Format::Base2Msbf);
}
#[test]
fn base32_round_trip() {
generic_round_trip(Format::Base32);
}
#[test]
fn base32hex_round_trip() {
generic_round_trip(Format::Base32Hex);
}
#[test]
fn base64_round_trip() {
generic_round_trip(Format::Base64);
}
#[test]
fn base64url_round_trip() {
generic_round_trip(Format::Base64Url);
}
#[test]
fn z85_round_trip() {
generic_round_trip(Format::Z85);
}

View file

@ -727,7 +727,7 @@ fn get_root_dev_ino() -> Result<DeviceAndINode> {
} }
fn root_dev_ino_check(root_dev_ino: Option<DeviceAndINode>, dir_dev_ino: DeviceAndINode) -> bool { fn root_dev_ino_check(root_dev_ino: Option<DeviceAndINode>, dir_dev_ino: DeviceAndINode) -> bool {
root_dev_ino.map_or(false, |root_dev_ino| root_dev_ino == dir_dev_ino) root_dev_ino == Some(dir_dev_ino)
} }
fn root_dev_ino_warn(dir_name: &Path) { fn root_dev_ino_warn(dir_name: &Path) {
@ -777,7 +777,7 @@ enum SELinuxSecurityContext<'t> {
String(Option<CString>), String(Option<CString>),
} }
impl<'t> SELinuxSecurityContext<'t> { impl SELinuxSecurityContext<'_> {
fn to_c_string(&self) -> Result<Option<Cow<CStr>>> { fn to_c_string(&self) -> Result<Option<Cow<CStr>>> {
match self { match self {
Self::File(context) => context Self::File(context) => context

View file

@ -13,8 +13,8 @@ use std::iter;
use std::path::Path; use std::path::Path;
use uucore::checksum::{ use uucore::checksum::{
calculate_blake2b_length, detect_algo, digest_reader, perform_checksum_validation, calculate_blake2b_length, detect_algo, digest_reader, perform_checksum_validation,
ChecksumError, ALGORITHM_OPTIONS_BLAKE2B, ALGORITHM_OPTIONS_BSD, ALGORITHM_OPTIONS_CRC, ChecksumError, ChecksumOptions, ALGORITHM_OPTIONS_BLAKE2B, ALGORITHM_OPTIONS_BSD,
ALGORITHM_OPTIONS_SYSV, SUPPORTED_ALGORITHMS, ALGORITHM_OPTIONS_CRC, ALGORITHM_OPTIONS_SYSV, SUPPORTED_ALGORITHMS,
}; };
use uucore::{ use uucore::{
encoding, encoding,
@ -22,7 +22,7 @@ use uucore::{
format_usage, help_about, help_section, help_usage, format_usage, help_about, help_section, help_usage,
line_ending::LineEnding, line_ending::LineEnding,
os_str_as_bytes, show, os_str_as_bytes, show,
sum::{div_ceil, Digest}, sum::Digest,
}; };
const USAGE: &str = help_usage!("cksum.md"); const USAGE: &str = help_usage!("cksum.md");
@ -124,7 +124,7 @@ where
format!( format!(
"{} {}{}", "{} {}{}",
sum.parse::<u16>().unwrap(), sum.parse::<u16>().unwrap(),
div_ceil(sz, options.output_bits), sz.div_ceil(options.output_bits),
if not_file { "" } else { " " } if not_file { "" } else { " " }
), ),
!not_file, !not_file,
@ -134,7 +134,7 @@ where
format!( format!(
"{:0bsd_width$} {:bsd_width$}{}", "{:0bsd_width$} {:bsd_width$}{}",
sum.parse::<u16>().unwrap(), sum.parse::<u16>().unwrap(),
div_ceil(sz, options.output_bits), sz.div_ceil(options.output_bits),
if not_file { "" } else { " " } if not_file { "" } else { " " }
), ),
!not_file, !not_file,
@ -318,17 +318,16 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
|| iter::once(OsStr::new("-")).collect::<Vec<_>>(), || iter::once(OsStr::new("-")).collect::<Vec<_>>(),
|files| files.map(OsStr::new).collect::<Vec<_>>(), |files| files.map(OsStr::new).collect::<Vec<_>>(),
); );
return perform_checksum_validation( let opts = ChecksumOptions {
files.iter().copied(), binary: binary_flag,
strict,
status,
warn,
binary_flag,
ignore_missing, ignore_missing,
quiet, quiet,
algo_option, status,
length, strict,
); warn,
};
return perform_checksum_validation(files.iter().copied(), algo_option, length, opts);
} }
let (tag, asterisk) = handle_tag_text_binary_flags(&matches)?; let (tag, asterisk) = handle_tag_text_binary_flags(&matches)?;

View file

@ -6,9 +6,8 @@
// spell-checker:ignore (ToDO) delim mkdelim // spell-checker:ignore (ToDO) delim mkdelim
use std::cmp::Ordering; use std::cmp::Ordering;
use std::fs::File; use std::fs::{metadata, File};
use std::io::{self, stdin, BufRead, BufReader, Stdin}; use std::io::{self, stdin, BufRead, BufReader, Stdin};
use std::path::Path;
use uucore::error::{FromIo, UResult, USimpleError}; use uucore::error::{FromIo, UResult, USimpleError};
use uucore::line_ending::LineEnding; use uucore::line_ending::LineEnding;
use uucore::{format_usage, help_about, help_usage}; use uucore::{format_usage, help_about, help_usage};
@ -130,7 +129,10 @@ fn open_file(name: &str, line_ending: LineEnding) -> io::Result<LineReader> {
if name == "-" { if name == "-" {
Ok(LineReader::new(Input::Stdin(stdin()), line_ending)) Ok(LineReader::new(Input::Stdin(stdin()), line_ending))
} else { } else {
let f = File::open(Path::new(name))?; if metadata(name)?.is_dir() {
return Err(io::Error::new(io::ErrorKind::Other, "Is a directory"));
}
let f = File::open(name)?;
Ok(LineReader::new( Ok(LineReader::new(
Input::FileIn(BufReader::new(f)), Input::FileIn(BufReader::new(f)),
line_ending, line_ending,

View file

@ -30,6 +30,7 @@ uucore = { workspace = true, features = [
"backup-control", "backup-control",
"entries", "entries",
"fs", "fs",
"fsxattr",
"perms", "perms",
"mode", "mode",
"update-control", "update-control",

View file

@ -17,6 +17,8 @@ use std::os::unix::ffi::OsStrExt;
#[cfg(unix)] #[cfg(unix)]
use std::os::unix::fs::{FileTypeExt, PermissionsExt}; use std::os::unix::fs::{FileTypeExt, PermissionsExt};
use std::path::{Path, PathBuf, StripPrefixError}; use std::path::{Path, PathBuf, StripPrefixError};
#[cfg(all(unix, not(target_os = "android")))]
use uucore::fsxattr::copy_xattrs;
use clap::{builder::ValueParser, crate_version, Arg, ArgAction, ArgMatches, Command}; use clap::{builder::ValueParser, crate_version, Arg, ArgAction, ArgMatches, Command};
use filetime::FileTime; use filetime::FileTime;
@ -1605,12 +1607,7 @@ pub(crate) fn copy_attributes(
handle_preserve(&attributes.xattr, || -> CopyResult<()> { handle_preserve(&attributes.xattr, || -> CopyResult<()> {
#[cfg(all(unix, not(target_os = "android")))] #[cfg(all(unix, not(target_os = "android")))]
{ {
let xattrs = xattr::list(source)?; copy_xattrs(source, dest)?;
for attr in xattrs {
if let Some(attr_value) = xattr::get(source, attr.clone())? {
xattr::set(dest, attr, &attr_value[..])?;
}
}
} }
#[cfg(not(all(unix, not(target_os = "android"))))] #[cfg(not(all(unix, not(target_os = "android"))))]
{ {

View file

@ -197,7 +197,7 @@ struct SplitWriter<'a> {
dev_null: bool, dev_null: bool,
} }
impl<'a> Drop for SplitWriter<'a> { impl Drop for SplitWriter<'_> {
fn drop(&mut self) { fn drop(&mut self) {
if self.options.elide_empty_files && self.size == 0 { if self.options.elide_empty_files && self.size == 0 {
let file_name = self.options.split_name.get(self.counter); let file_name = self.options.split_name.get(self.counter);
@ -206,7 +206,7 @@ impl<'a> Drop for SplitWriter<'a> {
} }
} }
impl<'a> SplitWriter<'a> { impl SplitWriter<'_> {
fn new(options: &CsplitOptions) -> SplitWriter { fn new(options: &CsplitOptions) -> SplitWriter {
SplitWriter { SplitWriter {
options, options,
@ -621,8 +621,9 @@ pub fn uu_app() -> Command {
) )
.arg( .arg(
Arg::new(options::QUIET) Arg::new(options::QUIET)
.short('s') .short('q')
.long(options::QUIET) .long(options::QUIET)
.visible_short_alias('s')
.visible_alias("silent") .visible_alias("silent")
.help("do not print counts of output file sizes") .help("do not print counts of output file sizes")
.action(ArgAction::SetTrue), .action(ArgAction::SetTrue),

View file

@ -106,7 +106,7 @@ pub fn get_patterns(args: &[String]) -> Result<Vec<Pattern>, CsplitError> {
fn extract_patterns(args: &[String]) -> Result<Vec<Pattern>, CsplitError> { fn extract_patterns(args: &[String]) -> Result<Vec<Pattern>, CsplitError> {
let mut patterns = Vec::with_capacity(args.len()); let mut patterns = Vec::with_capacity(args.len());
let to_match_reg = let to_match_reg =
Regex::new(r"^(/(?P<UPTO>.+)/|%(?P<SKIPTO>.+)%)(?P<OFFSET>[\+-]\d+)?$").unwrap(); Regex::new(r"^(/(?P<UPTO>.+)/|%(?P<SKIPTO>.+)%)(?P<OFFSET>[\+-]?\d+)?$").unwrap();
let execute_ntimes_reg = Regex::new(r"^\{(?P<TIMES>\d+)|\*\}$").unwrap(); let execute_ntimes_reg = Regex::new(r"^\{(?P<TIMES>\d+)|\*\}$").unwrap();
let mut iter = args.iter().peekable(); let mut iter = args.iter().peekable();
@ -219,14 +219,15 @@ mod tests {
"{*}", "{*}",
"/test3.*end$/", "/test3.*end$/",
"{4}", "{4}",
"/test4.*end$/+3", "/test4.*end$/3",
"/test5.*end$/-3", "/test5.*end$/+3",
"/test6.*end$/-3",
] ]
.into_iter() .into_iter()
.map(|v| v.to_string()) .map(|v| v.to_string())
.collect(); .collect();
let patterns = get_patterns(input.as_slice()).unwrap(); let patterns = get_patterns(input.as_slice()).unwrap();
assert_eq!(patterns.len(), 5); assert_eq!(patterns.len(), 6);
match patterns.first() { match patterns.first() {
Some(Pattern::UpToMatch(reg, 0, ExecutePattern::Times(1))) => { Some(Pattern::UpToMatch(reg, 0, ExecutePattern::Times(1))) => {
let parsed_reg = format!("{reg}"); let parsed_reg = format!("{reg}");
@ -256,12 +257,19 @@ mod tests {
_ => panic!("expected UpToMatch pattern"), _ => panic!("expected UpToMatch pattern"),
}; };
match patterns.get(4) { match patterns.get(4) {
Some(Pattern::UpToMatch(reg, -3, ExecutePattern::Times(1))) => { Some(Pattern::UpToMatch(reg, 3, ExecutePattern::Times(1))) => {
let parsed_reg = format!("{reg}"); let parsed_reg = format!("{reg}");
assert_eq!(parsed_reg, "test5.*end$"); assert_eq!(parsed_reg, "test5.*end$");
} }
_ => panic!("expected UpToMatch pattern"), _ => panic!("expected UpToMatch pattern"),
}; };
match patterns.get(5) {
Some(Pattern::UpToMatch(reg, -3, ExecutePattern::Times(1))) => {
let parsed_reg = format!("{reg}");
assert_eq!(parsed_reg, "test6.*end$");
}
_ => panic!("expected UpToMatch pattern"),
};
} }
#[test] #[test]
@ -273,14 +281,15 @@ mod tests {
"{*}", "{*}",
"%test3.*end$%", "%test3.*end$%",
"{4}", "{4}",
"%test4.*end$%+3", "%test4.*end$%3",
"%test5.*end$%-3", "%test5.*end$%+3",
"%test6.*end$%-3",
] ]
.into_iter() .into_iter()
.map(|v| v.to_string()) .map(|v| v.to_string())
.collect(); .collect();
let patterns = get_patterns(input.as_slice()).unwrap(); let patterns = get_patterns(input.as_slice()).unwrap();
assert_eq!(patterns.len(), 5); assert_eq!(patterns.len(), 6);
match patterns.first() { match patterns.first() {
Some(Pattern::SkipToMatch(reg, 0, ExecutePattern::Times(1))) => { Some(Pattern::SkipToMatch(reg, 0, ExecutePattern::Times(1))) => {
let parsed_reg = format!("{reg}"); let parsed_reg = format!("{reg}");
@ -310,12 +319,19 @@ mod tests {
_ => panic!("expected SkipToMatch pattern"), _ => panic!("expected SkipToMatch pattern"),
}; };
match patterns.get(4) { match patterns.get(4) {
Some(Pattern::SkipToMatch(reg, -3, ExecutePattern::Times(1))) => { Some(Pattern::SkipToMatch(reg, 3, ExecutePattern::Times(1))) => {
let parsed_reg = format!("{reg}"); let parsed_reg = format!("{reg}");
assert_eq!(parsed_reg, "test5.*end$"); assert_eq!(parsed_reg, "test5.*end$");
} }
_ => panic!("expected SkipToMatch pattern"), _ => panic!("expected SkipToMatch pattern"),
}; };
match patterns.get(5) {
Some(Pattern::SkipToMatch(reg, -3, ExecutePattern::Times(1))) => {
let parsed_reg = format!("{reg}");
assert_eq!(parsed_reg, "test6.*end$");
}
_ => panic!("expected SkipToMatch pattern"),
};
} }
#[test] #[test]

View file

@ -9,7 +9,7 @@ use bstr::io::BufReadExt;
use clap::{builder::ValueParser, crate_version, Arg, ArgAction, ArgMatches, Command}; use clap::{builder::ValueParser, crate_version, Arg, ArgAction, ArgMatches, Command};
use std::ffi::OsString; use std::ffi::OsString;
use std::fs::File; use std::fs::File;
use std::io::{stdin, stdout, BufReader, BufWriter, IsTerminal, Read, Write}; use std::io::{stdin, stdout, BufRead, BufReader, BufWriter, IsTerminal, Read, Write};
use std::path::Path; use std::path::Path;
use uucore::display::Quotable; use uucore::display::Quotable;
use uucore::error::{set_exit_code, FromIo, UResult, USimpleError}; use uucore::error::{set_exit_code, FromIo, UResult, USimpleError};
@ -131,8 +131,9 @@ fn cut_fields_explicit_out_delim<R: Read, M: Matcher>(
if delim_search.peek().is_none() { if delim_search.peek().is_none() {
if !only_delimited { if !only_delimited {
// Always write the entire line, even if it doesn't end with `newline_char`
out.write_all(line)?; out.write_all(line)?;
if line[line.len() - 1] != newline_char { if line.is_empty() || line[line.len() - 1] != newline_char {
out.write_all(&[newline_char])?; out.write_all(&[newline_char])?;
} }
} }
@ -214,8 +215,9 @@ fn cut_fields_implicit_out_delim<R: Read, M: Matcher>(
if delim_search.peek().is_none() { if delim_search.peek().is_none() {
if !only_delimited { if !only_delimited {
// Always write the entire line, even if it doesn't end with `newline_char`
out.write_all(line)?; out.write_all(line)?;
if line[line.len() - 1] != newline_char { if line.is_empty() || line[line.len() - 1] != newline_char {
out.write_all(&[newline_char])?; out.write_all(&[newline_char])?;
} }
} }
@ -265,10 +267,46 @@ fn cut_fields_implicit_out_delim<R: Read, M: Matcher>(
Ok(()) Ok(())
} }
// The input delimiter is identical to `newline_char`
fn cut_fields_newline_char_delim<R: Read>(
reader: R,
ranges: &[Range],
newline_char: u8,
out_delim: &[u8],
) -> UResult<()> {
let buf_in = BufReader::new(reader);
let mut out = stdout_writer();
let segments: Vec<_> = buf_in.split(newline_char).filter_map(|x| x.ok()).collect();
let mut print_delim = false;
for &Range { low, high } in ranges {
for i in low..=high {
// "- 1" is necessary because fields start from 1 whereas a Vec starts from 0
if let Some(segment) = segments.get(i - 1) {
if print_delim {
out.write_all(out_delim)?;
} else {
print_delim = true;
}
out.write_all(segment.as_slice())?;
} else {
break;
}
}
}
out.write_all(&[newline_char])?;
Ok(())
}
fn cut_fields<R: Read>(reader: R, ranges: &[Range], opts: &Options) -> UResult<()> { fn cut_fields<R: Read>(reader: R, ranges: &[Range], opts: &Options) -> UResult<()> {
let newline_char = opts.line_ending.into(); let newline_char = opts.line_ending.into();
let field_opts = opts.field_opts.as_ref().unwrap(); // it is safe to unwrap() here - field_opts will always be Some() for cut_fields() call let field_opts = opts.field_opts.as_ref().unwrap(); // it is safe to unwrap() here - field_opts will always be Some() for cut_fields() call
match field_opts.delimiter { match field_opts.delimiter {
Delimiter::Slice(delim) if delim == [newline_char] => {
let out_delim = opts.out_delimiter.unwrap_or(delim);
cut_fields_newline_char_delim(reader, ranges, newline_char, out_delim)
}
Delimiter::Slice(delim) => { Delimiter::Slice(delim) => {
let matcher = ExactMatcher::new(delim); let matcher = ExactMatcher::new(delim);
match opts.out_delimiter { match opts.out_delimiter {
@ -348,10 +386,7 @@ fn cut_files(mut filenames: Vec<String>, mode: &Mode) {
// Get delimiter and output delimiter from `-d`/`--delimiter` and `--output-delimiter` options respectively // Get delimiter and output delimiter from `-d`/`--delimiter` and `--output-delimiter` options respectively
// Allow either delimiter to have a value that is neither UTF-8 nor ASCII to align with GNU behavior // Allow either delimiter to have a value that is neither UTF-8 nor ASCII to align with GNU behavior
fn get_delimiters( fn get_delimiters(matches: &ArgMatches) -> UResult<(Delimiter, Option<&[u8]>)> {
matches: &ArgMatches,
delimiter_is_equal: bool,
) -> UResult<(Delimiter, Option<&[u8]>)> {
let whitespace_delimited = matches.get_flag(options::WHITESPACE_DELIMITED); let whitespace_delimited = matches.get_flag(options::WHITESPACE_DELIMITED);
let delim_opt = matches.get_one::<OsString>(options::DELIMITER); let delim_opt = matches.get_one::<OsString>(options::DELIMITER);
let delim = match delim_opt { let delim = match delim_opt {
@ -362,12 +397,7 @@ fn get_delimiters(
)); ));
} }
Some(os_string) => { Some(os_string) => {
// GNU's `cut` supports `-d=` to set the delimiter to `=`. if os_string == "''" || os_string.is_empty() {
// Clap parsing is limited in this situation, see:
// https://github.com/uutils/coreutils/issues/2424#issuecomment-863825242
if delimiter_is_equal {
Delimiter::Slice(b"=")
} else if os_string == "''" || os_string.is_empty() {
// treat `''` as empty delimiter // treat `''` as empty delimiter
Delimiter::Slice(b"\0") Delimiter::Slice(b"\0")
} else { } else {
@ -421,15 +451,26 @@ mod options {
#[uucore::main] #[uucore::main]
pub fn uumain(args: impl uucore::Args) -> UResult<()> { pub fn uumain(args: impl uucore::Args) -> UResult<()> {
let args = args.collect::<Vec<OsString>>(); // GNU's `cut` supports `-d=` to set the delimiter to `=`.
// Clap parsing is limited in this situation, see:
// https://github.com/uutils/coreutils/issues/2424#issuecomment-863825242
let args: Vec<OsString> = args
.into_iter()
.map(|x| {
if x == "-d=" {
"--delimiter==".into()
} else {
x
}
})
.collect();
let delimiter_is_equal = args.contains(&OsString::from("-d=")); // special case
let matches = uu_app().try_get_matches_from(args)?; let matches = uu_app().try_get_matches_from(args)?;
let complement = matches.get_flag(options::COMPLEMENT); let complement = matches.get_flag(options::COMPLEMENT);
let only_delimited = matches.get_flag(options::ONLY_DELIMITED); let only_delimited = matches.get_flag(options::ONLY_DELIMITED);
let (delimiter, out_delimiter) = get_delimiters(&matches, delimiter_is_equal)?; let (delimiter, out_delimiter) = get_delimiters(&matches)?;
let line_ending = LineEnding::from_zero_flag(matches.get_flag(options::ZERO_TERMINATED)); let line_ending = LineEnding::from_zero_flag(matches.get_flag(options::ZERO_TERMINATED));
// Only one, and only one of cutting mode arguments, i.e. `-b`, `-c`, `-f`, // Only one, and only one of cutting mode arguments, i.e. `-b`, `-c`, `-f`,

View file

@ -23,7 +23,7 @@ impl<'a> ExactMatcher<'a> {
} }
} }
impl<'a> Matcher for ExactMatcher<'a> { impl Matcher for ExactMatcher<'_> {
fn next_match(&self, haystack: &[u8]) -> Option<(usize, usize)> { fn next_match(&self, haystack: &[u8]) -> Option<(usize, usize)> {
let mut pos = 0usize; let mut pos = 0usize;
loop { loop {

View file

@ -27,7 +27,7 @@ impl<'a, 'b, M: Matcher> Searcher<'a, 'b, M> {
// Iterate over field delimiters // Iterate over field delimiters
// Returns (first, last) positions of each sequence, where `haystack[first..last]` // Returns (first, last) positions of each sequence, where `haystack[first..last]`
// corresponds to the delimiter. // corresponds to the delimiter.
impl<'a, 'b, M: Matcher> Iterator for Searcher<'a, 'b, M> { impl<M: Matcher> Iterator for Searcher<'_, '_, M> {
type Item = (usize, usize); type Item = (usize, usize);
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {

View file

@ -103,7 +103,7 @@ enum Iso8601Format {
Ns, Ns,
} }
impl<'a> From<&'a str> for Iso8601Format { impl From<&str> for Iso8601Format {
fn from(s: &str) -> Self { fn from(s: &str) -> Self {
match s { match s {
HOURS => Self::Hours, HOURS => Self::Hours,
@ -123,7 +123,7 @@ enum Rfc3339Format {
Ns, Ns,
} }
impl<'a> From<&'a str> for Rfc3339Format { impl From<&str> for Rfc3339Format {
fn from(s: &str) -> Self { fn from(s: &str) -> Self {
match s { match s {
DATE => Self::Date, DATE => Self::Date,

View file

@ -424,7 +424,7 @@ fn make_linux_iflags(iflags: &IFlags) -> Option<libc::c_int> {
} }
} }
impl<'a> Read for Input<'a> { impl Read for Input<'_> {
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> { fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
let mut base_idx = 0; let mut base_idx = 0;
let target_len = buf.len(); let target_len = buf.len();
@ -447,7 +447,7 @@ impl<'a> Read for Input<'a> {
} }
} }
impl<'a> Input<'a> { impl Input<'_> {
/// Discard the system file cache for the given portion of the input. /// Discard the system file cache for the given portion of the input.
/// ///
/// `offset` and `len` specify a contiguous portion of the input. /// `offset` and `len` specify a contiguous portion of the input.
@ -928,7 +928,7 @@ enum BlockWriter<'a> {
Unbuffered(Output<'a>), Unbuffered(Output<'a>),
} }
impl<'a> BlockWriter<'a> { impl BlockWriter<'_> {
fn discard_cache(&self, offset: libc::off_t, len: libc::off_t) { fn discard_cache(&self, offset: libc::off_t, len: libc::off_t) {
match self { match self {
Self::Unbuffered(o) => o.discard_cache(offset, len), Self::Unbuffered(o) => o.discard_cache(offset, len),

View file

@ -2,7 +2,8 @@
// //
// For the full copyright and license information, please view the LICENSE // For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code. // file that was distributed with this source code.
/// Functions for formatting a number as a magnitude and a unit suffix.
//! Functions for formatting a number as a magnitude and a unit suffix.
/// The first ten powers of 1024. /// The first ten powers of 1024.
const IEC_BASES: [u128; 10] = [ const IEC_BASES: [u128; 10] = [

View file

@ -311,7 +311,6 @@ fn is_best(previous: &[MountInfo], mi: &MountInfo) -> bool {
/// ///
/// Finally, if there are duplicate entries, the one with the shorter /// Finally, if there are duplicate entries, the one with the shorter
/// path is kept. /// path is kept.
fn filter_mount_list(vmi: Vec<MountInfo>, opt: &Options) -> Vec<MountInfo> { fn filter_mount_list(vmi: Vec<MountInfo>, opt: &Options) -> Vec<MountInfo> {
let mut result = vec![]; let mut result = vec![];
for mi in vmi { for mi in vmi {
@ -331,7 +330,6 @@ fn filter_mount_list(vmi: Vec<MountInfo>, opt: &Options) -> Vec<MountInfo> {
/// ///
/// `opt` excludes certain filesystems from consideration and allows for the synchronization of filesystems before running; see /// `opt` excludes certain filesystems from consideration and allows for the synchronization of filesystems before running; see
/// [`Options`] for more information. /// [`Options`] for more information.
fn get_all_filesystems(opt: &Options) -> UResult<Vec<Filesystem>> { fn get_all_filesystems(opt: &Options) -> UResult<Vec<Filesystem>> {
// Run a sync call before any operation if so instructed. // Run a sync call before any operation if so instructed.
if opt.sync { if opt.sync {

View file

@ -15,4 +15,4 @@ Run the tests:
cargo test --features "dircolors" --no-default-features cargo test --features "dircolors" --no-default-features
``` ```
Edit `/PATH_TO_COREUTILS/src/uu/dircolors/src/colors.rs` until the tests pass. Edit `/PATH_TO_COREUTILS/src/uu/dircolors/src/dircolors.rs` until the tests pass.

View file

@ -12,7 +12,7 @@ use std::error::Error;
use std::fmt::Display; use std::fmt::Display;
#[cfg(not(windows))] #[cfg(not(windows))]
use std::fs::Metadata; use std::fs::Metadata;
use std::fs::{self, File}; use std::fs::{self, DirEntry, File};
use std::io::{BufRead, BufReader}; use std::io::{BufRead, BufReader};
#[cfg(not(windows))] #[cfg(not(windows))]
use std::os::unix::fs::MetadataExt; use std::os::unix::fs::MetadataExt;
@ -138,7 +138,11 @@ struct Stat {
} }
impl Stat { impl Stat {
fn new(path: &Path, options: &TraversalOptions) -> std::io::Result<Self> { fn new(
path: &Path,
dir_entry: Option<&DirEntry>,
options: &TraversalOptions,
) -> std::io::Result<Self> {
// Determine whether to dereference (follow) the symbolic link // Determine whether to dereference (follow) the symbolic link
let should_dereference = match &options.dereference { let should_dereference = match &options.dereference {
Deref::All => true, Deref::All => true,
@ -149,8 +153,11 @@ impl Stat {
let metadata = if should_dereference { let metadata = if should_dereference {
// Get metadata, following symbolic links if necessary // Get metadata, following symbolic links if necessary
fs::metadata(path) fs::metadata(path)
} else if let Some(dir_entry) = dir_entry {
// Get metadata directly from the DirEntry, which is faster on Windows
dir_entry.metadata()
} else { } else {
// Get metadata without following symbolic links // Get metadata from the filesystem without following symbolic links
fs::symlink_metadata(path) fs::symlink_metadata(path)
}?; }?;
@ -164,7 +171,7 @@ impl Stat {
Ok(Self { Ok(Self {
path: path.to_path_buf(), path: path.to_path_buf(),
is_dir: metadata.is_dir(), is_dir: metadata.is_dir(),
size: if path.is_dir() { 0 } else { metadata.len() }, size: if metadata.is_dir() { 0 } else { metadata.len() },
blocks: metadata.blocks(), blocks: metadata.blocks(),
inodes: 1, inodes: 1,
inode: Some(file_info), inode: Some(file_info),
@ -182,7 +189,7 @@ impl Stat {
Ok(Self { Ok(Self {
path: path.to_path_buf(), path: path.to_path_buf(),
is_dir: metadata.is_dir(), is_dir: metadata.is_dir(),
size: if path.is_dir() { 0 } else { metadata.len() }, size: if metadata.is_dir() { 0 } else { metadata.len() },
blocks: size_on_disk / 1024 * 2, blocks: size_on_disk / 1024 * 2,
inodes: 1, inodes: 1,
inode: file_info, inode: file_info,
@ -319,7 +326,7 @@ fn du(
'file_loop: for f in read { 'file_loop: for f in read {
match f { match f {
Ok(entry) => { Ok(entry) => {
match Stat::new(&entry.path(), options) { match Stat::new(&entry.path(), Some(&entry), options) {
Ok(this_stat) => { Ok(this_stat) => {
// We have an exclude list // We have an exclude list
for pattern in &options.excludes { for pattern in &options.excludes {
@ -339,14 +346,21 @@ fn du(
} }
if let Some(inode) = this_stat.inode { if let Some(inode) = this_stat.inode {
if seen_inodes.contains(&inode) { // Check if the inode has been seen before and if we should skip it
if options.count_links { if seen_inodes.contains(&inode)
&& (!options.count_links || !options.all)
{
// If `count_links` is enabled and `all` is not, increment the inode count
if options.count_links && !options.all {
my_stat.inodes += 1; my_stat.inodes += 1;
} }
// Skip further processing for this inode
continue; continue;
} }
// Mark this inode as seen
seen_inodes.insert(inode); seen_inodes.insert(inode);
} }
if this_stat.is_dir { if this_stat.is_dir {
if options.one_file_system { if options.one_file_system {
if let (Some(this_inode), Some(my_inode)) = if let (Some(this_inode), Some(my_inode)) =
@ -519,7 +533,7 @@ impl StatPrinter {
if !self if !self
.threshold .threshold
.map_or(false, |threshold| threshold.should_exclude(size)) .is_some_and(|threshold| threshold.should_exclude(size))
&& self && self
.max_depth .max_depth
.map_or(true, |max_depth| stat_info.depth <= max_depth) .map_or(true, |max_depth| stat_info.depth <= max_depth)
@ -543,9 +557,6 @@ impl StatPrinter {
} }
fn convert_size(&self, size: u64) -> String { fn convert_size(&self, size: u64) -> String {
if self.inodes {
return size.to_string();
}
match self.size_format { match self.size_format {
SizeFormat::HumanDecimal => uucore::format::human::human_readable( SizeFormat::HumanDecimal => uucore::format::human::human_readable(
size, size,
@ -555,7 +566,14 @@ impl StatPrinter {
size, size,
uucore::format::human::SizeFormat::Binary, uucore::format::human::SizeFormat::Binary,
), ),
SizeFormat::BlockSize(block_size) => div_ceil(size, block_size).to_string(), SizeFormat::BlockSize(block_size) => {
if self.inodes {
// we ignore block size (-B) with --inodes
size.to_string()
} else {
size.div_ceil(block_size).to_string()
}
}
} }
} }
@ -576,13 +594,6 @@ impl StatPrinter {
} }
} }
// This can be replaced with u64::div_ceil once it is stabilized.
// This implementation approach is optimized for when `b` is a constant,
// particularly a power of two.
pub fn div_ceil(a: u64, b: u64) -> u64 {
(a + b - 1) / b
}
// Read file paths from the specified file, separated by null characters // Read file paths from the specified file, separated by null characters
fn read_files_from(file_name: &str) -> Result<Vec<PathBuf>, std::io::Error> { fn read_files_from(file_name: &str) -> Result<Vec<PathBuf>, std::io::Error> {
let reader: Box<dyn BufRead> = if file_name == "-" { let reader: Box<dyn BufRead> = if file_name == "-" {
@ -638,6 +649,8 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
let summarize = matches.get_flag(options::SUMMARIZE); let summarize = matches.get_flag(options::SUMMARIZE);
let count_links = matches.get_flag(options::COUNT_LINKS);
let max_depth = parse_depth( let max_depth = parse_depth(
matches matches
.get_one::<String>(options::MAX_DEPTH) .get_one::<String>(options::MAX_DEPTH)
@ -658,15 +671,19 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
} }
read_files_from(file_from)? read_files_from(file_from)?
} else { } else if let Some(files) = matches.get_many::<String>(options::FILE) {
match matches.get_one::<String>(options::FILE) { let files = files.map(PathBuf::from);
Some(_) => matches if count_links {
.get_many::<String>(options::FILE) files.collect()
.unwrap() } else {
.map(PathBuf::from) // Deduplicate while preserving order
.collect(), let mut seen = std::collections::HashSet::new();
None => vec![PathBuf::from(".")], files
.filter(|path| seen.insert(path.clone()))
.collect::<Vec<_>>()
} }
} else {
vec![PathBuf::from(".")]
}; };
let time = matches.contains_id(options::TIME).then(|| { let time = matches.contains_id(options::TIME).then(|| {
@ -708,7 +725,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
} else { } else {
Deref::None Deref::None
}, },
count_links: matches.get_flag(options::COUNT_LINKS), count_links,
verbose: matches.get_flag(options::VERBOSE), verbose: matches.get_flag(options::VERBOSE),
excludes: build_exclude_patterns(&matches)?, excludes: build_exclude_patterns(&matches)?,
}; };
@ -765,7 +782,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
} }
// Check existence of path provided in argument // Check existence of path provided in argument
if let Ok(stat) = Stat::new(&path, &traversal_options) { if let Ok(stat) = Stat::new(&path, None, &traversal_options) {
// Kick off the computation of disk usage from the initial path // Kick off the computation of disk usage from the initial path
let mut seen_inodes: HashSet<FileInfo> = HashSet::new(); let mut seen_inodes: HashSet<FileInfo> = HashSet::new();
if let Some(inode) = stat.inode { if let Some(inode) = stat.inode {

View file

@ -208,13 +208,6 @@ fn print_escaped(input: &[u8], output: &mut StdoutLock) -> io::Result<ControlFlo
} }
if let Some(next) = iter.next() { if let Some(next) = iter.next() {
// For extending lifetime
// Unnecessary when using Rust >= 1.79.0
// https://github.com/rust-lang/rust/pull/121346
// TODO: when we have a MSRV >= 1.79.0, delete these "hold" bindings
let hold_one_byte_outside_of_match: [u8; 1_usize];
let hold_two_bytes_outside_of_match: [u8; 2_usize];
let unescaped: &[u8] = match *next { let unescaped: &[u8] = match *next {
b'\\' => br"\", b'\\' => br"\",
b'a' => b"\x07", b'a' => b"\x07",
@ -230,12 +223,7 @@ fn print_escaped(input: &[u8], output: &mut StdoutLock) -> io::Result<ControlFlo
if let Some(parsed_hexadecimal_number) = if let Some(parsed_hexadecimal_number) =
parse_backslash_number(&mut iter, BackslashNumberType::Hexadecimal) parse_backslash_number(&mut iter, BackslashNumberType::Hexadecimal)
{ {
// TODO: remove when we have a MSRV >= 1.79.0 &[parsed_hexadecimal_number]
hold_one_byte_outside_of_match = [parsed_hexadecimal_number];
// TODO: when we have a MSRV >= 1.79.0, return reference to a temporary array:
// &[parsed_hexadecimal_number]
&hold_one_byte_outside_of_match
} else { } else {
// "\x" with any non-hexadecimal digit after means "\x" is treated literally // "\x" with any non-hexadecimal digit after means "\x" is treated literally
br"\x" br"\x"
@ -246,12 +234,7 @@ fn print_escaped(input: &[u8], output: &mut StdoutLock) -> io::Result<ControlFlo
&mut iter, &mut iter,
BackslashNumberType::OctalStartingWithZero, BackslashNumberType::OctalStartingWithZero,
) { ) {
// TODO: remove when we have a MSRV >= 1.79.0 &[parsed_octal_number]
hold_one_byte_outside_of_match = [parsed_octal_number];
// TODO: when we have a MSRV >= 1.79.0, return reference to a temporary array:
// &[parsed_octal_number]
&hold_one_byte_outside_of_match
} else { } else {
// "\0" with any non-octal digit after it means "\0" is treated as ASCII '\0' (NUL), 0x00 // "\0" with any non-octal digit after it means "\0" is treated as ASCII '\0' (NUL), 0x00
b"\0" b"\0"
@ -259,9 +242,7 @@ fn print_escaped(input: &[u8], output: &mut StdoutLock) -> io::Result<ControlFlo
} }
other_byte => { other_byte => {
// Backslash and the following byte are treated literally // Backslash and the following byte are treated literally
hold_two_bytes_outside_of_match = [b'\\', other_byte]; &[b'\\', other_byte]
&hold_two_bytes_outside_of_match
} }
}; };
@ -274,9 +255,26 @@ fn print_escaped(input: &[u8], output: &mut StdoutLock) -> io::Result<ControlFlo
Ok(ControlFlow::Continue(())) Ok(ControlFlow::Continue(()))
} }
// A workaround because clap interprets the first '--' as a marker that a value
// follows. In order to use '--' as a value, we have to inject an additional '--'
fn handle_double_hyphens(args: impl uucore::Args) -> impl uucore::Args {
let mut result = Vec::new();
let mut is_first_double_hyphen = true;
for arg in args {
if arg == "--" && is_first_double_hyphen {
result.push(OsString::from("--"));
is_first_double_hyphen = false;
}
result.push(arg);
}
result.into_iter()
}
#[uucore::main] #[uucore::main]
pub fn uumain(args: impl uucore::Args) -> UResult<()> { pub fn uumain(args: impl uucore::Args) -> UResult<()> {
let matches = uu_app().get_matches_from(args); let matches = uu_app().get_matches_from(handle_double_hyphens(args));
// TODO // TODO
// "If the POSIXLY_CORRECT environment variable is set, then when echos first argument is not -n it outputs option-like arguments instead of treating them as options." // "If the POSIXLY_CORRECT environment variable is set, then when echos first argument is not -n it outputs option-like arguments instead of treating them as options."

View file

@ -114,10 +114,9 @@ impl<'a> StringParser<'a> {
} }
pub fn peek_chunk(&self) -> Option<Chunk<'a>> { pub fn peek_chunk(&self) -> Option<Chunk<'a>> {
return self self.get_chunk_with_length_at(self.pointer)
.get_chunk_with_length_at(self.pointer)
.ok() .ok()
.map(|(chunk, _)| chunk); .map(|(chunk, _)| chunk)
} }
pub fn consume_chunk(&mut self) -> Result<Chunk<'a>, Error> { pub fn consume_chunk(&mut self) -> Result<Chunk<'a>, Error> {

View file

@ -11,7 +11,7 @@ pub struct VariableParser<'a, 'b> {
pub parser: &'b mut StringParser<'a>, pub parser: &'b mut StringParser<'a>,
} }
impl<'a, 'b> VariableParser<'a, 'b> { impl<'a> VariableParser<'a, '_> {
fn get_current_char(&self) -> Option<char> { fn get_current_char(&self) -> Option<char> {
self.parser.peek().ok() self.parser.peek().ok()
} }

View file

@ -189,6 +189,13 @@ fn process_file(
_ => { _ => {
let f = File::open(file_name) let f = File::open(file_name)
.map_err_context(|| format!("cannot open {} for reading", file_name.quote()))?; .map_err_context(|| format!("cannot open {} for reading", file_name.quote()))?;
if f.metadata()
.map_err_context(|| format!("cannot get metadata for {}", file_name.quote()))?
.is_dir()
{
return Err(USimpleError::new(1, "read error".to_string()));
}
Box::new(f) as Box<dyn Read + 'static> Box::new(f) as Box<dyn Read + 'static>
} }
}); });

View file

@ -20,7 +20,7 @@ struct BreakArgs<'a> {
ostream: &'a mut BufWriter<Stdout>, ostream: &'a mut BufWriter<Stdout>,
} }
impl<'a> BreakArgs<'a> { impl BreakArgs<'_> {
fn compute_width(&self, winfo: &WordInfo, posn: usize, fresh: bool) -> usize { fn compute_width(&self, winfo: &WordInfo, posn: usize, fresh: bool) -> usize {
if fresh { if fresh {
0 0

View file

@ -73,7 +73,7 @@ pub struct FileLines<'a> {
lines: Lines<&'a mut FileOrStdReader>, lines: Lines<&'a mut FileOrStdReader>,
} }
impl<'a> FileLines<'a> { impl FileLines<'_> {
fn new<'b>(opts: &'b FmtOptions, lines: Lines<&'b mut FileOrStdReader>) -> FileLines<'b> { fn new<'b>(opts: &'b FmtOptions, lines: Lines<&'b mut FileOrStdReader>) -> FileLines<'b> {
FileLines { opts, lines } FileLines { opts, lines }
} }
@ -144,7 +144,7 @@ impl<'a> FileLines<'a> {
} }
} }
impl<'a> Iterator for FileLines<'a> { impl Iterator for FileLines<'_> {
type Item = Line; type Item = Line;
fn next(&mut self) -> Option<Line> { fn next(&mut self) -> Option<Line> {
@ -232,7 +232,7 @@ pub struct ParagraphStream<'a> {
opts: &'a FmtOptions, opts: &'a FmtOptions,
} }
impl<'a> ParagraphStream<'a> { impl ParagraphStream<'_> {
pub fn new<'b>(opts: &'b FmtOptions, reader: &'b mut FileOrStdReader) -> ParagraphStream<'b> { pub fn new<'b>(opts: &'b FmtOptions, reader: &'b mut FileOrStdReader) -> ParagraphStream<'b> {
let lines = FileLines::new(opts, reader.lines()).peekable(); let lines = FileLines::new(opts, reader.lines()).peekable();
// at the beginning of the file, we might find mail headers // at the beginning of the file, we might find mail headers
@ -273,7 +273,7 @@ impl<'a> ParagraphStream<'a> {
} }
} }
impl<'a> Iterator for ParagraphStream<'a> { impl Iterator for ParagraphStream<'_> {
type Item = Result<Paragraph, String>; type Item = Result<Paragraph, String>;
#[allow(clippy::cognitive_complexity)] #[allow(clippy::cognitive_complexity)]
@ -491,7 +491,7 @@ struct WordSplit<'a> {
prev_punct: bool, prev_punct: bool,
} }
impl<'a> WordSplit<'a> { impl WordSplit<'_> {
fn analyze_tabs(&self, string: &str) -> (Option<usize>, usize, Option<usize>) { fn analyze_tabs(&self, string: &str) -> (Option<usize>, usize, Option<usize>) {
// given a string, determine (length before tab) and (printed length after first tab) // given a string, determine (length before tab) and (printed length after first tab)
// if there are no tabs, beforetab = -1 and aftertab is the printed length // if there are no tabs, beforetab = -1 and aftertab is the printed length
@ -517,7 +517,7 @@ impl<'a> WordSplit<'a> {
} }
} }
impl<'a> WordSplit<'a> { impl WordSplit<'_> {
fn new<'b>(opts: &'b FmtOptions, string: &'b str) -> WordSplit<'b> { fn new<'b>(opts: &'b FmtOptions, string: &'b str) -> WordSplit<'b> {
// wordsplits *must* start at a non-whitespace character // wordsplits *must* start at a non-whitespace character
let trim_string = string.trim_start(); let trim_string = string.trim_start();

View file

@ -99,7 +99,7 @@ pub fn uu_app() -> Command {
fn handle_obsolete(args: &[String]) -> (Vec<String>, Option<String>) { fn handle_obsolete(args: &[String]) -> (Vec<String>, Option<String>) {
for (i, arg) in args.iter().enumerate() { for (i, arg) in args.iter().enumerate() {
let slice = &arg; let slice = &arg;
if slice.starts_with('-') && slice.chars().nth(1).map_or(false, |c| c.is_ascii_digit()) { if slice.starts_with('-') && slice.chars().nth(1).is_some_and(|c| c.is_ascii_digit()) {
let mut v = args.to_vec(); let mut v = args.to_vec();
v.remove(i); v.remove(i);
return (v, Some(slice[1..].to_owned())); return (v, Some(slice[1..].to_owned()));

View file

@ -23,6 +23,7 @@ use uucore::checksum::digest_reader;
use uucore::checksum::escape_filename; use uucore::checksum::escape_filename;
use uucore::checksum::perform_checksum_validation; use uucore::checksum::perform_checksum_validation;
use uucore::checksum::ChecksumError; use uucore::checksum::ChecksumError;
use uucore::checksum::ChecksumOptions;
use uucore::checksum::HashAlgorithm; use uucore::checksum::HashAlgorithm;
use uucore::error::{FromIo, UResult}; use uucore::error::{FromIo, UResult};
use uucore::sum::{Digest, Sha3_224, Sha3_256, Sha3_384, Sha3_512, Shake128, Shake256}; use uucore::sum::{Digest, Sha3_224, Sha3_256, Sha3_384, Sha3_512, Shake128, Shake256};
@ -239,18 +240,21 @@ pub fn uumain(mut args: impl uucore::Args) -> UResult<()> {
|| iter::once(OsStr::new("-")).collect::<Vec<_>>(), || iter::once(OsStr::new("-")).collect::<Vec<_>>(),
|files| files.map(OsStr::new).collect::<Vec<_>>(), |files| files.map(OsStr::new).collect::<Vec<_>>(),
); );
let opts = ChecksumOptions {
binary,
ignore_missing,
quiet,
status,
strict,
warn,
};
// Execute the checksum validation // Execute the checksum validation
return perform_checksum_validation( return perform_checksum_validation(
input.iter().copied(), input.iter().copied(),
strict,
status,
warn,
binary,
ignore_missing,
quiet,
Some(algo.name), Some(algo.name),
Some(algo.bits), Some(algo.bits),
opts,
); );
} else if quiet { } else if quiet {
return Err(ChecksumError::QuietNotCheck.into()); return Err(ChecksumError::QuietNotCheck.into());

View file

@ -109,7 +109,7 @@ struct MultiByteSep<'a> {
finder: Finder<'a>, finder: Finder<'a>,
} }
impl<'a> Separator for MultiByteSep<'a> { impl Separator for MultiByteSep<'_> {
fn field_ranges(&self, haystack: &[u8], len_guess: usize) -> Vec<(usize, usize)> { fn field_ranges(&self, haystack: &[u8], len_guess: usize) -> Vec<(usize, usize)> {
let mut field_ranges = Vec::with_capacity(len_guess); let mut field_ranges = Vec::with_capacity(len_guess);
let mut last_end = 0; let mut last_end = 0;

View file

@ -156,6 +156,26 @@ pub(crate) fn color_name(
target_symlink: Option<&PathData>, target_symlink: Option<&PathData>,
wrap: bool, wrap: bool,
) -> String { ) -> String {
// Check if the file has capabilities
#[cfg(all(unix, not(any(target_os = "android", target_os = "macos"))))]
{
// Skip checking capabilities if LS_COLORS=ca=:
let capabilities = style_manager
.colors
.style_for_indicator(Indicator::Capabilities);
let has_capabilities = if capabilities.is_none() {
false
} else {
uucore::fsxattr::has_acl(path.p_buf.as_path())
};
// If the file has capabilities, use a specific style for `ca` (capabilities)
if has_capabilities {
return style_manager.apply_style(capabilities, name, wrap);
}
}
if !path.must_dereference { if !path.must_dereference {
// If we need to dereference (follow) a symlink, we will need to get the metadata // If we need to dereference (follow) a symlink, we will need to get the metadata
if let Some(de) = &path.de { if let Some(de) = &path.de {

View file

@ -21,7 +21,7 @@ use std::os::windows::fs::MetadataExt;
use std::{ use std::{
cmp::Reverse, cmp::Reverse,
error::Error, error::Error,
ffi::OsString, ffi::{OsStr, OsString},
fmt::{Display, Write as FmtWrite}, fmt::{Display, Write as FmtWrite},
fs::{self, DirEntry, FileType, Metadata, ReadDir}, fs::{self, DirEntry, FileType, Metadata, ReadDir},
io::{stdout, BufWriter, ErrorKind, Stdout, Write}, io::{stdout, BufWriter, ErrorKind, Stdout, Write},
@ -55,7 +55,7 @@ use uucore::libc::{dev_t, major, minor};
#[cfg(unix)] #[cfg(unix)]
use uucore::libc::{S_IXGRP, S_IXOTH, S_IXUSR}; use uucore::libc::{S_IXGRP, S_IXOTH, S_IXUSR};
use uucore::line_ending::LineEnding; use uucore::line_ending::LineEnding;
use uucore::quoting_style::{escape_dir_name, escape_name, QuotingStyle}; use uucore::quoting_style::{self, QuotingStyle};
use uucore::{ use uucore::{
display::Quotable, display::Quotable,
error::{set_exit_code, UError, UResult}, error::{set_exit_code, UError, UResult},
@ -2048,7 +2048,11 @@ impl PathData {
/// file11 /// file11
/// ``` /// ```
fn show_dir_name(path_data: &PathData, out: &mut BufWriter<Stdout>, config: &Config) { fn show_dir_name(path_data: &PathData, out: &mut BufWriter<Stdout>, config: &Config) {
let escaped_name = escape_dir_name(path_data.p_buf.as_os_str(), &config.quoting_style); // FIXME: replace this with appropriate behavior for literal unprintable bytes
let escaped_name =
quoting_style::escape_dir_name(path_data.p_buf.as_os_str(), &config.quoting_style)
.to_string_lossy()
.to_string();
let name = if config.hyperlink && !config.dired { let name = if config.hyperlink && !config.dired {
create_hyperlink(&escaped_name, path_data) create_hyperlink(&escaped_name, path_data)
@ -3002,7 +3006,6 @@ use std::sync::Mutex;
#[cfg(unix)] #[cfg(unix)]
use uucore::entries; use uucore::entries;
use uucore::fs::FileInformation; use uucore::fs::FileInformation;
use uucore::quoting_style;
#[cfg(unix)] #[cfg(unix)]
fn cached_uid2usr(uid: u32) -> String { fn cached_uid2usr(uid: u32) -> String {
@ -3542,3 +3545,10 @@ fn calculate_padding_collection(
padding_collections padding_collections
} }
// FIXME: replace this with appropriate behavior for literal unprintable bytes
fn escape_name(name: &OsStr, style: &QuotingStyle) -> String {
quoting_style::escape_name(name, style)
.to_string_lossy()
.to_string()
}

View file

@ -19,7 +19,7 @@ path = "src/mkfifo.rs"
[dependencies] [dependencies]
clap = { workspace = true } clap = { workspace = true }
libc = { workspace = true } libc = { workspace = true }
uucore = { workspace = true } uucore = { workspace = true, features = ["fs"] }
[[bin]] [[bin]]
name = "mkfifo" name = "mkfifo"

View file

@ -6,6 +6,8 @@
use clap::{crate_version, Arg, ArgAction, Command}; use clap::{crate_version, Arg, ArgAction, Command};
use libc::mkfifo; use libc::mkfifo;
use std::ffi::CString; use std::ffi::CString;
use std::fs;
use std::os::unix::fs::PermissionsExt;
use uucore::display::Quotable; use uucore::display::Quotable;
use uucore::error::{UResult, USimpleError}; use uucore::error::{UResult, USimpleError};
use uucore::{format_usage, help_about, help_usage, show}; use uucore::{format_usage, help_about, help_usage, show};
@ -32,11 +34,13 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
} }
let mode = match matches.get_one::<String>(options::MODE) { let mode = match matches.get_one::<String>(options::MODE) {
// if mode is passed, ignore umask
Some(m) => match usize::from_str_radix(m, 8) { Some(m) => match usize::from_str_radix(m, 8) {
Ok(m) => m, Ok(m) => m,
Err(e) => return Err(USimpleError::new(1, format!("invalid mode: {e}"))), Err(e) => return Err(USimpleError::new(1, format!("invalid mode: {e}"))),
}, },
None => 0o666, // Default value + umask if present
None => 0o666 & !(uucore::mode::get_umask() as usize),
}; };
let fifos: Vec<String> = match matches.get_many::<String>(options::FIFO) { let fifos: Vec<String> = match matches.get_many::<String>(options::FIFO) {
@ -47,12 +51,20 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
for f in fifos { for f in fifos {
let err = unsafe { let err = unsafe {
let name = CString::new(f.as_bytes()).unwrap(); let name = CString::new(f.as_bytes()).unwrap();
mkfifo(name.as_ptr(), mode as libc::mode_t) mkfifo(name.as_ptr(), 0o666)
}; };
if err == -1 { if err == -1 {
show!(USimpleError::new( show!(USimpleError::new(
1, 1,
format!("cannot create fifo {}: File exists", f.quote()) format!("cannot create fifo {}: File exists", f.quote()),
));
}
// Explicitly set the permissions to ignore umask
if let Err(e) = fs::set_permissions(&f, fs::Permissions::from_mode(mode as u32)) {
return Err(USimpleError::new(
1,
format!("cannot set permissions on {}: {}", f.quote(), e),
)); ));
} }
} }
@ -71,7 +83,6 @@ pub fn uu_app() -> Command {
.short('m') .short('m')
.long(options::MODE) .long(options::MODE)
.help("file permissions for the fifo") .help("file permissions for the fifo")
.default_value("0666")
.value_name("MODE"), .value_name("MODE"),
) )
.arg( .arg(

View file

@ -98,10 +98,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
println!("{panic_info}"); println!("{panic_info}");
})); }));
let matches = match uu_app().try_get_matches_from(args) { let matches = uu_app().try_get_matches_from(args)?;
Ok(m) => m,
Err(e) => return Err(e.into()),
};
let mut options = Options::from(&matches); let mut options = Options::from(&matches);
@ -308,12 +305,12 @@ fn more(
rows = number; rows = number;
} }
let lines = break_buff(buff, usize::from(cols)); let lines = break_buff(buff, cols as usize);
let mut pager = Pager::new(rows, lines, next_file, options); let mut pager = Pager::new(rows, lines, next_file, options);
if options.pattern.is_some() { if let Some(pat) = options.pattern.as_ref() {
match search_pattern_in_file(&pager.lines, &options.pattern) { match search_pattern_in_file(&pager.lines, pat) {
Some(number) => pager.upper_mark = number, Some(number) => pager.upper_mark = number,
None => { None => {
execute!(stdout, terminal::Clear(terminal::ClearType::CurrentLine))?; execute!(stdout, terminal::Clear(terminal::ClearType::CurrentLine))?;
@ -446,8 +443,8 @@ struct Pager<'a> {
// The current line at the top of the screen // The current line at the top of the screen
upper_mark: usize, upper_mark: usize,
// The number of rows that fit on the screen // The number of rows that fit on the screen
content_rows: u16, content_rows: usize,
lines: Vec<String>, lines: Vec<&'a str>,
next_file: Option<&'a str>, next_file: Option<&'a str>,
line_count: usize, line_count: usize,
silent: bool, silent: bool,
@ -456,11 +453,11 @@ struct Pager<'a> {
} }
impl<'a> Pager<'a> { impl<'a> Pager<'a> {
fn new(rows: u16, lines: Vec<String>, next_file: Option<&'a str>, options: &Options) -> Self { fn new(rows: u16, lines: Vec<&'a str>, next_file: Option<&'a str>, options: &Options) -> Self {
let line_count = lines.len(); let line_count = lines.len();
Self { Self {
upper_mark: options.from_line, upper_mark: options.from_line,
content_rows: rows.saturating_sub(1), content_rows: rows.saturating_sub(1) as usize,
lines, lines,
next_file, next_file,
line_count, line_count,
@ -472,30 +469,25 @@ impl<'a> Pager<'a> {
fn should_close(&mut self) -> bool { fn should_close(&mut self) -> bool {
self.upper_mark self.upper_mark
.saturating_add(self.content_rows.into()) .saturating_add(self.content_rows)
.ge(&self.line_count) .ge(&self.line_count)
} }
fn page_down(&mut self) { fn page_down(&mut self) {
// If the next page down position __after redraw__ is greater than the total line count, // If the next page down position __after redraw__ is greater than the total line count,
// the upper mark must not grow past top of the screen at the end of the open file. // the upper mark must not grow past top of the screen at the end of the open file.
if self if self.upper_mark.saturating_add(self.content_rows * 2) >= self.line_count {
.upper_mark self.upper_mark = self.line_count - self.content_rows;
.saturating_add(self.content_rows as usize * 2)
.ge(&self.line_count)
{
self.upper_mark = self.line_count - self.content_rows as usize;
return; return;
} }
self.upper_mark = self.upper_mark.saturating_add(self.content_rows.into()); self.upper_mark = self.upper_mark.saturating_add(self.content_rows);
} }
fn page_up(&mut self) { fn page_up(&mut self) {
let content_row_usize: usize = self.content_rows.into();
self.upper_mark = self self.upper_mark = self
.upper_mark .upper_mark
.saturating_sub(content_row_usize.saturating_add(self.line_squeezed)); .saturating_sub(self.content_rows.saturating_add(self.line_squeezed));
if self.squeeze { if self.squeeze {
let iter = self.lines.iter().take(self.upper_mark).rev(); let iter = self.lines.iter().take(self.upper_mark).rev();
@ -520,7 +512,7 @@ impl<'a> Pager<'a> {
// TODO: Deal with column size changes. // TODO: Deal with column size changes.
fn page_resize(&mut self, _: u16, row: u16, option_line: Option<u16>) { fn page_resize(&mut self, _: u16, row: u16, option_line: Option<u16>) {
if option_line.is_none() { if option_line.is_none() {
self.content_rows = row.saturating_sub(1); self.content_rows = row.saturating_sub(1) as usize;
}; };
} }
@ -528,7 +520,7 @@ impl<'a> Pager<'a> {
self.draw_lines(stdout); self.draw_lines(stdout);
let lower_mark = self let lower_mark = self
.line_count .line_count
.min(self.upper_mark.saturating_add(self.content_rows.into())); .min(self.upper_mark.saturating_add(self.content_rows));
self.draw_prompt(stdout, lower_mark, wrong_key); self.draw_prompt(stdout, lower_mark, wrong_key);
stdout.flush().unwrap(); stdout.flush().unwrap();
} }
@ -541,7 +533,7 @@ impl<'a> Pager<'a> {
let mut displayed_lines = Vec::new(); let mut displayed_lines = Vec::new();
let mut iter = self.lines.iter().skip(self.upper_mark); let mut iter = self.lines.iter().skip(self.upper_mark);
while displayed_lines.len() < self.content_rows as usize { while displayed_lines.len() < self.content_rows {
match iter.next() { match iter.next() {
Some(line) => { Some(line) => {
if self.squeeze { if self.squeeze {
@ -608,13 +600,12 @@ impl<'a> Pager<'a> {
} }
} }
fn search_pattern_in_file(lines: &[String], pattern: &Option<String>) -> Option<usize> { fn search_pattern_in_file(lines: &[&str], pattern: &str) -> Option<usize> {
let pattern = pattern.clone().unwrap_or_default();
if lines.is_empty() || pattern.is_empty() { if lines.is_empty() || pattern.is_empty() {
return None; return None;
} }
for (line_number, line) in lines.iter().enumerate() { for (line_number, line) in lines.iter().enumerate() {
if line.contains(pattern.as_str()) { if line.contains(pattern) {
return Some(line_number); return Some(line_number);
} }
} }
@ -630,8 +621,10 @@ fn paging_add_back_message(options: &Options, stdout: &mut std::io::Stdout) -> U
} }
// Break the lines on the cols of the terminal // Break the lines on the cols of the terminal
fn break_buff(buff: &str, cols: usize) -> Vec<String> { fn break_buff(buff: &str, cols: usize) -> Vec<&str> {
let mut lines = Vec::with_capacity(buff.lines().count()); // We _could_ do a precise with_capacity here, but that would require scanning the
// whole buffer. Just guess a value instead.
let mut lines = Vec::with_capacity(2048);
for l in buff.lines() { for l in buff.lines() {
lines.append(&mut break_line(l, cols)); lines.append(&mut break_line(l, cols));
@ -639,11 +632,11 @@ fn break_buff(buff: &str, cols: usize) -> Vec<String> {
lines lines
} }
fn break_line(line: &str, cols: usize) -> Vec<String> { fn break_line(line: &str, cols: usize) -> Vec<&str> {
let width = UnicodeWidthStr::width(line); let width = UnicodeWidthStr::width(line);
let mut lines = Vec::new(); let mut lines = Vec::new();
if width < cols { if width < cols {
lines.push(line.to_string()); lines.push(line);
return lines; return lines;
} }
@ -655,14 +648,14 @@ fn break_line(line: &str, cols: usize) -> Vec<String> {
total_width += width; total_width += width;
if total_width > cols { if total_width > cols {
lines.push(line[last_index..index].to_string()); lines.push(&line[last_index..index]);
last_index = index; last_index = index;
total_width = width; total_width = width;
} }
} }
if last_index != line.len() { if last_index != line.len() {
lines.push(line[last_index..].to_string()); lines.push(&line[last_index..]);
} }
lines lines
} }
@ -707,63 +700,46 @@ mod tests {
test_string.push_str("👩🏻‍🔬"); test_string.push_str("👩🏻‍🔬");
} }
let lines = break_line(&test_string, 80); let lines = break_line(&test_string, 31);
let widths: Vec<usize> = lines let widths: Vec<usize> = lines
.iter() .iter()
.map(|s| UnicodeWidthStr::width(&s[..])) .map(|s| UnicodeWidthStr::width(&s[..]))
.collect(); .collect();
// Each 👩🏻‍🔬 is 6 character width it break line to the closest number to 80 => 6 * 13 = 78 // Each 👩🏻‍🔬 is 2 character width, break line to the closest even number to 31
assert_eq!((78, 42), (widths[0], widths[1])); assert_eq!((30, 10), (widths[0], widths[1]));
} }
#[test] #[test]
fn test_search_pattern_empty_lines() { fn test_search_pattern_empty_lines() {
let lines = vec![]; let lines = vec![];
let pattern = Some(String::from("pattern")); let pattern = "pattern";
assert_eq!(None, search_pattern_in_file(&lines, &pattern)); assert_eq!(None, search_pattern_in_file(&lines, pattern));
} }
#[test] #[test]
fn test_search_pattern_empty_pattern() { fn test_search_pattern_empty_pattern() {
let lines = vec![String::from("line1"), String::from("line2")]; let lines = vec!["line1", "line2"];
let pattern = None; let pattern = "";
assert_eq!(None, search_pattern_in_file(&lines, &pattern)); assert_eq!(None, search_pattern_in_file(&lines, pattern));
} }
#[test] #[test]
fn test_search_pattern_found_pattern() { fn test_search_pattern_found_pattern() {
let lines = vec![ let lines = vec!["line1", "line2", "pattern"];
String::from("line1"), let lines2 = vec!["line1", "line2", "pattern", "pattern2"];
String::from("line2"), let lines3 = vec!["line1", "line2", "other_pattern"];
String::from("pattern"), let pattern = "pattern";
]; assert_eq!(2, search_pattern_in_file(&lines, pattern).unwrap());
let lines2 = vec![ assert_eq!(2, search_pattern_in_file(&lines2, pattern).unwrap());
String::from("line1"), assert_eq!(2, search_pattern_in_file(&lines3, pattern).unwrap());
String::from("line2"),
String::from("pattern"),
String::from("pattern2"),
];
let lines3 = vec![
String::from("line1"),
String::from("line2"),
String::from("other_pattern"),
];
let pattern = Some(String::from("pattern"));
assert_eq!(2, search_pattern_in_file(&lines, &pattern).unwrap());
assert_eq!(2, search_pattern_in_file(&lines2, &pattern).unwrap());
assert_eq!(2, search_pattern_in_file(&lines3, &pattern).unwrap());
} }
#[test] #[test]
fn test_search_pattern_not_found_pattern() { fn test_search_pattern_not_found_pattern() {
let lines = vec![ let lines = vec!["line1", "line2", "something"];
String::from("line1"), let pattern = "pattern";
String::from("line2"), assert_eq!(None, search_pattern_in_file(&lines, pattern));
String::from("something"),
];
let pattern = Some(String::from("pattern"));
assert_eq!(None, search_pattern_in_file(&lines, &pattern));
} }
} }

View file

@ -12,7 +12,6 @@ pub enum MvError {
NoSuchFile(String), NoSuchFile(String),
CannotStatNotADirectory(String), CannotStatNotADirectory(String),
SameFile(String, String), SameFile(String, String),
SelfSubdirectory(String),
SelfTargetSubdirectory(String, String), SelfTargetSubdirectory(String, String),
DirectoryToNonDirectory(String), DirectoryToNonDirectory(String),
NonDirectoryToDirectory(String, String), NonDirectoryToDirectory(String, String),
@ -29,14 +28,9 @@ impl Display for MvError {
Self::NoSuchFile(s) => write!(f, "cannot stat {s}: No such file or directory"), Self::NoSuchFile(s) => write!(f, "cannot stat {s}: No such file or directory"),
Self::CannotStatNotADirectory(s) => write!(f, "cannot stat {s}: Not a directory"), Self::CannotStatNotADirectory(s) => write!(f, "cannot stat {s}: Not a directory"),
Self::SameFile(s, t) => write!(f, "{s} and {t} are the same file"), Self::SameFile(s, t) => write!(f, "{s} and {t} are the same file"),
Self::SelfSubdirectory(s) => write!( Self::SelfTargetSubdirectory(s, t) => {
f, write!(f, "cannot move {s} to a subdirectory of itself, {t}")
"cannot move '{s}' to a subdirectory of itself, '{s}/{s}'" }
),
Self::SelfTargetSubdirectory(s, t) => write!(
f,
"cannot move '{s}' to a subdirectory of itself, '{t}/{s}'"
),
Self::DirectoryToNonDirectory(t) => { Self::DirectoryToNonDirectory(t) => {
write!(f, "cannot overwrite directory {t} with non-directory") write!(f, "cannot overwrite directory {t} with non-directory")
} }

View file

@ -19,13 +19,13 @@ use std::io;
use std::os::unix; use std::os::unix;
#[cfg(windows)] #[cfg(windows)]
use std::os::windows; use std::os::windows;
use std::path::{Path, PathBuf}; use std::path::{absolute, Path, PathBuf};
use uucore::backup_control::{self, source_is_target_backup}; use uucore::backup_control::{self, source_is_target_backup};
use uucore::display::Quotable; use uucore::display::Quotable;
use uucore::error::{set_exit_code, FromIo, UResult, USimpleError, UUsageError}; use uucore::error::{set_exit_code, FromIo, UResult, USimpleError, UUsageError};
use uucore::fs::{ use uucore::fs::{
are_hardlinks_or_one_way_symlink_to_same_file, are_hardlinks_to_same_file, are_hardlinks_or_one_way_symlink_to_same_file, are_hardlinks_to_same_file, canonicalize,
path_ends_with_terminator, path_ends_with_terminator, MissingHandling, ResolveMode,
}; };
#[cfg(all(unix, not(any(target_os = "macos", target_os = "redox"))))] #[cfg(all(unix, not(any(target_os = "macos", target_os = "redox"))))]
use uucore::fsxattr; use uucore::fsxattr;
@ -322,20 +322,6 @@ fn handle_two_paths(source: &Path, target: &Path, opts: &Options) -> UResult<()>
}); });
} }
if (source.eq(target)
|| are_hardlinks_to_same_file(source, target)
|| are_hardlinks_or_one_way_symlink_to_same_file(source, target))
&& opts.backup == BackupMode::NoBackup
{
if source.eq(Path::new(".")) || source.ends_with("/.") || source.is_file() {
return Err(
MvError::SameFile(source.quote().to_string(), target.quote().to_string()).into(),
);
} else {
return Err(MvError::SelfSubdirectory(source.display().to_string()).into());
}
}
let target_is_dir = target.is_dir(); let target_is_dir = target.is_dir();
let source_is_dir = source.is_dir(); let source_is_dir = source.is_dir();
@ -347,6 +333,8 @@ fn handle_two_paths(source: &Path, target: &Path, opts: &Options) -> UResult<()>
return Err(MvError::FailedToAccessNotADirectory(target.quote().to_string()).into()); return Err(MvError::FailedToAccessNotADirectory(target.quote().to_string()).into());
} }
assert_not_same_file(source, target, target_is_dir, opts)?;
if target_is_dir { if target_is_dir {
if opts.no_target_dir { if opts.no_target_dir {
if source.is_dir() { if source.is_dir() {
@ -356,14 +344,6 @@ fn handle_two_paths(source: &Path, target: &Path, opts: &Options) -> UResult<()>
} else { } else {
Err(MvError::DirectoryToNonDirectory(target.quote().to_string()).into()) Err(MvError::DirectoryToNonDirectory(target.quote().to_string()).into())
} }
// Check that source & target do not contain same subdir/dir when both exist
// mkdir dir1/dir2; mv dir1 dir1/dir2
} else if target.starts_with(source) {
Err(MvError::SelfTargetSubdirectory(
source.display().to_string(),
target.display().to_string(),
)
.into())
} else { } else {
move_files_into_dir(&[source.to_path_buf()], target, opts) move_files_into_dir(&[source.to_path_buf()], target, opts)
} }
@ -387,6 +367,88 @@ fn handle_two_paths(source: &Path, target: &Path, opts: &Options) -> UResult<()>
} }
} }
fn assert_not_same_file(
source: &Path,
target: &Path,
target_is_dir: bool,
opts: &Options,
) -> UResult<()> {
// we'll compare canonicalized_source and canonicalized_target for same file detection
let canonicalized_source = match canonicalize(
absolute(source)?,
MissingHandling::Normal,
ResolveMode::Logical,
) {
Ok(source) if source.exists() => source,
_ => absolute(source)?, // file or symlink target doesn't exist but its absolute path is still used for comparison
};
// special case if the target exists, is a directory, and the `-T` flag wasn't used
let target_is_dir = target_is_dir && !opts.no_target_dir;
let canonicalized_target = if target_is_dir {
// `mv source_file target_dir` => target_dir/source_file
// canonicalize the path that exists (target directory) and join the source file name
canonicalize(
absolute(target)?,
MissingHandling::Normal,
ResolveMode::Logical,
)?
.join(source.file_name().unwrap_or_default())
} else {
// `mv source target_dir/target` => target_dir/target
// we canonicalize target_dir and join /target
match absolute(target)?.parent() {
Some(parent) if parent.to_str() != Some("") => {
canonicalize(parent, MissingHandling::Normal, ResolveMode::Logical)?
.join(target.file_name().unwrap_or_default())
}
// path.parent() returns Some("") or None if there's no parent
_ => absolute(target)?, // absolute paths should always have a parent, but we'll fall back just in case
}
};
let same_file = (canonicalized_source.eq(&canonicalized_target)
|| are_hardlinks_to_same_file(source, target)
|| are_hardlinks_or_one_way_symlink_to_same_file(source, target))
&& opts.backup == BackupMode::NoBackup;
// get the expected target path to show in errors
// this is based on the argument and not canonicalized
let target_display = match source.file_name() {
Some(file_name) if target_is_dir => {
// join target_dir/source_file in a platform-independent manner
let mut path = target
.display()
.to_string()
.trim_end_matches("/")
.to_owned();
path.push('/');
path.push_str(&file_name.to_string_lossy());
path.quote().to_string()
}
_ => target.quote().to_string(),
};
if same_file
&& (canonicalized_source.eq(&canonicalized_target)
|| source.eq(Path::new("."))
|| source.ends_with("/.")
|| source.is_file())
{
return Err(MvError::SameFile(source.quote().to_string(), target_display).into());
} else if (same_file || canonicalized_target.starts_with(canonicalized_source))
// don't error if we're moving a symlink of a directory into itself
&& !source.is_symlink()
{
return Err(
MvError::SelfTargetSubdirectory(source.quote().to_string(), target_display).into(),
);
}
Ok(())
}
fn handle_multiple_paths(paths: &[PathBuf], opts: &Options) -> UResult<()> { fn handle_multiple_paths(paths: &[PathBuf], opts: &Options) -> UResult<()> {
if opts.no_target_dir { if opts.no_target_dir {
return Err(UUsageError::new( return Err(UUsageError::new(
@ -425,10 +487,6 @@ fn move_files_into_dir(files: &[PathBuf], target_dir: &Path, options: &Options)
return Err(MvError::NotADirectory(target_dir.quote().to_string()).into()); return Err(MvError::NotADirectory(target_dir.quote().to_string()).into());
} }
let canonicalized_target_dir = target_dir
.canonicalize()
.unwrap_or_else(|_| target_dir.to_path_buf());
let multi_progress = options.progress_bar.then(MultiProgress::new); let multi_progress = options.progress_bar.then(MultiProgress::new);
let count_progress = if let Some(ref multi_progress) = multi_progress { let count_progress = if let Some(ref multi_progress) = multi_progress {
@ -479,24 +537,9 @@ fn move_files_into_dir(files: &[PathBuf], target_dir: &Path, options: &Options)
// Check if we have mv dir1 dir2 dir2 // Check if we have mv dir1 dir2 dir2
// And generate an error if this is the case // And generate an error if this is the case
if let Ok(canonicalized_source) = sourcepath.canonicalize() { if let Err(e) = assert_not_same_file(sourcepath, target_dir, true, options) {
if canonicalized_source == canonicalized_target_dir { show!(e);
// User tried to move directory to itself, warning is shown continue;
// and process of moving files is continued.
show!(USimpleError::new(
1,
format!(
"cannot move '{}' to a subdirectory of itself, '{}/{}'",
sourcepath.display(),
target_dir.display(),
canonicalized_target_dir.components().last().map_or_else(
|| target_dir.display().to_string(),
|dir| { PathBuf::from(dir.as_os_str()).display().to_string() }
)
)
));
continue;
}
} }
match rename(sourcepath, &targetpath, options, multi_progress.as_ref()) { match rename(sourcepath, &targetpath, options, multi_progress.as_ref()) {
@ -679,7 +722,7 @@ fn rename_with_fallback(
}; };
#[cfg(all(unix, not(any(target_os = "macos", target_os = "redox"))))] #[cfg(all(unix, not(any(target_os = "macos", target_os = "redox"))))]
fsxattr::apply_xattrs(to, xattrs).unwrap(); fsxattr::apply_xattrs(to, xattrs)?;
if let Err(err) = result { if let Err(err) = result {
return match err.kind { return match err.kind {

View file

@ -33,7 +33,7 @@ where
byte_order: ByteOrder, byte_order: ByteOrder,
} }
impl<'a, I> InputDecoder<'a, I> { impl<I> InputDecoder<'_, I> {
/// Creates a new `InputDecoder` with an allocated buffer of `normal_length` + `peek_length` bytes. /// Creates a new `InputDecoder` with an allocated buffer of `normal_length` + `peek_length` bytes.
/// `byte_order` determines how to read multibyte formats from the buffer. /// `byte_order` determines how to read multibyte formats from the buffer.
pub fn new( pub fn new(
@ -55,7 +55,7 @@ impl<'a, I> InputDecoder<'a, I> {
} }
} }
impl<'a, I> InputDecoder<'a, I> impl<I> InputDecoder<'_, I>
where where
I: PeekRead, I: PeekRead,
{ {
@ -81,7 +81,7 @@ where
} }
} }
impl<'a, I> HasError for InputDecoder<'a, I> impl<I> HasError for InputDecoder<'_, I>
where where
I: HasError, I: HasError,
{ {
@ -103,7 +103,7 @@ pub struct MemoryDecoder<'a> {
byte_order: ByteOrder, byte_order: ByteOrder,
} }
impl<'a> MemoryDecoder<'a> { impl MemoryDecoder<'_> {
/// Set a part of the internal buffer to zero. /// Set a part of the internal buffer to zero.
/// access to the whole buffer is possible, not just to the valid data. /// access to the whole buffer is possible, not just to the valid data.
pub fn zero_out_buffer(&mut self, start: usize, end: usize) { pub fn zero_out_buffer(&mut self, start: usize, end: usize) {

View file

@ -28,7 +28,7 @@ pub trait HasError {
fn has_error(&self) -> bool; fn has_error(&self) -> bool;
} }
impl<'b> MultifileReader<'b> { impl MultifileReader<'_> {
pub fn new(fnames: Vec<InputSource>) -> MultifileReader { pub fn new(fnames: Vec<InputSource>) -> MultifileReader {
let mut mf = MultifileReader { let mut mf = MultifileReader {
ni: fnames, ni: fnames,
@ -76,7 +76,7 @@ impl<'b> MultifileReader<'b> {
} }
} }
impl<'b> io::Read for MultifileReader<'b> { impl io::Read for MultifileReader<'_> {
// Fill buf with bytes read from the list of files // Fill buf with bytes read from the list of files
// Returns Ok(<number of bytes read>) // Returns Ok(<number of bytes read>)
// Handles io errors itself, thus always returns OK // Handles io errors itself, thus always returns OK
@ -113,7 +113,7 @@ impl<'b> io::Read for MultifileReader<'b> {
} }
} }
impl<'b> HasError for MultifileReader<'b> { impl HasError for MultifileReader<'_> {
fn has_error(&self) -> bool { fn has_error(&self) -> bool {
self.any_err self.any_err
} }

View file

@ -200,7 +200,7 @@ fn parse_delimiters(delimiters: &str) -> UResult<Box<[Box<[u8]>]>> {
let mut add_single_char_delimiter = |vec: &mut Vec<Box<[u8]>>, ch: char| { let mut add_single_char_delimiter = |vec: &mut Vec<Box<[u8]>>, ch: char| {
let delimiter_encoded = ch.encode_utf8(&mut buffer); let delimiter_encoded = ch.encode_utf8(&mut buffer);
vec.push(Box::from(delimiter_encoded.as_bytes())); vec.push(Box::<[u8]>::from(delimiter_encoded.as_bytes()));
}; };
let mut vec = Vec::<Box<[u8]>>::with_capacity(delimiters.len()); let mut vec = Vec::<Box<[u8]>>::with_capacity(delimiters.len());
@ -311,7 +311,7 @@ impl<'a> DelimiterState<'a> {
DelimiterState::MultipleDelimiters { DelimiterState::MultipleDelimiters {
current_delimiter, .. current_delimiter, ..
} => current_delimiter.len(), } => current_delimiter.len(),
_ => { DelimiterState::NoDelimiters => {
return; return;
} }
}; };
@ -350,7 +350,7 @@ impl<'a> DelimiterState<'a> {
*current_delimiter = bo; *current_delimiter = bo;
} }
_ => {} DelimiterState::NoDelimiters => {}
} }
} }
} }
@ -363,8 +363,8 @@ enum InputSource {
impl InputSource { impl InputSource {
fn read_until(&mut self, byte: u8, buf: &mut Vec<u8>) -> UResult<usize> { fn read_until(&mut self, byte: u8, buf: &mut Vec<u8>) -> UResult<usize> {
let us = match self { let us = match self {
Self::File(bu) => bu.read_until(byte, buf)?, InputSource::File(bu) => bu.read_until(byte, buf)?,
Self::StandardInput(rc) => rc InputSource::StandardInput(rc) => rc
.try_borrow() .try_borrow()
.map_err(|bo| USimpleError::new(1, format!("{bo}")))? .map_err(|bo| USimpleError::new(1, format!("{bo}")))?
.lock() .lock()

View file

@ -3,7 +3,7 @@
// For the full copyright and license information, please view the LICENSE // For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code. // file that was distributed with this source code.
// spell-checker:ignore (path) eacces inacc // spell-checker:ignore (path) eacces inacc rm-r4
use clap::{builder::ValueParser, crate_version, parser::ValueSource, Arg, ArgAction, Command}; use clap::{builder::ValueParser, crate_version, parser::ValueSource, Arg, ArgAction, Command};
use std::collections::VecDeque; use std::collections::VecDeque;
@ -11,10 +11,15 @@ use std::ffi::{OsStr, OsString};
use std::fs::{self, File, Metadata}; use std::fs::{self, File, Metadata};
use std::io::ErrorKind; use std::io::ErrorKind;
use std::ops::BitOr; use std::ops::BitOr;
#[cfg(not(windows))]
use std::os::unix::ffi::OsStrExt;
use std::path::MAIN_SEPARATOR;
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use uucore::display::Quotable; use uucore::display::Quotable;
use uucore::error::{UResult, USimpleError, UUsageError}; use uucore::error::{UResult, USimpleError, UUsageError};
use uucore::{format_usage, help_about, help_section, help_usage, prompt_yes, show_error}; use uucore::{
format_usage, help_about, help_section, help_usage, os_str_as_bytes, prompt_yes, show_error,
};
use walkdir::{DirEntry, WalkDir}; use walkdir::{DirEntry, WalkDir};
#[derive(Eq, PartialEq, Clone, Copy)] #[derive(Eq, PartialEq, Clone, Copy)]
@ -290,6 +295,7 @@ pub fn remove(files: &[&OsStr], options: &Options) -> bool {
for filename in files { for filename in files {
let file = Path::new(filename); let file = Path::new(filename);
had_err = match file.symlink_metadata() { had_err = match file.symlink_metadata() {
Ok(metadata) => { Ok(metadata) => {
if metadata.is_dir() { if metadata.is_dir() {
@ -300,6 +306,7 @@ pub fn remove(files: &[&OsStr], options: &Options) -> bool {
remove_file(file, options) remove_file(file, options)
} }
} }
Err(_e) => { Err(_e) => {
// TODO: actually print out the specific error // TODO: actually print out the specific error
// TODO: When the error is not about missing files // TODO: When the error is not about missing files
@ -326,6 +333,15 @@ pub fn remove(files: &[&OsStr], options: &Options) -> bool {
fn handle_dir(path: &Path, options: &Options) -> bool { fn handle_dir(path: &Path, options: &Options) -> bool {
let mut had_err = false; let mut had_err = false;
let path = clean_trailing_slashes(path);
if path_is_current_or_parent_directory(path) {
show_error!(
"refusing to remove '.' or '..' directory: skipping '{}'",
path.display()
);
return true;
}
let is_root = path.has_root() && path.parent().is_none(); let is_root = path.has_root() && path.parent().is_none();
if options.recursive && (!is_root || !options.preserve_root) { if options.recursive && (!is_root || !options.preserve_root) {
if options.interactive != InteractiveMode::Always && !options.verbose { if options.interactive != InteractiveMode::Always && !options.verbose {
@ -396,7 +412,11 @@ fn handle_dir(path: &Path, options: &Options) -> bool {
} else if options.dir && (!is_root || !options.preserve_root) { } else if options.dir && (!is_root || !options.preserve_root) {
had_err = remove_dir(path, options).bitor(had_err); had_err = remove_dir(path, options).bitor(had_err);
} else if options.recursive { } else if options.recursive {
show_error!("could not remove directory {}", path.quote()); show_error!(
"it is dangerous to operate recursively on '{}'",
MAIN_SEPARATOR
);
show_error!("use --no-preserve-root to override this failsafe");
had_err = true; had_err = true;
} else { } else {
show_error!( show_error!(
@ -559,6 +579,20 @@ fn handle_writable_directory(path: &Path, options: &Options, metadata: &Metadata
true true
} }
} }
/// Checks if the path is referring to current or parent directory , if it is referring to current or any parent directory in the file tree e.g '/../..' , '../..'
fn path_is_current_or_parent_directory(path: &Path) -> bool {
let path_str = os_str_as_bytes(path.as_os_str());
let dir_separator = MAIN_SEPARATOR as u8;
if let Ok(path_bytes) = path_str {
return path_bytes == ([b'.'])
|| path_bytes == ([b'.', b'.'])
|| path_bytes.ends_with(&[dir_separator, b'.'])
|| path_bytes.ends_with(&[dir_separator, b'.', b'.'])
|| path_bytes.ends_with(&[dir_separator, b'.', dir_separator])
|| path_bytes.ends_with(&[dir_separator, b'.', b'.', dir_separator]);
}
false
}
// For windows we can use windows metadata trait and file attributes to see if a directory is readonly // For windows we can use windows metadata trait and file attributes to see if a directory is readonly
#[cfg(windows)] #[cfg(windows)]
@ -586,6 +620,40 @@ fn handle_writable_directory(path: &Path, options: &Options, metadata: &Metadata
} }
} }
/// Removes trailing slashes, for example 'd/../////' yield 'd/../' required to fix rm-r4 GNU test
fn clean_trailing_slashes(path: &Path) -> &Path {
let path_str = os_str_as_bytes(path.as_os_str());
let dir_separator = MAIN_SEPARATOR as u8;
if let Ok(path_bytes) = path_str {
let mut idx = if path_bytes.len() > 1 {
path_bytes.len() - 1
} else {
return path;
};
// Checks if element at the end is a '/'
if path_bytes[idx] == dir_separator {
for i in (1..path_bytes.len()).rev() {
// Will break at the start of the continuous sequence of '/', eg: "abc//////" , will break at
// "abc/", this will clean ////// to the root '/', so we have to be careful to not
// delete the root.
if path_bytes[i - 1] != dir_separator {
idx = i;
break;
}
}
#[cfg(unix)]
return Path::new(OsStr::from_bytes(&path_bytes[0..=idx]));
#[cfg(not(unix))]
// Unwrapping is fine here as os_str_as_bytes() would return an error on non unix
// systems with non utf-8 characters and thus bypass the if let Ok branch
return Path::new(std::str::from_utf8(&path_bytes[0..=idx]).unwrap());
}
}
path
}
fn prompt_descend(path: &Path) -> bool { fn prompt_descend(path: &Path) -> bool {
prompt_yes!("descend into directory {}?", path.quote()) prompt_yes!("descend into directory {}?", path.quote())
} }
@ -611,3 +679,17 @@ fn is_symlink_dir(metadata: &Metadata) -> bool {
metadata.file_type().is_symlink() metadata.file_type().is_symlink()
&& ((metadata.file_attributes() & FILE_ATTRIBUTE_DIRECTORY) != 0) && ((metadata.file_attributes() & FILE_ATTRIBUTE_DIRECTORY) != 0)
} }
mod tests {
#[test]
// Testing whether path the `/////` collapses to `/`
fn test_collapsible_slash_path() {
use std::path::Path;
use crate::clean_trailing_slashes;
let path = Path::new("/////");
assert_eq!(Path::new("/"), clean_trailing_slashes(path));
}
}

View file

@ -102,20 +102,33 @@ fn parse_exponent_no_decimal(s: &str, j: usize) -> Result<PreciseNumber, ParseNu
// displayed in decimal notation. For example, "1e-2" will be // displayed in decimal notation. For example, "1e-2" will be
// displayed as "0.01", but "1e2" will be displayed as "100", // displayed as "0.01", but "1e2" will be displayed as "100",
// without a decimal point. // without a decimal point.
let x: BigDecimal = s.parse().map_err(|_| ParseNumberError::Float)?; let x: BigDecimal = {
let parsed_decimal = s
.parse::<BigDecimal>()
.map_err(|_| ParseNumberError::Float)?;
if parsed_decimal == BigDecimal::zero() {
BigDecimal::zero()
} else {
parsed_decimal
}
};
let num_integral_digits = if is_minus_zero_float(s, &x) { let num_integral_digits = if is_minus_zero_float(s, &x) {
if exponent > 0 { if exponent > 0 {
2usize + exponent as usize (2usize)
.checked_add(exponent as usize)
.ok_or(ParseNumberError::Float)?
} else { } else {
2usize 2usize
} }
} else { } else {
let total = j as i64 + exponent; let total = (j as i64)
.checked_add(exponent)
.ok_or(ParseNumberError::Float)?;
let result = if total < 1 { let result = if total < 1 {
1 1
} else { } else {
total.try_into().unwrap() total.try_into().map_err(|_| ParseNumberError::Float)?
}; };
if x.sign() == Sign::Minus { if x.sign() == Sign::Minus {
result + 1 result + 1
@ -200,14 +213,25 @@ fn parse_decimal_and_exponent(
// Because of the match guard, this subtraction will not underflow. // Because of the match guard, this subtraction will not underflow.
let num_digits_between_decimal_point_and_e = (j - (i + 1)) as i64; let num_digits_between_decimal_point_and_e = (j - (i + 1)) as i64;
let exponent: i64 = s[j + 1..].parse().map_err(|_| ParseNumberError::Float)?; let exponent: i64 = s[j + 1..].parse().map_err(|_| ParseNumberError::Float)?;
let val: BigDecimal = s.parse().map_err(|_| ParseNumberError::Float)?; let val: BigDecimal = {
let parsed_decimal = s
.parse::<BigDecimal>()
.map_err(|_| ParseNumberError::Float)?;
if parsed_decimal == BigDecimal::zero() {
BigDecimal::zero()
} else {
parsed_decimal
}
};
let num_integral_digits = { let num_integral_digits = {
let minimum: usize = { let minimum: usize = {
let integral_part: f64 = s[..j].parse().map_err(|_| ParseNumberError::Float)?; let integral_part: f64 = s[..j].parse().map_err(|_| ParseNumberError::Float)?;
if integral_part.is_sign_negative() { if integral_part.is_sign_negative() {
if exponent > 0 { if exponent > 0 {
2usize + exponent as usize 2usize
.checked_add(exponent as usize)
.ok_or(ParseNumberError::Float)?
} else { } else {
2usize 2usize
} }
@ -217,15 +241,20 @@ fn parse_decimal_and_exponent(
}; };
// Special case: if the string is "-.1e2", we need to treat it // Special case: if the string is "-.1e2", we need to treat it
// as if it were "-0.1e2". // as if it were "-0.1e2".
let total = if s.starts_with("-.") { let total = {
i as i64 + exponent + 1 let total = (i as i64)
} else { .checked_add(exponent)
i as i64 + exponent .ok_or(ParseNumberError::Float)?;
if s.starts_with("-.") {
total.checked_add(1).ok_or(ParseNumberError::Float)?
} else {
total
}
}; };
if total < minimum as i64 { if total < minimum as i64 {
minimum minimum
} else { } else {
total.try_into().unwrap() total.try_into().map_err(|_| ParseNumberError::Float)?
} }
}; };
@ -312,7 +341,7 @@ impl FromStr for PreciseNumber {
// Check if the string seems to be in hexadecimal format. // Check if the string seems to be in hexadecimal format.
// //
// May be 0x123 or -0x123, so the index `i` may be either 0 or 1. // May be 0x123 or -0x123, so the index `i` may be either 0 or 1.
if let Some(i) = s.to_lowercase().find("0x") { if let Some(i) = s.find("0x").or_else(|| s.find("0X")) {
if i <= 1 { if i <= 1 {
return parse_hexadecimal(s); return parse_hexadecimal(s);
} }
@ -322,7 +351,7 @@ impl FromStr for PreciseNumber {
// number differently depending on its form. This is important // number differently depending on its form. This is important
// because the form of the input dictates how the output will be // because the form of the input dictates how the output will be
// presented. // presented.
match (s.find('.'), s.find('e')) { match (s.find('.'), s.find(['e', 'E'])) {
// For example, "123456" or "inf". // For example, "123456" or "inf".
(None, None) => parse_no_decimal_no_exponent(s), (None, None) => parse_no_decimal_no_exponent(s),
// For example, "123e456" or "1e-2". // For example, "123e456" or "1e-2".
@ -381,6 +410,7 @@ mod tests {
fn test_parse_big_int() { fn test_parse_big_int() {
assert_eq!(parse("0"), ExtendedBigDecimal::zero()); assert_eq!(parse("0"), ExtendedBigDecimal::zero());
assert_eq!(parse("0.1e1"), ExtendedBigDecimal::one()); assert_eq!(parse("0.1e1"), ExtendedBigDecimal::one());
assert_eq!(parse("0.1E1"), ExtendedBigDecimal::one());
assert_eq!( assert_eq!(
parse("1.0e1"), parse("1.0e1"),
ExtendedBigDecimal::BigDecimal("10".parse::<BigDecimal>().unwrap()) ExtendedBigDecimal::BigDecimal("10".parse::<BigDecimal>().unwrap())

View file

@ -3,6 +3,7 @@
// For the full copyright and license information, please view the LICENSE // For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code. // file that was distributed with this source code.
// spell-checker:ignore (ToDO) extendedbigdecimal numberparse // spell-checker:ignore (ToDO) extendedbigdecimal numberparse
use std::ffi::OsString;
use std::io::{stdout, ErrorKind, Write}; use std::io::{stdout, ErrorKind, Write};
use clap::{crate_version, Arg, ArgAction, Command}; use clap::{crate_version, Arg, ArgAction, Command};
@ -47,9 +48,33 @@ struct SeqOptions<'a> {
/// The elements are (first, increment, last). /// The elements are (first, increment, last).
type RangeFloat = (ExtendedBigDecimal, ExtendedBigDecimal, ExtendedBigDecimal); type RangeFloat = (ExtendedBigDecimal, ExtendedBigDecimal, ExtendedBigDecimal);
// Turn short args with attached value, for example "-s,", into two args "-s" and "," to make
// them work with clap.
fn split_short_args_with_value(args: impl uucore::Args) -> impl uucore::Args {
let mut v: Vec<OsString> = Vec::new();
for arg in args {
let bytes = arg.as_encoded_bytes();
if bytes.len() > 2
&& (bytes.starts_with(b"-f") || bytes.starts_with(b"-s") || bytes.starts_with(b"-t"))
{
let (short_arg, value) = bytes.split_at(2);
// SAFETY:
// Both `short_arg` and `value` only contain content that originated from `OsStr::as_encoded_bytes`
v.push(unsafe { OsString::from_encoded_bytes_unchecked(short_arg.to_vec()) });
v.push(unsafe { OsString::from_encoded_bytes_unchecked(value.to_vec()) });
} else {
v.push(arg);
}
}
v.into_iter()
}
#[uucore::main] #[uucore::main]
pub fn uumain(args: impl uucore::Args) -> UResult<()> { pub fn uumain(args: impl uucore::Args) -> UResult<()> {
let matches = uu_app().try_get_matches_from(args)?; let matches = uu_app().try_get_matches_from(split_short_args_with_value(args))?;
let numbers_option = matches.get_many::<String>(ARG_NUMBERS); let numbers_option = matches.get_many::<String>(ARG_NUMBERS);
@ -138,7 +163,6 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
pub fn uu_app() -> Command { pub fn uu_app() -> Command {
Command::new(uucore::util_name()) Command::new(uucore::util_name())
.trailing_var_arg(true) .trailing_var_arg(true)
.allow_negative_numbers(true)
.infer_long_args(true) .infer_long_args(true)
.version(crate_version!()) .version(crate_version!())
.about(ABOUT) .about(ABOUT)
@ -169,7 +193,10 @@ pub fn uu_app() -> Command {
.help("use printf style floating-point FORMAT"), .help("use printf style floating-point FORMAT"),
) )
.arg( .arg(
// we use allow_hyphen_values instead of allow_negative_numbers because clap removed
// the support for "exotic" negative numbers like -.1 (see https://github.com/clap-rs/clap/discussions/5837)
Arg::new(ARG_NUMBERS) Arg::new(ARG_NUMBERS)
.allow_hyphen_values(true)
.action(ArgAction::Append) .action(ArgAction::Append)
.num_args(1..=3), .num_args(1..=3),
) )

View file

@ -279,7 +279,10 @@ impl<'a> Shufable for Vec<&'a [u8]> {
// this is safe. // this is safe.
(**self).choose(rng).unwrap() (**self).choose(rng).unwrap()
} }
type PartialShuffleIterator<'b> = std::iter::Copied<std::slice::Iter<'b, &'a [u8]>> where Self: 'b; type PartialShuffleIterator<'b>
= std::iter::Copied<std::slice::Iter<'b, &'a [u8]>>
where
Self: 'b;
fn partial_shuffle<'b>( fn partial_shuffle<'b>(
&'b mut self, &'b mut self,
rng: &'b mut WrappedRng, rng: &'b mut WrappedRng,
@ -298,7 +301,10 @@ impl Shufable for RangeInclusive<usize> {
fn choose(&self, rng: &mut WrappedRng) -> usize { fn choose(&self, rng: &mut WrappedRng) -> usize {
rng.gen_range(self.clone()) rng.gen_range(self.clone())
} }
type PartialShuffleIterator<'b> = NonrepeatingIterator<'b> where Self: 'b; type PartialShuffleIterator<'b>
= NonrepeatingIterator<'b>
where
Self: 'b;
fn partial_shuffle<'b>( fn partial_shuffle<'b>(
&'b mut self, &'b mut self,
rng: &'b mut WrappedRng, rng: &'b mut WrappedRng,
@ -374,7 +380,7 @@ impl<'a> NonrepeatingIterator<'a> {
} }
} }
impl<'a> Iterator for NonrepeatingIterator<'a> { impl Iterator for NonrepeatingIterator<'_> {
type Item = usize; type Item = usize;
fn next(&mut self) -> Option<usize> { fn next(&mut self) -> Option<usize> {
@ -401,7 +407,7 @@ trait Writable {
fn write_all_to(&self, output: &mut impl Write) -> Result<(), Error>; fn write_all_to(&self, output: &mut impl Write) -> Result<(), Error>;
} }
impl<'a> Writable for &'a [u8] { impl Writable for &[u8] {
fn write_all_to(&self, output: &mut impl Write) -> Result<(), Error> { fn write_all_to(&self, output: &mut impl Write) -> Result<(), Error> {
output.write_all(self) output.write_all(self)
} }

View file

@ -98,12 +98,12 @@ fn reader_writer<
)?; )?;
match read_result { match read_result {
ReadResult::WroteChunksToFile { tmp_files } => { ReadResult::WroteChunksToFile { tmp_files } => {
let merger = merge::merge_with_file_limit::<_, _, Tmp>( merge::merge_with_file_limit::<_, _, Tmp>(
tmp_files.into_iter().map(|c| c.reopen()), tmp_files.into_iter().map(|c| c.reopen()),
settings, settings,
output,
tmp_dir, tmp_dir,
)?; )?;
merger.write_all(settings, output)?;
} }
ReadResult::SortedSingleChunk(chunk) => { ReadResult::SortedSingleChunk(chunk) => {
if settings.unique { if settings.unique {

View file

@ -25,7 +25,6 @@ use std::{
}; };
use compare::Compare; use compare::Compare;
use itertools::Itertools;
use uucore::error::UResult; use uucore::error::UResult;
use crate::{ use crate::{
@ -67,58 +66,63 @@ fn replace_output_file_in_input_files(
/// ///
/// If `settings.merge_batch_size` is greater than the length of `files`, intermediate files will be used. /// If `settings.merge_batch_size` is greater than the length of `files`, intermediate files will be used.
/// If `settings.compress_prog` is `Some`, intermediate files will be compressed with it. /// If `settings.compress_prog` is `Some`, intermediate files will be compressed with it.
pub fn merge<'a>( pub fn merge(
files: &mut [OsString], files: &mut [OsString],
settings: &'a GlobalSettings, settings: &GlobalSettings,
output: Option<&str>, output: Output,
tmp_dir: &mut TmpDirWrapper, tmp_dir: &mut TmpDirWrapper,
) -> UResult<FileMerger<'a>> { ) -> UResult<()> {
replace_output_file_in_input_files(files, output, tmp_dir)?; replace_output_file_in_input_files(files, output.as_output_name(), tmp_dir)?;
let files = files
.iter()
.map(|file| open(file).map(|file| PlainMergeInput { inner: file }));
if settings.compress_prog.is_none() { if settings.compress_prog.is_none() {
merge_with_file_limit::<_, _, WriteablePlainTmpFile>( merge_with_file_limit::<_, _, WriteablePlainTmpFile>(files, settings, output, tmp_dir)
files
.iter()
.map(|file| open(file).map(|file| PlainMergeInput { inner: file })),
settings,
tmp_dir,
)
} else { } else {
merge_with_file_limit::<_, _, WriteableCompressedTmpFile>( merge_with_file_limit::<_, _, WriteableCompressedTmpFile>(files, settings, output, tmp_dir)
files
.iter()
.map(|file| open(file).map(|file| PlainMergeInput { inner: file })),
settings,
tmp_dir,
)
} }
} }
// Merge already sorted `MergeInput`s. // Merge already sorted `MergeInput`s.
pub fn merge_with_file_limit< pub fn merge_with_file_limit<
'a,
M: MergeInput + 'static, M: MergeInput + 'static,
F: ExactSizeIterator<Item = UResult<M>>, F: ExactSizeIterator<Item = UResult<M>>,
Tmp: WriteableTmpFile + 'static, Tmp: WriteableTmpFile + 'static,
>( >(
files: F, files: F,
settings: &'a GlobalSettings, settings: &GlobalSettings,
output: Output,
tmp_dir: &mut TmpDirWrapper, tmp_dir: &mut TmpDirWrapper,
) -> UResult<FileMerger<'a>> { ) -> UResult<()> {
if files.len() > settings.merge_batch_size { if files.len() <= settings.merge_batch_size {
let mut remaining_files = files.len(); let merger = merge_without_limit(files, settings);
let batches = files.chunks(settings.merge_batch_size); merger?.write_all(settings, output)
let mut batches = batches.into_iter(); } else {
let mut temporary_files = vec![]; let mut temporary_files = vec![];
while remaining_files != 0 { let mut batch = vec![];
// Work around the fact that `Chunks` is not an `ExactSizeIterator`. for file in files {
remaining_files = remaining_files.saturating_sub(settings.merge_batch_size); batch.push(file);
let merger = merge_without_limit(batches.next().unwrap(), settings)?; if batch.len() >= settings.merge_batch_size {
assert_eq!(batch.len(), settings.merge_batch_size);
let merger = merge_without_limit(batch.into_iter(), settings)?;
batch = vec![];
let mut tmp_file =
Tmp::create(tmp_dir.next_file()?, settings.compress_prog.as_deref())?;
merger.write_all_to(settings, tmp_file.as_write())?;
temporary_files.push(tmp_file.finished_writing()?);
}
}
// Merge any remaining files that didn't get merged in a full batch above.
if !batch.is_empty() {
assert!(batch.len() < settings.merge_batch_size);
let merger = merge_without_limit(batch.into_iter(), settings)?;
let mut tmp_file = let mut tmp_file =
Tmp::create(tmp_dir.next_file()?, settings.compress_prog.as_deref())?; Tmp::create(tmp_dir.next_file()?, settings.compress_prog.as_deref())?;
merger.write_all_to(settings, tmp_file.as_write())?; merger.write_all_to(settings, tmp_file.as_write())?;
temporary_files.push(tmp_file.finished_writing()?); temporary_files.push(tmp_file.finished_writing()?);
} }
assert!(batches.next().is_none());
merge_with_file_limit::<_, _, Tmp>( merge_with_file_limit::<_, _, Tmp>(
temporary_files temporary_files
.into_iter() .into_iter()
@ -127,10 +131,9 @@ pub fn merge_with_file_limit<
dyn FnMut(Tmp::Closed) -> UResult<<Tmp::Closed as ClosedTmpFile>::Reopened>, dyn FnMut(Tmp::Closed) -> UResult<<Tmp::Closed as ClosedTmpFile>::Reopened>,
>), >),
settings, settings,
output,
tmp_dir, tmp_dir,
) )
} else {
merge_without_limit(files, settings)
} }
} }
@ -260,21 +263,21 @@ struct PreviousLine {
} }
/// Merges files together. This is **not** an iterator because of lifetime problems. /// Merges files together. This is **not** an iterator because of lifetime problems.
pub struct FileMerger<'a> { struct FileMerger<'a> {
heap: binary_heap_plus::BinaryHeap<MergeableFile, FileComparator<'a>>, heap: binary_heap_plus::BinaryHeap<MergeableFile, FileComparator<'a>>,
request_sender: Sender<(usize, RecycledChunk)>, request_sender: Sender<(usize, RecycledChunk)>,
prev: Option<PreviousLine>, prev: Option<PreviousLine>,
reader_join_handle: JoinHandle<UResult<()>>, reader_join_handle: JoinHandle<UResult<()>>,
} }
impl<'a> FileMerger<'a> { impl FileMerger<'_> {
/// Write the merged contents to the output file. /// Write the merged contents to the output file.
pub fn write_all(self, settings: &GlobalSettings, output: Output) -> UResult<()> { fn write_all(self, settings: &GlobalSettings, output: Output) -> UResult<()> {
let mut out = output.into_write(); let mut out = output.into_write();
self.write_all_to(settings, &mut out) self.write_all_to(settings, &mut out)
} }
pub fn write_all_to(mut self, settings: &GlobalSettings, out: &mut impl Write) -> UResult<()> { fn write_all_to(mut self, settings: &GlobalSettings, out: &mut impl Write) -> UResult<()> {
while self.write_next(settings, out) {} while self.write_next(settings, out) {}
drop(self.request_sender); drop(self.request_sender);
self.reader_join_handle.join().unwrap() self.reader_join_handle.join().unwrap()
@ -341,7 +344,7 @@ struct FileComparator<'a> {
settings: &'a GlobalSettings, settings: &'a GlobalSettings,
} }
impl<'a> Compare<MergeableFile> for FileComparator<'a> { impl Compare<MergeableFile> for FileComparator<'_> {
fn compare(&self, a: &MergeableFile, b: &MergeableFile) -> Ordering { fn compare(&self, a: &MergeableFile, b: &MergeableFile) -> Ordering {
let mut cmp = compare_by( let mut cmp = compare_by(
&a.current_chunk.lines()[a.line_idx], &a.current_chunk.lines()[a.line_idx],

View file

@ -1567,8 +1567,7 @@ fn exec(
tmp_dir: &mut TmpDirWrapper, tmp_dir: &mut TmpDirWrapper,
) -> UResult<()> { ) -> UResult<()> {
if settings.merge { if settings.merge {
let file_merger = merge::merge(files, settings, output.as_output_name(), tmp_dir)?; merge::merge(files, settings, output, tmp_dir)
file_merger.write_all(settings, output)
} else if settings.check { } else if settings.check {
if files.len() > 1 { if files.len() > 1 {
Err(UUsageError::new(2, "only one file allowed with -c")) Err(UUsageError::new(2, "only one file allowed with -c"))

View file

@ -341,7 +341,7 @@ impl<'a> FilenameIterator<'a> {
} }
} }
impl<'a> Iterator for FilenameIterator<'a> { impl Iterator for FilenameIterator<'_> {
type Item = String; type Item = String;
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {

View file

@ -492,7 +492,7 @@ impl Settings {
} }
match first.as_str() { match first.as_str() {
"\\0" => b'\0', "\\0" => b'\0',
s if s.as_bytes().len() == 1 => s.as_bytes()[0], s if s.len() == 1 => s.as_bytes()[0],
s => return Err(SettingsError::MultiCharacterSeparator(s.to_string())), s => return Err(SettingsError::MultiCharacterSeparator(s.to_string())),
} }
} }
@ -748,7 +748,7 @@ impl<'a> ByteChunkWriter<'a> {
} }
} }
impl<'a> Write for ByteChunkWriter<'a> { impl Write for ByteChunkWriter<'_> {
/// Implements `--bytes=SIZE` /// Implements `--bytes=SIZE`
fn write(&mut self, mut buf: &[u8]) -> std::io::Result<usize> { fn write(&mut self, mut buf: &[u8]) -> std::io::Result<usize> {
// If the length of `buf` exceeds the number of bytes remaining // If the length of `buf` exceeds the number of bytes remaining
@ -872,7 +872,7 @@ impl<'a> LineChunkWriter<'a> {
} }
} }
impl<'a> Write for LineChunkWriter<'a> { impl Write for LineChunkWriter<'_> {
/// Implements `--lines=NUMBER` /// Implements `--lines=NUMBER`
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> { fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
// If the number of lines in `buf` exceeds the number of lines // If the number of lines in `buf` exceeds the number of lines
@ -978,7 +978,7 @@ impl<'a> LineBytesChunkWriter<'a> {
} }
} }
impl<'a> Write for LineBytesChunkWriter<'a> { impl Write for LineBytesChunkWriter<'_> {
/// Write as many lines to a chunk as possible without /// Write as many lines to a chunk as possible without
/// exceeding the byte limit. If a single line has more bytes /// exceeding the byte limit. If a single line has more bytes
/// than the limit, then fill an entire single chunk with those /// than the limit, then fill an entire single chunk with those

View file

@ -9,7 +9,9 @@ use uucore::error::{UResult, USimpleError};
use clap::builder::ValueParser; use clap::builder::ValueParser;
use uucore::display::Quotable; use uucore::display::Quotable;
use uucore::fs::display_permissions; use uucore::fs::display_permissions;
use uucore::fsext::{pretty_filetype, pretty_fstype, read_fs_list, statfs, BirthTime, FsMeta}; use uucore::fsext::{
pretty_filetype, pretty_fstype, read_fs_list, statfs, BirthTime, FsMeta, StatFs,
};
use uucore::libc::mode_t; use uucore::libc::mode_t;
use uucore::{ use uucore::{
entries, format_usage, help_about, help_section, help_usage, show_error, show_warning, entries, format_usage, help_about, help_section, help_usage, show_error, show_warning,
@ -19,10 +21,12 @@ use chrono::{DateTime, Local};
use clap::{crate_version, Arg, ArgAction, ArgMatches, Command}; use clap::{crate_version, Arg, ArgAction, ArgMatches, Command};
use std::borrow::Cow; use std::borrow::Cow;
use std::ffi::{OsStr, OsString}; use std::ffi::{OsStr, OsString};
use std::fs; use std::fs::{FileType, Metadata};
use std::io::Write;
use std::os::unix::fs::{FileTypeExt, MetadataExt}; use std::os::unix::fs::{FileTypeExt, MetadataExt};
use std::os::unix::prelude::OsStrExt; use std::os::unix::prelude::OsStrExt;
use std::path::Path; use std::path::Path;
use std::{env, fs};
const ABOUT: &str = help_about!("stat.md"); const ABOUT: &str = help_about!("stat.md");
const USAGE: &str = help_usage!("stat.md"); const USAGE: &str = help_usage!("stat.md");
@ -93,9 +97,33 @@ pub enum OutputType {
Unknown, Unknown,
} }
#[derive(Default)]
enum QuotingStyle {
Locale,
Shell,
#[default]
ShellEscapeAlways,
Quote,
}
impl std::str::FromStr for QuotingStyle {
type Err = String;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
"locale" => Ok(QuotingStyle::Locale),
"shell" => Ok(QuotingStyle::Shell),
"shell-escape-always" => Ok(QuotingStyle::ShellEscapeAlways),
// The others aren't exposed to the user
_ => Err(format!("Invalid quoting style: {}", s)),
}
}
}
#[derive(Debug, PartialEq, Eq)] #[derive(Debug, PartialEq, Eq)]
enum Token { enum Token {
Char(char), Char(char),
Byte(u8),
Directive { Directive {
flag: Flags, flag: Flags,
width: usize, width: usize,
@ -293,6 +321,93 @@ fn print_str(s: &str, flags: &Flags, width: usize, precision: Option<usize>) {
pad_and_print(s, flags.left, width, Padding::Space); pad_and_print(s, flags.left, width, Padding::Space);
} }
fn quote_file_name(file_name: &str, quoting_style: &QuotingStyle) -> String {
match quoting_style {
QuotingStyle::Locale | QuotingStyle::Shell => {
let escaped = file_name.replace('\'', r"\'");
format!("'{}'", escaped)
}
QuotingStyle::ShellEscapeAlways => format!("\"{}\"", file_name),
QuotingStyle::Quote => file_name.to_string(),
}
}
fn get_quoted_file_name(
display_name: &str,
file: &OsString,
file_type: &FileType,
from_user: bool,
) -> Result<String, i32> {
let quoting_style = env::var("QUOTING_STYLE")
.ok()
.and_then(|style| style.parse().ok())
.unwrap_or_default();
if file_type.is_symlink() {
let quoted_display_name = quote_file_name(display_name, &quoting_style);
match fs::read_link(file) {
Ok(dst) => {
let quoted_dst = quote_file_name(&dst.to_string_lossy(), &quoting_style);
Ok(format!("{quoted_display_name} -> {quoted_dst}"))
}
Err(e) => {
show_error!("{e}");
Err(1)
}
}
} else {
let style = if from_user {
quoting_style
} else {
QuotingStyle::Quote
};
Ok(quote_file_name(display_name, &style))
}
}
fn process_token_filesystem(t: &Token, meta: StatFs, display_name: &str) {
match *t {
Token::Byte(byte) => write_raw_byte(byte),
Token::Char(c) => print!("{c}"),
Token::Directive {
flag,
width,
precision,
format,
} => {
let output = match format {
// free blocks available to non-superuser
'a' => OutputType::Unsigned(meta.avail_blocks()),
// total data blocks in file system
'b' => OutputType::Unsigned(meta.total_blocks()),
// total file nodes in file system
'c' => OutputType::Unsigned(meta.total_file_nodes()),
// free file nodes in file system
'd' => OutputType::Unsigned(meta.free_file_nodes()),
// free blocks in file system
'f' => OutputType::Unsigned(meta.free_blocks()),
// file system ID in hex
'i' => OutputType::UnsignedHex(meta.fsid()),
// maximum length of filenames
'l' => OutputType::Unsigned(meta.namelen()),
// file name
'n' => OutputType::Str(display_name.to_string()),
// block size (for faster transfers)
's' => OutputType::Unsigned(meta.io_size()),
// fundamental block size (for block counts)
'S' => OutputType::Integer(meta.block_size()),
// file system type in hex
't' => OutputType::UnsignedHex(meta.fs_type() as u64),
// file system type in human readable form
'T' => OutputType::Str(pretty_fstype(meta.fs_type()).into()),
_ => OutputType::Unknown,
};
print_it(&output, flag, width, precision);
}
}
}
/// Prints an integer value based on the provided flags, width, and precision. /// Prints an integer value based on the provided flags, width, and precision.
/// ///
/// # Arguments /// # Arguments
@ -403,7 +518,26 @@ fn print_unsigned_hex(
pad_and_print(&s, flags.left, width, padding_char); pad_and_print(&s, flags.left, width, padding_char);
} }
fn write_raw_byte(byte: u8) {
std::io::stdout().write_all(&[byte]).unwrap();
}
impl Stater { impl Stater {
fn process_flags(chars: &[char], i: &mut usize, bound: usize, flag: &mut Flags) {
while *i < bound {
match chars[*i] {
'#' => flag.alter = true,
'0' => flag.zero = true,
'-' => flag.left = true,
' ' => flag.space = true,
'+' => flag.sign = true,
'\'' => flag.group = true,
_ => break,
}
*i += 1;
}
}
fn handle_percent_case( fn handle_percent_case(
chars: &[char], chars: &[char],
i: &mut usize, i: &mut usize,
@ -423,20 +557,7 @@ impl Stater {
let mut flag = Flags::default(); let mut flag = Flags::default();
while *i < bound { Self::process_flags(chars, i, bound, &mut flag);
match chars[*i] {
'#' => flag.alter = true,
'0' => flag.zero = true,
'-' => flag.left = true,
' ' => flag.space = true,
'+' => flag.sign = true,
'\'' => flag.group = true,
'I' => unimplemented!(),
_ => break,
}
*i += 1;
}
check_bound(format_str, bound, old, *i)?;
let mut width = 0; let mut width = 0;
let mut precision = None; let mut precision = None;
@ -445,6 +566,15 @@ impl Stater {
if let Some((field_width, offset)) = format_str[j..].scan_num::<usize>() { if let Some((field_width, offset)) = format_str[j..].scan_num::<usize>() {
width = field_width; width = field_width;
j += offset; j += offset;
// Reject directives like `%<NUMBER>` by checking if width has been parsed.
if j >= bound || chars[j] == '%' {
let invalid_directive: String = chars[old..=j.min(bound - 1)].iter().collect();
return Err(USimpleError::new(
1,
format!("{}: invalid directive", invalid_directive.quote()),
));
}
} }
check_bound(format_str, bound, old, j)?; check_bound(format_str, bound, old, j)?;
@ -465,9 +595,27 @@ impl Stater {
} }
*i = j; *i = j;
// Check for multi-character specifiers (e.g., `%Hd`, `%Lr`)
if *i + 1 < bound {
if let Some(&next_char) = chars.get(*i + 1) {
if (chars[*i] == 'H' || chars[*i] == 'L') && (next_char == 'd' || next_char == 'r')
{
let specifier = format!("{}{}", chars[*i], next_char);
*i += 1;
return Ok(Token::Directive {
flag,
width,
precision,
format: specifier.chars().next().unwrap(),
});
}
}
}
Ok(Token::Directive { Ok(Token::Directive {
width,
flag, flag,
width,
precision, precision,
format: chars[*i], format: chars[*i],
}) })
@ -485,33 +633,49 @@ impl Stater {
return Token::Char('\\'); return Token::Char('\\');
} }
match chars[*i] { match chars[*i] {
'x' if *i + 1 < bound => { 'a' => Token::Byte(0x07), // BEL
if let Some((c, offset)) = format_str[*i + 1..].scan_char(16) { 'b' => Token::Byte(0x08), // Backspace
*i += offset; 'f' => Token::Byte(0x0C), // Form feed
Token::Char(c) 'n' => Token::Byte(0x0A), // Line feed
'r' => Token::Byte(0x0D), // Carriage return
't' => Token::Byte(0x09), // Horizontal tab
'\\' => Token::Byte(b'\\'), // Backslash
'\'' => Token::Byte(b'\''), // Single quote
'"' => Token::Byte(b'"'), // Double quote
'0'..='7' => {
// Parse octal escape sequence (up to 3 digits)
let mut value = 0u8;
let mut count = 0;
while *i < bound && count < 3 {
if let Some(digit) = chars[*i].to_digit(8) {
value = value * 8 + digit as u8;
*i += 1;
count += 1;
} else {
break;
}
}
*i -= 1; // Adjust index to account for the outer loop increment
Token::Byte(value)
}
'x' => {
// Parse hexadecimal escape sequence
if *i + 1 < bound {
if let Some((c, offset)) = format_str[*i + 1..].scan_char(16) {
*i += offset;
Token::Byte(c as u8)
} else {
show_warning!("unrecognized escape '\\x'");
Token::Byte(b'x')
}
} else { } else {
show_warning!("unrecognized escape '\\x'"); show_warning!("incomplete hex escape '\\x'");
Token::Char('x') Token::Byte(b'x')
} }
} }
'0'..='7' => { other => {
let (c, offset) = format_str[*i..].scan_char(8).unwrap(); show_warning!("unrecognized escape '\\{}'", other);
*i += offset - 1; Token::Byte(other as u8)
Token::Char(c)
}
'"' => Token::Char('"'),
'\\' => Token::Char('\\'),
'a' => Token::Char('\x07'),
'b' => Token::Char('\x08'),
'e' => Token::Char('\x1B'),
'f' => Token::Char('\x0C'),
'n' => Token::Char('\n'),
'r' => Token::Char('\r'),
't' => Token::Char('\t'),
'v' => Token::Char('\x0B'),
c => {
show_warning!("unrecognized escape '\\{}'", c);
Token::Char(c)
} }
} }
} }
@ -634,7 +798,128 @@ impl Stater {
ret ret
} }
#[allow(clippy::cognitive_complexity)] fn process_token_files(
&self,
t: &Token,
meta: &Metadata,
display_name: &str,
file: &OsString,
file_type: &FileType,
from_user: bool,
) -> Result<(), i32> {
match *t {
Token::Byte(byte) => write_raw_byte(byte),
Token::Char(c) => print!("{c}"),
Token::Directive {
flag,
width,
precision,
format,
} => {
let output = match format {
// access rights in octal
'a' => OutputType::UnsignedOct(0o7777 & meta.mode()),
// access rights in human readable form
'A' => OutputType::Str(display_permissions(meta, true)),
// number of blocks allocated (see %B)
'b' => OutputType::Unsigned(meta.blocks()),
// the size in bytes of each block reported by %b
// FIXME: blocksize differs on various platform
// See coreutils/gnulib/lib/stat-size.h ST_NBLOCKSIZE // spell-checker:disable-line
'B' => OutputType::Unsigned(512),
// device number in decimal
'd' => OutputType::Unsigned(meta.dev()),
// device number in hex
'D' => OutputType::UnsignedHex(meta.dev()),
// raw mode in hex
'f' => OutputType::UnsignedHex(meta.mode() as u64),
// file type
'F' => OutputType::Str(
pretty_filetype(meta.mode() as mode_t, meta.len()).to_owned(),
),
// group ID of owner
'g' => OutputType::Unsigned(meta.gid() as u64),
// group name of owner
'G' => {
let group_name =
entries::gid2grp(meta.gid()).unwrap_or_else(|_| "UNKNOWN".to_owned());
OutputType::Str(group_name)
}
// number of hard links
'h' => OutputType::Unsigned(meta.nlink()),
// inode number
'i' => OutputType::Unsigned(meta.ino()),
// mount point
'm' => OutputType::Str(self.find_mount_point(file).unwrap()),
// file name
'n' => OutputType::Str(display_name.to_string()),
// quoted file name with dereference if symbolic link
'N' => {
let file_name =
get_quoted_file_name(display_name, file, file_type, from_user)?;
OutputType::Str(file_name)
}
// optimal I/O transfer size hint
'o' => OutputType::Unsigned(meta.blksize()),
// total size, in bytes
's' => OutputType::Integer(meta.len() as i64),
// major device type in hex, for character/block device special
// files
't' => OutputType::UnsignedHex(meta.rdev() >> 8),
// minor device type in hex, for character/block device special
// files
'T' => OutputType::UnsignedHex(meta.rdev() & 0xff),
// user ID of owner
'u' => OutputType::Unsigned(meta.uid() as u64),
// user name of owner
'U' => {
let user_name =
entries::uid2usr(meta.uid()).unwrap_or_else(|_| "UNKNOWN".to_owned());
OutputType::Str(user_name)
}
// time of file birth, human-readable; - if unknown
'w' => OutputType::Str(
meta.birth()
.map(|(sec, nsec)| pretty_time(sec as i64, nsec as i64))
.unwrap_or(String::from("-")),
),
// time of file birth, seconds since Epoch; 0 if unknown
'W' => OutputType::Unsigned(meta.birth().unwrap_or_default().0),
// time of last access, human-readable
'x' => OutputType::Str(pretty_time(meta.atime(), meta.atime_nsec())),
// time of last access, seconds since Epoch
'X' => OutputType::Integer(meta.atime()),
// time of last data modification, human-readable
'y' => OutputType::Str(pretty_time(meta.mtime(), meta.mtime_nsec())),
// time of last data modification, seconds since Epoch
'Y' => OutputType::Integer(meta.mtime()),
// time of last status change, human-readable
'z' => OutputType::Str(pretty_time(meta.ctime(), meta.ctime_nsec())),
// time of last status change, seconds since Epoch
'Z' => OutputType::Integer(meta.ctime()),
'R' => {
let major = meta.rdev() >> 8;
let minor = meta.rdev() & 0xff;
OutputType::Str(format!("{},{}", major, minor))
}
'r' => OutputType::Unsigned(meta.rdev()),
'H' => OutputType::Unsigned(meta.rdev() >> 8), // Major in decimal
'L' => OutputType::Unsigned(meta.rdev() & 0xff), // Minor in decimal
_ => OutputType::Unknown,
};
print_it(&output, flag, width, precision);
}
}
Ok(())
}
fn do_stat(&self, file: &OsStr, stdin_is_fifo: bool) -> i32 { fn do_stat(&self, file: &OsStr, stdin_is_fifo: bool) -> i32 {
let display_name = file.to_string_lossy(); let display_name = file.to_string_lossy();
let file = if cfg!(unix) && display_name == "-" { let file = if cfg!(unix) && display_name == "-" {
@ -659,46 +944,9 @@ impl Stater {
Ok(meta) => { Ok(meta) => {
let tokens = &self.default_tokens; let tokens = &self.default_tokens;
// Usage
for t in tokens { for t in tokens {
match *t { process_token_filesystem(t, meta, &display_name);
Token::Char(c) => print!("{c}"),
Token::Directive {
flag,
width,
precision,
format,
} => {
let output = match format {
// free blocks available to non-superuser
'a' => OutputType::Unsigned(meta.avail_blocks()),
// total data blocks in file system
'b' => OutputType::Unsigned(meta.total_blocks()),
// total file nodes in file system
'c' => OutputType::Unsigned(meta.total_file_nodes()),
// free file nodes in file system
'd' => OutputType::Unsigned(meta.free_file_nodes()),
// free blocks in file system
'f' => OutputType::Unsigned(meta.free_blocks()),
// file system ID in hex
'i' => OutputType::UnsignedHex(meta.fsid()),
// maximum length of filenames
'l' => OutputType::Unsigned(meta.namelen()),
// file name
'n' => OutputType::Str(display_name.to_string()),
// block size (for faster transfers)
's' => OutputType::Unsigned(meta.io_size()),
// fundamental block size (for block counts)
'S' => OutputType::Integer(meta.block_size()),
// file system type in hex
't' => OutputType::UnsignedHex(meta.fs_type() as u64),
// file system type in human readable form
'T' => OutputType::Str(pretty_fstype(meta.fs_type()).into()),
_ => OutputType::Unknown,
};
print_it(&output, flag, width, precision);
}
}
} }
} }
Err(e) => { Err(e) => {
@ -728,125 +976,15 @@ impl Stater {
}; };
for t in tokens { for t in tokens {
match *t { if let Err(code) = self.process_token_files(
Token::Char(c) => print!("{c}"), t,
Token::Directive { &meta,
flag, &display_name,
width, &file,
precision, &file_type,
format, self.from_user,
} => { ) {
let output = match format { return code;
// access rights in octal
'a' => OutputType::UnsignedOct(0o7777 & meta.mode()),
// access rights in human readable form
'A' => OutputType::Str(display_permissions(&meta, true)),
// number of blocks allocated (see %B)
'b' => OutputType::Unsigned(meta.blocks()),
// the size in bytes of each block reported by %b
// FIXME: blocksize differs on various platform
// See coreutils/gnulib/lib/stat-size.h ST_NBLOCKSIZE // spell-checker:disable-line
'B' => OutputType::Unsigned(512),
// device number in decimal
'd' => OutputType::Unsigned(meta.dev()),
// device number in hex
'D' => OutputType::UnsignedHex(meta.dev()),
// raw mode in hex
'f' => OutputType::UnsignedHex(meta.mode() as u64),
// file type
'F' => OutputType::Str(
pretty_filetype(meta.mode() as mode_t, meta.len())
.to_owned(),
),
// group ID of owner
'g' => OutputType::Unsigned(meta.gid() as u64),
// group name of owner
'G' => {
let group_name = entries::gid2grp(meta.gid())
.unwrap_or_else(|_| "UNKNOWN".to_owned());
OutputType::Str(group_name)
}
// number of hard links
'h' => OutputType::Unsigned(meta.nlink()),
// inode number
'i' => OutputType::Unsigned(meta.ino()),
// mount point
'm' => OutputType::Str(self.find_mount_point(&file).unwrap()),
// file name
'n' => OutputType::Str(display_name.to_string()),
// quoted file name with dereference if symbolic link
'N' => {
let file_name = if file_type.is_symlink() {
let dst = match fs::read_link(&file) {
Ok(path) => path,
Err(e) => {
println!("{e}");
return 1;
}
};
format!("{} -> {}", display_name.quote(), dst.quote())
} else {
display_name.to_string()
};
OutputType::Str(file_name)
}
// optimal I/O transfer size hint
'o' => OutputType::Unsigned(meta.blksize()),
// total size, in bytes
's' => OutputType::Integer(meta.len() as i64),
// major device type in hex, for character/block device special
// files
't' => OutputType::UnsignedHex(meta.rdev() >> 8),
// minor device type in hex, for character/block device special
// files
'T' => OutputType::UnsignedHex(meta.rdev() & 0xff),
// user ID of owner
'u' => OutputType::Unsigned(meta.uid() as u64),
// user name of owner
'U' => {
let user_name = entries::uid2usr(meta.uid())
.unwrap_or_else(|_| "UNKNOWN".to_owned());
OutputType::Str(user_name)
}
// time of file birth, human-readable; - if unknown
'w' => OutputType::Str(
meta.birth()
.map(|(sec, nsec)| pretty_time(sec as i64, nsec as i64))
.unwrap_or(String::from("-")),
),
// time of file birth, seconds since Epoch; 0 if unknown
'W' => OutputType::Unsigned(meta.birth().unwrap_or_default().0),
// time of last access, human-readable
'x' => OutputType::Str(pretty_time(
meta.atime(),
meta.atime_nsec(),
)),
// time of last access, seconds since Epoch
'X' => OutputType::Integer(meta.atime()),
// time of last data modification, human-readable
'y' => OutputType::Str(pretty_time(
meta.mtime(),
meta.mtime_nsec(),
)),
// time of last data modification, seconds since Epoch
'Y' => OutputType::Integer(meta.mtime()),
// time of last status change, human-readable
'z' => OutputType::Str(pretty_time(
meta.ctime(),
meta.ctime_nsec(),
)),
// time of last status change, seconds since Epoch
'Z' => OutputType::Integer(meta.ctime()),
_ => OutputType::Unknown,
};
print_it(&output, flag, width, precision);
}
} }
} }
} }
@ -1038,7 +1176,7 @@ mod tests {
#[test] #[test]
fn printf_format() { fn printf_format() {
let s = r#"%-# 15a\t\r\"\\\a\b\e\f\v%+020.-23w\x12\167\132\112\n"#; let s = r#"%-# 15a\t\r\"\\\a\b\x1B\f\x0B%+020.-23w\x12\167\132\112\n"#;
let expected = vec![ let expected = vec![
Token::Directive { Token::Directive {
flag: Flags { flag: Flags {
@ -1051,15 +1189,15 @@ mod tests {
precision: None, precision: None,
format: 'a', format: 'a',
}, },
Token::Char('\t'), Token::Byte(b'\t'),
Token::Char('\r'), Token::Byte(b'\r'),
Token::Char('"'), Token::Byte(b'"'),
Token::Char('\\'), Token::Byte(b'\\'),
Token::Char('\x07'), Token::Byte(b'\x07'),
Token::Char('\x08'), Token::Byte(b'\x08'),
Token::Char('\x1B'), Token::Byte(b'\x1B'),
Token::Char('\x0C'), Token::Byte(b'\x0C'),
Token::Char('\x0B'), Token::Byte(b'\x0B'),
Token::Directive { Token::Directive {
flag: Flags { flag: Flags {
sign: true, sign: true,
@ -1070,11 +1208,11 @@ mod tests {
precision: None, precision: None,
format: 'w', format: 'w',
}, },
Token::Char('\x12'), Token::Byte(b'\x12'),
Token::Char('w'), Token::Byte(b'w'),
Token::Char('Z'), Token::Byte(b'Z'),
Token::Char('J'), Token::Byte(b'J'),
Token::Char('\n'), Token::Byte(b'\n'),
]; ];
assert_eq!(&expected, &Stater::generate_tokens(s, true).unwrap()); assert_eq!(&expected, &Stater::generate_tokens(s, true).unwrap());
} }

View file

@ -20,8 +20,8 @@ crate-type = [
] # XXX: note: the rlib is just to prevent Cargo from spitting out a warning ] # XXX: note: the rlib is just to prevent Cargo from spitting out a warning
[dependencies] [dependencies]
cpp = "0.5.9" cpp = "0.5.10"
libc = { workspace = true } libc = { workspace = true }
[build-dependencies] [build-dependencies]
cpp_build = "0.5.9" cpp_build = "0.5.10"

View file

@ -16,13 +16,6 @@ use uucore::{format_usage, help_about, help_usage, show};
const USAGE: &str = help_usage!("sum.md"); const USAGE: &str = help_usage!("sum.md");
const ABOUT: &str = help_about!("sum.md"); const ABOUT: &str = help_about!("sum.md");
// This can be replaced with usize::div_ceil once it is stabilized.
// This implementation approach is optimized for when `b` is a constant,
// particularly a power of two.
const fn div_ceil(a: usize, b: usize) -> usize {
(a + b - 1) / b
}
fn bsd_sum(mut reader: Box<dyn Read>) -> (usize, u16) { fn bsd_sum(mut reader: Box<dyn Read>) -> (usize, u16) {
let mut buf = [0; 4096]; let mut buf = [0; 4096];
let mut bytes_read = 0; let mut bytes_read = 0;
@ -41,7 +34,7 @@ fn bsd_sum(mut reader: Box<dyn Read>) -> (usize, u16) {
} }
// Report blocks read in terms of 1024-byte blocks. // Report blocks read in terms of 1024-byte blocks.
let blocks_read = div_ceil(bytes_read, 1024); let blocks_read = bytes_read.div_ceil(1024);
(blocks_read, checksum) (blocks_read, checksum)
} }
@ -66,7 +59,7 @@ fn sysv_sum(mut reader: Box<dyn Read>) -> (usize, u16) {
ret = (ret & 0xffff) + (ret >> 16); ret = (ret & 0xffff) + (ret >> 16);
// Report blocks read in terms of 512-byte blocks. // Report blocks read in terms of 512-byte blocks.
let blocks_read = div_ceil(bytes_read, 512); let blocks_read = bytes_read.div_ceil(512);
(blocks_read, ret as u16) (blocks_read, ret as u16)
} }

View file

@ -184,7 +184,7 @@ fn buffer_tac(data: &[u8], before: bool, separator: &str) -> std::io::Result<()>
let mut out = BufWriter::new(out.lock()); let mut out = BufWriter::new(out.lock());
// The number of bytes in the line separator. // The number of bytes in the line separator.
let slen = separator.as_bytes().len(); let slen = separator.len();
// The index of the start of the next line in the `data`. // The index of the start of the next line in the `data`.
// //

View file

@ -336,11 +336,11 @@ impl Settings {
let blocking_stdin = self.pid == 0 let blocking_stdin = self.pid == 0
&& self.follow == Some(FollowMode::Descriptor) && self.follow == Some(FollowMode::Descriptor)
&& self.num_inputs() == 1 && self.num_inputs() == 1
&& Handle::stdin().map_or(false, |handle| { && Handle::stdin().is_ok_and(|handle| {
handle handle
.as_file() .as_file()
.metadata() .metadata()
.map_or(false, |meta| !meta.is_file()) .is_ok_and(|meta| !meta.is_file())
}); });
if !blocking_stdin && std::io::stdin().is_terminal() { if !blocking_stdin && std::io::stdin().is_terminal() {

View file

@ -64,7 +64,7 @@ impl<'a> ReverseChunks<'a> {
} }
} }
impl<'a> Iterator for ReverseChunks<'a> { impl Iterator for ReverseChunks<'_> {
type Item = Vec<u8>; type Item = Vec<u8>;
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {

View file

@ -93,7 +93,7 @@ impl Input {
pub fn is_tailable(&self) -> bool { pub fn is_tailable(&self) -> bool {
match &self.kind { match &self.kind {
InputKind::File(path) => path_is_tailable(path), InputKind::File(path) => path_is_tailable(path),
InputKind::Stdin => self.resolve().map_or(false, |path| path_is_tailable(&path)), InputKind::Stdin => self.resolve().is_some_and(|path| path_is_tailable(&path)),
} }
} }
} }
@ -233,7 +233,7 @@ impl PathExtTail for Path {
} }
pub fn path_is_tailable(path: &Path) -> bool { pub fn path_is_tailable(path: &Path) -> bool {
path.is_file() || path.exists() && path.metadata().map_or(false, |meta| meta.is_tailable()) path.is_file() || path.exists() && path.metadata().is_ok_and(|meta| meta.is_tailable())
} }
#[inline] #[inline]

View file

@ -65,13 +65,15 @@ fn uu_tail(settings: &Settings) -> UResult<()> {
// Add `path` and `reader` to `files` map if `--follow` is selected. // Add `path` and `reader` to `files` map if `--follow` is selected.
for input in &settings.inputs.clone() { for input in &settings.inputs.clone() {
match input.kind() { match input.kind() {
InputKind::File(path) if cfg!(not(unix)) || path != &PathBuf::from(text::DEV_STDIN) => { InputKind::Stdin => {
tail_file(settings, &mut printer, input, path, &mut observer, 0)?;
}
// File points to /dev/stdin here
InputKind::File(_) | InputKind::Stdin => {
tail_stdin(settings, &mut printer, input, &mut observer)?; tail_stdin(settings, &mut printer, input, &mut observer)?;
} }
InputKind::File(path) if cfg!(unix) && path == &PathBuf::from(text::DEV_STDIN) => {
tail_stdin(settings, &mut printer, input, &mut observer)?;
}
InputKind::File(path) => {
tail_file(settings, &mut printer, input, path, &mut observer, 0)?;
}
} }
} }
@ -85,7 +87,7 @@ fn uu_tail(settings: &Settings) -> UResult<()> {
the input file is not a FIFO, pipe, or regular file, it is unspecified whether or the input file is not a FIFO, pipe, or regular file, it is unspecified whether or
not the -f option shall be ignored. not the -f option shall be ignored.
*/ */
if !settings.has_only_stdin() { if !settings.has_only_stdin() || settings.pid != 0 {
follow::follow(observer, settings)?; follow::follow(observer, settings)?;
} }
} }

View file

@ -288,7 +288,6 @@ fn preserve_signal_info(signal: libc::c_int) -> libc::c_int {
} }
/// TODO: Improve exit codes, and make them consistent with the GNU Coreutils exit codes. /// TODO: Improve exit codes, and make them consistent with the GNU Coreutils exit codes.
fn timeout( fn timeout(
cmd: &[String], cmd: &[String],
duration: Duration, duration: Duration,

View file

@ -19,7 +19,7 @@ path = "src/tr.rs"
[dependencies] [dependencies]
nom = { workspace = true } nom = { workspace = true }
clap = { workspace = true } clap = { workspace = true }
uucore = { workspace = true } uucore = { workspace = true, features = ["fs"] }
[[bin]] [[bin]]
name = "tr" name = "tr"

View file

@ -16,13 +16,15 @@ use nom::{
IResult, IResult,
}; };
use std::{ use std::{
char,
collections::{HashMap, HashSet}, collections::{HashMap, HashSet},
error::Error, error::Error,
fmt::{Debug, Display}, fmt::{Debug, Display},
io::{BufRead, Write}, io::{BufRead, Write},
ops::Not, ops::Not,
}; };
use uucore::error::UError; use uucore::error::{UError, UResult, USimpleError};
use uucore::show_warning;
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub enum BadSequence { pub enum BadSequence {
@ -293,7 +295,9 @@ impl Sequence {
Self::parse_class, Self::parse_class,
Self::parse_char_equal, Self::parse_char_equal,
// NOTE: This must be the last one // NOTE: This must be the last one
map(Self::parse_backslash_or_char, |s| Ok(Self::Char(s))), map(Self::parse_backslash_or_char_with_warning, |s| {
Ok(Self::Char(s))
}),
)))(input) )))(input)
.map(|(_, r)| r) .map(|(_, r)| r)
.unwrap() .unwrap()
@ -302,10 +306,16 @@ impl Sequence {
} }
fn parse_octal(input: &[u8]) -> IResult<&[u8], u8> { fn parse_octal(input: &[u8]) -> IResult<&[u8], u8> {
// For `parse_char_range`, `parse_char_star`, `parse_char_repeat`, `parse_char_equal`.
// Because in these patterns, there's no ambiguous cases.
preceded(tag("\\"), Self::parse_octal_up_to_three_digits)(input)
}
fn parse_octal_with_warning(input: &[u8]) -> IResult<&[u8], u8> {
preceded( preceded(
tag("\\"), tag("\\"),
alt(( alt((
Self::parse_octal_up_to_three_digits, Self::parse_octal_up_to_three_digits_with_warning,
// Fallback for if the three digit octal escape is greater than \377 (0xFF), and therefore can't be // Fallback for if the three digit octal escape is greater than \377 (0xFF), and therefore can't be
// parsed as as a byte // parsed as as a byte
// See test `test_multibyte_octal_sequence` // See test `test_multibyte_octal_sequence`
@ -319,16 +329,29 @@ impl Sequence {
recognize(many_m_n(1, 3, one_of("01234567"))), recognize(many_m_n(1, 3, one_of("01234567"))),
|out: &[u8]| { |out: &[u8]| {
let str_to_parse = std::str::from_utf8(out).unwrap(); let str_to_parse = std::str::from_utf8(out).unwrap();
u8::from_str_radix(str_to_parse, 8).ok()
},
)(input)
}
match u8::from_str_radix(str_to_parse, 8) { fn parse_octal_up_to_three_digits_with_warning(input: &[u8]) -> IResult<&[u8], u8> {
Ok(ue) => Some(ue), map_opt(
Err(_pa) => { recognize(many_m_n(1, 3, one_of("01234567"))),
// TODO |out: &[u8]| {
// A warning needs to be printed here let str_to_parse = std::str::from_utf8(out).unwrap();
// See https://github.com/uutils/coreutils/issues/6821 let result = u8::from_str_radix(str_to_parse, 8).ok();
None if result.is_none() {
} let origin_octal: &str = std::str::from_utf8(input).unwrap();
let actual_octal_tail: &str = std::str::from_utf8(&input[0..2]).unwrap();
let outstand_char: char = char::from_u32(input[2] as u32).unwrap();
show_warning!(
"the ambiguous octal escape \\{} is being\n interpreted as the 2-byte sequence \\0{}, {}",
origin_octal,
actual_octal_tail,
outstand_char
);
} }
result
}, },
)(input) )(input)
} }
@ -360,6 +383,14 @@ impl Sequence {
alt((Self::parse_octal, Self::parse_backslash, Self::single_char))(input) alt((Self::parse_octal, Self::parse_backslash, Self::single_char))(input)
} }
fn parse_backslash_or_char_with_warning(input: &[u8]) -> IResult<&[u8], u8> {
alt((
Self::parse_octal_with_warning,
Self::parse_backslash,
Self::single_char,
))(input)
}
fn single_char(input: &[u8]) -> IResult<&[u8], u8> { fn single_char(input: &[u8]) -> IResult<&[u8], u8> {
take(1usize)(input).map(|(l, a)| (l, a[0])) take(1usize)(input).map(|(l, a)| (l, a[0]))
} }
@ -577,7 +608,7 @@ impl SymbolTranslator for SqueezeOperation {
} }
} }
pub fn translate_input<T, R, W>(input: &mut R, output: &mut W, mut translator: T) pub fn translate_input<T, R, W>(input: &mut R, output: &mut W, mut translator: T) -> UResult<()>
where where
T: SymbolTranslator, T: SymbolTranslator,
R: BufRead, R: BufRead,
@ -585,15 +616,25 @@ where
{ {
let mut buf = Vec::new(); let mut buf = Vec::new();
let mut output_buf = Vec::new(); let mut output_buf = Vec::new();
while let Ok(length) = input.read_until(b'\n', &mut buf) { while let Ok(length) = input.read_until(b'\n', &mut buf) {
if length == 0 { if length == 0 {
break; break; // EOF reached
} else {
let filtered = buf.iter().filter_map(|c| translator.translate(*c));
output_buf.extend(filtered);
output.write_all(&output_buf).unwrap();
} }
let filtered = buf.iter().filter_map(|&c| translator.translate(c));
output_buf.extend(filtered);
if let Err(e) = output.write_all(&output_buf) {
return Err(USimpleError::new(
1,
format!("{}: write error: {}", uucore::util_name(), e),
));
}
buf.clear(); buf.clear();
output_buf.clear(); output_buf.clear();
} }
Ok(())
} }

View file

@ -8,17 +8,17 @@
mod operation; mod operation;
mod unicode_table; mod unicode_table;
use crate::operation::DeleteOperation;
use clap::{crate_version, value_parser, Arg, ArgAction, Command}; use clap::{crate_version, value_parser, Arg, ArgAction, Command};
use operation::{ use operation::{
translate_input, Sequence, SqueezeOperation, SymbolTranslator, TranslateOperation, translate_input, Sequence, SqueezeOperation, SymbolTranslator, TranslateOperation,
}; };
use std::ffi::OsString; use std::ffi::OsString;
use std::io::{stdin, stdout, BufWriter}; use std::io::{stdin, stdout, BufWriter};
use uucore::{format_usage, help_about, help_section, help_usage, os_str_as_bytes, show};
use crate::operation::DeleteOperation;
use uucore::display::Quotable; use uucore::display::Quotable;
use uucore::error::{UResult, USimpleError, UUsageError}; use uucore::error::{UResult, USimpleError, UUsageError};
use uucore::fs::is_stdin_directory;
use uucore::{format_usage, help_about, help_section, help_usage, os_str_as_bytes, show};
const ABOUT: &str = help_about!("tr.md"); const ABOUT: &str = help_about!("tr.md");
const AFTER_HELP: &str = help_section!("after help", "tr.md"); const AFTER_HELP: &str = help_section!("after help", "tr.md");
@ -126,30 +126,34 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
translating, translating,
)?; )?;
if is_stdin_directory(&stdin) {
return Err(USimpleError::new(1, "read error: Is a directory"));
}
// '*_op' are the operations that need to be applied, in order. // '*_op' are the operations that need to be applied, in order.
if delete_flag { if delete_flag {
if squeeze_flag { if squeeze_flag {
let delete_op = DeleteOperation::new(set1); let delete_op = DeleteOperation::new(set1);
let squeeze_op = SqueezeOperation::new(set2); let squeeze_op = SqueezeOperation::new(set2);
let op = delete_op.chain(squeeze_op); let op = delete_op.chain(squeeze_op);
translate_input(&mut locked_stdin, &mut buffered_stdout, op); translate_input(&mut locked_stdin, &mut buffered_stdout, op)?;
} else { } else {
let op = DeleteOperation::new(set1); let op = DeleteOperation::new(set1);
translate_input(&mut locked_stdin, &mut buffered_stdout, op); translate_input(&mut locked_stdin, &mut buffered_stdout, op)?;
} }
} else if squeeze_flag { } else if squeeze_flag {
if sets_len < 2 { if sets_len < 2 {
let op = SqueezeOperation::new(set1); let op = SqueezeOperation::new(set1);
translate_input(&mut locked_stdin, &mut buffered_stdout, op); translate_input(&mut locked_stdin, &mut buffered_stdout, op)?;
} else { } else {
let translate_op = TranslateOperation::new(set1, set2.clone())?; let translate_op = TranslateOperation::new(set1, set2.clone())?;
let squeeze_op = SqueezeOperation::new(set2); let squeeze_op = SqueezeOperation::new(set2);
let op = translate_op.chain(squeeze_op); let op = translate_op.chain(squeeze_op);
translate_input(&mut locked_stdin, &mut buffered_stdout, op); translate_input(&mut locked_stdin, &mut buffered_stdout, op)?;
} }
} else { } else {
let op = TranslateOperation::new(set1, set2)?; let op = TranslateOperation::new(set1, set2)?;
translate_input(&mut locked_stdin, &mut buffered_stdout, op); translate_input(&mut locked_stdin, &mut buffered_stdout, op)?;
} }
Ok(()) Ok(())
} }

View file

@ -383,7 +383,7 @@ fn should_extract_obs_skip_chars(
&& posix_version().is_some_and(|v| v <= OBSOLETE) && posix_version().is_some_and(|v| v <= OBSOLETE)
&& !preceding_long_opt_req_value && !preceding_long_opt_req_value
&& !preceding_short_opt_req_value && !preceding_short_opt_req_value
&& slice.chars().nth(1).map_or(false, |c| c.is_ascii_digit()) && slice.chars().nth(1).is_some_and(|c| c.is_ascii_digit())
} }
/// Helper function to [`filter_args`] /// Helper function to [`filter_args`]

View file

@ -27,7 +27,7 @@ pub enum BufReadDecoderError<'a> {
Io(io::Error), Io(io::Error),
} }
impl<'a> fmt::Display for BufReadDecoderError<'a> { impl fmt::Display for BufReadDecoderError<'_> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match *self { match *self {
BufReadDecoderError::InvalidByteSequence(bytes) => { BufReadDecoderError::InvalidByteSequence(bytes) => {
@ -38,7 +38,7 @@ impl<'a> fmt::Display for BufReadDecoderError<'a> {
} }
} }
impl<'a> Error for BufReadDecoderError<'a> { impl Error for BufReadDecoderError<'_> {
fn source(&self) -> Option<&(dyn Error + 'static)> { fn source(&self) -> Option<&(dyn Error + 'static)> {
match *self { match *self {
BufReadDecoderError::InvalidByteSequence(_) => None, BufReadDecoderError::InvalidByteSequence(_) => None,

View file

@ -13,7 +13,7 @@ mod word_count;
use std::{ use std::{
borrow::{Borrow, Cow}, borrow::{Borrow, Cow},
cmp::max, cmp::max,
ffi::OsString, ffi::{OsStr, OsString},
fs::{self, File}, fs::{self, File},
io::{self, Write}, io::{self, Write},
iter, iter,
@ -28,7 +28,7 @@ use utf8::{BufReadDecoder, BufReadDecoderError};
use uucore::{ use uucore::{
error::{FromIo, UError, UResult}, error::{FromIo, UError, UResult},
format_usage, help_about, help_usage, format_usage, help_about, help_usage,
quoting_style::{escape_name, QuotingStyle}, quoting_style::{self, QuotingStyle},
shortcut_value_parser::ShortcutValueParser, shortcut_value_parser::ShortcutValueParser,
show, show,
}; };
@ -259,7 +259,7 @@ impl<'a> Input<'a> {
match self { match self {
Self::Path(path) => Some(match path.to_str() { Self::Path(path) => Some(match path.to_str() {
Some(s) if !s.contains('\n') => Cow::Borrowed(s), Some(s) if !s.contains('\n') => Cow::Borrowed(s),
_ => Cow::Owned(escape_name(path.as_os_str(), QS_ESCAPE)), _ => Cow::Owned(escape_name_wrapper(path.as_os_str())),
}), }),
Self::Stdin(StdinKind::Explicit) => Some(Cow::Borrowed(STDIN_REPR)), Self::Stdin(StdinKind::Explicit) => Some(Cow::Borrowed(STDIN_REPR)),
Self::Stdin(StdinKind::Implicit) => None, Self::Stdin(StdinKind::Implicit) => None,
@ -269,7 +269,7 @@ impl<'a> Input<'a> {
/// Converts input into the form that appears in errors. /// Converts input into the form that appears in errors.
fn path_display(&self) -> String { fn path_display(&self) -> String {
match self { match self {
Self::Path(path) => escape_name(path.as_os_str(), QS_ESCAPE), Self::Path(path) => escape_name_wrapper(path.as_os_str()),
Self::Stdin(_) => String::from("standard input"), Self::Stdin(_) => String::from("standard input"),
} }
} }
@ -361,7 +361,7 @@ impl WcError {
Some((input, idx)) => { Some((input, idx)) => {
let path = match input { let path = match input {
Input::Stdin(_) => STDIN_REPR.into(), Input::Stdin(_) => STDIN_REPR.into(),
Input::Path(path) => escape_name(path.as_os_str(), QS_ESCAPE).into(), Input::Path(path) => escape_name_wrapper(path.as_os_str()).into(),
}; };
Self::ZeroLengthFileNameCtx { path, idx } Self::ZeroLengthFileNameCtx { path, idx }
} }
@ -761,7 +761,9 @@ fn files0_iter_file<'a>(path: &Path) -> UResult<impl Iterator<Item = InputIterIt
Err(e) => Err(e.map_err_context(|| { Err(e) => Err(e.map_err_context(|| {
format!( format!(
"cannot open {} for reading", "cannot open {} for reading",
escape_name(path.as_os_str(), QS_QUOTE_ESCAPE) quoting_style::escape_name(path.as_os_str(), QS_QUOTE_ESCAPE)
.into_string()
.expect("All escaped names with the escaping option return valid strings.")
) )
})), })),
} }
@ -793,9 +795,9 @@ fn files0_iter<'a>(
Ok(Input::Path(PathBuf::from(s).into())) Ok(Input::Path(PathBuf::from(s).into()))
} }
} }
Err(e) => Err(e.map_err_context(|| { Err(e) => Err(e
format!("{}: read error", escape_name(&err_path, QS_ESCAPE)) .map_err_context(|| format!("{}: read error", escape_name_wrapper(&err_path)))
}) as Box<dyn UError>), as Box<dyn UError>),
}), }),
); );
// Loop until there is an error; yield that error and then nothing else. // Loop until there is an error; yield that error and then nothing else.
@ -808,6 +810,12 @@ fn files0_iter<'a>(
}) })
} }
fn escape_name_wrapper(name: &OsStr) -> String {
quoting_style::escape_name(name, QS_ESCAPE)
.into_string()
.expect("All escaped names with the escaping option return valid strings.")
}
fn wc(inputs: &Inputs, settings: &Settings) -> UResult<()> { fn wc(inputs: &Inputs, settings: &Settings) -> UResult<()> {
let mut total_word_count = WordCount::default(); let mut total_word_count = WordCount::default();
let mut num_inputs: usize = 0; let mut num_inputs: usize = 0;

View file

@ -25,6 +25,7 @@ dns-lookup = { workspace = true, optional = true }
dunce = { version = "1.0.4", optional = true } dunce = { version = "1.0.4", optional = true }
wild = "2.2.1" wild = "2.2.1"
glob = { workspace = true } glob = { workspace = true }
lazy_static = "1.4.0"
# * optional # * optional
itertools = { workspace = true, optional = true } itertools = { workspace = true, optional = true }
thiserror = { workspace = true, optional = true } thiserror = { workspace = true, optional = true }
@ -86,6 +87,7 @@ lines = []
format = ["itertools", "quoting-style"] format = ["itertools", "quoting-style"]
mode = ["libc"] mode = ["libc"]
perms = ["libc", "walkdir"] perms = ["libc", "walkdir"]
buf-copy = []
pipes = [] pipes = []
process = ["libc"] process = ["libc"]
proc-info = ["tty", "walkdir"] proc-info = ["tty", "walkdir"]

View file

@ -39,11 +39,13 @@ pub mod version_cmp;
pub mod mode; pub mod mode;
// ** unix-only // ** unix-only
#[cfg(all(any(target_os = "linux", target_os = "android"), feature = "buf-copy"))]
pub mod buf_copy;
#[cfg(all(unix, feature = "entries"))] #[cfg(all(unix, feature = "entries"))]
pub mod entries; pub mod entries;
#[cfg(all(unix, feature = "perms"))] #[cfg(all(unix, feature = "perms"))]
pub mod perms; pub mod perms;
#[cfg(all(unix, feature = "pipes"))] #[cfg(all(unix, any(feature = "pipes", feature = "buf-copy")))]
pub mod pipes; pub mod pipes;
#[cfg(all(target_os = "linux", feature = "proc-info"))] #[cfg(all(target_os = "linux", feature = "proc-info"))]
pub mod proc_info; pub mod proc_info;
@ -52,7 +54,7 @@ pub mod process;
#[cfg(all(target_os = "linux", feature = "tty"))] #[cfg(all(target_os = "linux", feature = "tty"))]
pub mod tty; pub mod tty;
#[cfg(all(unix, not(target_os = "macos"), feature = "fsxattr"))] #[cfg(all(unix, feature = "fsxattr"))]
pub mod fsxattr; pub mod fsxattr;
#[cfg(all(unix, not(target_os = "fuchsia"), feature = "signals"))] #[cfg(all(unix, not(target_os = "fuchsia"), feature = "signals"))]
pub mod signals; pub mod signals;

View file

@ -421,25 +421,29 @@ pub fn get_backup_path(
} }
fn simple_backup_path(path: &Path, suffix: &str) -> PathBuf { fn simple_backup_path(path: &Path, suffix: &str) -> PathBuf {
let mut p = path.to_string_lossy().into_owned(); let mut file_name = path.file_name().unwrap_or_default().to_os_string();
p.push_str(suffix); file_name.push(suffix);
PathBuf::from(p) path.with_file_name(file_name)
} }
fn numbered_backup_path(path: &Path) -> PathBuf { fn numbered_backup_path(path: &Path) -> PathBuf {
let file_name = path.file_name().unwrap_or_default();
for i in 1_u64.. { for i in 1_u64.. {
let path_str = &format!("{}.~{}~", path.to_string_lossy(), i); let mut numbered_file_name = file_name.to_os_string();
let path = Path::new(path_str); numbered_file_name.push(format!(".~{}~", i));
let path = path.with_file_name(numbered_file_name);
if !path.exists() { if !path.exists() {
return path.to_path_buf(); return path;
} }
} }
panic!("cannot create backup") panic!("cannot create backup")
} }
fn existing_backup_path(path: &Path, suffix: &str) -> PathBuf { fn existing_backup_path(path: &Path, suffix: &str) -> PathBuf {
let test_path_str = &format!("{}.~1~", path.to_string_lossy()); let file_name = path.file_name().unwrap_or_default();
let test_path = Path::new(test_path_str); let mut numbered_file_name = file_name.to_os_string();
numbered_file_name.push(".~1~");
let test_path = path.with_file_name(numbered_file_name);
if test_path.exists() { if test_path.exists() {
numbered_backup_path(path) numbered_backup_path(path)
} else { } else {
@ -660,6 +664,44 @@ mod tests {
let result = determine_backup_suffix(&matches); let result = determine_backup_suffix(&matches);
assert_eq!(result, "-v"); assert_eq!(result, "-v");
} }
#[test]
fn test_numbered_backup_path() {
assert_eq!(numbered_backup_path(&Path::new("")), PathBuf::from(".~1~"));
assert_eq!(
numbered_backup_path(&Path::new("/")),
PathBuf::from("/.~1~")
);
assert_eq!(
numbered_backup_path(&Path::new("/hello/world")),
PathBuf::from("/hello/world.~1~")
);
assert_eq!(
numbered_backup_path(&Path::new("/hello/world/")),
PathBuf::from("/hello/world.~1~")
);
}
#[test]
fn test_simple_backup_path() {
assert_eq!(
simple_backup_path(&Path::new(""), ".bak"),
PathBuf::from(".bak")
);
assert_eq!(
simple_backup_path(&Path::new("/"), ".bak"),
PathBuf::from("/.bak")
);
assert_eq!(
simple_backup_path(&Path::new("/hello/world"), ".bak"),
PathBuf::from("/hello/world.bak")
);
assert_eq!(
simple_backup_path(&Path::new("/hello/world/"), ".bak"),
PathBuf::from("/hello/world.bak")
);
}
#[test] #[test]
fn test_source_is_target_backup() { fn test_source_is_target_backup() {
let source = Path::new("data.txt.bak"); let source = Path::new("data.txt.bak");

View file

@ -0,0 +1,373 @@
// This file is part of the uutils coreutils package.
//
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.
//! This module provides several buffer-based copy/write functions that leverage
//! the `splice` system call in Linux systems, thus increasing the I/O
//! performance of copying between two file descriptors. This module is mostly
//! used by utilities to work around the limitations of Rust's `fs::copy` which
//! does not handle copying special files (e.g pipes, character/block devices).
use crate::error::{UError, UResult};
use nix::unistd;
use std::fs::File;
use std::{
io::{self, Read, Write},
os::{
fd::AsFd,
unix::io::{AsRawFd, RawFd},
},
};
use nix::{errno::Errno, libc::S_IFIFO, sys::stat::fstat};
use super::pipes::{pipe, splice, splice_exact, vmsplice};
type Result<T> = std::result::Result<T, Error>;
/// Error types used by buffer-copying functions from the `buf_copy` module.
#[derive(Debug)]
pub enum Error {
Io(io::Error),
WriteError(String),
}
impl std::fmt::Display for Error {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Error::WriteError(msg) => write!(f, "splice() write error: {}", msg),
Error::Io(err) => write!(f, "I/O error: {}", err),
}
}
}
impl std::error::Error for Error {}
impl UError for Error {
fn code(&self) -> i32 {
1
}
fn usage(&self) -> bool {
false
}
}
/// Helper function to determine whether a given handle (such as a file) is a pipe or not.
///
/// # Arguments
/// * `out` - path of handle
///
/// # Returns
/// A `bool` indicating whether the given handle is a pipe or not.
#[inline]
#[cfg(unix)]
pub fn is_pipe<P>(path: &P) -> Result<bool>
where
P: AsRawFd,
{
Ok(fstat(path.as_raw_fd())?.st_mode as nix::libc::mode_t & S_IFIFO != 0)
}
const SPLICE_SIZE: usize = 1024 * 128;
const BUF_SIZE: usize = 1024 * 16;
/// Copy data from `Read` implementor `source` into a `Write` implementor
/// `dest`. This works by reading a chunk of data from `source` and writing the
/// data to `dest` in a loop.
///
/// This function uses the Linux-specific `splice` call when possible which does
/// not use any intermediate user-space buffer. It falls backs to
/// `std::io::copy` under other platforms or when the call fails and is still
/// recoverable.
///
/// # Arguments
/// * `source` - `Read` implementor to copy data from.
/// * `dest` - `Write` implementor to copy data to.
///
/// # Returns
///
/// Result of operation and bytes successfully written (as a `u64`) when
/// operation is successful.
pub fn copy_stream<R, S>(src: &mut R, dest: &mut S) -> UResult<u64>
where
R: Read + AsFd + AsRawFd,
S: Write + AsFd + AsRawFd,
{
#[cfg(any(target_os = "linux", target_os = "android"))]
{
// If we're on Linux or Android, try to use the splice() system call
// for faster writing. If it works, we're done.
let result = splice_write(src, &dest.as_fd())?;
if !result.1 {
return Ok(result.0);
}
}
// If we're not on Linux or Android, or the splice() call failed,
// fall back on slower writing.
let result = std::io::copy(src, dest)?;
// If the splice() call failed and there has been some data written to
// stdout via while loop above AND there will be second splice() call
// that will succeed, data pushed through splice will be output before
// the data buffered in stdout.lock. Therefore additional explicit flush
// is required here.
dest.flush()?;
Ok(result)
}
/// Write from source `handle` into destination `write_fd` using Linux-specific
/// `splice` system call.
///
/// # Arguments
/// - `source` - source handle
/// - `dest` - destination handle
#[inline]
#[cfg(any(target_os = "linux", target_os = "android"))]
fn splice_write<R, S>(source: &R, dest: &S) -> UResult<(u64, bool)>
where
R: Read + AsFd + AsRawFd,
S: AsRawFd + AsFd,
{
let (pipe_rd, pipe_wr) = pipe()?;
let mut bytes: u64 = 0;
loop {
match splice(&source, &pipe_wr, SPLICE_SIZE) {
Ok(n) => {
if n == 0 {
return Ok((bytes, false));
}
if splice_exact(&pipe_rd, dest, n).is_err() {
// If the first splice manages to copy to the intermediate
// pipe, but the second splice to stdout fails for some reason
// we can recover by copying the data that we have from the
// intermediate pipe to stdout using normal read/write. Then
// we tell the caller to fall back.
copy_exact(pipe_rd.as_raw_fd(), dest, n)?;
return Ok((bytes, true));
}
bytes += n as u64;
}
Err(_) => {
return Ok((bytes, true));
}
}
}
}
/// Move exactly `num_bytes` bytes from `read_fd` to `write_fd` using the `read`
/// and `write` calls.
fn copy_exact(read_fd: RawFd, write_fd: &impl AsFd, num_bytes: usize) -> std::io::Result<usize> {
let mut left = num_bytes;
let mut buf = [0; BUF_SIZE];
let mut written = 0;
while left > 0 {
let read = unistd::read(read_fd, &mut buf)?;
assert_ne!(read, 0, "unexpected end of pipe");
while written < read {
let n = unistd::write(write_fd, &buf[written..read])?;
written += n;
}
left -= read;
}
Ok(written)
}
/// Write input `bytes` to a file descriptor. This uses the Linux-specific
/// `vmsplice()` call to write into a file descriptor directly, which only works
/// if the destination is a pipe.
///
/// # Arguments
/// * `bytes` - data to be written
/// * `dest` - destination handle
///
/// # Returns
/// When write succeeds, the amount of bytes written is returned as a
/// `u64`. The `bool` indicates if we need to fall back to normal copying or
/// not. `true` means we need to fall back, `false` means we don't have to.
///
/// A `UError` error is returned when the operation is not supported or when an
/// I/O error occurs.
#[cfg(any(target_os = "linux", target_os = "android"))]
pub fn splice_data_to_pipe<T>(bytes: &[u8], dest: &T) -> UResult<(u64, bool)>
where
T: AsRawFd + AsFd,
{
let mut n_bytes: u64 = 0;
let mut bytes = bytes;
while !bytes.is_empty() {
let len = match vmsplice(dest, bytes) {
Ok(n) => n,
// The maybe_unsupported call below may emit an error, when the
// error is considered as unrecoverable error (ones that won't make
// us fall back to other method)
Err(e) => return Ok(maybe_unsupported(e)?),
};
bytes = &bytes[len..];
n_bytes += len as u64;
}
Ok((n_bytes, false))
}
/// Write input `bytes` to a handle using a temporary pipe. A `vmsplice()` call
/// is issued to write to the temporary pipe, which then gets written to the
/// final destination using `splice()`.
///
/// # Arguments * `bytes` - data to be written * `dest` - destination handle
///
/// # Returns When write succeeds, the amount of bytes written is returned as a
/// `u64`. The `bool` indicates if we need to fall back to normal copying or
/// not. `true` means we need to fall back, `false` means we don't have to.
///
/// A `UError` error is returned when the operation is not supported or when an
/// I/O error occurs.
#[cfg(any(target_os = "linux", target_os = "android"))]
pub fn splice_data_to_fd<T: AsFd>(
bytes: &[u8],
read_pipe: &File,
write_pipe: &File,
dest: &T,
) -> UResult<(u64, bool)> {
loop {
let mut bytes = bytes;
while !bytes.is_empty() {
let len = match vmsplice(&write_pipe, bytes) {
Ok(n) => n,
Err(e) => return Ok(maybe_unsupported(e)?),
};
if let Err(e) = splice_exact(&read_pipe, dest, len) {
return Ok(maybe_unsupported(e)?);
}
bytes = &bytes[len..];
}
}
}
/// Conversion from a `nix::Error` into our `Error` which implements `UError`.
#[cfg(any(target_os = "linux", target_os = "android"))]
impl From<nix::Error> for Error {
fn from(error: nix::Error) -> Self {
Self::Io(io::Error::from_raw_os_error(error as i32))
}
}
/// Several error values from `nix::Error` (`EINVAL`, `ENOSYS`, and `EBADF`) get
/// treated as errors indicating that the `splice` call is not available, i.e we
/// can still recover from the error. Thus, return the final result of the call
/// as `Result` and indicate that we have to fall back using other write method.
///
/// # Arguments
/// * `error` - the `nix::Error` received
///
/// # Returns
/// Result with tuple containing a `u64` `0` indicating that no data had been
/// written and a `true` indicating we have to fall back, if error is still
/// recoverable. Returns an `Error` implementing `UError` otherwise.
#[cfg(any(target_os = "linux", target_os = "android"))]
fn maybe_unsupported(error: nix::Error) -> Result<(u64, bool)> {
match error {
Errno::EINVAL | Errno::ENOSYS | Errno::EBADF => Ok((0, true)),
_ => Err(error.into()),
}
}
#[cfg(test)]
mod tests {
use tempfile::tempdir;
use super::*;
use crate::pipes;
fn new_temp_file() -> File {
let temp_dir = tempdir().unwrap();
File::create(temp_dir.path().join("file.txt")).unwrap()
}
#[test]
fn test_file_is_pipe() {
let temp_file = new_temp_file();
let (pipe_read, pipe_write) = pipes::pipe().unwrap();
assert!(is_pipe(&pipe_read).unwrap());
assert!(is_pipe(&pipe_write).unwrap());
assert!(!is_pipe(&temp_file).unwrap());
}
#[test]
fn test_valid_splice_errs() {
let err = nix::Error::from(Errno::EINVAL);
assert_eq!(maybe_unsupported(err).unwrap(), (0, true));
let err = nix::Error::from(Errno::ENOSYS);
assert_eq!(maybe_unsupported(err).unwrap(), (0, true));
let err = nix::Error::from(Errno::EBADF);
assert_eq!(maybe_unsupported(err).unwrap(), (0, true));
let err = nix::Error::from(Errno::EPERM);
assert!(maybe_unsupported(err).is_err());
}
#[test]
fn test_splice_data_to_pipe() {
let (pipe_read, pipe_write) = pipes::pipe().unwrap();
let data = b"Hello, world!";
let (bytes, _) = splice_data_to_pipe(data, &pipe_write).unwrap();
let mut buf = [0; 1024];
let n = unistd::read(pipe_read.as_raw_fd(), &mut buf).unwrap();
assert_eq!(&buf[..n], data);
assert_eq!(bytes as usize, data.len());
}
#[test]
fn test_splice_data_to_file() {
let mut temp_file = new_temp_file();
let (pipe_read, pipe_write) = pipes::pipe().unwrap();
let data = b"Hello, world!";
let (bytes, _) = splice_data_to_fd(data, &pipe_read, &pipe_write, &temp_file).unwrap();
let mut buf = [0; 1024];
let n = temp_file.read(&mut buf).unwrap();
assert_eq!(&buf[..n], data);
assert_eq!(bytes as usize, data.len());
}
#[test]
fn test_copy_exact() {
let (mut pipe_read, mut pipe_write) = pipes::pipe().unwrap();
let data = b"Hello, world!";
let n = pipe_write.write(data).unwrap();
assert_eq!(n, data.len());
let mut buf = [0; 1024];
let n = copy_exact(pipe_read.as_raw_fd(), &pipe_write, data.len()).unwrap();
let n2 = pipe_read.read(&mut buf).unwrap();
assert_eq!(n, n2);
assert_eq!(&buf[..n], data);
}
#[test]
fn test_copy_stream() {
let (mut pipe_read, mut pipe_write) = pipes::pipe().unwrap();
let data = b"Hello, world!";
let n = pipe_write.write(data).unwrap();
assert_eq!(n, data.len());
let mut buf = [0; 1024];
let n = copy_stream(&mut pipe_read, &mut pipe_write).unwrap();
let n2 = pipe_read.read(&mut buf).unwrap();
assert_eq!(n as usize, n2);
assert_eq!(&buf[..n as usize], data);
}
#[test]
fn test_splice_write() {
let (mut pipe_read, pipe_write) = pipes::pipe().unwrap();
let data = b"Hello, world!";
let (bytes, _) = splice_write(&pipe_read, &pipe_write).unwrap();
let mut buf = [0; 1024];
let n = pipe_read.read(&mut buf).unwrap();
assert_eq!(&buf[..n], data);
assert_eq!(bytes as usize, data.len());
}
}

File diff suppressed because it is too large Load diff

View file

@ -13,6 +13,7 @@
/// restrict following config to systems with matching environment variables. /// restrict following config to systems with matching environment variables.
pub static TERMS: &[&str] = &[ pub static TERMS: &[&str] = &[
"Eterm", "Eterm",
"alacritty*",
"ansi", "ansi",
"*color*", "*color*",
"con[0-9]*x[0-9]*", "con[0-9]*x[0-9]*",
@ -21,6 +22,7 @@ pub static TERMS: &[&str] = &[
"cygwin", "cygwin",
"*direct*", "*direct*",
"dtterm", "dtterm",
"foot",
"gnome", "gnome",
"hurd", "hurd",
"jfbterm", "jfbterm",

View file

@ -83,13 +83,14 @@ pub fn get_groups() -> IOResult<Vec<gid_t>> {
if res == -1 { if res == -1 {
let err = IOError::last_os_error(); let err = IOError::last_os_error();
if err.raw_os_error() == Some(libc::EINVAL) { if err.raw_os_error() == Some(libc::EINVAL) {
// Number of groups changed, retry // Number of groups has increased, retry
continue; continue;
} else { } else {
return Err(err); return Err(err);
} }
} else { } else {
groups.truncate(ngroups.try_into().unwrap()); // Number of groups may have decreased
groups.truncate(res.try_into().unwrap());
return Ok(groups); return Ok(groups);
} }
} }

View file

@ -112,7 +112,8 @@ fn extract_value<T: Default>(p: Result<T, ParseError<'_, T>>, input: &str) -> T
Default::default() Default::default()
} }
ParseError::PartialMatch(v, rest) => { ParseError::PartialMatch(v, rest) => {
if input.starts_with('\'') { let bytes = input.as_encoded_bytes();
if !bytes.is_empty() && bytes[0] == b'\'' {
show_warning!( show_warning!(
"{}: character(s) following character constant have been ignored", "{}: character(s) following character constant have been ignored",
&rest, &rest,

View file

@ -38,7 +38,7 @@ pub mod num_parser;
mod spec; mod spec;
pub use argument::*; pub use argument::*;
use spec::Spec; pub use spec::Spec;
use std::{ use std::{
error::Error, error::Error,
fmt::Display, fmt::Display,
@ -48,7 +48,7 @@ use std::{
use crate::error::UError; use crate::error::UError;
use self::{ pub use self::{
escape::{parse_escape_code, EscapedChar}, escape::{parse_escape_code, EscapedChar},
num_format::Formatter, num_format::Formatter,
}; };

View file

@ -353,20 +353,20 @@ impl Spec {
writer.write_all(&parsed).map_err(FormatError::IoError) writer.write_all(&parsed).map_err(FormatError::IoError)
} }
Self::QuotedString => { Self::QuotedString => {
let s = args.get_str(); let s = escape_name(
writer args.get_str().as_ref(),
.write_all( &QuotingStyle::Shell {
escape_name( escape: true,
s.as_ref(), always_quote: false,
&QuotingStyle::Shell { show_control: false,
escape: true, },
always_quote: false, );
show_control: false, #[cfg(unix)]
}, let bytes = std::os::unix::ffi::OsStringExt::into_vec(s);
) #[cfg(not(unix))]
.as_bytes(), let bytes = s.to_string_lossy().as_bytes().to_owned();
)
.map_err(FormatError::IoError) writer.write_all(&bytes).map_err(FormatError::IoError)
} }
Self::SignedInt { Self::SignedInt {
width, width,

View file

@ -20,6 +20,7 @@ use std::ffi::{OsStr, OsString};
use std::fs; use std::fs;
use std::fs::read_dir; use std::fs::read_dir;
use std::hash::Hash; use std::hash::Hash;
use std::io::Stdin;
use std::io::{Error, ErrorKind, Result as IOResult}; use std::io::{Error, ErrorKind, Result as IOResult};
#[cfg(unix)] #[cfg(unix)]
use std::os::unix::{fs::MetadataExt, io::AsRawFd}; use std::os::unix::{fs::MetadataExt, io::AsRawFd};
@ -709,7 +710,7 @@ pub fn path_ends_with_terminator(path: &Path) -> bool {
path.as_os_str() path.as_os_str()
.as_bytes() .as_bytes()
.last() .last()
.map_or(false, |&byte| byte == b'/' || byte == b'\\') .is_some_and(|&byte| byte == b'/' || byte == b'\\')
} }
#[cfg(windows)] #[cfg(windows)]
@ -721,6 +722,34 @@ pub fn path_ends_with_terminator(path: &Path) -> bool {
.map_or(false, |wide| wide == b'/'.into() || wide == b'\\'.into()) .map_or(false, |wide| wide == b'/'.into() || wide == b'\\'.into())
} }
/// Checks if the standard input (stdin) is a directory.
///
/// # Arguments
///
/// * `stdin` - A reference to the standard input handle.
///
/// # Returns
///
/// * `bool` - Returns `true` if stdin is a directory, `false` otherwise.
pub fn is_stdin_directory(stdin: &Stdin) -> bool {
#[cfg(unix)]
{
use nix::sys::stat::fstat;
let mode = fstat(stdin.as_raw_fd()).unwrap().st_mode as mode_t;
has!(mode, S_IFDIR)
}
#[cfg(windows)]
{
use std::os::windows::io::AsRawHandle;
let handle = stdin.as_raw_handle();
if let Ok(metadata) = fs::metadata(format!("{}", handle as usize)) {
return metadata.is_dir();
}
false
}
}
pub mod sane_blksize { pub mod sane_blksize {
#[cfg(not(target_os = "windows"))] #[cfg(not(target_os = "windows"))]

View file

@ -23,7 +23,7 @@ use std::fs::Metadata;
use std::os::unix::fs::MetadataExt; use std::os::unix::fs::MetadataExt;
use std::os::unix::ffi::OsStrExt; use std::os::unix::ffi::OsStrExt;
use std::path::{Path, MAIN_SEPARATOR_STR}; use std::path::{Path, MAIN_SEPARATOR};
/// The various level of verbosity /// The various level of verbosity
#[derive(PartialEq, Eq, Clone, Debug)] #[derive(PartialEq, Eq, Clone, Debug)]
@ -214,23 +214,13 @@ fn is_root(path: &Path, would_traverse_symlink: bool) -> bool {
// We cannot check path.is_dir() here, as this would resolve symlinks, // We cannot check path.is_dir() here, as this would resolve symlinks,
// which we need to avoid here. // which we need to avoid here.
// All directory-ish paths match "*/", except ".", "..", "*/.", and "*/..". // All directory-ish paths match "*/", except ".", "..", "*/.", and "*/..".
let looks_like_dir = match path.as_os_str().to_str() { let path_bytes = path.as_os_str().as_encoded_bytes();
// If it contains special character, prefer to err on the side of safety, i.e. forbidding the chown operation: let looks_like_dir = path_bytes == [b'.']
None => false, || path_bytes == [b'.', b'.']
Some(".") | Some("..") => true, || path_bytes.ends_with(&[MAIN_SEPARATOR as u8])
Some(path_str) => { || path_bytes.ends_with(&[MAIN_SEPARATOR as u8, b'.'])
(path_str.ends_with(MAIN_SEPARATOR_STR)) || path_bytes.ends_with(&[MAIN_SEPARATOR as u8, b'.', b'.']);
|| (path_str.ends_with(&format!("{MAIN_SEPARATOR_STR}.")))
|| (path_str.ends_with(&format!("{MAIN_SEPARATOR_STR}..")))
}
};
// TODO: Once we reach MSRV 1.74.0, replace this abomination by something simpler, e.g. this:
// let path_bytes = path.as_os_str().as_encoded_bytes();
// let looks_like_dir = path_bytes == [b'.']
// || path_bytes == [b'.', b'.']
// || path_bytes.ends_with(&[MAIN_SEPARATOR as u8])
// || path_bytes.ends_with(&[MAIN_SEPARATOR as u8, b'.'])
// || path_bytes.ends_with(&[MAIN_SEPARATOR as u8, b'.', b'.']);
if !looks_like_dir { if !looks_like_dir {
return false; return false;
} }

View file

@ -6,39 +6,43 @@
//! Set of functions for escaping names according to different quoting styles. //! Set of functions for escaping names according to different quoting styles.
use std::char::from_digit; use std::char::from_digit;
use std::ffi::OsStr; use std::ffi::{OsStr, OsString};
use std::fmt; use std::fmt;
// These are characters with special meaning in the shell (e.g. bash). // These are characters with special meaning in the shell (e.g. bash).
// The first const contains characters that only have a special meaning when they appear at the beginning of a name. // The first const contains characters that only have a special meaning when they appear at the beginning of a name.
const SPECIAL_SHELL_CHARS_START: &[char] = &['~', '#']; const SPECIAL_SHELL_CHARS_START: &[u8] = b"~#";
// PR#6559 : Remove `]{}` from special shell chars. // PR#6559 : Remove `]{}` from special shell chars.
const SPECIAL_SHELL_CHARS: &str = "`$&*()|[;\\'\"<>?! "; const SPECIAL_SHELL_CHARS: &str = "`$&*()|[;\\'\"<>?! ";
/// The quoting style to use when escaping a name. /// The quoting style to use when escaping a name.
#[derive(Clone, Copy, Debug, Eq, PartialEq)] #[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum QuotingStyle { pub enum QuotingStyle {
/// Escape the name as a literal string. /// Escape the name as a shell string.
/// Used in, e.g., `ls --quoting-style=shell`.
Shell { Shell {
/// Whether to escape characters in the name. /// Whether to escape characters in the name.
/// True in, e.g., `ls --quoting-style=shell-escape`.
escape: bool, escape: bool,
/// Whether to always quote the name. /// Whether to always quote the name.
always_quote: bool, always_quote: bool,
/// Whether to show control characters. /// Whether to show control and non-unicode characters, or replace them with `?`.
show_control: bool, show_control: bool,
}, },
/// Escape the name as a C string. /// Escape the name as a C string.
/// Used in, e.g., `ls --quote-name`.
C { C {
/// The type of quotes to use. /// The type of quotes to use.
quotes: Quotes, quotes: Quotes,
}, },
/// Escape the name as a literal string. /// Do not escape the string.
/// Used in, e.g., `ls --literal`.
Literal { Literal {
/// Whether to show control characters. /// Whether to show control and non-unicode characters, or replace them with `?`.
show_control: bool, show_control: bool,
}, },
} }
@ -72,16 +76,24 @@ enum EscapeState {
Octal(EscapeOctal), Octal(EscapeOctal),
} }
/// Bytes we need to present as escaped octal, in the form of `\nnn` per byte.
/// Only supports characters up to 2 bytes long in UTF-8.
struct EscapeOctal { struct EscapeOctal {
c: char, c: [u8; 2],
state: EscapeOctalState, state: EscapeOctalState,
idx: usize, idx: u8,
} }
enum EscapeOctalState { enum EscapeOctalState {
Done, Done,
Backslash, FirstBackslash,
Value, FirstValue,
LastBackslash,
LastValue,
}
fn byte_to_octal_digit(byte: u8, idx: u8) -> u8 {
(byte >> (idx * 3)) & 0o7
} }
impl Iterator for EscapeOctal { impl Iterator for EscapeOctal {
@ -90,29 +102,57 @@ impl Iterator for EscapeOctal {
fn next(&mut self) -> Option<char> { fn next(&mut self) -> Option<char> {
match self.state { match self.state {
EscapeOctalState::Done => None, EscapeOctalState::Done => None,
EscapeOctalState::Backslash => { EscapeOctalState::FirstBackslash => {
self.state = EscapeOctalState::Value; self.state = EscapeOctalState::FirstValue;
Some('\\') Some('\\')
} }
EscapeOctalState::Value => { EscapeOctalState::LastBackslash => {
let octal_digit = ((self.c as u32) >> (self.idx * 3)) & 0o7; self.state = EscapeOctalState::LastValue;
Some('\\')
}
EscapeOctalState::FirstValue => {
let octal_digit = byte_to_octal_digit(self.c[0], self.idx);
if self.idx == 0 {
self.state = EscapeOctalState::LastBackslash;
self.idx = 2;
} else {
self.idx -= 1;
}
Some(from_digit(octal_digit.into(), 8).unwrap())
}
EscapeOctalState::LastValue => {
let octal_digit = byte_to_octal_digit(self.c[1], self.idx);
if self.idx == 0 { if self.idx == 0 {
self.state = EscapeOctalState::Done; self.state = EscapeOctalState::Done;
} else { } else {
self.idx -= 1; self.idx -= 1;
} }
Some(from_digit(octal_digit, 8).unwrap()) Some(from_digit(octal_digit.into(), 8).unwrap())
} }
} }
} }
} }
impl EscapeOctal { impl EscapeOctal {
fn from(c: char) -> Self { fn from_char(c: char) -> Self {
if c.len_utf8() == 1 {
return Self::from_byte(c as u8);
}
let mut buf = [0; 2];
let _s = c.encode_utf8(&mut buf);
Self { Self {
c, c: buf,
idx: 2, idx: 2,
state: EscapeOctalState::Backslash, state: EscapeOctalState::FirstBackslash,
}
}
fn from_byte(b: u8) -> Self {
Self {
c: [0, b],
idx: 2,
state: EscapeOctalState::LastBackslash,
} }
} }
} }
@ -124,6 +164,12 @@ impl EscapedChar {
} }
} }
fn new_octal(b: u8) -> Self {
Self {
state: EscapeState::Octal(EscapeOctal::from_byte(b)),
}
}
fn new_c(c: char, quotes: Quotes, dirname: bool) -> Self { fn new_c(c: char, quotes: Quotes, dirname: bool) -> Self {
use EscapeState::*; use EscapeState::*;
let init_state = match c { let init_state = match c {
@ -148,7 +194,7 @@ impl EscapedChar {
_ => Char(' '), _ => Char(' '),
}, },
':' if dirname => Backslash(':'), ':' if dirname => Backslash(':'),
_ if c.is_ascii_control() => Octal(EscapeOctal::from(c)), _ if c.is_control() => Octal(EscapeOctal::from_char(c)),
_ => Char(c), _ => Char(c),
}; };
Self { state: init_state } Self { state: init_state }
@ -165,11 +211,11 @@ impl EscapedChar {
'\x0B' => Backslash('v'), '\x0B' => Backslash('v'),
'\x0C' => Backslash('f'), '\x0C' => Backslash('f'),
'\r' => Backslash('r'), '\r' => Backslash('r'),
'\x00'..='\x1F' | '\x7F' => Octal(EscapeOctal::from(c)),
'\'' => match quotes { '\'' => match quotes {
Quotes::Single => Backslash('\''), Quotes::Single => Backslash('\''),
_ => Char('\''), _ => Char('\''),
}, },
_ if c.is_control() => Octal(EscapeOctal::from_char(c)),
_ if SPECIAL_SHELL_CHARS.contains(c) => ForceQuote(c), _ if SPECIAL_SHELL_CHARS.contains(c) => ForceQuote(c),
_ => Char(c), _ => Char(c),
}; };
@ -205,102 +251,124 @@ impl Iterator for EscapedChar {
} }
} }
fn shell_without_escape(name: &str, quotes: Quotes, show_control_chars: bool) -> (String, bool) { /// Check whether `bytes` starts with any byte in `pattern`.
fn bytes_start_with(bytes: &[u8], pattern: &[u8]) -> bool {
!bytes.is_empty() && pattern.contains(&bytes[0])
}
fn shell_without_escape(name: &[u8], quotes: Quotes, show_control_chars: bool) -> (Vec<u8>, bool) {
let mut must_quote = false; let mut must_quote = false;
let mut escaped_str = String::with_capacity(name.len()); let mut escaped_str = Vec::with_capacity(name.len());
let mut utf8_buf = vec![0; 4];
for c in name.chars() { for s in name.utf8_chunks() {
let escaped = { for c in s.valid().chars() {
let ec = EscapedChar::new_shell(c, false, quotes); let escaped = {
if show_control_chars { let ec = EscapedChar::new_shell(c, false, quotes);
ec if show_control_chars {
} else { ec
ec.hide_control() } else {
} ec.hide_control()
}; }
};
match escaped.state { match escaped.state {
EscapeState::Backslash('\'') => escaped_str.push_str("'\\''"), EscapeState::Backslash('\'') => escaped_str.extend_from_slice(b"'\\''"),
EscapeState::ForceQuote(x) => { EscapeState::ForceQuote(x) => {
must_quote = true; must_quote = true;
escaped_str.push(x); escaped_str.extend_from_slice(x.encode_utf8(&mut utf8_buf).as_bytes());
} }
_ => { _ => {
for char in escaped { for c in escaped {
escaped_str.push(char); escaped_str.extend_from_slice(c.encode_utf8(&mut utf8_buf).as_bytes());
}
} }
} }
} }
if show_control_chars {
escaped_str.extend_from_slice(s.invalid());
} else {
escaped_str.resize(escaped_str.len() + s.invalid().len(), b'?');
}
} }
must_quote = must_quote || name.starts_with(SPECIAL_SHELL_CHARS_START); must_quote = must_quote || bytes_start_with(name, SPECIAL_SHELL_CHARS_START);
(escaped_str, must_quote) (escaped_str, must_quote)
} }
fn shell_with_escape(name: &str, quotes: Quotes) -> (String, bool) { fn shell_with_escape(name: &[u8], quotes: Quotes) -> (Vec<u8>, bool) {
// We need to keep track of whether we are in a dollar expression // We need to keep track of whether we are in a dollar expression
// because e.g. \b\n is escaped as $'\b\n' and not like $'b'$'n' // because e.g. \b\n is escaped as $'\b\n' and not like $'b'$'n'
let mut in_dollar = false; let mut in_dollar = false;
let mut must_quote = false; let mut must_quote = false;
let mut escaped_str = String::with_capacity(name.len()); let mut escaped_str = String::with_capacity(name.len());
for c in name.chars() { for s in name.utf8_chunks() {
let escaped = EscapedChar::new_shell(c, true, quotes); for c in s.valid().chars() {
match escaped.state { let escaped = EscapedChar::new_shell(c, true, quotes);
EscapeState::Char(x) => { match escaped.state {
if in_dollar { EscapeState::Char(x) => {
escaped_str.push_str("''"); if in_dollar {
escaped_str.push_str("''");
in_dollar = false;
}
escaped_str.push(x);
}
EscapeState::ForceQuote(x) => {
if in_dollar {
escaped_str.push_str("''");
in_dollar = false;
}
must_quote = true;
escaped_str.push(x);
}
// Single quotes are not put in dollar expressions, but are escaped
// if the string also contains double quotes. In that case, they must
// be handled separately.
EscapeState::Backslash('\'') => {
must_quote = true;
in_dollar = false; in_dollar = false;
escaped_str.push_str("'\\''");
} }
escaped_str.push(x); _ => {
} if !in_dollar {
EscapeState::ForceQuote(x) => { escaped_str.push_str("'$'");
if in_dollar { in_dollar = true;
escaped_str.push_str("''"); }
in_dollar = false; must_quote = true;
} for char in escaped {
must_quote = true; escaped_str.push(char);
escaped_str.push(x); }
}
// Single quotes are not put in dollar expressions, but are escaped
// if the string also contains double quotes. In that case, they must
// be handled separately.
EscapeState::Backslash('\'') => {
must_quote = true;
in_dollar = false;
escaped_str.push_str("'\\''");
}
_ => {
if !in_dollar {
escaped_str.push_str("'$'");
in_dollar = true;
}
must_quote = true;
for char in escaped {
escaped_str.push(char);
} }
} }
} }
if !s.invalid().is_empty() {
if !in_dollar {
escaped_str.push_str("'$'");
in_dollar = true;
}
must_quote = true;
let escaped_bytes: String = s
.invalid()
.iter()
.flat_map(|b| EscapedChar::new_octal(*b))
.collect();
escaped_str.push_str(&escaped_bytes);
}
} }
must_quote = must_quote || name.starts_with(SPECIAL_SHELL_CHARS_START); must_quote = must_quote || bytes_start_with(name, SPECIAL_SHELL_CHARS_START);
(escaped_str, must_quote) (escaped_str.into(), must_quote)
} }
/// Return a set of characters that implies quoting of the word in /// Return a set of characters that implies quoting of the word in
/// shell-quoting mode. /// shell-quoting mode.
fn shell_escaped_char_set(is_dirname: bool) -> &'static [char] { fn shell_escaped_char_set(is_dirname: bool) -> &'static [u8] {
const ESCAPED_CHARS: &[char] = &[ const ESCAPED_CHARS: &[u8] = b":\"`$\\^\n\t\r=";
// the ':' colon character only induce quoting in the // the ':' colon character only induce quoting in the
// context of ls displaying a directory name before listing its content. // context of ls displaying a directory name before listing its content.
// (e.g. with the recursive flag -R) // (e.g. with the recursive flag -R)
':',
// Under this line are the control characters that should be
// quoted in shell mode in all cases.
'"', '`', '$', '\\', '^', '\n', '\t', '\r', '=',
];
let start_index = if is_dirname { 0 } else { 1 }; let start_index = if is_dirname { 0 } else { 1 };
&ESCAPED_CHARS[start_index..] &ESCAPED_CHARS[start_index..]
} }
@ -308,41 +376,57 @@ fn shell_escaped_char_set(is_dirname: bool) -> &'static [char] {
/// ///
/// This inner function provides an additional flag `dirname` which /// This inner function provides an additional flag `dirname` which
/// is meant for ls' directory name display. /// is meant for ls' directory name display.
fn escape_name_inner(name: &OsStr, style: &QuotingStyle, dirname: bool) -> String { fn escape_name_inner(name: &[u8], style: &QuotingStyle, dirname: bool) -> Vec<u8> {
match style { match style {
QuotingStyle::Literal { show_control } => { QuotingStyle::Literal { show_control } => {
if *show_control { if *show_control {
name.to_string_lossy().into_owned() name.to_owned()
} else { } else {
name.to_string_lossy() name.utf8_chunks()
.chars() .map(|s| {
.flat_map(|c| EscapedChar::new_literal(c).hide_control()) let valid: String = s
.collect() .valid()
.chars()
.flat_map(|c| EscapedChar::new_literal(c).hide_control())
.collect();
let invalid = "?".repeat(s.invalid().len());
valid + &invalid
})
.collect::<String>()
.into()
} }
} }
QuotingStyle::C { quotes } => { QuotingStyle::C { quotes } => {
let escaped_str: String = name let escaped_str: String = name
.to_string_lossy() .utf8_chunks()
.chars() .flat_map(|s| {
.flat_map(|c| EscapedChar::new_c(c, *quotes, dirname)) let valid = s
.collect(); .valid()
.chars()
.flat_map(|c| EscapedChar::new_c(c, *quotes, dirname));
let invalid = s.invalid().iter().flat_map(|b| EscapedChar::new_octal(*b));
valid.chain(invalid)
})
.collect::<String>();
match quotes { match quotes {
Quotes::Single => format!("'{escaped_str}'"), Quotes::Single => format!("'{escaped_str}'"),
Quotes::Double => format!("\"{escaped_str}\""), Quotes::Double => format!("\"{escaped_str}\""),
Quotes::None => escaped_str, Quotes::None => escaped_str,
} }
.into()
} }
QuotingStyle::Shell { QuotingStyle::Shell {
escape, escape,
always_quote, always_quote,
show_control, show_control,
} => { } => {
let name = name.to_string_lossy(); let (quotes, must_quote) = if name
.iter()
let (quotes, must_quote) = if name.contains(shell_escaped_char_set(dirname)) { .any(|c| shell_escaped_char_set(dirname).contains(c))
{
(Quotes::Single, true) (Quotes::Single, true)
} else if name.contains('\'') { } else if name.contains(&b'\'') {
(Quotes::Double, true) (Quotes::Double, true)
} else if *always_quote { } else if *always_quote {
(Quotes::Single, true) (Quotes::Single, true)
@ -351,30 +435,43 @@ fn escape_name_inner(name: &OsStr, style: &QuotingStyle, dirname: bool) -> Strin
}; };
let (escaped_str, contains_quote_chars) = if *escape { let (escaped_str, contains_quote_chars) = if *escape {
shell_with_escape(&name, quotes) shell_with_escape(name, quotes)
} else { } else {
shell_without_escape(&name, quotes, *show_control) shell_without_escape(name, quotes, *show_control)
}; };
match (must_quote | contains_quote_chars, quotes) { if must_quote | contains_quote_chars && quotes != Quotes::None {
(true, Quotes::Single) => format!("'{escaped_str}'"), let mut quoted_str = Vec::<u8>::with_capacity(escaped_str.len() + 2);
(true, Quotes::Double) => format!("\"{escaped_str}\""), let quote = if quotes == Quotes::Single {
_ => escaped_str, b'\''
} else {
b'"'
};
quoted_str.push(quote);
quoted_str.extend(escaped_str);
quoted_str.push(quote);
quoted_str
} else {
escaped_str
} }
} }
} }
} }
/// Escape a filename with respect to the given style. /// Escape a filename with respect to the given style.
pub fn escape_name(name: &OsStr, style: &QuotingStyle) -> String { pub fn escape_name(name: &OsStr, style: &QuotingStyle) -> OsString {
escape_name_inner(name, style, false) let name = crate::os_str_as_bytes_lossy(name);
crate::os_string_from_vec(escape_name_inner(&name, style, false))
.expect("all byte sequences should be valid for platform, or already replaced in name")
} }
/// Escape a directory name with respect to the given style. /// Escape a directory name with respect to the given style.
/// This is mainly meant to be used for ls' directory name printing and is not /// This is mainly meant to be used for ls' directory name printing and is not
/// likely to be used elsewhere. /// likely to be used elsewhere.
pub fn escape_dir_name(dir_name: &OsStr, style: &QuotingStyle) -> String { pub fn escape_dir_name(dir_name: &OsStr, style: &QuotingStyle) -> OsString {
escape_name_inner(dir_name, style, true) let name = crate::os_str_as_bytes_lossy(dir_name);
crate::os_string_from_vec(escape_name_inner(&name, style, true))
.expect("all byte sequences should be valid for platform, or already replaced in name")
} }
impl fmt::Display for QuotingStyle { impl fmt::Display for QuotingStyle {
@ -415,7 +512,7 @@ impl fmt::Display for Quotes {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use crate::quoting_style::{escape_name, Quotes, QuotingStyle}; use crate::quoting_style::{escape_name_inner, Quotes, QuotingStyle};
// spell-checker:ignore (tests/words) one\'two one'two // spell-checker:ignore (tests/words) one\'two one'two
@ -465,14 +562,31 @@ mod tests {
} }
} }
fn check_names_inner<T>(name: &[u8], map: &[(T, &str)]) -> Vec<Vec<u8>> {
map.iter()
.map(|(_, style)| escape_name_inner(name, &get_style(style), false))
.collect()
}
fn check_names(name: &str, map: &[(&str, &str)]) { fn check_names(name: &str, map: &[(&str, &str)]) {
assert_eq!( assert_eq!(
map.iter() map.iter()
.map(|(_, style)| escape_name(name.as_ref(), &get_style(style))) .map(|(correct, _)| *correct)
.collect::<Vec<String>>(), .collect::<Vec<&str>>(),
check_names_inner(name.as_bytes(), map)
.iter()
.map(|bytes| std::str::from_utf8(bytes)
.expect("valid str goes in, valid str comes out"))
.collect::<Vec<&str>>()
);
}
fn check_names_raw(name: &[u8], map: &[(&[u8], &str)]) {
assert_eq!(
map.iter() map.iter()
.map(|(correct, _)| correct.to_string()) .map(|(correct, _)| *correct)
.collect::<Vec<String>>() .collect::<Vec<&[u8]>>(),
check_names_inner(name, map)
); );
} }
@ -487,10 +601,10 @@ mod tests {
("\"one_two\"", "c"), ("\"one_two\"", "c"),
("one_two", "shell"), ("one_two", "shell"),
("one_two", "shell-show"), ("one_two", "shell-show"),
("\'one_two\'", "shell-always"), ("'one_two'", "shell-always"),
("\'one_two\'", "shell-always-show"), ("'one_two'", "shell-always-show"),
("one_two", "shell-escape"), ("one_two", "shell-escape"),
("\'one_two\'", "shell-escape-always"), ("'one_two'", "shell-escape-always"),
], ],
); );
} }
@ -504,12 +618,12 @@ mod tests {
("one two", "literal-show"), ("one two", "literal-show"),
("one\\ two", "escape"), ("one\\ two", "escape"),
("\"one two\"", "c"), ("\"one two\"", "c"),
("\'one two\'", "shell"), ("'one two'", "shell"),
("\'one two\'", "shell-show"), ("'one two'", "shell-show"),
("\'one two\'", "shell-always"), ("'one two'", "shell-always"),
("\'one two\'", "shell-always-show"), ("'one two'", "shell-always-show"),
("\'one two\'", "shell-escape"), ("'one two'", "shell-escape"),
("\'one two\'", "shell-escape-always"), ("'one two'", "shell-escape-always"),
], ],
); );
@ -551,7 +665,7 @@ mod tests {
// One single quote // One single quote
check_names( check_names(
"one\'two", "one'two",
&[ &[
("one'two", "literal"), ("one'two", "literal"),
("one'two", "literal-show"), ("one'two", "literal-show"),
@ -637,7 +751,7 @@ mod tests {
], ],
); );
// The first 16 control characters. NUL is also included, even though it is of // The first 16 ASCII control characters. NUL is also included, even though it is of
// no importance for file names. // no importance for file names.
check_names( check_names(
"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F", "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F",
@ -676,7 +790,7 @@ mod tests {
], ],
); );
// The last 16 control characters. // The last 16 ASCII control characters.
check_names( check_names(
"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F", "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F",
&[ &[
@ -730,6 +844,265 @@ mod tests {
("''$'\\177'", "shell-escape-always"), ("''$'\\177'", "shell-escape-always"),
], ],
); );
// The first 16 Unicode control characters.
let test_str = std::str::from_utf8(b"\xC2\x80\xC2\x81\xC2\x82\xC2\x83\xC2\x84\xC2\x85\xC2\x86\xC2\x87\xC2\x88\xC2\x89\xC2\x8A\xC2\x8B\xC2\x8C\xC2\x8D\xC2\x8E\xC2\x8F").unwrap();
check_names(
test_str,
&[
("????????????????", "literal"),
(test_str, "literal-show"),
("\\302\\200\\302\\201\\302\\202\\302\\203\\302\\204\\302\\205\\302\\206\\302\\207\\302\\210\\302\\211\\302\\212\\302\\213\\302\\214\\302\\215\\302\\216\\302\\217", "escape"),
("\"\\302\\200\\302\\201\\302\\202\\302\\203\\302\\204\\302\\205\\302\\206\\302\\207\\302\\210\\302\\211\\302\\212\\302\\213\\302\\214\\302\\215\\302\\216\\302\\217\"", "c"),
("????????????????", "shell"),
(test_str, "shell-show"),
("'????????????????'", "shell-always"),
(&format!("'{}'", test_str), "shell-always-show"),
("''$'\\302\\200\\302\\201\\302\\202\\302\\203\\302\\204\\302\\205\\302\\206\\302\\207\\302\\210\\302\\211\\302\\212\\302\\213\\302\\214\\302\\215\\302\\216\\302\\217'", "shell-escape"),
("''$'\\302\\200\\302\\201\\302\\202\\302\\203\\302\\204\\302\\205\\302\\206\\302\\207\\302\\210\\302\\211\\302\\212\\302\\213\\302\\214\\302\\215\\302\\216\\302\\217'", "shell-escape-always"),
],
);
// The last 16 Unicode control characters.
let test_str = std::str::from_utf8(b"\xC2\x90\xC2\x91\xC2\x92\xC2\x93\xC2\x94\xC2\x95\xC2\x96\xC2\x97\xC2\x98\xC2\x99\xC2\x9A\xC2\x9B\xC2\x9C\xC2\x9D\xC2\x9E\xC2\x9F").unwrap();
check_names(
test_str,
&[
("????????????????", "literal"),
(test_str, "literal-show"),
("\\302\\220\\302\\221\\302\\222\\302\\223\\302\\224\\302\\225\\302\\226\\302\\227\\302\\230\\302\\231\\302\\232\\302\\233\\302\\234\\302\\235\\302\\236\\302\\237", "escape"),
("\"\\302\\220\\302\\221\\302\\222\\302\\223\\302\\224\\302\\225\\302\\226\\302\\227\\302\\230\\302\\231\\302\\232\\302\\233\\302\\234\\302\\235\\302\\236\\302\\237\"", "c"),
("????????????????", "shell"),
(test_str, "shell-show"),
("'????????????????'", "shell-always"),
(&format!("'{}'", test_str), "shell-always-show"),
("''$'\\302\\220\\302\\221\\302\\222\\302\\223\\302\\224\\302\\225\\302\\226\\302\\227\\302\\230\\302\\231\\302\\232\\302\\233\\302\\234\\302\\235\\302\\236\\302\\237'", "shell-escape"),
("''$'\\302\\220\\302\\221\\302\\222\\302\\223\\302\\224\\302\\225\\302\\226\\302\\227\\302\\230\\302\\231\\302\\232\\302\\233\\302\\234\\302\\235\\302\\236\\302\\237'", "shell-escape-always"),
],
);
}
#[test]
fn test_non_unicode_bytes() {
let ascii = b'_';
let continuation = b'\xA7';
let first2byte = b'\xC2';
let first3byte = b'\xE0';
let first4byte = b'\xF0';
let invalid = b'\xC0';
// a single byte value invalid outside of additional context in UTF-8
check_names_raw(
&[continuation],
&[
(b"?", "literal"),
(b"\xA7", "literal-show"),
(b"\\247", "escape"),
(b"\"\\247\"", "c"),
(b"?", "shell"),
(b"\xA7", "shell-show"),
(b"'?'", "shell-always"),
(b"'\xA7'", "shell-always-show"),
(b"''$'\\247'", "shell-escape"),
(b"''$'\\247'", "shell-escape-always"),
],
);
// ...but the byte becomes valid with appropriate context
// (this is just the § character in UTF-8, written as bytes)
check_names_raw(
&[first2byte, continuation],
&[
(b"\xC2\xA7", "literal"),
(b"\xC2\xA7", "literal-show"),
(b"\xC2\xA7", "escape"),
(b"\"\xC2\xA7\"", "c"),
(b"\xC2\xA7", "shell"),
(b"\xC2\xA7", "shell-show"),
(b"'\xC2\xA7'", "shell-always"),
(b"'\xC2\xA7'", "shell-always-show"),
(b"\xC2\xA7", "shell-escape"),
(b"'\xC2\xA7'", "shell-escape-always"),
],
);
// mixed with valid characters
check_names_raw(
&[continuation, ascii],
&[
(b"?_", "literal"),
(b"\xA7_", "literal-show"),
(b"\\247_", "escape"),
(b"\"\\247_\"", "c"),
(b"?_", "shell"),
(b"\xA7_", "shell-show"),
(b"'?_'", "shell-always"),
(b"'\xA7_'", "shell-always-show"),
(b"''$'\\247''_'", "shell-escape"),
(b"''$'\\247''_'", "shell-escape-always"),
],
);
check_names_raw(
&[ascii, continuation],
&[
(b"_?", "literal"),
(b"_\xA7", "literal-show"),
(b"_\\247", "escape"),
(b"\"_\\247\"", "c"),
(b"_?", "shell"),
(b"_\xA7", "shell-show"),
(b"'_?'", "shell-always"),
(b"'_\xA7'", "shell-always-show"),
(b"'_'$'\\247'", "shell-escape"),
(b"'_'$'\\247'", "shell-escape-always"),
],
);
check_names_raw(
&[ascii, continuation, ascii],
&[
(b"_?_", "literal"),
(b"_\xA7_", "literal-show"),
(b"_\\247_", "escape"),
(b"\"_\\247_\"", "c"),
(b"_?_", "shell"),
(b"_\xA7_", "shell-show"),
(b"'_?_'", "shell-always"),
(b"'_\xA7_'", "shell-always-show"),
(b"'_'$'\\247''_'", "shell-escape"),
(b"'_'$'\\247''_'", "shell-escape-always"),
],
);
check_names_raw(
&[continuation, ascii, continuation],
&[
(b"?_?", "literal"),
(b"\xA7_\xA7", "literal-show"),
(b"\\247_\\247", "escape"),
(b"\"\\247_\\247\"", "c"),
(b"?_?", "shell"),
(b"\xA7_\xA7", "shell-show"),
(b"'?_?'", "shell-always"),
(b"'\xA7_\xA7'", "shell-always-show"),
(b"''$'\\247''_'$'\\247'", "shell-escape"),
(b"''$'\\247''_'$'\\247'", "shell-escape-always"),
],
);
// contiguous invalid bytes
check_names_raw(
&[
ascii,
invalid,
ascii,
continuation,
continuation,
ascii,
continuation,
continuation,
continuation,
ascii,
continuation,
continuation,
continuation,
continuation,
ascii,
],
&[
(b"_?_??_???_????_", "literal"),
(
b"_\xC0_\xA7\xA7_\xA7\xA7\xA7_\xA7\xA7\xA7\xA7_",
"literal-show",
),
(
b"_\\300_\\247\\247_\\247\\247\\247_\\247\\247\\247\\247_",
"escape",
),
(
b"\"_\\300_\\247\\247_\\247\\247\\247_\\247\\247\\247\\247_\"",
"c",
),
(b"_?_??_???_????_", "shell"),
(
b"_\xC0_\xA7\xA7_\xA7\xA7\xA7_\xA7\xA7\xA7\xA7_",
"shell-show",
),
(b"'_?_??_???_????_'", "shell-always"),
(
b"'_\xC0_\xA7\xA7_\xA7\xA7\xA7_\xA7\xA7\xA7\xA7_'",
"shell-always-show",
),
(
b"'_'$'\\300''_'$'\\247\\247''_'$'\\247\\247\\247''_'$'\\247\\247\\247\\247''_'",
"shell-escape",
),
(
b"'_'$'\\300''_'$'\\247\\247''_'$'\\247\\247\\247''_'$'\\247\\247\\247\\247''_'",
"shell-escape-always",
),
],
);
// invalid multi-byte sequences that start valid
check_names_raw(
&[first2byte, ascii],
&[
(b"?_", "literal"),
(b"\xC2_", "literal-show"),
(b"\\302_", "escape"),
(b"\"\\302_\"", "c"),
(b"?_", "shell"),
(b"\xC2_", "shell-show"),
(b"'?_'", "shell-always"),
(b"'\xC2_'", "shell-always-show"),
(b"''$'\\302''_'", "shell-escape"),
(b"''$'\\302''_'", "shell-escape-always"),
],
);
check_names_raw(
&[first2byte, first2byte, continuation],
&[
(b"?\xC2\xA7", "literal"),
(b"\xC2\xC2\xA7", "literal-show"),
(b"\\302\xC2\xA7", "escape"),
(b"\"\\302\xC2\xA7\"", "c"),
(b"?\xC2\xA7", "shell"),
(b"\xC2\xC2\xA7", "shell-show"),
(b"'?\xC2\xA7'", "shell-always"),
(b"'\xC2\xC2\xA7'", "shell-always-show"),
(b"''$'\\302''\xC2\xA7'", "shell-escape"),
(b"''$'\\302''\xC2\xA7'", "shell-escape-always"),
],
);
check_names_raw(
&[first3byte, continuation, ascii],
&[
(b"??_", "literal"),
(b"\xE0\xA7_", "literal-show"),
(b"\\340\\247_", "escape"),
(b"\"\\340\\247_\"", "c"),
(b"??_", "shell"),
(b"\xE0\xA7_", "shell-show"),
(b"'??_'", "shell-always"),
(b"'\xE0\xA7_'", "shell-always-show"),
(b"''$'\\340\\247''_'", "shell-escape"),
(b"''$'\\340\\247''_'", "shell-escape-always"),
],
);
check_names_raw(
&[first4byte, continuation, continuation, ascii],
&[
(b"???_", "literal"),
(b"\xF0\xA7\xA7_", "literal-show"),
(b"\\360\\247\\247_", "escape"),
(b"\"\\360\\247\\247_\"", "c"),
(b"???_", "shell"),
(b"\xF0\xA7\xA7_", "shell-show"),
(b"'???_'", "shell-always"),
(b"'\xF0\xA7\xA7_'", "shell-always-show"),
(b"''$'\\360\\247\\247''_'", "shell-escape"),
(b"''$'\\360\\247\\247''_'", "shell-escape-always"),
],
);
} }
#[test] #[test]
@ -765,7 +1138,7 @@ mod tests {
("one\\\\two", "escape"), ("one\\\\two", "escape"),
("\"one\\\\two\"", "c"), ("\"one\\\\two\"", "c"),
("'one\\two'", "shell"), ("'one\\two'", "shell"),
("\'one\\two\'", "shell-always"), ("'one\\two'", "shell-always"),
("'one\\two'", "shell-escape"), ("'one\\two'", "shell-escape"),
("'one\\two'", "shell-escape-always"), ("'one\\two'", "shell-escape-always"),
], ],

View file

@ -91,7 +91,7 @@ impl Range {
Ok(Self::merge(ranges)) Ok(Self::merge(ranges))
} }
/// Merge any overlapping ranges /// Merge any overlapping ranges. Adjacent ranges are *NOT* merged.
/// ///
/// Is guaranteed to return only disjoint ranges in a sorted order. /// Is guaranteed to return only disjoint ranges in a sorted order.
fn merge(mut ranges: Vec<Self>) -> Vec<Self> { fn merge(mut ranges: Vec<Self>) -> Vec<Self> {
@ -101,10 +101,7 @@ impl Range {
for i in 0..ranges.len() { for i in 0..ranges.len() {
let j = i + 1; let j = i + 1;
// The +1 is a small optimization, because we can merge adjacent Ranges. while j < ranges.len() && ranges[j].low <= ranges[i].high {
// For example (1,3) and (4,6), because in the integers, there are no
// possible values between 3 and 4, this is equivalent to (1,6).
while j < ranges.len() && ranges[j].low <= ranges[i].high + 1 {
let j_high = ranges.remove(j).high; let j_high = ranges.remove(j).high;
ranges[i].high = max(ranges[i].high, j_high); ranges[i].high = max(ranges[i].high, j_high);
} }
@ -216,8 +213,8 @@ mod test {
&[r(10, 40), r(50, 60)], &[r(10, 40), r(50, 60)],
); );
// Merge adjacent ranges // Don't merge adjacent ranges
m(vec![r(1, 3), r(4, 6)], &[r(1, 6)]); m(vec![r(1, 3), r(4, 6)], &[r(1, 3), r(4, 6)]);
} }
#[test] #[test]

View file

@ -207,13 +207,6 @@ impl Digest for CRC {
} }
} }
// This can be replaced with usize::div_ceil once it is stabilized.
// This implementation approach is optimized for when `b` is a constant,
// particularly a power of two.
pub fn div_ceil(a: usize, b: usize) -> usize {
(a + b - 1) / b
}
pub struct BSD { pub struct BSD {
state: u16, state: u16,
} }
@ -410,7 +403,7 @@ impl<'a> DigestWriter<'a> {
} }
} }
impl<'a> Write for DigestWriter<'a> { impl Write for DigestWriter<'_> {
#[cfg(not(windows))] #[cfg(not(windows))]
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> { fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
self.digest.hash_update(buf); self.digest.hash_update(buf);

View file

@ -70,11 +70,13 @@ pub use crate::features::version_cmp;
#[cfg(all(not(windows), feature = "mode"))] #[cfg(all(not(windows), feature = "mode"))]
pub use crate::features::mode; pub use crate::features::mode;
// ** unix-only // ** unix-only
#[cfg(all(any(target_os = "linux", target_os = "android"), feature = "buf-copy"))]
pub use crate::features::buf_copy;
#[cfg(all(unix, feature = "entries"))] #[cfg(all(unix, feature = "entries"))]
pub use crate::features::entries; pub use crate::features::entries;
#[cfg(all(unix, feature = "perms"))] #[cfg(all(unix, feature = "perms"))]
pub use crate::features::perms; pub use crate::features::perms;
#[cfg(all(unix, feature = "pipes"))] #[cfg(all(unix, any(feature = "pipes", feature = "buf-copy")))]
pub use crate::features::pipes; pub use crate::features::pipes;
#[cfg(all(unix, feature = "process"))] #[cfg(all(unix, feature = "process"))]
pub use crate::features::process; pub use crate::features::process;
@ -97,7 +99,7 @@ pub use crate::features::wide;
#[cfg(feature = "fsext")] #[cfg(feature = "fsext")]
pub use crate::features::fsext; pub use crate::features::fsext;
#[cfg(all(unix, not(target_os = "macos"), feature = "fsxattr"))] #[cfg(all(unix, feature = "fsxattr"))]
pub use crate::features::fsxattr; pub use crate::features::fsxattr;
//## core functions //## core functions
@ -253,9 +255,10 @@ pub fn read_yes() -> bool {
} }
} }
/// Helper function for processing delimiter values (which could be non UTF-8) /// Converts an `OsStr` to a UTF-8 `&[u8]`.
/// It converts OsString to &[u8] for unix targets only ///
/// On non-unix (i.e. Windows) it will just return an error if delimiter value is not UTF-8 /// This always succeeds on unix platforms,
/// and fails on other platforms if the string can't be coerced to UTF-8.
pub fn os_str_as_bytes(os_string: &OsStr) -> mods::error::UResult<&[u8]> { pub fn os_str_as_bytes(os_string: &OsStr) -> mods::error::UResult<&[u8]> {
#[cfg(unix)] #[cfg(unix)]
let bytes = os_string.as_bytes(); let bytes = os_string.as_bytes();
@ -271,13 +274,28 @@ pub fn os_str_as_bytes(os_string: &OsStr) -> mods::error::UResult<&[u8]> {
Ok(bytes) Ok(bytes)
} }
/// Helper function for converting a slice of bytes into an &OsStr /// Performs a potentially lossy conversion from `OsStr` to UTF-8 bytes.
/// or OsString in non-unix targets.
/// ///
/// It converts `&[u8]` to `Cow<OsStr>` for unix targets only. /// This is always lossless on unix platforms,
/// On non-unix (i.e. Windows), the conversion goes through the String type /// and wraps [`OsStr::to_string_lossy`] on non-unix platforms.
/// and thus undergo UTF-8 validation, making it fail if the stream contains pub fn os_str_as_bytes_lossy(os_string: &OsStr) -> Cow<[u8]> {
/// non-UTF-8 characters. #[cfg(unix)]
let bytes = Cow::from(os_string.as_bytes());
#[cfg(not(unix))]
let bytes = match os_string.to_string_lossy() {
Cow::Borrowed(slice) => Cow::from(slice.as_bytes()),
Cow::Owned(owned) => Cow::from(owned.into_bytes()),
};
bytes
}
/// Converts a `&[u8]` to an `&OsStr`,
/// or parses it as UTF-8 into an [`OsString`] on non-unix platforms.
///
/// This always succeeds on unix platforms,
/// and fails on other platforms if the bytes can't be parsed as UTF-8.
pub fn os_str_from_bytes(bytes: &[u8]) -> mods::error::UResult<Cow<'_, OsStr>> { pub fn os_str_from_bytes(bytes: &[u8]) -> mods::error::UResult<Cow<'_, OsStr>> {
#[cfg(unix)] #[cfg(unix)]
let os_str = Cow::Borrowed(OsStr::from_bytes(bytes)); let os_str = Cow::Borrowed(OsStr::from_bytes(bytes));
@ -289,9 +307,10 @@ pub fn os_str_from_bytes(bytes: &[u8]) -> mods::error::UResult<Cow<'_, OsStr>> {
Ok(os_str) Ok(os_str)
} }
/// Helper function for making an `OsString` from a byte field /// Converts a `Vec<u8>` into an `OsString`, parsing as UTF-8 on non-unix platforms.
/// It converts `Vec<u8>` to `OsString` for unix targets only. ///
/// On non-unix (i.e. Windows) it may fail if the bytes are not valid UTF-8 /// This always succeeds on unix platforms,
/// and fails on other platforms if the bytes can't be parsed as UTF-8.
pub fn os_string_from_vec(vec: Vec<u8>) -> mods::error::UResult<OsString> { pub fn os_string_from_vec(vec: Vec<u8>) -> mods::error::UResult<OsString> {
#[cfg(unix)] #[cfg(unix)]
let s = OsString::from_vec(vec); let s = OsString::from_vec(vec);

View file

@ -73,7 +73,7 @@ pub fn parse_usage(content: &str) -> String {
pub fn parse_section(section: &str, content: &str) -> Option<String> { pub fn parse_section(section: &str, content: &str) -> Option<String> {
fn is_section_header(line: &str, section: &str) -> bool { fn is_section_header(line: &str, section: &str) -> bool {
line.strip_prefix("##") line.strip_prefix("##")
.map_or(false, |l| l.trim().to_lowercase() == section) .is_some_and(|l| l.trim().to_lowercase() == section)
} }
let section = &section.to_lowercase(); let section = &section.to_lowercase();

View file

@ -40,6 +40,28 @@ fn test_encode_repeat_flags_later_wrap_15() {
.stdout_only("aGVsbG8sIHdvcmx\nkIQ==\n"); // spell-checker:disable-line .stdout_only("aGVsbG8sIHdvcmx\nkIQ==\n"); // spell-checker:disable-line
} }
#[test]
fn test_decode_short() {
let input = "aQ";
new_ucmd!()
.args(&["--decode"])
.pipe_in(input)
.succeeds()
.stdout_only("i");
}
#[test]
fn test_multi_lines() {
let input = ["aQ\n\n\n", "a\nQ==\n\n\n"];
for i in input {
new_ucmd!()
.args(&["--decode"])
.pipe_in(i)
.succeeds()
.stdout_only("i");
}
}
#[test] #[test]
fn test_base64_encode_file() { fn test_base64_encode_file() {
new_ucmd!() new_ucmd!()
@ -105,6 +127,17 @@ fn test_wrap() {
// spell-checker:disable-next-line // spell-checker:disable-next-line
.stdout_only("VGhlIHF1aWNrIGJyb3du\nIGZveCBqdW1wcyBvdmVy\nIHRoZSBsYXp5IGRvZy4=\n"); .stdout_only("VGhlIHF1aWNrIGJyb3du\nIGZveCBqdW1wcyBvdmVy\nIHRoZSBsYXp5IGRvZy4=\n");
} }
let input = "hello, world";
new_ucmd!()
.args(&["--wrap", "0"])
.pipe_in(input)
.succeeds()
.stdout_only("aGVsbG8sIHdvcmxk"); // spell-checker:disable-line
new_ucmd!()
.args(&["--wrap", "30"])
.pipe_in(input)
.succeeds()
.stdout_only("aGVsbG8sIHdvcmxk\n"); // spell-checker:disable-line
} }
#[test] #[test]

View file

@ -130,6 +130,24 @@ fn test_base16_decode() {
.stdout_only("Hello, World!"); .stdout_only("Hello, World!");
} }
#[test]
fn test_base16_decode_lowercase() {
new_ucmd!()
.args(&["--base16", "-d"])
.pipe_in("48656c6c6f2c20576f726c6421")
.succeeds()
.stdout_only("Hello, World!");
}
#[test]
fn test_base16_decode_and_ignore_garbage_lowercase() {
new_ucmd!()
.args(&["--base16", "-d", "-i"])
.pipe_in("48656c6c6f2c20576f726c6421")
.succeeds()
.stdout_only("Hello, World!");
}
#[test] #[test]
fn test_base2msbf() { fn test_base2msbf() {
new_ucmd!() new_ucmd!()

Some files were not shown because too many files have changed in this diff Show more