1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-29 12:07:46 +00:00

Merge branch 'main' into cp-lb

This commit is contained in:
Sylvestre Ledru 2022-02-20 10:31:13 +01:00 committed by GitHub
commit 6bf575ad56
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
192 changed files with 6336 additions and 3125 deletions

View file

@ -1,2 +1,11 @@
[target.x86_64-unknown-redox] [target.x86_64-unknown-redox]
linker = "x86_64-unknown-redox-gcc" linker = "x86_64-unknown-redox-gcc"
[target.'cfg(feature = "cargo-clippy")']
rustflags = [
"-Wclippy::use_self",
"-Wclippy::needless_pass_by_value",
"-Wclippy::semicolon_if_nothing_returned",
"-Wclippy::single_char_pattern",
"-Wclippy::explicit_iter_loop",
]

View file

@ -1,4 +1,4 @@
# EditorConfig (is awesome): http://EditorConfig.org # EditorConfig (is awesome!; ref: http://EditorConfig.org; v2022.02.11 [rivy])
# * top-most EditorConfig file # * top-most EditorConfig file
root = true root = true
@ -13,27 +13,49 @@ insert_final_newline = true
max_line_length = 100 max_line_length = 100
trim_trailing_whitespace = true trim_trailing_whitespace = true
[[Mm]akefile{,.*}, *.{mk,[Mm][Kk]}] [{[Mm]akefile{,.*},*.{mak,mk,[Mm][Aa][Kk],[Mm][Kk]},[Gg][Nn][Uu]makefile}]
# makefiles ~ TAB-style indentation # makefiles ~ TAB-style indentation
indent_style = tab indent_style = tab
[*.bash]
# `bash` shell scripts
indent_size = 4
indent_style = space
# * ref: <https://github.com/foxundermoon/vs-shell-format/blob/bc56a8e367b04bbf7d9947b767dc82516a6155b7/src/shFormat.ts>
# shell_variant = bash ## allow `shellcheck` to decide via script hash-bang/sha-bang line
switch_case_indent = true
[*.{bat,cmd,[Bb][Aa][Tt],[Cc][Mm][Dd]}] [*.{bat,cmd,[Bb][Aa][Tt],[Cc][Mm][Dd]}]
# BAT/CMD ~ DOS/Win requires BAT/CMD files to have CRLF EOLNs # BAT/CMD ~ DOS/Win requires BAT/CMD files to have CRLF EOLNs
end_of_line = crlf end_of_line = crlf
[*.{cjs,cjx,cts,ctx,js,jsx,mjs,mts,mtx,ts,tsx,json,jsonc}]
# js/ts/json ~ Prettier/XO-style == TAB indention + SPACE alignment
indent_size = 2
indent_style = tab
[*.go] [*.go]
# go ~ TAB-style indentation (SPACE-style alignment); ref: <https://blog.golang.org/gofmt>@@<https://archive.is/wip/9B6FC> # go ~ TAB-style indentation (SPACE-style alignment); ref: <https://blog.golang.org/gofmt>@@<https://archive.is/wip/9B6FC>
indent_style = tab indent_style = tab
[*.{cjs,js,json,mjs,ts}]
# js/ts
indent_size = 2
[*.{markdown,md,mkd,[Mm][Dd],[Mm][Kk][Dd],[Mm][Dd][Oo][Ww][Nn],[Mm][Kk][Dd][Oo][Ww][Nn],[Mm][Aa][Rr][Kk][Dd][Oo][Ww][Nn]}] [*.{markdown,md,mkd,[Mm][Dd],[Mm][Kk][Dd],[Mm][Dd][Oo][Ww][Nn],[Mm][Kk][Dd][Oo][Ww][Nn],[Mm][Aa][Rr][Kk][Dd][Oo][Ww][Nn]}]
# markdown # markdown
indent_size = 2 indent_size = 2
indent_style = space indent_style = space
[*.sh]
# POSIX shell scripts
indent_size = 4
indent_style = space
# * ref: <https://github.com/foxundermoon/vs-shell-format/blob/bc56a8e367b04bbf7d9947b767dc82516a6155b7/src/shFormat.ts>
# shell_variant = posix ## allow `shellcheck` to decide via script hash-bang/sha-bang line
switch_case_indent = true
[*.{sln,vc{,x}proj{,.*},[Ss][Ln][Nn],[Vv][Cc]{,[Xx]}[Pp][Rr][Oo][Jj]{,.*}}]
# MSVC sln/vcproj/vcxproj files, when used, will persistantly revert to CRLF EOLNs and eat final EOLs
end_of_line = crlf
insert_final_newline = false
[*.{yaml,yml,[Yy][Mm][Ll],[Yy][Aa][Mm][Ll]}] [*.{yaml,yml,[Yy][Mm][Ll],[Yy][Aa][Mm][Ll]}]
# YAML # YAML
indent_size = 2 indent_size = 2

View file

@ -1,10 +1,10 @@
name: CICD name: CICD
# spell-checker:ignore (acronyms) CICD MSVC musl # spell-checker:ignore (acronyms) CICD MSVC musl
# spell-checker:ignore (env/flags) Awarnings Ccodegen Coverflow Cpanic RUSTDOCFLAGS RUSTFLAGS Zpanic # spell-checker:ignore (env/flags) Awarnings Ccodegen Coverflow Cpanic Dwarnings RUSTDOCFLAGS RUSTFLAGS Zpanic
# spell-checker:ignore (jargon) SHAs deps dequote softprops subshell toolchain # spell-checker:ignore (jargon) SHAs deps dequote softprops subshell toolchain
# spell-checker:ignore (names) CodeCOV MacOS MinGW Peltoche rivy # spell-checker:ignore (names) CodeCOV MacOS MinGW Peltoche rivy
# spell-checker:ignore (shell/tools) choco clippy dmake dpkg esac fakeroot gmake grcov halium lcov libssl mkdir popd printf pushd rustc rustfmt rustup shopt xargs # spell-checker:ignore (shell/tools) choco clippy dmake dpkg esac fakeroot gmake grcov halium lcov libssl mkdir popd printf pushd rsync rustc rustfmt rustup shopt xargs
# spell-checker:ignore (misc) aarch alnum armhf bindir busytest coreutils gnueabihf issuecomment maint nullglob onexitbegin onexitend pell runtest tempfile testsuite uutils DESTDIR sizemulti # spell-checker:ignore (misc) aarch alnum armhf bindir busytest coreutils gnueabihf issuecomment maint nullglob onexitbegin onexitend pell runtest tempfile testsuite uutils DESTDIR sizemulti
# ToDO: [2021-06; rivy] change from `cargo-tree` to `cargo tree` once MSRV is >= 1.45 # ToDO: [2021-06; rivy] change from `cargo-tree` to `cargo tree` once MSRV is >= 1.45
@ -340,6 +340,13 @@ jobs:
## Confirm MinSRV compatible 'Cargo.lock' ## Confirm MinSRV compatible 'Cargo.lock'
# * 'Cargo.lock' is required to be in a format that `cargo` of MinSRV can interpret (eg, v1-format for MinSRV < v1.38) # * 'Cargo.lock' is required to be in a format that `cargo` of MinSRV can interpret (eg, v1-format for MinSRV < v1.38)
cargo fetch --locked --quiet || { echo "::error file=Cargo.lock::Incompatible (or out-of-date) 'Cargo.lock' file; update using \`cargo +${{ env.RUST_MIN_SRV }} update\`" ; exit 1 ; } cargo fetch --locked --quiet || { echo "::error file=Cargo.lock::Incompatible (or out-of-date) 'Cargo.lock' file; update using \`cargo +${{ env.RUST_MIN_SRV }} update\`" ; exit 1 ; }
- name: Confirm MinSRV equivalence for '.clippy.toml'
shell: bash
run: |
## Confirm MinSRV equivalence for '.clippy.toml'
# * ensure '.clippy.toml' MSRV configuration setting is equal to ${{ env.RUST_MIN_SRV }}
CLIPPY_MSRV=$(grep -P "(?i)^\s*msrv\s*=\s*" .clippy.toml | grep -oP "\d+([.]\d+)+")
if [ "${CLIPPY_MSRV}" != "${{ env.RUST_MIN_SRV }}" ]; then { echo "::error file=.clippy.toml::Incorrect MSRV configuration for clippy (found '${CLIPPY_MSRV}'; should be '${{ env.RUST_MIN_SRV }}'); update '.clippy.toml' with 'msrv = \"${{ env.RUST_MIN_SRV }}\"'" ; exit 1 ; } ; fi
- name: Info - name: Info
shell: bash shell: bash
run: | run: |

View file

@ -1,6 +1,6 @@
name: GnuTests name: GnuTests
# spell-checker:ignore (names) gnulib ; (utils) autopoint gperf pyinotify texinfo ; (vars) XPASS # spell-checker:ignore (names) gnulib ; (jargon) submodules ; (people) Dawid Dziurla * dawidd ; (utils) autopoint chksum gperf pyinotify shopt texinfo ; (vars) FILESET XPASS
on: [push, pull_request] on: [push, pull_request]
@ -9,23 +9,52 @@ jobs:
name: Run GNU tests name: Run GNU tests
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- name: Checkout code uutil - name: Initialize workflow variables
id: vars
shell: bash
run: |
## VARs setup
outputs() { step_id="vars"; for var in "$@" ; do echo steps.${step_id}.outputs.${var}="${!var}"; echo ::set-output name=${var}::${!var}; done; }
# * config
path_GNU="gnu"
path_GNU_tests="${path_GNU}/tests"
path_UUTILS="uutils"
path_reference="reference"
outputs path_GNU path_GNU_tests path_reference path_UUTILS
#
repo_default_branch="${{ github.event.repository.default_branch }}"
repo_GNU_ref="v9.0"
repo_reference_branch="${{ github.event.repository.default_branch }}"
outputs repo_default_branch repo_GNU_ref repo_reference_branch
#
SUITE_LOG_FILE="${path_GNU_tests}/test-suite.log"
TEST_LOGS_GLOB="${path_GNU_tests}/**/*.log" ## note: not usable at bash CLI; [why] double globstar not enabled by default b/c MacOS includes only bash v3 which doesn't have double globstar support
TEST_FILESET_PREFIX='test-fileset-IDs.sha1#'
TEST_FILESET_SUFFIX='.txt'
TEST_SUMMARY_FILE='gnu-result.json'
TEST_FULL_SUMMARY_FILE='gnu-full-result.json'
outputs SUITE_LOG_FILE TEST_FILESET_PREFIX TEST_FILESET_SUFFIX TEST_LOGS_GLOB TEST_SUMMARY_FILE TEST_FULL_SUMMARY_FILE
- name: Checkout code (uutil)
uses: actions/checkout@v2 uses: actions/checkout@v2
with: with:
path: 'uutils' path: '${{ steps.vars.outputs.path_UUTILS }}'
- name: Checkout GNU coreutils - name: Checkout code (GNU coreutils)
uses: actions/checkout@v2 uses: actions/checkout@v2
with: with:
repository: 'coreutils/coreutils' repository: 'coreutils/coreutils'
path: 'gnu' path: '${{ steps.vars.outputs.path_GNU }}'
ref: v9.0 ref: ${{ steps.vars.outputs.repo_GNU_ref }}
- name: Checkout GNU coreutils library (gnulib) submodules: recursive
uses: actions/checkout@v2 - name: Retrieve reference artifacts
uses: dawidd6/action-download-artifact@v2
# ref: <https://github.com/dawidd6/action-download-artifact>
continue-on-error: true ## don't break the build for missing reference artifacts (may be expired or just not generated yet)
with: with:
repository: 'coreutils/gnulib' workflow: GnuTests.yml
path: 'gnulib' branch: "${{ steps.vars.outputs.repo_reference_branch }}"
ref: 8e99f24c0931a38880c6ee9b8287c7da80b0036b # workflow_conclusion: success ## (default); * but, if commit with failed GnuTests is merged into the default branch, future commits will all show regression errors in GnuTests CI until o/w fixed
fetch-depth: 0 # gnu gets upset if gnulib is a shallow checkout workflow_conclusion: completed ## continually recalibrates to last commit of default branch with a successful GnuTests (ie, "self-heals" from GnuTest regressions, but needs more supervision for/of regressions)
path: "${{ steps.vars.outputs.path_reference }}"
- name: Install `rust` toolchain - name: Install `rust` toolchain
uses: actions-rs/toolchain@v1 uses: actions-rs/toolchain@v1
with: with:
@ -39,31 +68,54 @@ jobs:
## Install dependencies ## Install dependencies
sudo apt-get update sudo apt-get update
sudo apt-get install autoconf autopoint bison texinfo gperf gcc g++ gdb python-pyinotify jq sudo apt-get install autoconf autopoint bison texinfo gperf gcc g++ gdb python-pyinotify jq
- name: Add various locales
shell: bash
run: |
echo "Before:"
locale -a
## Some tests fail with 'cannot change locale (en_US.ISO-8859-1): No such file or directory'
## Some others need a French locale
sudo locale-gen
sudo locale-gen fr_FR
sudo locale-gen fr_FR.UTF-8
sudo update-locale
echo "After:"
locale -a
- name: Build binaries - name: Build binaries
shell: bash shell: bash
run: | run: |
## Build binaries ## Build binaries
cd uutils cd '${{ steps.vars.outputs.path_UUTILS }}'
bash util/build-gnu.sh bash util/build-gnu.sh
- name: Run GNU tests - name: Run GNU tests
shell: bash shell: bash
run: | run: |
bash uutils/util/run-gnu-test.sh path_GNU='${{ steps.vars.outputs.path_GNU }}'
- name: Extract testing info path_UUTILS='${{ steps.vars.outputs.path_UUTILS }}'
bash "${path_UUTILS}/util/run-gnu-test.sh"
- name: Extract testing info into JSON
shell: bash
run : |
path_UUTILS='${{ steps.vars.outputs.path_UUTILS }}'
python ${path_UUTILS}/util/gnu-json-result.py ${{ steps.vars.outputs.path_GNU_tests }} > ${{ steps.vars.outputs.TEST_FULL_SUMMARY_FILE }}
- name: Extract/summarize testing info
id: summary
shell: bash shell: bash
run: | run: |
## Extract testing info ## Extract/summarize testing info
LOG_FILE=gnu/tests/test-suite.log outputs() { step_id="summary"; for var in "$@" ; do echo steps.${step_id}.outputs.${var}="${!var}"; echo ::set-output name=${var}::${!var}; done; }
if test -f "$LOG_FILE" #
SUITE_LOG_FILE='${{ steps.vars.outputs.SUITE_LOG_FILE }}'
if test -f "${SUITE_LOG_FILE}"
then then
TOTAL=$(sed -n "s/.*# TOTAL: \(.*\)/\1/p" "$LOG_FILE"|tr -d '\r'|head -n1) TOTAL=$(sed -n "s/.*# TOTAL: \(.*\)/\1/p" "${SUITE_LOG_FILE}" | tr -d '\r' | head -n1)
PASS=$(sed -n "s/.*# PASS: \(.*\)/\1/p" "$LOG_FILE"|tr -d '\r'|head -n1) PASS=$(sed -n "s/.*# PASS: \(.*\)/\1/p" "${SUITE_LOG_FILE}" | tr -d '\r' | head -n1)
SKIP=$(sed -n "s/.*# SKIP: \(.*\)/\1/p" "$LOG_FILE"|tr -d '\r'|head -n1) SKIP=$(sed -n "s/.*# SKIP: \(.*\)/\1/p" "${SUITE_LOG_FILE}" | tr -d '\r' | head -n1)
FAIL=$(sed -n "s/.*# FAIL: \(.*\)/\1/p" "$LOG_FILE"|tr -d '\r'|head -n1) FAIL=$(sed -n "s/.*# FAIL: \(.*\)/\1/p" "${SUITE_LOG_FILE}" | tr -d '\r' | head -n1)
XPASS=$(sed -n "s/.*# XPASS: \(.*\)/\1/p" "$LOG_FILE"|tr -d '\r'|head -n1) XPASS=$(sed -n "s/.*# XPASS: \(.*\)/\1/p" "${SUITE_LOG_FILE}" | tr -d '\r' | head -n1)
ERROR=$(sed -n "s/.*# ERROR: \(.*\)/\1/p" "$LOG_FILE"|tr -d '\r'|head -n1) ERROR=$(sed -n "s/.*# ERROR: \(.*\)/\1/p" "${SUITE_LOG_FILE}" | tr -d '\r' | head -n1)
if [[ "$TOTAL" -eq 0 || "$TOTAL" -eq 1 ]]; then if [[ "$TOTAL" -eq 0 || "$TOTAL" -eq 1 ]]; then
echo "Error in the execution, failing early" echo "::error ::Failed to parse test results from '${SUITE_LOG_FILE}'; failing early"
exit 1 exit 1
fi fi
output="GNU tests summary = TOTAL: $TOTAL / PASS: $PASS / FAIL: $FAIL / ERROR: $ERROR" output="GNU tests summary = TOTAL: $TOTAL / PASS: $PASS / FAIL: $FAIL / ERROR: $ERROR"
@ -78,54 +130,70 @@ jobs:
--arg fail "$FAIL" \ --arg fail "$FAIL" \
--arg xpass "$XPASS" \ --arg xpass "$XPASS" \
--arg error "$ERROR" \ --arg error "$ERROR" \
'{($date): { sha: $sha, total: $total, pass: $pass, skip: $skip, fail: $fail, xpass: $xpass, error: $error, }}' > gnu-result.json '{($date): { sha: $sha, total: $total, pass: $pass, skip: $skip, fail: $fail, xpass: $xpass, error: $error, }}' > '${{ steps.vars.outputs.TEST_SUMMARY_FILE }}'
HASH=$(sha1sum '${{ steps.vars.outputs.TEST_SUMMARY_FILE }}' | cut --delim=" " -f 1)
outputs HASH
else else
echo "::error ::Failed to get summary of test results" echo "::error ::Failed to find summary of test results (missing '${SUITE_LOG_FILE}'); failing early"
exit 1
fi fi
- uses: actions/upload-artifact@v2 - name: Reserve SHA1/ID of 'test-summary'
uses: actions/upload-artifact@v2
with: with:
name: test-report name: "${{ steps.summary.outputs.HASH }}"
path: gnu/tests/**/*.log path: "${{ steps.vars.outputs.TEST_SUMMARY_FILE }}"
- uses: actions/upload-artifact@v2 - name: Reserve test results summary
uses: actions/upload-artifact@v2
with: with:
name: gnu-result name: test-summary
path: gnu-result.json path: "${{ steps.vars.outputs.TEST_SUMMARY_FILE }}"
- name: Download the result - name: Reserve test logs
uses: dawidd6/action-download-artifact@v2 uses: actions/upload-artifact@v2
with: with:
workflow: GnuTests.yml name: test-logs
name: gnu-result path: "${{ steps.vars.outputs.TEST_LOGS_GLOB }}"
repo: uutils/coreutils - name: Upload full json results
branch: main uses: actions/upload-artifact@v2
path: dl
- name: Download the log
uses: dawidd6/action-download-artifact@v2
with: with:
workflow: GnuTests.yml name: gnu-full-result.json
name: test-report path: ${{ steps.vars.outputs.TEST_FULL_SUMMARY_FILE }}
repo: uutils/coreutils - name: Compare test failures VS reference
branch: main
path: dl
- name: Compare failing tests against main
shell: bash shell: bash
run: | run: |
OLD_FAILING=$(sed -n "s/^FAIL: \([[:print:]]\+\).*/\1/p" dl/test-suite.log | sort) have_new_failures=""
NEW_FAILING=$(sed -n "s/^FAIL: \([[:print:]]\+\).*/\1/p" gnu/tests/test-suite.log | sort) REF_LOG_FILE='${{ steps.vars.outputs.path_reference }}/test-logs/test-suite.log'
for LINE in $OLD_FAILING REF_SUMMARY_FILE='${{ steps.vars.outputs.path_reference }}/test-summary/gnu-result.json'
if test -f "${REF_LOG_FILE}"; then
echo "Reference SHA1/ID: $(sha1sum -- "${REF_SUMMARY_FILE}")"
REF_FAILING=$(sed -n "s/^FAIL: \([[:print:]]\+\).*/\1/p" "${REF_LOG_FILE}" | sort)
NEW_FAILING=$(sed -n "s/^FAIL: \([[:print:]]\+\).*/\1/p" '${{ steps.vars.outputs.path_GNU_tests }}/test-suite.log' | sort)
for LINE in ${REF_FAILING}
do do
if ! grep -Fxq $LINE<<<"$NEW_FAILING"; then if ! grep -Fxq ${LINE}<<<"${NEW_FAILING}"; then
echo "::warning ::Congrats! The gnu test $LINE is now passing!" echo "::warning ::Congrats! The gnu test ${LINE} is now passing!"
fi fi
done done
for LINE in $NEW_FAILING for LINE in ${NEW_FAILING}
do do
if ! grep -Fxq $LINE<<<"$OLD_FAILING" if ! grep -Fxq ${LINE}<<<"${REF_FAILING}"
then then
echo "::error ::GNU test failed: $LINE. $LINE is passing on 'main'. Maybe you have to rebase?" echo "::error ::GNU test failed: ${LINE}. ${LINE} is passing on '${{ steps.vars.outputs.repo_default_branch }}'. Maybe you have to rebase?"
have_new_failures="true"
fi fi
done done
- name: Compare against main results else
echo "::warning ::Skipping test failure comparison; no prior reference test logs are available."
fi
if test -n "${have_new_failures}" ; then exit -1 ; fi
- name: Compare test summary VS reference
if: success() || failure() # run regardless of prior step success/failure
shell: bash shell: bash
run: | run: |
mv dl/gnu-result.json main-gnu-result.json REF_SUMMARY_FILE='${{ steps.vars.outputs.path_reference }}/test-summary/gnu-result.json'
if test -f "${REF_SUMMARY_FILE}"; then
echo "Reference SHA1/ID: $(sha1sum -- "${REF_SUMMARY_FILE}")"
mv "${REF_SUMMARY_FILE}" main-gnu-result.json
python uutils/util/compare_gnu_result.py python uutils/util/compare_gnu_result.py
else
echo "::warning ::Skipping test summary comparison; no prior reference summary is available."
fi

1
.rustfmt.toml Normal file
View file

@ -0,0 +1 @@
# * using all default `cargo fmt`/`rustfmt` options

View file

@ -1,4 +1,4 @@
#!/bin/bash #!/bin/sh
rustup target add x86_64-unknown-redox rustup target add x86_64-unknown-redox
sudo apt-key adv --keyserver keyserver.ubuntu.com --recv-keys AA12E97F0881517F sudo apt-key adv --keyserver keyserver.ubuntu.com --recv-keys AA12E97F0881517F

2
.vscode/.gitattributes vendored Normal file
View file

@ -0,0 +1,2 @@
# Configure GitHub to not mark comments in configuration files as errors
*.json linguist-language=jsonc

26
.vscode/cSpell.json vendored
View file

@ -1,7 +1,12 @@
// `cspell` settings // `cspell` settings
{ {
"version": "0.1", // Version of the setting file. Always 0.1 // version of the setting file
"language": "en", // language - current active spelling language "version": "0.2",
// spelling language
"language": "en",
// custom dictionaries
"dictionaries": ["acronyms+names", "jargon", "people", "shell", "workspace"], "dictionaries": ["acronyms+names", "jargon", "people", "shell", "workspace"],
"dictionaryDefinitions": [ "dictionaryDefinitions": [
{ "name": "acronyms+names", "path": "./cspell.dictionaries/acronyms+names.wordlist.txt" }, { "name": "acronyms+names", "path": "./cspell.dictionaries/acronyms+names.wordlist.txt" },
@ -10,10 +15,19 @@
{ "name": "shell", "path": "./cspell.dictionaries/shell.wordlist.txt" }, { "name": "shell", "path": "./cspell.dictionaries/shell.wordlist.txt" },
{ "name": "workspace", "path": "./cspell.dictionaries/workspace.wordlist.txt" } { "name": "workspace", "path": "./cspell.dictionaries/workspace.wordlist.txt" }
], ],
// ignorePaths - a list of globs to specify which files are to be ignored
"ignorePaths": ["Cargo.lock", "target/**", "tests/**/fixtures/**", "src/uu/dd/test-resources/**", "vendor/**"], // files to ignore (globs supported)
// ignoreWords - a list of words to be ignored (even if they are in the flagWords) "ignorePaths": [
"Cargo.lock",
"target/**",
"tests/**/fixtures/**",
"src/uu/dd/test-resources/**",
"vendor/**"
],
// words to ignore (even if they are in the flagWords)
"ignoreWords": [], "ignoreWords": [],
// words - list of words to be always considered correct
// words to always consider correct
"words": [] "words": []
} }

View file

@ -44,6 +44,7 @@ termsize
termwidth termwidth
textwrap textwrap
thiserror thiserror
ureq
walkdir walkdir
winapi winapi
xattr xattr

View file

@ -1,10 +1,13 @@
// spell-checker:ignore (misc) matklad // spell-checker:ignore (misc) matklad
// see <http://go.microsoft.com/fwlink/?LinkId=827846> for the documentation about the extensions.json format // see <http://go.microsoft.com/fwlink/?LinkId=827846> for the documentation about the extensions.json format
// *
// "foxundermoon.shell-format" ~ shell script formatting ; note: ENABLE "Use EditorConfig"
// "matklad.rust-analyzer" ~ `rust` language support
// "streetsidesoftware.code-spell-checker" ~ `cspell` spell-checker support
{ {
"recommendations": [ "recommendations": [
// Rust language support.
"matklad.rust-analyzer", "matklad.rust-analyzer",
// `cspell` spell-checker support "streetsidesoftware.code-spell-checker",
"streetsidesoftware.code-spell-checker" "foxundermoon.shell-format"
] ]
} }

1
.vscode/settings.json vendored Normal file
View file

@ -0,0 +1 @@
{ "cSpell.import": [".vscode/cspell.json"] }

355
Cargo.lock generated
View file

@ -8,6 +8,12 @@ version = "0.11.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fe438c63458706e03479442743baae6c88256498e6431708f6dfc520a26515d3" checksum = "fe438c63458706e03479442743baae6c88256498e6431708f6dfc520a26515d3"
[[package]]
name = "adler"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
[[package]] [[package]]
name = "ahash" name = "ahash"
version = "0.4.7" version = "0.4.7"
@ -50,6 +56,12 @@ version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b" checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b"
[[package]]
name = "arrayvec"
version = "0.7.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6"
[[package]] [[package]]
name = "atty" name = "atty"
version = "0.2.14" version = "0.2.14"
@ -67,6 +79,12 @@ version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a" checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a"
[[package]]
name = "base64"
version = "0.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "904dfeac50f3cdaba28fc6f57fdcddb75f49ed61346676a78c4ffe55877802fd"
[[package]] [[package]]
name = "bigdecimal" name = "bigdecimal"
version = "0.3.0" version = "0.3.0"
@ -123,10 +141,24 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "afa748e348ad3be8263be728124b24a24f268266f6f5d58af9d75f6a40b5c587" checksum = "afa748e348ad3be8263be728124b24a24f268266f6f5d58af9d75f6a40b5c587"
dependencies = [ dependencies = [
"arrayref", "arrayref",
"arrayvec", "arrayvec 0.5.2",
"constant_time_eq", "constant_time_eq",
] ]
[[package]]
name = "blake3"
version = "1.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a08e53fc5a564bb15bfe6fae56bd71522205f1f91893f9c0116edad6496c183f"
dependencies = [
"arrayref",
"arrayvec 0.7.2",
"cc",
"cfg-if 1.0.0",
"constant_time_eq",
"digest",
]
[[package]] [[package]]
name = "block-buffer" name = "block-buffer"
version = "0.10.0" version = "0.10.0"
@ -147,6 +179,12 @@ dependencies = [
"regex-automata", "regex-automata",
] ]
[[package]]
name = "bumpalo"
version = "3.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a4a45a46ab1f2412e53d3a0ade76ffad2025804294569aae387231a0cd6e0899"
[[package]] [[package]]
name = "byte-unit" name = "byte-unit"
version = "4.0.13" version = "4.0.13"
@ -208,6 +246,12 @@ dependencies = [
"winapi 0.3.9", "winapi 0.3.9",
] ]
[[package]]
name = "chunked_transfer"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fff857943da45f546682664a79488be82e69e43c1a7a2307679ab9afb3a66d2e"
[[package]] [[package]]
name = "clang-sys" name = "clang-sys"
version = "1.3.0" version = "1.3.0"
@ -292,6 +336,7 @@ dependencies = [
"conv", "conv",
"filetime", "filetime",
"glob", "glob",
"hex-literal",
"lazy_static", "lazy_static",
"libc", "libc",
"nix 0.23.1", "nix 0.23.1",
@ -308,6 +353,7 @@ dependencies = [
"time", "time",
"unindent", "unindent",
"unix_socket", "unix_socket",
"ureq",
"users", "users",
"uu_arch", "uu_arch",
"uu_base32", "uu_base32",
@ -411,6 +457,7 @@ dependencies = [
"uu_yes", "uu_yes",
"uucore", "uucore",
"walkdir", "walkdir",
"zip",
] ]
[[package]] [[package]]
@ -525,6 +572,15 @@ dependencies = [
"libc", "libc",
] ]
[[package]]
name = "crc32fast"
version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d"
dependencies = [
"cfg-if 1.0.0",
]
[[package]] [[package]]
name = "crossbeam-channel" name = "crossbeam-channel"
version = "0.5.2" version = "0.5.2"
@ -670,6 +726,7 @@ dependencies = [
"block-buffer", "block-buffer",
"crypto-common", "crypto-common",
"generic-array", "generic-array",
"subtle",
] ]
[[package]] [[package]]
@ -770,12 +827,34 @@ dependencies = [
"winapi 0.3.9", "winapi 0.3.9",
] ]
[[package]]
name = "flate2"
version = "1.0.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e6988e897c1c9c485f43b47a529cef42fde0547f9d8d41a7062518f1d8fc53f"
dependencies = [
"cfg-if 1.0.0",
"crc32fast",
"libc",
"miniz_oxide",
]
[[package]] [[package]]
name = "fnv" name = "fnv"
version = "1.0.7" version = "1.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
[[package]]
name = "form_urlencoded"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5fc25a87fa4fd2094bffb06925852034d90a17f0d1e05197d4956d3555752191"
dependencies = [
"matches",
"percent-encoding",
]
[[package]] [[package]]
name = "fs_extra" name = "fs_extra"
version = "1.2.0" version = "1.2.0"
@ -882,6 +961,12 @@ version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d6a22814455d41612f41161581c2883c0c6a1c41852729b17d5ed88f01e153aa" checksum = "d6a22814455d41612f41161581c2883c0c6a1c41852729b17d5ed88f01e153aa"
[[package]]
name = "hex-literal"
version = "0.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7ebdb29d2ea9ed0083cd8cece49bbd968021bd99b0849edb4a9a7ee0fdf6a4e0"
[[package]] [[package]]
name = "hostname" name = "hostname"
version = "0.3.1" version = "0.3.1"
@ -899,6 +984,17 @@ version = "2.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4"
[[package]]
name = "idna"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "418a0a6fab821475f634efe3ccc45c013f742efe03d853e8d3355d5cb850ecf8"
dependencies = [
"matches",
"unicode-bidi",
"unicode-normalization",
]
[[package]] [[package]]
name = "if_rust_version" name = "if_rust_version"
version = "1.0.0" version = "1.0.0"
@ -939,6 +1035,15 @@ dependencies = [
"either", "either",
] ]
[[package]]
name = "js-sys"
version = "0.3.56"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a38fc24e30fd564ce974c02bf1d337caddff65be6cc4735a1f7eab22a7440f04"
dependencies = [
"wasm-bindgen",
]
[[package]] [[package]]
name = "keccak" name = "keccak"
version = "0.1.0" version = "0.1.0"
@ -1016,6 +1121,12 @@ version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ffbee8634e0d45d258acb448e7eaab3fce7a0a467395d4d9f228e3c1f01fb2e4" checksum = "ffbee8634e0d45d258acb448e7eaab3fce7a0a467395d4d9f228e3c1f01fb2e4"
[[package]]
name = "matches"
version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a3e378b66a060d48947b590737b30a1be76706c8dd7b8ba0f2fe3989c68a853f"
[[package]] [[package]]
name = "md5" name = "md5"
version = "0.3.8" version = "0.3.8"
@ -1061,6 +1172,16 @@ version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
[[package]]
name = "miniz_oxide"
version = "0.4.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a92518e98c078586bc6c934028adcca4c92a53d6a958196de835170a01d84e4b"
dependencies = [
"adler",
"autocfg",
]
[[package]] [[package]]
name = "mio" name = "mio"
version = "0.7.14" version = "0.7.14"
@ -1347,6 +1468,12 @@ version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099" checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099"
[[package]]
name = "percent-encoding"
version = "2.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d4fd5641d01c8f18a23da7b6fe29298ff4b55afcccdf78973b24cf3175fee32e"
[[package]] [[package]]
name = "phf" name = "phf"
version = "0.10.1" version = "0.10.1"
@ -1627,6 +1754,21 @@ version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "53552c6c49e1e13f1a203ef0080ab3bbef0beb570a528993e83df057a9d9bba1" checksum = "53552c6c49e1e13f1a203ef0080ab3bbef0beb570a528993e83df057a9d9bba1"
[[package]]
name = "ring"
version = "0.16.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3053cf52e236a3ed746dfc745aa9cacf1b791d846bdaf412f60a8d7d6e17c8fc"
dependencies = [
"cc",
"libc",
"once_cell",
"spin",
"untrusted",
"web-sys",
"winapi 0.3.9",
]
[[package]] [[package]]
name = "rlimit" name = "rlimit"
version = "0.4.0" version = "0.4.0"
@ -1653,6 +1795,18 @@ version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
[[package]]
name = "rustls"
version = "0.20.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b323592e3164322f5b193dc4302e4e36cd8d37158a712d664efae1a5c2791700"
dependencies = [
"log",
"ring",
"sct",
"webpki",
]
[[package]] [[package]]
name = "same-file" name = "same-file"
version = "1.0.6" version = "1.0.6"
@ -1668,6 +1822,16 @@ version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
[[package]]
name = "sct"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d53dcdb7c9f8158937a7981b48accfd39a43af418591a5d008c7b22b5e1b7ca4"
dependencies = [
"ring",
"untrusted",
]
[[package]] [[package]]
name = "selinux" name = "selinux"
version = "0.2.5" version = "0.2.5"
@ -1716,19 +1880,15 @@ dependencies = [
[[package]] [[package]]
name = "sha1" name = "sha1"
version = "0.6.1" version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c1da05c97445caa12d05e848c4a4fcbbea29e748ac28f7e80e9b010392063770" checksum = "04cc229fb94bcb689ffc39bd4ded842f6ff76885efede7c6d1ffb62582878bea"
dependencies = [ dependencies = [
"sha1_smol", "cfg-if 1.0.0",
"cpufeatures",
"digest",
] ]
[[package]]
name = "sha1_smol"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ae1a47186c03a32177042e55dbc5fd5aee900b8e0069a8d70fba96a9375cd012"
[[package]] [[package]]
name = "sha2" name = "sha2"
version = "0.10.1" version = "0.10.1"
@ -1814,6 +1974,12 @@ dependencies = [
"winapi 0.3.9", "winapi 0.3.9",
] ]
[[package]]
name = "spin"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d"
[[package]] [[package]]
name = "stable_deref_trait" name = "stable_deref_trait"
version = "1.2.0" version = "1.2.0"
@ -1850,6 +2016,12 @@ dependencies = [
"syn", "syn",
] ]
[[package]]
name = "subtle"
version = "2.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6bdef32e8150c2a081110b42772ffe7d7c9032b606bc226c8260fd97e0976601"
[[package]] [[package]]
name = "syn" name = "syn"
version = "1.0.86" version = "1.0.86"
@ -1979,6 +2151,21 @@ dependencies = [
"winapi 0.3.9", "winapi 0.3.9",
] ]
[[package]]
name = "tinyvec"
version = "1.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2c1c1d5a42b6245520c249549ec267180beaffcc0615401ac8e31853d4b6d8d2"
dependencies = [
"tinyvec_macros",
]
[[package]]
name = "tinyvec_macros"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c"
[[package]] [[package]]
name = "toml" name = "toml"
version = "0.5.8" version = "0.5.8"
@ -1994,6 +2181,12 @@ version = "1.15.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dcf81ac59edc17cc8697ff311e8f5ef2d99fcbd9817b34cec66f90b6c3dfd987" checksum = "dcf81ac59edc17cc8697ff311e8f5ef2d99fcbd9817b34cec66f90b6c3dfd987"
[[package]]
name = "unicode-bidi"
version = "0.3.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a01404663e3db436ed2746d9fefef640d868edae3cceb81c3b8d5732fda678f"
[[package]] [[package]]
name = "unicode-linebreak" name = "unicode-linebreak"
version = "0.1.2" version = "0.1.2"
@ -2003,6 +2196,15 @@ dependencies = [
"regex", "regex",
] ]
[[package]]
name = "unicode-normalization"
version = "0.1.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d54590932941a9e9266f0832deed84ebe1bf2e4c9e4a3554d393d18f5e854bf9"
dependencies = [
"tinyvec",
]
[[package]] [[package]]
name = "unicode-segmentation" name = "unicode-segmentation"
version = "1.8.0" version = "1.8.0"
@ -2043,6 +2245,41 @@ dependencies = [
"libc", "libc",
] ]
[[package]]
name = "untrusted"
version = "0.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a156c684c91ea7d62626509bce3cb4e1d9ed5c4d978f7b4352658f96a4c26b4a"
[[package]]
name = "ureq"
version = "2.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9399fa2f927a3d327187cbd201480cee55bee6ac5d3c77dd27f0c6814cff16d5"
dependencies = [
"base64",
"chunked_transfer",
"flate2",
"log",
"once_cell",
"rustls",
"url",
"webpki",
"webpki-roots",
]
[[package]]
name = "url"
version = "2.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a507c383b2d33b5fc35d1861e77e6b383d158b2da5e14fe51b83dfedf6fd578c"
dependencies = [
"form_urlencoded",
"idna",
"matches",
"percent-encoding",
]
[[package]] [[package]]
name = "users" name = "users"
version = "0.10.0" version = "0.10.0"
@ -2375,6 +2612,7 @@ name = "uu_hashsum"
version = "0.0.12" version = "0.0.12"
dependencies = [ dependencies = [
"blake2b_simd", "blake2b_simd",
"blake3",
"clap 3.0.10", "clap 3.0.10",
"digest", "digest",
"hex", "hex",
@ -2444,6 +2682,7 @@ name = "uu_join"
version = "0.0.12" version = "0.0.12"
dependencies = [ dependencies = [
"clap 3.0.10", "clap 3.0.10",
"memchr 2.4.1",
"uucore", "uucore",
] ]
@ -2821,6 +3060,7 @@ name = "uu_split"
version = "0.0.12" version = "0.0.12"
dependencies = [ dependencies = [
"clap 3.0.10", "clap 3.0.10",
"memchr 2.4.1",
"uucore", "uucore",
] ]
@ -3138,6 +3378,89 @@ version = "0.10.2+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6" checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6"
[[package]]
name = "wasm-bindgen"
version = "0.2.79"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "25f1af7423d8588a3d840681122e72e6a24ddbcb3f0ec385cac0d12d24256c06"
dependencies = [
"cfg-if 1.0.0",
"wasm-bindgen-macro",
]
[[package]]
name = "wasm-bindgen-backend"
version = "0.2.79"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b21c0df030f5a177f3cba22e9bc4322695ec43e7257d865302900290bcdedca"
dependencies = [
"bumpalo",
"lazy_static",
"log",
"proc-macro2",
"quote 1.0.14",
"syn",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-macro"
version = "0.2.79"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2f4203d69e40a52ee523b2529a773d5ffc1dc0071801c87b3d270b471b80ed01"
dependencies = [
"quote 1.0.14",
"wasm-bindgen-macro-support",
]
[[package]]
name = "wasm-bindgen-macro-support"
version = "0.2.79"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bfa8a30d46208db204854cadbb5d4baf5fcf8071ba5bf48190c3e59937962ebc"
dependencies = [
"proc-macro2",
"quote 1.0.14",
"syn",
"wasm-bindgen-backend",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-shared"
version = "0.2.79"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3d958d035c4438e28c70e4321a2911302f10135ce78a9c7834c0cab4123d06a2"
[[package]]
name = "web-sys"
version = "0.3.56"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c060b319f29dd25724f09a2ba1418f142f539b2be99fbf4d2d5a8f7330afb8eb"
dependencies = [
"js-sys",
"wasm-bindgen",
]
[[package]]
name = "webpki"
version = "0.22.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f095d78192e208183081cc07bc5515ef55216397af48b873e5edcd72637fa1bd"
dependencies = [
"ring",
"untrusted",
]
[[package]]
name = "webpki-roots"
version = "0.22.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "552ceb903e957524388c4d3475725ff2c8b7960922063af6ce53c9a43da07449"
dependencies = [
"webpki",
]
[[package]] [[package]]
name = "which" name = "which"
version = "4.2.2" version = "4.2.2"
@ -3215,3 +3538,15 @@ name = "z85"
version = "3.0.4" version = "3.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "af896e93db81340b74b65f74276a99b210c086f3d34ed0abf433182a462af856" checksum = "af896e93db81340b74b65f74276a99b210c086f3d34ed0abf433182a462af856"
[[package]]
name = "zip"
version = "0.5.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "93ab48844d61251bb3835145c521d88aa4031d7139e8485990f60ca911fa0815"
dependencies = [
"byteorder",
"crc32fast",
"flate2",
"thiserror",
]

View file

@ -252,6 +252,8 @@ lazy_static = { version="1.3" }
textwrap = { version="0.14", features=["terminal_size"] } textwrap = { version="0.14", features=["terminal_size"] }
uucore = { version=">=0.0.11", package="uucore", path="src/uucore" } uucore = { version=">=0.0.11", package="uucore", path="src/uucore" }
selinux = { version="0.2", optional = true } selinux = { version="0.2", optional = true }
ureq = "2.4.0"
zip = { version = "0.5.13", default_features=false, features=["deflate"] }
# * uutils # * uutils
uu_test = { optional=true, version="0.0.12", package="uu_test", path="src/uu/test" } uu_test = { optional=true, version="0.0.12", package="uu_test", path="src/uu/test" }
# #
@ -372,13 +374,14 @@ libc = "0.2"
pretty_assertions = "1" pretty_assertions = "1"
rand = "0.8" rand = "0.8"
regex = "1.0" regex = "1.0"
sha1 = { version="0.6", features=["std"] } sha1 = { version="0.10", features=["std"] }
tempfile = "3.2.0" tempfile = "3.2.0"
time = "0.1" time = "0.1"
unindent = "0.1" unindent = "0.1"
uucore = { version=">=0.0.11", package="uucore", path="src/uucore", features=["entries", "process"] } uucore = { version=">=0.0.11", package="uucore", path="src/uucore", features=["entries", "process"] }
walkdir = "2.2" walkdir = "2.2"
atty = "0.2" atty = "0.2"
hex-literal = "0.3.1"
[target.'cfg(target_os = "linux")'.dev-dependencies] [target.'cfg(target_os = "linux")'.dev-dependencies]
rlimit = "0.4.0" rlimit = "0.4.0"

View file

@ -62,6 +62,7 @@ PROGS := \
csplit \ csplit \
cut \ cut \
date \ date \
dd \
df \ df \
dircolors \ dircolors \
dirname \ dirname \

View file

@ -349,6 +349,10 @@ $ make UTILS='UTILITY_1 UTILITY_2' RUNTEST_ARGS='-v' busytest
### Comparing with GNU ### Comparing with GNU
Below is the evolution of how many GNU tests uutils passes. A more detailed
breakdown of the GNU test results of the main branch can be found
[in the user manual](https://uutils.github.io/coreutils-docs/user/test_coverage.html).
![Evolution over time](https://github.com/uutils/coreutils-tracking/blob/main/gnu-results.png?raw=true) ![Evolution over time](https://github.com/uutils/coreutils-tracking/blob/main/gnu-results.png?raw=true)
To run locally: To run locally:

View file

@ -116,6 +116,8 @@ pub fn main() {
phf_map.entry("sha3-512sum", &map_value); phf_map.entry("sha3-512sum", &map_value);
phf_map.entry("shake128sum", &map_value); phf_map.entry("shake128sum", &map_value);
phf_map.entry("shake256sum", &map_value); phf_map.entry("shake256sum", &map_value);
phf_map.entry("b2sum", &map_value);
phf_map.entry("b3sum", &map_value);
tf.write_all( tf.write_all(
format!( format!(
"#[path=\"{dir}/test_{krate}.rs\"]\nmod test_{krate};\n", "#[path=\"{dir}/test_{krate}.rs\"]\nmod test_{krate};\n",

View file

@ -1,5 +1,4 @@
UseGNU=gmake $* clean:
all: rm -rf book
@$(UseGNU) rm -f src/SUMMARY.md
.DEFAULT: rm -f src/utils/*
@$(UseGNU)

View file

@ -0,0 +1,46 @@
:root {
--PASS: #44AF69;
--ERROR: #F8333C;
--FAIL: #F8333C;
--SKIP: #d3c994;
}
.PASS {
color: var(--PASS);
}
.ERROR {
color: var(--ERROR);
}
.FAIL {
color: var(--FAIL);
}
.SKIP {
color: var(--SKIP);
}
.testSummary {
display: inline-flex;
align-items: center;
justify-content: space-between;
width: 90%;
}
.progress {
width: 80%;
display: flex;
justify-content: right;
align-items: center;
}
.progress-bar {
height: 10px;
width: calc(100% - 15ch);
border-radius: 5px;
}
.result {
font-weight: bold;
width: 7ch;
display: inline-block;
}
.result-line {
margin: 8px;
}
.counts {
margin-right: 10px;
}

82
docs/src/test_coverage.js Normal file
View file

@ -0,0 +1,82 @@
// spell-checker:ignore hljs
function progressBar(totals) {
const bar = document.createElement("div");
bar.className = "progress-bar";
let totalTests = 0;
for (const [key, value] of Object.entries(totals)) {
totalTests += value;
}
const passPercentage = Math.round(100 * totals["PASS"] / totalTests);
const skipPercentage = passPercentage + Math.round(100 * totals["SKIP"] / totalTests);
// The ternary expressions are used for some edge-cases where there are no failing test,
// but still a red (or beige) line shows up because of how CSS draws gradients.
bar.style = `background: linear-gradient(
to right,
var(--PASS) ${passPercentage}%`
+ ( passPercentage === 100 ? ", var(--PASS)" :
`, var(--SKIP) ${passPercentage}%,
var(--SKIP) ${skipPercentage}%`
)
+ (skipPercentage === 100 ? ")" : ", var(--FAIL) 0)");
const progress = document.createElement("div");
progress.className = "progress"
progress.innerHTML = `
<span class="counts">
<span class="PASS">${totals["PASS"]}</span>
/
<span class="SKIP">${totals["SKIP"]}</span>
/
<span class="FAIL">${totals["FAIL"] + totals["ERROR"]}</span>
</span>
`;
progress.appendChild(bar);
return progress
}
function parse_result(parent, obj) {
const totals = {
PASS: 0,
SKIP: 0,
FAIL: 0,
ERROR: 0,
};
for (const [category, content] of Object.entries(obj)) {
if (typeof content === "string") {
const p = document.createElement("p");
p.className = "result-line";
totals[content]++;
p.innerHTML = `<span class="result" style="color: var(--${content})">${content}</span> ${category}`;
parent.appendChild(p);
} else {
const categoryName = document.createElement("code");
categoryName.innerHTML = category;
categoryName.className = "hljs";
const details = document.createElement("details");
const subtotals = parse_result(details, content);
for (const [subtotal, count] of Object.entries(subtotals)) {
totals[subtotal] += count;
}
const summaryDiv = document.createElement("div");
summaryDiv.className = "testSummary";
summaryDiv.appendChild(categoryName);
summaryDiv.appendChild(progressBar(subtotals));
const summary = document.createElement("summary");
summary.appendChild(summaryDiv);
details.appendChild(summary);
parent.appendChild(details);
}
}
return totals;
}
fetch("https://raw.githubusercontent.com/uutils/coreutils-tracking/main/gnu-full-result.json")
.then((r) => r.json())
.then((obj) => {
let parent = document.getElementById("test-cov");
parse_result(parent, obj);
});

19
docs/src/test_coverage.md Normal file
View file

@ -0,0 +1,19 @@
# GNU Test Coverage
uutils is actively tested against the GNU coreutils test suite. The results
below are automatically updated every day.
## Coverage per category
Click on the categories to see the names of the tests. Green indicates a passing
test, yellow indicates a skipped test and red means that the test either failed
or resulted in an error.
<link rel="stylesheet" href="test_coverage.css">
<script src="test_coverage.js"></script>
<div id="test-cov"></div>
## Progress over time
<image src="https://github.com/uutils/coreutils-tracking/blob/main/gnu-results.png?raw=true">

View file

@ -87,7 +87,7 @@ fn main() {
}; };
if util == "completion" { if util == "completion" {
gen_completions(args, utils); gen_completions(args, &utils);
} }
match utils.get(util) { match utils.get(util) {
@ -132,7 +132,7 @@ fn main() {
/// Prints completions for the utility in the first parameter for the shell in the second parameter to stdout /// Prints completions for the utility in the first parameter for the shell in the second parameter to stdout
fn gen_completions<T: uucore::Args>( fn gen_completions<T: uucore::Args>(
args: impl Iterator<Item = OsString>, args: impl Iterator<Item = OsString>,
util_map: UtilityMap<T>, util_map: &UtilityMap<T>,
) -> ! { ) -> ! {
let all_utilities: Vec<_> = std::iter::once("coreutils") let all_utilities: Vec<_> = std::iter::once("coreutils")
.chain(util_map.keys().copied()) .chain(util_map.keys().copied())
@ -168,9 +168,9 @@ fn gen_completions<T: uucore::Args>(
process::exit(0); process::exit(0);
} }
fn gen_coreutils_app<T: uucore::Args>(util_map: UtilityMap<T>) -> App<'static> { fn gen_coreutils_app<T: uucore::Args>(util_map: &UtilityMap<T>) -> App<'static> {
let mut app = App::new("coreutils"); let mut app = App::new("coreutils");
for (_, (_, sub_app)) in &util_map { for (_, (_, sub_app)) in util_map {
app = app.subcommand(sub_app()); app = app.subcommand(sub_app());
} }
app app

View file

@ -2,15 +2,27 @@
// //
// For the full copyright and license information, please view the LICENSE // For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code. // file that was distributed with this source code.
// spell-checker:ignore tldr
use clap::App; use clap::App;
use std::ffi::OsString; use std::ffi::OsString;
use std::fs::File; use std::fs::File;
use std::io::{self, Write}; use std::io::Cursor;
use std::io::{self, Read, Seek, Write};
use zip::ZipArchive;
include!(concat!(env!("OUT_DIR"), "/uutils_map.rs")); include!(concat!(env!("OUT_DIR"), "/uutils_map.rs"));
fn main() -> io::Result<()> { fn main() -> io::Result<()> {
println!("Downloading tldr archive");
let mut zip_reader = ureq::get("https://tldr.sh/assets/tldr.zip")
.call()
.unwrap()
.into_reader();
let mut buffer = Vec::new();
zip_reader.read_to_end(&mut buffer).unwrap();
let mut tldr_zip = ZipArchive::new(Cursor::new(buffer)).unwrap();
let utils = util_map::<Box<dyn Iterator<Item = OsString>>>(); let utils = util_map::<Box<dyn Iterator<Item = OsString>>>();
match std::fs::create_dir("docs/src/utils/") { match std::fs::create_dir("docs/src/utils/") {
Err(e) if e.kind() == std::io::ErrorKind::AlreadyExists => Ok(()), Err(e) if e.kind() == std::io::ErrorKind::AlreadyExists => Ok(()),
@ -26,6 +38,7 @@ fn main() -> io::Result<()> {
[Introduction](index.md)\n\ [Introduction](index.md)\n\
* [Installation](installation.md)\n\ * [Installation](installation.md)\n\
* [Contributing](contributing.md)\n\ * [Contributing](contributing.md)\n\
* [GNU test coverage](test_coverage.md)\n\
\n\ \n\
# Reference\n\ # Reference\n\
* [Multi-call binary](multicall.md)\n", * [Multi-call binary](multicall.md)\n",
@ -39,7 +52,7 @@ fn main() -> io::Result<()> {
} }
let p = format!("docs/src/utils/{}.md", name); let p = format!("docs/src/utils/{}.md", name);
if let Ok(f) = File::create(&p) { if let Ok(f) = File::create(&p) {
write_markdown(f, &mut app(), name)?; write_markdown(f, &mut app(), name, &mut tldr_zip)?;
println!("Wrote to '{}'", p); println!("Wrote to '{}'", p);
} else { } else {
println!("Error writing to {}", p); println!("Error writing to {}", p);
@ -49,12 +62,18 @@ fn main() -> io::Result<()> {
Ok(()) Ok(())
} }
fn write_markdown(mut w: impl Write, app: &mut App, name: &str) -> io::Result<()> { fn write_markdown(
mut w: impl Write,
app: &mut App,
name: &str,
tldr_zip: &mut zip::ZipArchive<impl Read + Seek>,
) -> io::Result<()> {
write!(w, "# {}\n\n", name)?; write!(w, "# {}\n\n", name)?;
write_version(&mut w, app)?; write_version(&mut w, app)?;
write_usage(&mut w, app, name)?; write_usage(&mut w, app, name)?;
write_description(&mut w, app)?; write_description(&mut w, app)?;
write_options(&mut w, app) write_options(&mut w, app)?;
write_examples(&mut w, name, tldr_zip)
} }
fn write_version(w: &mut impl Write, app: &App) -> io::Result<()> { fn write_version(w: &mut impl Write, app: &App) -> io::Result<()> {
@ -67,7 +86,14 @@ fn write_version(w: &mut impl Write, app: &App) -> io::Result<()> {
fn write_usage(w: &mut impl Write, app: &mut App, name: &str) -> io::Result<()> { fn write_usage(w: &mut impl Write, app: &mut App, name: &str) -> io::Result<()> {
writeln!(w, "\n```")?; writeln!(w, "\n```")?;
let mut usage: String = app.render_usage().lines().nth(1).unwrap().trim().into(); let mut usage: String = app
.render_usage()
.lines()
.skip(1)
.map(|l| l.trim())
.filter(|l| !l.is_empty())
.collect::<Vec<_>>()
.join("\n");
usage = usage.replace(app.get_name(), name); usage = usage.replace(app.get_name(), name);
writeln!(w, "{}", usage)?; writeln!(w, "{}", usage)?;
writeln!(w, "```") writeln!(w, "```")
@ -81,6 +107,51 @@ fn write_description(w: &mut impl Write, app: &App) -> io::Result<()> {
} }
} }
fn write_examples(
w: &mut impl Write,
name: &str,
tldr_zip: &mut zip::ZipArchive<impl Read + Seek>,
) -> io::Result<()> {
let content = if let Some(f) = get_zip_content(tldr_zip, &format!("pages/common/{}.md", name)) {
f
} else if let Some(f) = get_zip_content(tldr_zip, &format!("pages/linux/{}.md", name)) {
f
} else {
return Ok(());
};
writeln!(w, "## Examples")?;
writeln!(w)?;
for line in content.lines().skip_while(|l| !l.starts_with('-')) {
if let Some(l) = line.strip_prefix("- ") {
writeln!(w, "{}", l)?;
} else if line.starts_with('`') {
writeln!(w, "```shell\n{}\n```", line.trim_matches('`'))?;
} else if line.is_empty() {
writeln!(w)?;
} else {
println!("Not sure what to do with this line:");
println!("{}", line);
}
}
writeln!(w)?;
writeln!(
w,
"> The examples are provided by the [tldr-pages project](https://tldr.sh) under the [CC BY 4.0 License](https://github.com/tldr-pages/tldr/blob/main/LICENSE.md)."
)?;
writeln!(w, ">")?;
writeln!(
w,
"> Please note that, as uutils is a work in progress, some examples might fail."
)
}
fn get_zip_content(archive: &mut ZipArchive<impl Read + Seek>, name: &str) -> Option<String> {
let mut s = String::new();
archive.by_name(name).ok()?.read_to_string(&mut s).unwrap();
Some(s)
}
fn write_options(w: &mut impl Write, app: &App) -> io::Result<()> { fn write_options(w: &mut impl Write, app: &App) -> io::Result<()> {
writeln!(w, "<h2>Options</h2>")?; writeln!(w, "<h2>Options</h2>")?;
write!(w, "<dl>")?; write!(w, "<dl>")?;
@ -130,7 +201,11 @@ fn write_options(w: &mut impl Write, app: &App) -> io::Result<()> {
write!(w, "</code>")?; write!(w, "</code>")?;
} }
writeln!(w, "</dt>")?; writeln!(w, "</dt>")?;
writeln!(w, "<dd>\n\n{}\n\n</dd>", arg.get_help().unwrap_or_default())?; writeln!(
w,
"<dd>\n\n{}\n\n</dd>",
arg.get_help().unwrap_or_default().replace("\n", "<br />")
)?;
} }
writeln!(w, "</dl>") writeln!(w, "</dl>\n")
} }

View file

@ -12,14 +12,14 @@ use uucore::{encoding::Format, error::UResult};
pub mod base_common; pub mod base_common;
static ABOUT: &str = " static ABOUT: &str = "\
With no FILE, or when FILE is -, read standard input. With no FILE, or when FILE is -, read standard input.
The data are encoded as described for the base32 alphabet in RFC The data are encoded as described for the base32 alphabet in RFC
4648. When decoding, the input may contain newlines in addition 4648. When decoding, the input may contain newlines in addition
to the bytes of the formal base32 alphabet. Use --ignore-garbage to the bytes of the formal base32 alphabet. Use --ignore-garbage
to attempt to recover from any other non-alphabet bytes in the to attempt to recover from any other non-alphabet bytes in the
encoded stream. encoded stream.
"; ";
fn usage() -> String { fn usage() -> String {

View file

@ -13,14 +13,14 @@ use uucore::{encoding::Format, error::UResult};
use std::io::{stdin, Read}; use std::io::{stdin, Read};
static ABOUT: &str = " static ABOUT: &str = "\
With no FILE, or when FILE is -, read standard input. With no FILE, or when FILE is -, read standard input.
The data are encoded as described for the base64 alphabet in RFC The data are encoded as described for the base64 alphabet in RFC
3548. When decoding, the input may contain newlines in addition 3548. When decoding, the input may contain newlines in addition
to the bytes of the formal base64 alphabet. Use --ignore-garbage to the bytes of the formal base64 alphabet. Use --ignore-garbage
to attempt to recover from any other non-alphabet bytes in the to attempt to recover from any other non-alphabet bytes in the
encoded stream. encoded stream.
"; ";
fn usage() -> String { fn usage() -> String {

View file

@ -19,12 +19,12 @@ use uucore::{
use std::io::{stdin, Read}; use std::io::{stdin, Read};
static ABOUT: &str = " static ABOUT: &str = "\
With no FILE, or when FILE is -, read standard input. With no FILE, or when FILE is -, read standard input.
When decoding, the input may contain newlines in addition to the bytes of When decoding, the input may contain newlines in addition to the bytes of
the formal alphabet. Use --ignore-garbage to attempt to recover the formal alphabet. Use --ignore-garbage to attempt to recover
from any other non-alphabet bytes in the encoded stream. from any other non-alphabet bytes in the encoded stream.
"; ";
const ENCODINGS: &[(&str, Format)] = &[ const ENCODINGS: &[(&str, Format)] = &[

View file

@ -325,15 +325,15 @@ fn cat_path(
state: &mut OutputState, state: &mut OutputState,
out_info: Option<&FileInformation>, out_info: Option<&FileInformation>,
) -> CatResult<()> { ) -> CatResult<()> {
if path == "-" { match get_input_type(path)? {
InputType::StdIn => {
let stdin = io::stdin(); let stdin = io::stdin();
let mut handle = InputHandle { let mut handle = InputHandle {
reader: stdin, reader: stdin,
is_interactive: atty::is(atty::Stream::Stdin), is_interactive: atty::is(atty::Stream::Stdin),
}; };
return cat_handle(&mut handle, options, state); cat_handle(&mut handle, options, state)
} }
match get_input_type(path)? {
InputType::Directory => Err(CatError::IsDirectory), InputType::Directory => Err(CatError::IsDirectory),
#[cfg(unix)] #[cfg(unix)]
InputType::Socket => { InputType::Socket => {
@ -560,13 +560,12 @@ fn write_tab_to_end<W: Write>(mut in_buf: &[u8], writer: &mut W) -> usize {
{ {
Some(p) => { Some(p) => {
writer.write_all(&in_buf[..p]).unwrap(); writer.write_all(&in_buf[..p]).unwrap();
if in_buf[p] == b'\n' { if in_buf[p] == b'\t' {
return count + p;
} else if in_buf[p] == b'\t' {
writer.write_all(b"^I").unwrap(); writer.write_all(b"^I").unwrap();
in_buf = &in_buf[p + 1..]; in_buf = &in_buf[p + 1..];
count += p + 1; count += p + 1;
} else { } else {
// b'\n' or b'\r'
return count + p; return count + p;
} }
} }
@ -589,10 +588,10 @@ fn write_nonprint_to_end<W: Write>(in_buf: &[u8], writer: &mut W, tab: &[u8]) ->
9 => writer.write_all(tab), 9 => writer.write_all(tab),
0..=8 | 10..=31 => writer.write_all(&[b'^', byte + 64]), 0..=8 | 10..=31 => writer.write_all(&[b'^', byte + 64]),
32..=126 => writer.write_all(&[byte]), 32..=126 => writer.write_all(&[byte]),
127 => writer.write_all(&[b'^', byte - 64]), 127 => writer.write_all(&[b'^', b'?']),
128..=159 => writer.write_all(&[b'M', b'-', b'^', byte - 64]), 128..=159 => writer.write_all(&[b'M', b'-', b'^', byte - 64]),
160..=254 => writer.write_all(&[b'M', b'-', byte - 128]), 160..=254 => writer.write_all(&[b'M', b'-', byte - 128]),
_ => writer.write_all(&[b'M', b'-', b'^', 63]), _ => writer.write_all(&[b'M', b'-', b'^', b'?']),
} }
.unwrap(); .unwrap();
count += 1; count += 1;

View file

@ -201,12 +201,12 @@ fn set_main_group(group: &str) -> UResult<()> {
} }
#[cfg(any(target_vendor = "apple", target_os = "freebsd"))] #[cfg(any(target_vendor = "apple", target_os = "freebsd"))]
fn set_groups(groups: Vec<libc::gid_t>) -> libc::c_int { fn set_groups(groups: &[libc::gid_t]) -> libc::c_int {
unsafe { setgroups(groups.len() as libc::c_int, groups.as_ptr()) } unsafe { setgroups(groups.len() as libc::c_int, groups.as_ptr()) }
} }
#[cfg(target_os = "linux")] #[cfg(target_os = "linux")]
fn set_groups(groups: Vec<libc::gid_t>) -> libc::c_int { fn set_groups(groups: &[libc::gid_t]) -> libc::c_int {
unsafe { setgroups(groups.len() as libc::size_t, groups.as_ptr()) } unsafe { setgroups(groups.len() as libc::size_t, groups.as_ptr()) }
} }
@ -220,7 +220,7 @@ fn set_groups_from_str(groups: &str) -> UResult<()> {
}; };
groups_vec.push(gid); groups_vec.push(gid);
} }
let err = set_groups(groups_vec); let err = set_groups(&groups_vec);
if err != 0 { if err != 0 {
return Err(ChrootError::SetGroupsFailed(Error::last_os_error()).into()); return Err(ChrootError::SetGroupsFailed(Error::last_os_error()).into());
} }

View file

@ -1025,7 +1025,10 @@ fn copy_directory(
if is_symlink && !options.dereference { if is_symlink && !options.dereference {
copy_link(&path, &local_to_target, symlinked_files)?; copy_link(&path, &local_to_target, symlinked_files)?;
} else if path.is_dir() && !local_to_target.exists() { } else if path.is_dir() && !local_to_target.exists() {
or_continue!(fs::create_dir_all(local_to_target)); if target.is_file() {
return Err("cannot overwrite non-directory with directory".into());
}
fs::create_dir_all(local_to_target)?;
} else if !path.is_dir() { } else if !path.is_dir() {
if preserve_hard_links { if preserve_hard_links {
let mut found_hard_link = false; let mut found_hard_link = false;

View file

@ -7,50 +7,11 @@
// spell-checker:ignore ctable, outfile // spell-checker:ignore ctable, outfile
use std::error::Error; use std::error::Error;
use std::time;
use uucore::error::UError; use uucore::error::UError;
use crate::conversion_tables::*; use crate::conversion_tables::*;
pub struct ProgUpdate {
pub read_stat: ReadStat,
pub write_stat: WriteStat,
pub duration: time::Duration,
}
#[derive(Clone, Copy, Default)]
pub struct ReadStat {
pub reads_complete: u64,
pub reads_partial: u64,
pub records_truncated: u32,
}
impl std::ops::AddAssign for ReadStat {
fn add_assign(&mut self, other: Self) {
*self = Self {
reads_complete: self.reads_complete + other.reads_complete,
reads_partial: self.reads_partial + other.reads_partial,
records_truncated: self.records_truncated + other.records_truncated,
}
}
}
#[derive(Clone, Copy)]
pub struct WriteStat {
pub writes_complete: u64,
pub writes_partial: u64,
pub bytes_total: u128,
}
impl std::ops::AddAssign for WriteStat {
fn add_assign(&mut self, other: Self) {
*self = Self {
writes_complete: self.writes_complete + other.writes_complete,
writes_partial: self.writes_partial + other.writes_partial,
bytes_total: self.bytes_total + other.bytes_total,
}
}
}
type Cbs = usize; type Cbs = usize;
/// Stores all Conv Flags that apply to the input /// Stores all Conv Flags that apply to the input
@ -116,15 +77,6 @@ pub struct OFlags {
pub seek_bytes: bool, pub seek_bytes: bool,
} }
/// The value of the status cl-option.
/// Controls printing of transfer stats
#[derive(Copy, Clone, Debug, PartialEq)]
pub enum StatusLevel {
Progress,
Noxfer,
None,
}
/// The value of count=N /// The value of count=N
/// Defaults to Reads(N) /// Defaults to Reads(N)
/// if iflag=count_bytes /// if iflag=count_bytes

File diff suppressed because it is too large Load diff

View file

@ -1,351 +0,0 @@
// spell-checker:ignore fname, tname, fpath, specfile, testfile, unspec, ifile, ofile, outfile, fullblock, urand, fileio, atoe, atoibm, behaviour, bmax, bremain, btotal, cflags, creat, ctable, ctty, datastructures, doesnt, etoa, fileout, fname, gnudd, iconvflags, nocache, noctty, noerror, nofollow, nolinks, nonblock, oconvflags, outfile, parseargs, rlen, rmax, rposition, rremain, rsofar, rstat, sigusr, sigval, wlen, wstat
use super::*;
#[cfg(unix)]
macro_rules! make_block_test (
( $test_id:ident, $test_name:expr, $src:expr, $block:expr, $spec:expr ) =>
{
make_spec_test!($test_id,
$test_name,
Input {
src: $src,
non_ascii: false,
ibs: 512,
print_level: None,
count: None,
cflags: IConvFlags {
block: $block,
..IConvFlags::default()
},
iflags: IFlags::default(),
},
Output {
dst: File::create(format!("./test-resources/FAILED-{}.test", $test_name)).unwrap(),
obs: 512,
cflags: OConvFlags::default(),
},
$spec,
format!("./test-resources/FAILED-{}.test", $test_name)
);
};
);
#[cfg(unix)]
macro_rules! make_unblock_test (
( $test_id:ident, $test_name:expr, $src:expr, $unblock:expr, $spec:expr ) =>
{
make_spec_test!($test_id,
$test_name,
Input {
src: $src,
non_ascii: false,
ibs: 512,
print_level: None,
count: None,
cflags: IConvFlags {
unblock: $unblock,
..IConvFlags::default()
},
iflags: IFlags::default(),
},
Output {
dst: File::create(format!("./test-resources/FAILED-{}.test", $test_name)).unwrap(),
obs: 512,
cflags: OConvFlags::default(),
},
$spec,
format!("./test-resources/FAILED-{}.test", $test_name)
);
};
);
#[test]
fn block_test_no_nl() {
let mut rs = ReadStat::default();
let buf = [0u8, 1u8, 2u8, 3u8];
let res = block(&buf, 4, &mut rs);
assert_eq!(res, vec![vec![0u8, 1u8, 2u8, 3u8],]);
}
#[test]
fn block_test_no_nl_short_record() {
let mut rs = ReadStat::default();
let buf = [0u8, 1u8, 2u8, 3u8];
let res = block(&buf, 8, &mut rs);
assert_eq!(
res,
vec![vec![0u8, 1u8, 2u8, 3u8, SPACE, SPACE, SPACE, SPACE],]
);
}
#[test]
fn block_test_no_nl_trunc() {
let mut rs = ReadStat::default();
let buf = [0u8, 1u8, 2u8, 3u8, 4u8];
let res = block(&buf, 4, &mut rs);
// Commented section(s) should be truncated and appear for reference only.
assert_eq!(res, vec![vec![0u8, 1u8, 2u8, 3u8 /*, 4u8*/],]);
assert_eq!(rs.records_truncated, 1);
}
#[test]
fn block_test_nl_gt_cbs_trunc() {
let mut rs = ReadStat::default();
let buf = [
0u8, 1u8, 2u8, 3u8, 4u8, NEWLINE, 0u8, 1u8, 2u8, 3u8, 4u8, NEWLINE, 5u8, 6u8, 7u8, 8u8,
];
let res = block(&buf, 4, &mut rs);
assert_eq!(
res,
vec![
// Commented section(s) should be truncated and appear for reference only.
vec![0u8, 1u8, 2u8, 3u8],
// vec![4u8, SPACE, SPACE, SPACE],
vec![0u8, 1u8, 2u8, 3u8],
// vec![4u8, SPACE, SPACE, SPACE],
vec![5u8, 6u8, 7u8, 8u8],
]
);
assert_eq!(rs.records_truncated, 2);
}
#[test]
fn block_test_surrounded_nl() {
let mut rs = ReadStat::default();
let buf = [0u8, 1u8, 2u8, 3u8, NEWLINE, 4u8, 5u8, 6u8, 7u8, 8u8];
let res = block(&buf, 8, &mut rs);
assert_eq!(
res,
vec![
vec![0u8, 1u8, 2u8, 3u8, SPACE, SPACE, SPACE, SPACE],
vec![4u8, 5u8, 6u8, 7u8, 8u8, SPACE, SPACE, SPACE],
]
);
}
#[test]
fn block_test_multiple_nl_same_cbs_block() {
let mut rs = ReadStat::default();
let buf = [
0u8, 1u8, 2u8, 3u8, NEWLINE, 4u8, NEWLINE, 5u8, 6u8, 7u8, 8u8, 9u8,
];
let res = block(&buf, 8, &mut rs);
assert_eq!(
res,
vec![
vec![0u8, 1u8, 2u8, 3u8, SPACE, SPACE, SPACE, SPACE],
vec![4u8, SPACE, SPACE, SPACE, SPACE, SPACE, SPACE, SPACE],
vec![5u8, 6u8, 7u8, 8u8, 9u8, SPACE, SPACE, SPACE],
]
);
}
#[test]
fn block_test_multiple_nl_diff_cbs_block() {
let mut rs = ReadStat::default();
let buf = [
0u8, 1u8, 2u8, 3u8, NEWLINE, 4u8, 5u8, 6u8, 7u8, NEWLINE, 8u8, 9u8,
];
let res = block(&buf, 8, &mut rs);
assert_eq!(
res,
vec![
vec![0u8, 1u8, 2u8, 3u8, SPACE, SPACE, SPACE, SPACE],
vec![4u8, 5u8, 6u8, 7u8, SPACE, SPACE, SPACE, SPACE],
vec![8u8, 9u8, SPACE, SPACE, SPACE, SPACE, SPACE, SPACE],
]
);
}
#[test]
fn block_test_end_nl_diff_cbs_block() {
let mut rs = ReadStat::default();
let buf = [0u8, 1u8, 2u8, 3u8, NEWLINE];
let res = block(&buf, 4, &mut rs);
assert_eq!(res, vec![vec![0u8, 1u8, 2u8, 3u8],]);
}
#[test]
fn block_test_end_nl_same_cbs_block() {
let mut rs = ReadStat::default();
let buf = [0u8, 1u8, 2u8, NEWLINE];
let res = block(&buf, 4, &mut rs);
assert_eq!(res, vec![vec![0u8, 1u8, 2u8, SPACE]]);
}
#[test]
fn block_test_double_end_nl() {
let mut rs = ReadStat::default();
let buf = [0u8, 1u8, 2u8, NEWLINE, NEWLINE];
let res = block(&buf, 4, &mut rs);
assert_eq!(
res,
vec![vec![0u8, 1u8, 2u8, SPACE], vec![SPACE, SPACE, SPACE, SPACE],]
);
}
#[test]
fn block_test_start_nl() {
let mut rs = ReadStat::default();
let buf = [NEWLINE, 0u8, 1u8, 2u8, 3u8];
let res = block(&buf, 4, &mut rs);
assert_eq!(
res,
vec![vec![SPACE, SPACE, SPACE, SPACE], vec![0u8, 1u8, 2u8, 3u8],]
);
}
#[test]
fn block_test_double_surrounded_nl_no_trunc() {
let mut rs = ReadStat::default();
let buf = [0u8, 1u8, 2u8, 3u8, NEWLINE, NEWLINE, 4u8, 5u8, 6u8, 7u8];
let res = block(&buf, 8, &mut rs);
assert_eq!(
res,
vec![
vec![0u8, 1u8, 2u8, 3u8, SPACE, SPACE, SPACE, SPACE],
vec![SPACE, SPACE, SPACE, SPACE, SPACE, SPACE, SPACE, SPACE],
vec![4u8, 5u8, 6u8, 7u8, SPACE, SPACE, SPACE, SPACE],
]
);
}
#[test]
fn block_test_double_surrounded_nl_double_trunc() {
let mut rs = ReadStat::default();
let buf = [
0u8, 1u8, 2u8, 3u8, NEWLINE, NEWLINE, 4u8, 5u8, 6u8, 7u8, 8u8,
];
let res = block(&buf, 4, &mut rs);
assert_eq!(
res,
vec![
// Commented section(s) should be truncated and appear for reference only.
vec![0u8, 1u8, 2u8, 3u8],
vec![SPACE, SPACE, SPACE, SPACE],
vec![4u8, 5u8, 6u8, 7u8 /*, 8u8*/],
]
);
assert_eq!(rs.records_truncated, 1);
}
#[cfg(unix)]
make_block_test!(
block_cbs16,
"block-cbs-16",
File::open("./test-resources/dd-block-cbs16.test").unwrap(),
Some(16),
File::open("./test-resources/dd-block-cbs16.spec").unwrap()
);
#[cfg(unix)]
make_block_test!(
block_cbs16_as_cbs8,
"block-cbs-16-as-cbs8",
File::open("./test-resources/dd-block-cbs16.test").unwrap(),
Some(8),
File::open("./test-resources/dd-block-cbs8.spec").unwrap()
);
#[cfg(unix)]
make_block_test!(
block_consecutive_nl,
"block-consecutive-nl",
File::open("./test-resources/dd-block-consecutive-nl.test").unwrap(),
Some(16),
File::open("./test-resources/dd-block-consecutive-nl-cbs16.spec").unwrap()
);
#[test]
fn unblock_test_full_cbs() {
let buf = [0u8, 1u8, 2u8, 3u8, 4u8, 5u8, 6u8, 7u8];
let res = unblock(&buf, 8);
assert_eq!(res, vec![0u8, 1u8, 2u8, 3u8, 4u8, 5u8, 6u8, 7u8, NEWLINE],);
}
#[test]
fn unblock_test_all_space() {
let buf = [SPACE, SPACE, SPACE, SPACE, SPACE, SPACE, SPACE, SPACE];
let res = unblock(&buf, 8);
assert_eq!(res, vec![NEWLINE],);
}
#[test]
fn unblock_test_decoy_spaces() {
let buf = [0u8, SPACE, SPACE, SPACE, SPACE, SPACE, SPACE, 7u8];
let res = unblock(&buf, 8);
assert_eq!(
res,
vec![0u8, SPACE, SPACE, SPACE, SPACE, SPACE, SPACE, 7u8, NEWLINE],
);
}
#[test]
fn unblock_test_strip_single_cbs() {
let buf = [0u8, 1u8, 2u8, 3u8, SPACE, SPACE, SPACE, SPACE];
let res = unblock(&buf, 8);
assert_eq!(res, vec![0u8, 1u8, 2u8, 3u8, NEWLINE],);
}
#[test]
fn unblock_test_strip_multi_cbs() {
let buf = vec![
vec![0u8, SPACE, SPACE, SPACE, SPACE, SPACE, SPACE, SPACE],
vec![0u8, 1u8, SPACE, SPACE, SPACE, SPACE, SPACE, SPACE],
vec![0u8, 1u8, 2u8, SPACE, SPACE, SPACE, SPACE, SPACE],
vec![0u8, 1u8, 2u8, 3u8, SPACE, SPACE, SPACE, SPACE],
]
.into_iter()
.flatten()
.collect::<Vec<_>>();
let res = unblock(&buf, 8);
let exp = vec![
vec![0u8, NEWLINE],
vec![0u8, 1u8, NEWLINE],
vec![0u8, 1u8, 2u8, NEWLINE],
vec![0u8, 1u8, 2u8, 3u8, NEWLINE],
]
.into_iter()
.flatten()
.collect::<Vec<_>>();
assert_eq!(res, exp);
}
#[cfg(unix)]
make_unblock_test!(
unblock_multi_16,
"unblock-multi-16",
File::open("./test-resources/dd-unblock-cbs16.test").unwrap(),
Some(16),
File::open("./test-resources/dd-unblock-cbs16.spec").unwrap()
);
#[cfg(unix)]
make_unblock_test!(
unblock_multi_16_as_8,
"unblock-multi-16-as-8",
File::open("./test-resources/dd-unblock-cbs16.test").unwrap(),
Some(8),
File::open("./test-resources/dd-unblock-cbs8.spec").unwrap()
);

View file

@ -1,106 +0,0 @@
// spell-checker:ignore fname, tname, fpath, specfile, testfile, unspec, ifile, ofile, outfile, fullblock, urand, fileio, atoe, atoibm, behaviour, bmax, bremain, btotal, cflags, creat, ctable, ctty, datastructures, doesnt, etoa, fileout, fname, gnudd, iconvflags, nocache, noctty, noerror, nofollow, nolinks, nonblock, oconvflags, outfile, parseargs, rlen, rmax, rposition, rremain, rsofar, rstat, sigusr, sigval, wlen, wstat
use super::*;
macro_rules! make_sync_test (
( $test_id:ident, $test_name:expr, $src:expr, $sync:expr, $ibs:expr, $obs:expr, $spec:expr ) =>
{
make_spec_test!($test_id,
$test_name,
Input {
src: $src,
non_ascii: false,
ibs: $ibs,
print_level: None,
count: None,
cflags: IConvFlags {
sync: $sync,
..IConvFlags::default()
},
iflags: IFlags::default(),
},
Output {
dst: File::create(format!("./test-resources/FAILED-{}.test", $test_name)).unwrap(),
obs: $obs,
cflags: OConvFlags::default(),
},
$spec,
format!("./test-resources/FAILED-{}.test", $test_name)
);
};
);
// Zeros
make_sync_test!(
zeros_4k_conv_sync_obs_gt_ibs,
"zeros_4k_conv_sync_obs_gt_ibs",
File::open("./test-resources/zeros-620f0b67a91f7f74151bc5be745b7110.test").unwrap(),
Some(0u8),
521,
1031,
File::open("./test-resources/gnudd-conv-sync-ibs-521-obs-1031-zeros.spec").unwrap()
);
make_sync_test!(
zeros_4k_conv_sync_ibs_gt_obs,
"zeros_4k_conv_sync_ibs_gt_obs",
File::open("./test-resources/zeros-620f0b67a91f7f74151bc5be745b7110.test").unwrap(),
Some(0u8),
1031,
521,
File::open("./test-resources/gnudd-conv-sync-ibs-1031-obs-521-zeros.spec").unwrap()
);
// Deadbeef
make_sync_test!(
deadbeef_32k_conv_sync_obs_gt_ibs,
"deadbeef_32k_conv_sync_obs_gt_ibs",
File::open("./test-resources/deadbeef-18d99661a1de1fc9af21b0ec2cd67ba3.test").unwrap(),
Some(0u8),
521,
1031,
File::open("./test-resources/gnudd-conv-sync-ibs-521-obs-1031-deadbeef.spec").unwrap()
);
make_sync_test!(
deadbeef_32k_conv_sync_ibs_gt_obs,
"deadbeef_32k_conv_sync_ibs_gt_obs",
File::open("./test-resources/deadbeef-18d99661a1de1fc9af21b0ec2cd67ba3.test").unwrap(),
Some(0u8),
1031,
521,
File::open("./test-resources/gnudd-conv-sync-ibs-1031-obs-521-deadbeef.spec").unwrap()
);
// Random
make_sync_test!(
random_73k_test_bs_prime_obs_gt_ibs_sync,
"random-73k-test-bs-prime-obs-gt-ibs-sync",
File::open("./test-resources/random-5828891cb1230748e146f34223bbd3b5.test").unwrap(),
Some(0u8),
521,
1031,
File::open("./test-resources/gnudd-conv-sync-ibs-521-obs-1031-random.spec").unwrap()
);
make_sync_test!(
random_73k_test_bs_prime_ibs_gt_obs_sync,
"random-73k-test-bs-prime-ibs-gt-obs-sync",
File::open("./test-resources/random-5828891cb1230748e146f34223bbd3b5.test").unwrap(),
Some(0u8),
1031,
521,
File::open("./test-resources/gnudd-conv-sync-ibs-1031-obs-521-random.spec").unwrap()
);
make_sync_test!(
deadbeef_16_delayed,
"deadbeef-16-delayed",
LazyReader {
src: File::open("./test-resources/deadbeef-16.test").unwrap()
},
Some(0u8),
16,
32,
File::open("./test-resources/deadbeef-16.spec").unwrap()
);

View file

@ -1,233 +0,0 @@
// spell-checker:ignore fname, tname, fpath, specfile, testfile, unspec, ifile, ofile, outfile, fullblock, urand, fileio, atoe, atoibm, behaviour, bmax, bremain, btotal, cflags, creat, ctable, ctty, datastructures, doesnt, etoa, fileout, fname, gnudd, iconvflags, nocache, noctty, noerror, nofollow, nolinks, nonblock, oconvflags, outfile, parseargs, rlen, rmax, rposition, rremain, rsofar, rstat, sigusr, sigval, wlen, wstat
use super::*;
macro_rules! make_conv_test (
( $test_id:ident, $test_name:expr, $src:expr, $ctable:expr, $spec:expr ) =>
{
make_spec_test!($test_id,
$test_name,
Input {
src: $src,
non_ascii: false,
ibs: 512,
print_level: None,
count: None,
cflags: icf!($ctable),
iflags: IFlags::default(),
},
Output {
dst: File::create(format!("./test-resources/FAILED-{}.test", $test_name)).unwrap(),
obs: 512,
cflags: OConvFlags::default(),
},
$spec,
format!("./test-resources/FAILED-{}.test", $test_name)
);
};
);
macro_rules! make_icf_test (
( $test_id:ident, $test_name:expr, $src:expr, $icf:expr, $spec:expr ) =>
{
make_spec_test!($test_id,
$test_name,
Input {
src: $src,
non_ascii: false,
ibs: 512,
print_level: None,
count: None,
cflags: $icf,
iflags: IFlags::default(),
},
Output {
dst: File::create(format!("./test-resources/FAILED-{}.test", $test_name)).unwrap(),
obs: 512,
cflags: OConvFlags::default(),
},
$spec,
format!("./test-resources/FAILED-{}.test", $test_name)
);
};
);
make_conv_test!(
atoe_conv_spec_test,
"atoe-conv-spec-test",
File::open("./test-resources/seq-byte-values-b632a992d3aed5d8d1a59cc5a5a455ba.test").unwrap(),
Some(&ASCII_TO_EBCDIC),
File::open("./test-resources/gnudd-conv-atoe-seq-byte-values.spec").unwrap()
);
make_conv_test!(
etoa_conv_spec_test,
"etoa-conv-spec-test",
File::open("./test-resources/seq-byte-values-b632a992d3aed5d8d1a59cc5a5a455ba.test").unwrap(),
Some(&EBCDIC_TO_ASCII),
File::open("./test-resources/gnudd-conv-etoa-seq-byte-values.spec").unwrap()
);
make_conv_test!(
atoibm_conv_spec_test,
"atoibm-conv-spec-test",
File::open("./test-resources/seq-byte-values-b632a992d3aed5d8d1a59cc5a5a455ba.test").unwrap(),
Some(&ASCII_TO_IBM),
File::open("./test-resources/gnudd-conv-atoibm-seq-byte-values.spec").unwrap()
);
make_conv_test!(
lcase_ascii_to_ucase_ascii,
"lcase_ascii_to_ucase_ascii",
File::open("./test-resources/lcase-ascii.test").unwrap(),
Some(&ASCII_LCASE_TO_UCASE),
File::open("./test-resources/ucase-ascii.test").unwrap()
);
make_conv_test!(
ucase_ascii_to_lcase_ascii,
"ucase_ascii_to_lcase_ascii",
File::open("./test-resources/ucase-ascii.test").unwrap(),
Some(&ASCII_UCASE_TO_LCASE),
File::open("./test-resources/lcase-ascii.test").unwrap()
);
make_conv_test!(
// conv=ebcdic,ucase
atoe_and_ucase_conv_spec_test,
"atoe-and-ucase-conv-spec-test",
File::open("./test-resources/seq-byte-values-b632a992d3aed5d8d1a59cc5a5a455ba.test").unwrap(),
Some(&ASCII_TO_EBCDIC_LCASE_TO_UCASE),
File::open("./test-resources/ucase-ebcdic.test").unwrap()
);
make_conv_test!(
// conv=ebcdic,lcase
atoe_and_lcase_conv_spec_test,
"atoe-and-lcase-conv-spec-test",
File::open("./test-resources/seq-byte-values-b632a992d3aed5d8d1a59cc5a5a455ba.test").unwrap(),
Some(&ASCII_TO_EBCDIC_UCASE_TO_LCASE),
File::open("./test-resources/lcase-ebcdic.test").unwrap()
);
make_conv_test!(
// conv=ibm,ucase
atoibm_and_ucase_conv_spec_test,
"atoibm-and-ucase-conv-spec-test",
File::open("./test-resources/seq-byte-values-b632a992d3aed5d8d1a59cc5a5a455ba.test").unwrap(),
Some(&ASCII_TO_IBM_UCASE_TO_LCASE),
File::open("./test-resources/lcase-ibm.test").unwrap()
);
make_conv_test!(
// conv=ibm,lcase
atoibm_and_lcase_conv_spec_test,
"atoibm-and-lcase-conv-spec-test",
File::open("./test-resources/seq-byte-values-b632a992d3aed5d8d1a59cc5a5a455ba.test").unwrap(),
Some(&ASCII_TO_IBM_LCASE_TO_UCASE),
File::open("./test-resources/ucase-ibm.test").unwrap()
);
#[test]
fn all_valid_ascii_ebcdic_ascii_roundtrip_conv_test() {
// ASCII->EBCDIC
let test_name = "all-valid-ascii-to-ebcdic";
let tmp_fname_ae = format!("./test-resources/FAILED-{}.test", test_name);
let i = Input {
src: File::open(
"./test-resources/all-valid-ascii-chars-37eff01866ba3f538421b30b7cbefcac.test",
)
.unwrap(),
non_ascii: false,
ibs: 128,
print_level: None,
count: None,
cflags: icf!(Some(&ASCII_TO_EBCDIC)),
iflags: IFlags::default(),
};
let o = Output {
dst: File::create(&tmp_fname_ae).unwrap(),
obs: 1024,
cflags: OConvFlags::default(),
};
o.dd_out(i).unwrap();
// EBCDIC->ASCII
let test_name = "all-valid-ebcdic-to-ascii";
let tmp_fname_ea = format!("./test-resources/FAILED-{}.test", test_name);
let i = Input {
src: File::open(&tmp_fname_ae).unwrap(),
non_ascii: false,
ibs: 256,
print_level: None,
count: None,
cflags: icf!(Some(&EBCDIC_TO_ASCII)),
iflags: IFlags::default(),
};
let o = Output {
dst: File::create(&tmp_fname_ea).unwrap(),
obs: 1024,
cflags: OConvFlags::default(),
};
o.dd_out(i).unwrap();
// Final Comparison
let res = File::open(&tmp_fname_ea).unwrap();
let spec =
File::open("./test-resources/all-valid-ascii-chars-37eff01866ba3f538421b30b7cbefcac.test")
.unwrap();
assert_eq!(
res.metadata().unwrap().len(),
spec.metadata().unwrap().len()
);
let res = BufReader::new(res);
let spec = BufReader::new(spec);
let res = BufReader::new(res);
// Check all bytes match
for (b_res, b_spec) in res.bytes().zip(spec.bytes()) {
assert_eq!(b_res.unwrap(), b_spec.unwrap());
}
fs::remove_file(&tmp_fname_ae).unwrap();
fs::remove_file(&tmp_fname_ea).unwrap();
}
make_icf_test!(
swab_256_test,
"swab-256",
File::open("./test-resources/seq-byte-values.test").unwrap(),
IConvFlags {
ctable: None,
block: None,
unblock: None,
swab: true,
sync: None,
noerror: false,
},
File::open("./test-resources/seq-byte-values-swapped.test").unwrap()
);
make_icf_test!(
swab_257_test,
"swab-257",
File::open("./test-resources/seq-byte-values-odd.test").unwrap(),
IConvFlags {
ctable: None,
block: None,
unblock: None,
swab: true,
sync: None,
noerror: false,
},
File::open("./test-resources/seq-byte-values-odd.spec").unwrap()
);

View file

@ -1,89 +0,0 @@
// spell-checker:ignore fname, tname, fpath, specfile, testfile, unspec, ifile, ofile, outfile, fullblock, urand, fileio, atoe, atoibm, behaviour, bmax, bremain, btotal, cflags, creat, ctable, ctty, datastructures, doesnt, etoa, fileout, fname, gnudd, iconvflags, nocache, noctty, noerror, nofollow, nolinks, nonblock, oconvflags, outfile, parseargs, rlen, rmax, rposition, rremain, rsofar, rstat, sigusr, sigval, wlen, wstat
use super::*;
mod block_unblock_tests;
mod conv_sync_tests;
mod conversion_tests;
mod sanity_tests;
use std::fs;
use std::io::prelude::*;
use std::io::BufReader;
struct LazyReader<R: Read> {
src: R,
}
impl<R: Read> Read for LazyReader<R> {
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
let reduced = cmp::max(buf.len() / 2, 1);
self.src.read(&mut buf[..reduced])
}
}
#[macro_export]
macro_rules! icf (
( $ctable:expr ) =>
{
IConvFlags {
ctable: $ctable,
..IConvFlags::default()
}
};
);
#[macro_export]
macro_rules! make_spec_test (
( $test_id:ident, $test_name:expr, $src:expr ) =>
{
// When spec not given, output should match input
make_spec_test!($test_id, $test_name, $src, $src);
};
( $test_id:ident, $test_name:expr, $src:expr, $spec:expr ) =>
{
make_spec_test!($test_id,
$test_name,
Input {
src: $src,
non_ascii: false,
ibs: 512,
print_level: None,
count: None,
cflags: IConvFlags::default(),
iflags: IFlags::default(),
},
Output {
dst: File::create(format!("./test-resources/FAILED-{}.test", $test_name)).unwrap(),
obs: 512,
cflags: OConvFlags::default(),
},
$spec,
format!("./test-resources/FAILED-{}.test", $test_name)
);
};
( $test_id:ident, $test_name:expr, $i:expr, $o:expr, $spec:expr, $tmp_fname:expr ) =>
{
#[test]
fn $test_id()
{
$o.dd_out($i).unwrap();
let res = File::open($tmp_fname).unwrap();
// Check test file isn't empty (unless spec file is too)
assert_eq!(res.metadata().unwrap().len(), $spec.metadata().unwrap().len());
let spec = BufReader::new($spec);
let res = BufReader::new(res);
// Check all bytes match
for (b_res, b_spec) in res.bytes().zip(spec.bytes())
{
assert_eq!(b_res.unwrap(),
b_spec.unwrap());
}
fs::remove_file($tmp_fname).unwrap();
}
};
);

View file

@ -1,316 +0,0 @@
// spell-checker:ignore fname, tname, fpath, specfile, testfile, unspec, ifile, ofile, outfile, fullblock, urand, fileio, atoe, atoibm, behaviour, bmax, bremain, btotal, cflags, creat, ctable, ctty, datastructures, doesnt, etoa, fileout, fname, gnudd, iconvflags, nocache, noctty, noerror, nofollow, nolinks, nonblock, oconvflags, outfile, parseargs, rlen, rmax, rposition, rremain, rsofar, rstat, sigusr, sigval, wlen, wstat
use super::*;
const DST_PLACEHOLDER: Vec<u8> = Vec::new();
macro_rules! make_io_test (
( $test_id:ident, $test_name:expr, $i:expr, $o:expr, $spec:expr ) =>
{
make_spec_test!($test_id,
$test_name,
$i,
Output {
dst: File::create(format!("./test-resources/FAILED-{}.test", $test_name)).unwrap(),
obs: $o.obs,
cflags: $o.cflags,
},
$spec,
format!("./test-resources/FAILED-{}.test", $test_name)
);
};
);
make_spec_test!(
zeros_4k_test,
"zeros-4k",
File::open("./test-resources/zeros-620f0b67a91f7f74151bc5be745b7110.test").unwrap()
);
make_spec_test!(
ones_4k_test,
"ones-4k",
File::open("./test-resources/ones-6ae59e64850377ee5470c854761551ea.test").unwrap()
);
make_spec_test!(
deadbeef_32k_test,
"deadbeef-32k",
File::open("./test-resources/deadbeef-18d99661a1de1fc9af21b0ec2cd67ba3.test").unwrap()
);
make_spec_test!(
random_73k_test,
"random-73k",
File::open("./test-resources/random-5828891cb1230748e146f34223bbd3b5.test").unwrap()
);
make_io_test!(
random_73k_test_not_a_multiple_obs_gt_ibs,
"random-73k-not-a-multiple-obs-gt-ibs",
Input {
src: File::open("./test-resources/random-5828891cb1230748e146f34223bbd3b5.test").unwrap(),
non_ascii: false,
ibs: 521,
print_level: None,
count: None,
cflags: IConvFlags::default(),
iflags: IFlags::default(),
},
Output {
dst: DST_PLACEHOLDER,
obs: 1031,
cflags: OConvFlags::default(),
},
File::open("./test-resources/random-5828891cb1230748e146f34223bbd3b5.test").unwrap()
);
make_io_test!(
random_73k_test_obs_lt_not_a_multiple_ibs,
"random-73k-obs-lt-not-a-multiple-ibs",
Input {
src: File::open("./test-resources/random-5828891cb1230748e146f34223bbd3b5.test").unwrap(),
non_ascii: false,
ibs: 1031,
print_level: None,
count: None,
cflags: IConvFlags::default(),
iflags: IFlags::default(),
},
Output {
dst: DST_PLACEHOLDER,
obs: 521,
cflags: OConvFlags::default(),
},
File::open("./test-resources/random-5828891cb1230748e146f34223bbd3b5.test").unwrap()
);
make_io_test!(
deadbeef_all_32k_test_count_reads,
"deadbeef_all_32k_test_count_reads",
Input {
src: File::open("./test-resources/deadbeef-18d99661a1de1fc9af21b0ec2cd67ba3.test").unwrap(),
non_ascii: false,
ibs: 1024,
print_level: None,
count: Some(CountType::Reads(32)),
cflags: IConvFlags::default(),
iflags: IFlags::default(),
},
Output {
dst: DST_PLACEHOLDER,
obs: 1024,
cflags: OConvFlags::default(),
},
File::open("./test-resources/deadbeef-18d99661a1de1fc9af21b0ec2cd67ba3.test").unwrap()
);
make_io_test!(
deadbeef_all_32k_test_count_bytes,
"deadbeef_all_32k_test_count_bytes",
Input {
src: File::open("./test-resources/deadbeef-18d99661a1de1fc9af21b0ec2cd67ba3.test").unwrap(),
non_ascii: false,
ibs: 531,
print_level: None,
count: Some(CountType::Bytes(32 * 1024)),
cflags: IConvFlags::default(),
iflags: IFlags::default(),
},
Output {
dst: DST_PLACEHOLDER,
obs: 1031,
cflags: OConvFlags::default(),
},
File::open("./test-resources/deadbeef-18d99661a1de1fc9af21b0ec2cd67ba3.test").unwrap()
);
make_io_test!(
deadbeef_32k_to_16k_test_count_reads,
"deadbeef_32k_test_count_reads",
Input {
src: File::open("./test-resources/deadbeef-18d99661a1de1fc9af21b0ec2cd67ba3.test").unwrap(),
non_ascii: false,
ibs: 1024,
print_level: None,
count: Some(CountType::Reads(16)),
cflags: IConvFlags::default(),
iflags: IFlags::default(),
},
Output {
dst: DST_PLACEHOLDER,
obs: 1031,
cflags: OConvFlags::default(),
},
File::open("./test-resources/gnudd-deadbeef-first-16k.spec").unwrap()
);
make_io_test!(
deadbeef_32k_to_12345_test_count_bytes,
"deadbeef_32k_to_12345_test_count_bytes",
Input {
src: File::open("./test-resources/deadbeef-18d99661a1de1fc9af21b0ec2cd67ba3.test").unwrap(),
non_ascii: false,
ibs: 531,
print_level: None,
count: Some(CountType::Bytes(12345)),
cflags: IConvFlags::default(),
iflags: IFlags::default(),
},
Output {
dst: DST_PLACEHOLDER,
obs: 1031,
cflags: OConvFlags::default(),
},
File::open("./test-resources/gnudd-deadbeef-first-12345.spec").unwrap()
);
make_io_test!(
random_73k_test_count_reads,
"random-73k-test-count-reads",
Input {
src: File::open("./test-resources/random-5828891cb1230748e146f34223bbd3b5.test").unwrap(),
non_ascii: false,
ibs: 1024,
print_level: None,
count: Some(CountType::Reads(32)),
cflags: IConvFlags::default(),
iflags: IFlags::default(),
},
Output {
dst: DST_PLACEHOLDER,
obs: 1024,
cflags: OConvFlags::default(),
},
File::open("./test-resources/gnudd-random-first-32k.spec").unwrap()
);
make_io_test!(
random_73k_test_count_bytes,
"random-73k-test-count-bytes",
Input {
src: File::open("./test-resources/random-5828891cb1230748e146f34223bbd3b5.test").unwrap(),
non_ascii: false,
ibs: 521,
print_level: None,
count: Some(CountType::Bytes(32 * 1024)),
cflags: IConvFlags::default(),
iflags: IFlags::default(),
},
Output {
dst: DST_PLACEHOLDER,
obs: 1031,
cflags: OConvFlags::default(),
},
File::open("./test-resources/gnudd-random-first-32k.spec").unwrap()
);
make_io_test!(
random_73k_test_lazy_fullblock,
"random-73k-test-lazy-fullblock",
Input {
src: LazyReader {
src: File::open("./test-resources/random-5828891cb1230748e146f34223bbd3b5.test")
.unwrap()
},
non_ascii: false,
ibs: 521,
print_level: None,
count: None,
cflags: IConvFlags::default(),
iflags: IFlags {
fullblock: true,
..IFlags::default()
},
},
Output {
dst: DST_PLACEHOLDER,
obs: 1031,
cflags: OConvFlags::default(),
},
File::open("./test-resources/random-5828891cb1230748e146f34223bbd3b5.test").unwrap()
);
// Test internal buffer size fn
#[test]
fn bsize_test_primes() {
let (n, m) = (7901, 7919);
let res = calc_bsize(n, m);
assert!(res % n == 0);
assert!(res % m == 0);
assert_eq!(res, n * m);
}
#[test]
fn bsize_test_rel_prime_obs_greater() {
let (n, m) = (7 * 5119, 13 * 5119);
let res = calc_bsize(n, m);
assert!(res % n == 0);
assert!(res % m == 0);
assert_eq!(res, 7 * 13 * 5119);
}
#[test]
fn bsize_test_rel_prime_ibs_greater() {
let (n, m) = (13 * 5119, 7 * 5119);
let res = calc_bsize(n, m);
assert!(res % n == 0);
assert!(res % m == 0);
assert_eq!(res, 7 * 13 * 5119);
}
#[test]
fn bsize_test_3fac_rel_prime() {
let (n, m) = (11 * 13 * 5119, 7 * 11 * 5119);
let res = calc_bsize(n, m);
assert!(res % n == 0);
assert!(res % m == 0);
assert_eq!(res, 7 * 11 * 13 * 5119);
}
#[test]
fn bsize_test_ibs_greater() {
let (n, m) = (512 * 1024, 256 * 1024);
let res = calc_bsize(n, m);
assert!(res % n == 0);
assert!(res % m == 0);
assert_eq!(res, n);
}
#[test]
fn bsize_test_obs_greater() {
let (n, m) = (256 * 1024, 512 * 1024);
let res = calc_bsize(n, m);
assert!(res % n == 0);
assert!(res % m == 0);
assert_eq!(res, m);
}
#[test]
fn bsize_test_bs_eq() {
let (n, m) = (1024, 1024);
let res = calc_bsize(n, m);
assert!(res % n == 0);
assert!(res % m == 0);
assert_eq!(res, m);
}
#[test]
#[should_panic]
fn test_nocreat_causes_failure_when_ofile_doesnt_exist() {
let args = vec![
String::from("dd"),
String::from("--conv=nocreat"),
String::from("--of=not-a-real.file"),
];
let matches = uu_app().try_get_matches_from(args).unwrap();
let _ = Output::<File>::new(&matches).unwrap();
}

View file

@ -4,7 +4,7 @@
// //
// For the full copyright and license information, please view the LICENSE // For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code. // file that was distributed with this source code.
// spell-checker:ignore ctty, ctable, iconvflags, oconvflags // spell-checker:ignore ctty, ctable, iconvflags, oconvflags parseargs
#[cfg(test)] #[cfg(test)]
mod unit_tests; mod unit_tests;
@ -12,6 +12,8 @@ mod unit_tests;
use super::*; use super::*;
use std::error::Error; use std::error::Error;
use uucore::error::UError; use uucore::error::UError;
use uucore::parse_size::ParseSizeError;
use uucore::show_warning;
pub type Matches = ArgMatches; pub type Matches = ArgMatches;
@ -31,6 +33,25 @@ pub enum ParseError {
Unimplemented(String), Unimplemented(String),
} }
impl ParseError {
/// Replace the argument, if any, with the given string, consuming self.
fn with_arg(self, s: String) -> Self {
match self {
Self::MultipleFmtTable => Self::MultipleFmtTable,
Self::MultipleUCaseLCase => Self::MultipleUCaseLCase,
Self::MultipleBlockUnblock => Self::MultipleBlockUnblock,
Self::MultipleExclNoCreate => Self::MultipleExclNoCreate,
Self::FlagNoMatch(_) => Self::FlagNoMatch(s),
Self::ConvFlagNoMatch(_) => Self::ConvFlagNoMatch(s),
Self::MultiplierStringParseFailure(_) => Self::MultiplierStringParseFailure(s),
Self::MultiplierStringOverflow(_) => Self::MultiplierStringOverflow(s),
Self::BlockUnblockWithoutCBS => Self::BlockUnblockWithoutCBS,
Self::StatusLevelNotRecognized(_) => Self::StatusLevelNotRecognized(s),
Self::Unimplemented(_) => Self::Unimplemented(s),
}
}
}
impl std::fmt::Display for ParseError { impl std::fmt::Display for ParseError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self { match self {
@ -304,33 +325,96 @@ impl std::str::FromStr for StatusLevel {
} }
} }
fn show_zero_multiplier_warning() {
show_warning!(
"{} is a zero multiplier; use {} if that is intended",
"0x".quote(),
"00x".quote()
);
}
/// Parse bytes using str::parse, then map error if needed. /// Parse bytes using str::parse, then map error if needed.
fn parse_bytes_only(s: &str) -> Result<usize, ParseError> { fn parse_bytes_only(s: &str) -> Result<usize, ParseError> {
s.parse() s.parse()
.map_err(|_| ParseError::MultiplierStringParseFailure(s.to_string())) .map_err(|_| ParseError::MultiplierStringParseFailure(s.to_string()))
} }
/// Parse a number of bytes from the given string, assuming no `'x'` characters.
///
/// The `'x'` character means "multiply the number before the `'x'` by
/// the number after the `'x'`". In order to compute the numbers
/// before and after the `'x'`, use this function, which assumes there
/// are no `'x'` characters in the string.
///
/// A suffix `'c'` means multiply by 1, `'w'` by 2, and `'b'` by
/// 512. You can also use standard block size suffixes like `'k'` for
/// 1024.
///
/// # Errors
///
/// If a number cannot be parsed or if the multiplication would cause
/// an overflow.
///
/// # Examples
///
/// ```rust,ignore
/// assert_eq!(parse_bytes_no_x("123").unwrap(), 123);
/// assert_eq!(parse_bytes_no_x("2c").unwrap(), 2 * 1);
/// assert_eq!(parse_bytes_no_x("3w").unwrap(), 3 * 2);
/// assert_eq!(parse_bytes_no_x("2b").unwrap(), 2 * 512);
/// assert_eq!(parse_bytes_no_x("2k").unwrap(), 2 * 1024);
/// ```
fn parse_bytes_no_x(s: &str) -> Result<usize, ParseError> {
let (num, multiplier) = match (s.find('c'), s.rfind('w'), s.rfind('b')) {
(None, None, None) => match uucore::parse_size::parse_size(s) {
Ok(n) => (n, 1),
Err(ParseSizeError::ParseFailure(s)) => {
return Err(ParseError::MultiplierStringParseFailure(s))
}
Err(ParseSizeError::SizeTooBig(s)) => {
return Err(ParseError::MultiplierStringOverflow(s))
}
},
(Some(i), None, None) => (parse_bytes_only(&s[..i])?, 1),
(None, Some(i), None) => (parse_bytes_only(&s[..i])?, 2),
(None, None, Some(i)) => (parse_bytes_only(&s[..i])?, 512),
_ => return Err(ParseError::MultiplierStringParseFailure(s.to_string())),
};
num.checked_mul(multiplier)
.ok_or_else(|| ParseError::MultiplierStringOverflow(s.to_string()))
}
/// Parse byte and multiplier like 512, 5KiB, or 1G. /// Parse byte and multiplier like 512, 5KiB, or 1G.
/// Uses uucore::parse_size, and adds the 'w' and 'c' suffixes which are mentioned /// Uses uucore::parse_size, and adds the 'w' and 'c' suffixes which are mentioned
/// in dd's info page. /// in dd's info page.
fn parse_bytes_with_opt_multiplier(s: &str) -> Result<usize, ParseError> { fn parse_bytes_with_opt_multiplier(s: &str) -> Result<usize, ParseError> {
if let Some(idx) = s.rfind('c') { // TODO On my Linux system, there seems to be a maximum block size of 4096 bytes:
parse_bytes_only(&s[..idx]) //
} else if let Some(idx) = s.rfind('w') { // $ printf "%0.sa" {1..10000} | dd bs=4095 count=1 status=none | wc -c
let partial = parse_bytes_only(&s[..idx])?; // 4095
// $ printf "%0.sa" {1..10000} | dd bs=4k count=1 status=none | wc -c
// 4096
// $ printf "%0.sa" {1..10000} | dd bs=4097 count=1 status=none | wc -c
// 4096
// $ printf "%0.sa" {1..10000} | dd bs=5k count=1 status=none | wc -c
// 4096
//
partial // Split on the 'x' characters. Each component will be parsed
.checked_mul(2) // individually, then multiplied together.
.ok_or_else(|| ParseError::MultiplierStringOverflow(s.to_string())) let parts: Vec<&str> = s.split('x').collect();
if parts.len() == 1 {
parse_bytes_no_x(parts[0]).map_err(|e| e.with_arg(s.to_string()))
} else { } else {
uucore::parse_size::parse_size(s).map_err(|e| match e { let mut total = 1;
uucore::parse_size::ParseSizeError::ParseFailure(s) => { for part in parts {
ParseError::MultiplierStringParseFailure(s) if part == "0" {
show_zero_multiplier_warning();
} }
uucore::parse_size::ParseSizeError::SizeTooBig(s) => { let num = parse_bytes_no_x(part).map_err(|e| e.with_arg(s.to_string()))?;
ParseError::MultiplierStringOverflow(s) total *= num;
} }
}) Ok(total)
} }
} }
@ -353,7 +437,7 @@ fn parse_cbs(matches: &Matches) -> Result<Option<usize>, ParseError> {
} }
} }
pub fn parse_status_level(matches: &Matches) -> Result<Option<StatusLevel>, ParseError> { pub(crate) fn parse_status_level(matches: &Matches) -> Result<Option<StatusLevel>, ParseError> {
match matches.value_of(options::STATUS) { match matches.value_of(options::STATUS) {
Some(s) => { Some(s) => {
let st = s.parse()?; let st = s.parse()?;
@ -414,16 +498,11 @@ fn parse_flag_list<T: std::str::FromStr<Err = ParseError>>(
tag: &str, tag: &str,
matches: &Matches, matches: &Matches,
) -> Result<Vec<T>, ParseError> { ) -> Result<Vec<T>, ParseError> {
let mut flags = Vec::new(); matches
.values_of(tag)
if let Some(comma_str) = matches.value_of(tag) { .unwrap_or_default()
for s in comma_str.split(',') { .map(|f| f.parse())
let flag = s.parse()?; .collect()
flags.push(flag);
}
}
Ok(flags)
} }
/// Parse Conversion Options (Input Variety) /// Parse Conversion Options (Input Variety)
@ -689,3 +768,25 @@ pub fn parse_input_non_ascii(matches: &Matches) -> Result<bool, ParseError> {
Ok(false) Ok(false)
} }
} }
#[cfg(test)]
mod tests {
use crate::parseargs::parse_bytes_with_opt_multiplier;
#[test]
fn test_parse_bytes_with_opt_multiplier() {
assert_eq!(parse_bytes_with_opt_multiplier("123").unwrap(), 123);
assert_eq!(parse_bytes_with_opt_multiplier("123c").unwrap(), 123 * 1);
assert_eq!(parse_bytes_with_opt_multiplier("123w").unwrap(), 123 * 2);
assert_eq!(parse_bytes_with_opt_multiplier("123b").unwrap(), 123 * 512);
assert_eq!(parse_bytes_with_opt_multiplier("123x3").unwrap(), 123 * 3);
assert_eq!(parse_bytes_with_opt_multiplier("123k").unwrap(), 123 * 1024);
assert_eq!(parse_bytes_with_opt_multiplier("1x2x3").unwrap(), 1 * 2 * 3);
assert_eq!(
parse_bytes_with_opt_multiplier("1wx2cx3w").unwrap(),
(1 * 2) * (2 * 1) * (3 * 2)
);
assert!(parse_bytes_with_opt_multiplier("123asdf").is_err());
}
}

View file

@ -299,6 +299,116 @@ fn test_status_level_noxfer() {
assert_eq!(st, StatusLevel::Noxfer); assert_eq!(st, StatusLevel::Noxfer);
} }
#[test]
fn test_multiple_flags_options() {
let args = vec![
String::from("dd"),
String::from("--iflag=fullblock,directory"),
String::from("--iflag=skip_bytes"),
String::from("--oflag=direct"),
String::from("--oflag=dsync"),
String::from("--conv=ascii,ucase"),
String::from("--conv=unblock"),
];
let matches = uu_app().try_get_matches_from(args).unwrap();
// iflag
let iflags = parse_flag_list::<Flag>(options::IFLAG, &matches).unwrap();
assert_eq!(
vec![Flag::FullBlock, Flag::Directory, Flag::SkipBytes],
iflags
);
// oflag
let oflags = parse_flag_list::<Flag>(options::OFLAG, &matches).unwrap();
assert_eq!(vec![Flag::Direct, Flag::Dsync], oflags);
// conv
let conv = parse_flag_list::<ConvFlag>(options::CONV, &matches).unwrap();
assert_eq!(
vec![ConvFlag::FmtEtoA, ConvFlag::UCase, ConvFlag::Unblock],
conv
);
}
#[test]
fn test_override_multiple_options() {
let args = vec![
String::from("dd"),
String::from("--if=foo.file"),
String::from("--if=correct.file"),
String::from("--of=bar.file"),
String::from("--of=correct.file"),
String::from("--ibs=256"),
String::from("--ibs=1024"),
String::from("--obs=256"),
String::from("--obs=1024"),
String::from("--cbs=1"),
String::from("--cbs=2"),
String::from("--skip=0"),
String::from("--skip=2"),
String::from("--seek=0"),
String::from("--seek=2"),
String::from("--status=none"),
String::from("--status=noxfer"),
String::from("--count=512"),
String::from("--count=1024"),
];
let matches = uu_app().try_get_matches_from(args).unwrap();
// if
assert_eq!("correct.file", matches.value_of(options::INFILE).unwrap());
// of
assert_eq!("correct.file", matches.value_of(options::OUTFILE).unwrap());
// ibs
assert_eq!(1024, parse_ibs(&matches).unwrap());
// obs
assert_eq!(1024, parse_obs(&matches).unwrap());
// cbs
assert_eq!(2, parse_cbs(&matches).unwrap().unwrap());
// status
assert_eq!(
StatusLevel::Noxfer,
parse_status_level(&matches).unwrap().unwrap()
);
// skip
assert_eq!(
200,
parse_skip_amt(&100, &IFlags::default(), &matches)
.unwrap()
.unwrap()
);
// seek
assert_eq!(
200,
parse_seek_amt(&100, &OFlags::default(), &matches)
.unwrap()
.unwrap()
);
// count
assert_eq!(
CountType::Bytes(1024),
parse_count(
&IFlags {
count_bytes: true,
..IFlags::default()
},
&matches
)
.unwrap()
.unwrap()
);
}
// ----- IConvFlags/Output ----- // ----- IConvFlags/Output -----
#[test] #[test]

517
src/uu/dd/src/progress.rs Normal file
View file

@ -0,0 +1,517 @@
// * This file is part of the uutils coreutils package.
// *
// * For the full copyright and license information, please view the LICENSE
// * file that was distributed with this source code.
// spell-checker:ignore btotal sigval
//! Read and write progress tracking for dd.
//!
//! The [`ProgUpdate`] struct represents summary statistics for the
//! read and write progress of a running `dd` process. The
//! [`gen_prog_updater`] function can be used to implement a progress
//! updater that runs in its own thread.
use std::io::Write;
use std::sync::mpsc;
use std::time::Duration;
use byte_unit::Byte;
// On Linux, we register a signal handler that prints progress updates.
#[cfg(target_os = "linux")]
use signal_hook::consts::signal;
#[cfg(target_os = "linux")]
use std::{
env,
error::Error,
sync::{
atomic::{AtomicUsize, Ordering},
Arc,
},
};
/// Summary statistics for read and write progress of dd for a given duration.
pub(crate) struct ProgUpdate {
/// Read statistics.
///
/// This contains information about the number of blocks read from
/// the data source.
pub(crate) read_stat: ReadStat,
/// Write statistics.
///
/// This contains information about the number of blocks and
/// number of bytes written to the data sink.
pub(crate) write_stat: WriteStat,
/// The time period over which the reads and writes were measured.
pub(crate) duration: Duration,
}
impl ProgUpdate {
/// Instantiate this struct.
pub(crate) fn new(read_stat: ReadStat, write_stat: WriteStat, duration: Duration) -> Self {
Self {
read_stat,
write_stat,
duration,
}
}
/// Write the number of complete and partial records both read and written.
///
/// The information is written to `w`.
///
/// # Examples
///
/// ```rust,ignore
/// use std::io::Cursor;
/// use std::time::Duration;
/// use crate::progress::{ProgUpdate, ReadState, WriteStat};
///
/// let read_stat = ReadStat::new(1, 2, 3);
/// let write_stat = WriteStat::new(4, 5, 6);
/// let duration = Duration::new(789, 0);
/// let prog_update = ProgUpdate {
/// read_stat,
/// write_stat,
/// duration,
/// };
///
/// let mut cursor = Cursor::new(vec![]);
/// prog_update.write_io_lines(&mut cursor).unwrap();
/// assert_eq!(
/// cursor.get_ref(),
/// b"1+2 records in\n3 truncated records\n4+5 records out\n"
/// );
/// ```
fn write_io_lines(&self, w: &mut impl Write) -> std::io::Result<()> {
self.read_stat.report(w)?;
self.write_stat.report(w)?;
match self.read_stat.records_truncated {
0 => {}
1 => writeln!(w, "1 truncated record")?,
n => writeln!(w, "{} truncated records", n)?,
}
Ok(())
}
/// Write the number of bytes written, duration, and throughput.
///
/// The information is written to `w`. If `rewrite` is `true`,
/// then a `\r` character is written first and no newline is
/// written at the end. When writing to `stderr`, this has the
/// visual effect of overwriting the previous characters on the
/// line.
///
/// # Examples
///
/// ```rust,ignore
/// use std::io::Cursor;
/// use std::time::Duration;
/// use crate::progress::{ProgUpdate, ReadState, WriteStat};
///
/// let prog_update = ProgUpdate {
/// read_stat: Default::default(),
/// write_stat: Default::default(),
/// duration: Duration::new(1, 0), // one second
/// };
///
/// let mut cursor = Cursor::new(vec![]);
/// let rewrite = false;
/// prog_update.write_prog_line(&mut cursor, rewrite).unwrap();
/// assert_eq!(
/// cursor.get_ref(),
/// b"0 bytes (0 B, 0 B) copied, 1.0 s, 0 B/s\n"
/// );
/// ```
fn write_prog_line(&self, w: &mut impl Write, rewrite: bool) -> std::io::Result<()> {
let btotal_metric = Byte::from_bytes(self.write_stat.bytes_total)
.get_appropriate_unit(false)
.format(0);
let btotal_bin = Byte::from_bytes(self.write_stat.bytes_total)
.get_appropriate_unit(true)
.format(0);
let safe_millis = std::cmp::max(1, self.duration.as_millis());
let transfer_rate = Byte::from_bytes(1000 * (self.write_stat.bytes_total / safe_millis))
.get_appropriate_unit(false)
.format(1);
let btotal = self.write_stat.bytes_total;
let duration = self.duration.as_secs_f64();
if rewrite {
write!(
w,
"\r{} bytes ({}, {}) copied, {:.1} s, {}/s",
btotal, btotal_metric, btotal_bin, duration, transfer_rate
)
} else {
writeln!(
w,
"{} bytes ({}, {}) copied, {:.1} s, {}/s",
btotal, btotal_metric, btotal_bin, duration, transfer_rate
)
}
}
/// Write all summary statistics.
///
/// This is a convenience method that calls
/// [`ProgUpdate::write_io_lines`] and
/// [`ProgUpdate::write_prog_line`] in that order. The information
/// is written to `w`.
///
/// # Examples
///
/// ```rust,ignore
/// use std::io::Cursor;
/// use std::time::Duration;
/// use crate::progress::{ProgUpdate, ReadState, WriteStat};
///
/// let prog_update = ProgUpdate {
/// read_stat: Default::default(),
/// write_stat: Default::default(),
/// duration: Duration::new(1, 0), // one second
/// };
/// let mut cursor = Cursor::new(vec![]);
/// prog_update.write_transfer_stats(&mut cursor).unwrap();
/// let mut iter = cursor.get_ref().split(|v| *v == b'\n');
/// assert_eq!(iter.next().unwrap(), b"0+0 records in");
/// assert_eq!(iter.next().unwrap(), b"0+0 records out");
/// assert_eq!(
/// iter.next().unwrap(),
/// b"0 bytes (0 B, 0 B) copied, 1.0 s, 0 B/s"
/// );
/// assert_eq!(iter.next().unwrap(), b"");
/// assert!(iter.next().is_none());
/// ```
fn write_transfer_stats(&self, w: &mut impl Write) -> std::io::Result<()> {
self.write_io_lines(w)?;
let rewrite = false;
self.write_prog_line(w, rewrite)?;
Ok(())
}
/// Print number of complete and partial records read and written to stderr.
///
/// See [`ProgUpdate::write_io_lines`] for more information.
pub(crate) fn print_io_lines(&self) {
let mut stderr = std::io::stderr();
self.write_io_lines(&mut stderr).unwrap();
}
/// Re-print the number of bytes written, duration, and throughput.
///
/// See [`ProgUpdate::write_prog_line`] for more information.
pub(crate) fn reprint_prog_line(&self) {
let mut stderr = std::io::stderr();
let rewrite = true;
self.write_prog_line(&mut stderr, rewrite).unwrap();
}
/// Write all summary statistics.
///
/// See [`ProgUpdate::write_transfer_stats`] for more information.
pub(crate) fn print_transfer_stats(&self) {
let mut stderr = std::io::stderr();
self.write_transfer_stats(&mut stderr).unwrap();
}
}
/// Read statistics.
///
/// This contains information about the number of blocks read from the
/// input file. A block is sometimes referred to as a "record".
#[derive(Clone, Copy, Default)]
pub(crate) struct ReadStat {
/// The number of complete blocks that have been read.
pub(crate) reads_complete: u64,
/// The number of partial blocks that have been read.
///
/// A partial block read can happen if, for example, there are
/// fewer bytes in the input file than the specified input block
/// size.
pub(crate) reads_partial: u64,
/// The number of truncated records.
///
/// A truncated record can only occur in `conv=block` mode.
pub(crate) records_truncated: u32,
}
impl ReadStat {
/// Create a new instance.
#[allow(dead_code)]
fn new(complete: u64, partial: u64, truncated: u32) -> Self {
Self {
reads_complete: complete,
reads_partial: partial,
records_truncated: truncated,
}
}
/// Whether this counter has zero complete reads and zero partial reads.
pub(crate) fn is_empty(&self) -> bool {
self.reads_complete == 0 && self.reads_partial == 0
}
/// Write the counts in the format required by `dd`.
///
/// # Errors
///
/// If there is a problem writing to `w`.
fn report(&self, w: &mut impl Write) -> std::io::Result<()> {
writeln!(
w,
"{}+{} records in",
self.reads_complete, self.reads_partial
)?;
Ok(())
}
}
impl std::ops::AddAssign for ReadStat {
fn add_assign(&mut self, other: Self) {
*self = Self {
reads_complete: self.reads_complete + other.reads_complete,
reads_partial: self.reads_partial + other.reads_partial,
records_truncated: self.records_truncated + other.records_truncated,
}
}
}
/// Write statistics.
///
/// This contains information about the number of blocks written to
/// the output file and the total number of bytes written.
#[derive(Clone, Copy, Default)]
pub(crate) struct WriteStat {
/// The number of complete blocks that have been written.
pub(crate) writes_complete: u64,
/// The number of partial blocks that have been written.
///
/// A partial block write can happen if, for example, there are
/// fewer bytes in the input file than the specified output block
/// size.
pub(crate) writes_partial: u64,
/// The total number of bytes written.
pub(crate) bytes_total: u128,
}
impl WriteStat {
/// Create a new instance.
#[allow(dead_code)]
fn new(complete: u64, partial: u64, bytes_total: u128) -> Self {
Self {
writes_complete: complete,
writes_partial: partial,
bytes_total,
}
}
/// Write the counts in the format required by `dd`.
///
/// # Errors
///
/// If there is a problem writing to `w`.
fn report(&self, w: &mut impl Write) -> std::io::Result<()> {
writeln!(
w,
"{}+{} records out",
self.writes_complete, self.writes_partial
)
}
}
impl std::ops::AddAssign for WriteStat {
fn add_assign(&mut self, other: Self) {
*self = Self {
writes_complete: self.writes_complete + other.writes_complete,
writes_partial: self.writes_partial + other.writes_partial,
bytes_total: self.bytes_total + other.bytes_total,
}
}
}
/// How much detail to report when printing transfer statistics.
///
/// This corresponds to the available settings of the `status`
/// command-line argument.
#[derive(Copy, Clone, Debug, PartialEq)]
pub(crate) enum StatusLevel {
/// Report number of blocks read and written, throughput, and volume.
///
/// This corresponds to `status=progress`.
Progress,
/// Report number of blocks read and written, but no throughput and volume.
///
/// This corresponds to `status=noxfer`.
Noxfer,
/// Print no status information.
None,
}
/// Return a closure that can be used in its own thread to print progress info.
///
/// This function returns a closure that receives [`ProgUpdate`]
/// instances sent through `rx`. When a [`ProgUpdate`] instance is
/// received, the transfer statistics are re-printed to stderr.
#[cfg(not(target_os = "linux"))]
pub(crate) fn gen_prog_updater(
rx: mpsc::Receiver<ProgUpdate>,
print_level: Option<StatusLevel>,
) -> impl Fn() {
move || {
while let Ok(update) = rx.recv() {
if Some(StatusLevel::Progress) == print_level {
update.reprint_prog_line();
}
}
}
}
/// Return a closure that can be used in its own thread to print progress info.
///
/// This function returns a closure that receives [`ProgUpdate`]
/// instances sent through `rx`. When a [`ProgUpdate`] instance is
/// received, the transfer statistics are re-printed to stderr.
///
/// The closure also registers a signal handler for `SIGUSR1`. When
/// the `SIGUSR1` signal is sent to this process, the transfer
/// statistics are printed to stderr.
#[cfg(target_os = "linux")]
pub(crate) fn gen_prog_updater(
rx: mpsc::Receiver<ProgUpdate>,
print_level: Option<StatusLevel>,
) -> impl Fn() {
// TODO: SIGINFO: Trigger progress line reprint. BSD-style Linux only.
const SIGUSR1_USIZE: usize = signal::SIGUSR1 as usize;
fn posixly_correct() -> bool {
env::var("POSIXLY_CORRECT").is_ok()
}
fn register_linux_signal_handler(sigval: Arc<AtomicUsize>) -> Result<(), Box<dyn Error>> {
if !posixly_correct() {
signal_hook::flag::register_usize(signal::SIGUSR1, sigval, SIGUSR1_USIZE)?;
}
Ok(())
}
// --------------------------------------------------------------
move || {
let sigval = Arc::new(AtomicUsize::new(0));
register_linux_signal_handler(sigval.clone()).unwrap_or_else(|e| {
if Some(StatusLevel::None) != print_level {
eprintln!(
"Internal dd Warning: Unable to register signal handler \n\t{}",
e
);
}
});
let mut progress_as_secs = 0;
while let Ok(update) = rx.recv() {
// (Re)print status line if progress is requested.
if Some(StatusLevel::Progress) == print_level
&& update.duration.as_secs() >= progress_as_secs
{
update.reprint_prog_line();
progress_as_secs = update.duration.as_secs() + 1;
}
// Handle signals
if let SIGUSR1_USIZE = sigval.load(Ordering::Relaxed) {
update.print_transfer_stats();
};
}
}
}
#[cfg(test)]
mod tests {
use std::io::Cursor;
use std::time::Duration;
use super::{ProgUpdate, ReadStat, WriteStat};
#[test]
fn test_read_stat_report() {
let read_stat = ReadStat::new(1, 2, 3);
let mut cursor = Cursor::new(vec![]);
read_stat.report(&mut cursor).unwrap();
assert_eq!(cursor.get_ref(), b"1+2 records in\n");
}
#[test]
fn test_write_stat_report() {
let write_stat = WriteStat::new(1, 2, 3);
let mut cursor = Cursor::new(vec![]);
write_stat.report(&mut cursor).unwrap();
assert_eq!(cursor.get_ref(), b"1+2 records out\n");
}
#[test]
fn test_prog_update_write_io_lines() {
let read_stat = ReadStat::new(1, 2, 3);
let write_stat = WriteStat::new(4, 5, 6);
let duration = Duration::new(789, 0);
let prog_update = ProgUpdate {
read_stat,
write_stat,
duration,
};
let mut cursor = Cursor::new(vec![]);
prog_update.write_io_lines(&mut cursor).unwrap();
assert_eq!(
cursor.get_ref(),
b"1+2 records in\n4+5 records out\n3 truncated records\n"
);
}
#[test]
fn test_prog_update_write_prog_line() {
let prog_update = ProgUpdate {
read_stat: Default::default(),
write_stat: Default::default(),
duration: Duration::new(1, 0), // one second
};
let mut cursor = Cursor::new(vec![]);
let rewrite = false;
prog_update.write_prog_line(&mut cursor, rewrite).unwrap();
// TODO The expected output string below is what our code
// produces today, but it does not match GNU dd:
//
// $ : | dd
// 0 bytes copied, 7.9151e-05 s, 0.0 kB/s
//
assert_eq!(
cursor.get_ref(),
b"0 bytes (0 B, 0 B) copied, 1.0 s, 0 B/s\n"
);
}
#[test]
fn write_transfer_stats() {
let prog_update = ProgUpdate {
read_stat: Default::default(),
write_stat: Default::default(),
duration: Duration::new(1, 0), // one second
};
let mut cursor = Cursor::new(vec![]);
prog_update.write_transfer_stats(&mut cursor).unwrap();
let mut iter = cursor.get_ref().split(|v| *v == b'\n');
assert_eq!(iter.next().unwrap(), b"0+0 records in");
assert_eq!(iter.next().unwrap(), b"0+0 records out");
assert_eq!(
iter.next().unwrap(),
b"0 bytes (0 B, 0 B) copied, 1.0 s, 0 B/s"
);
assert_eq!(iter.next().unwrap(), b"");
assert!(iter.next().is_none());
}
}

View file

@ -5,30 +5,27 @@
// //
// For the full copyright and license information, please view the LICENSE file // For the full copyright and license information, please view the LICENSE file
// that was distributed with this source code. // that was distributed with this source code.
mod table;
use uucore::error::UError;
use uucore::error::UResult; use uucore::error::UResult;
#[cfg(unix)] #[cfg(unix)]
use uucore::fsext::statfs_fn; use uucore::fsext::statfs_fn;
use uucore::fsext::{read_fs_list, FsUsage, MountInfo}; use uucore::fsext::{read_fs_list, FsUsage, MountInfo};
use clap::{crate_version, App, AppSettings, Arg}; use clap::{crate_version, App, AppSettings, Arg, ArgMatches};
use number_prefix::NumberPrefix;
use std::cell::Cell;
use std::collections::HashMap;
use std::collections::HashSet; use std::collections::HashSet;
use std::error::Error;
#[cfg(unix)] #[cfg(unix)]
use std::ffi::CString; use std::ffi::CString;
use std::fmt::Display; use std::iter::FromIterator;
#[cfg(unix)] #[cfg(unix)]
use std::mem; use std::mem;
#[cfg(windows)] #[cfg(windows)]
use std::path::Path; use std::path::Path;
use crate::table::{DisplayRow, Header, Row};
static ABOUT: &str = "Show information about the file system on which each FILE resides,\n\ static ABOUT: &str = "Show information about the file system on which each FILE resides,\n\
or all file systems by default."; or all file systems by default.";
@ -58,6 +55,7 @@ struct FsSelector {
exclude: HashSet<String>, exclude: HashSet<String>,
} }
#[derive(Default)]
struct Options { struct Options {
show_local_fs: bool, show_local_fs: bool,
show_all_fs: bool, show_all_fs: bool,
@ -69,6 +67,27 @@ struct Options {
fs_selector: FsSelector, fs_selector: FsSelector,
} }
impl Options {
/// Convert command-line arguments into [`Options`].
fn from(matches: &ArgMatches) -> Self {
Self {
show_local_fs: matches.is_present(OPT_LOCAL),
show_all_fs: matches.is_present(OPT_ALL),
show_listed_fs: false,
show_fs_type: matches.is_present(OPT_PRINT_TYPE),
show_inode_instead: matches.is_present(OPT_INODES),
human_readable_base: if matches.is_present(OPT_HUMAN_READABLE) {
1024
} else if matches.is_present(OPT_HUMAN_READABLE_2) {
1000
} else {
-1
},
fs_selector: FsSelector::from(matches),
}
}
}
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
struct Filesystem { struct Filesystem {
mount_info: MountInfo, mount_info: MountInfo,
@ -80,18 +99,19 @@ fn usage() -> String {
} }
impl FsSelector { impl FsSelector {
fn new() -> Self { /// Convert command-line arguments into a [`FsSelector`].
Self::default() ///
} /// This function reads the include and exclude sets from
/// [`ArgMatches`] and returns the corresponding [`FsSelector`]
#[inline(always)] /// instance.
fn include(&mut self, fs_type: String) { fn from(matches: &ArgMatches) -> Self {
self.include.insert(fs_type); let include = HashSet::from_iter(matches.values_of_lossy(OPT_TYPE).unwrap_or_default());
} let exclude = HashSet::from_iter(
matches
#[inline(always)] .values_of_lossy(OPT_EXCLUDE_TYPE)
fn exclude(&mut self, fs_type: String) { .unwrap_or_default(),
self.exclude.insert(fs_type); );
Self { include, exclude }
} }
fn should_select(&self, fs_type: &str) -> bool { fn should_select(&self, fs_type: &str) -> bool {
@ -102,24 +122,6 @@ impl FsSelector {
} }
} }
impl Options {
fn new() -> Self {
Self {
show_local_fs: false,
show_all_fs: false,
show_listed_fs: false,
show_fs_type: false,
show_inode_instead: false,
// block_size: match env::var("BLOCKSIZE") {
// Ok(size) => size.parse().unwrap(),
// Err(_) => 512,
// },
human_readable_base: -1,
fs_selector: FsSelector::new(),
}
}
}
impl Filesystem { impl Filesystem {
// TODO: resolve uuid in `mount_info.dev_name` if exists // TODO: resolve uuid in `mount_info.dev_name` if exists
fn new(mount_info: MountInfo) -> Option<Self> { fn new(mount_info: MountInfo) -> Option<Self> {
@ -157,122 +159,99 @@ impl Filesystem {
} }
} }
fn filter_mount_list(vmi: Vec<MountInfo>, paths: &[String], opt: &Options) -> Vec<MountInfo> { /// Whether to display the mount info given the inclusion settings.
vmi.into_iter() fn is_included(mi: &MountInfo, paths: &[String], opt: &Options) -> bool {
.filter_map(|mi| { // Don't show remote filesystems if `--local` has been given.
if (mi.remote && opt.show_local_fs) if mi.remote && opt.show_local_fs {
|| (mi.dummy && !opt.show_all_fs && !opt.show_listed_fs) return false;
|| !opt.fs_selector.should_select(&mi.fs_type)
{
None
} else {
if paths.is_empty() {
// No path specified
return Some((mi.dev_id.clone(), mi));
} }
if paths.contains(&mi.mount_dir) {
// One or more paths have been provided // Don't show pseudo filesystems unless `--all` has been given.
Some((mi.dev_id.clone(), mi)) if mi.dummy && !opt.show_all_fs && !opt.show_listed_fs {
} else { return false;
// Not a path we want to see
None
} }
// Don't show filesystems if they have been explicitly excluded.
if !opt.fs_selector.should_select(&mi.fs_type) {
return false;
} }
})
.fold( // Don't show filesystems other than the ones specified on the
HashMap::<String, Cell<MountInfo>>::new(), // command line, if any.
|mut acc, (id, mi)| { if !paths.is_empty() && !paths.contains(&mi.mount_dir) {
#[allow(clippy::map_entry)] return false;
{ }
if acc.contains_key(&id) {
let seen = acc[&id].replace(mi.clone()); true
let target_nearer_root = seen.mount_dir.len() > mi.mount_dir.len(); }
// With bind mounts, prefer items nearer the root of the source
let source_below_root = !seen.mount_root.is_empty() /// Whether the mount info in `m2` should be prioritized over `m1`.
&& !mi.mount_root.is_empty() ///
&& seen.mount_root.len() < mi.mount_root.len(); /// The "lt" in the function name is in analogy to the
/// [`std::cmp::PartialOrd::lt`].
fn mount_info_lt(m1: &MountInfo, m2: &MountInfo) -> bool {
// let "real" devices with '/' in the name win. // let "real" devices with '/' in the name win.
if (!mi.dev_name.starts_with('/') || seen.dev_name.starts_with('/')) if m1.dev_name.starts_with('/') && !m2.dev_name.starts_with('/') {
return false;
}
let m1_nearer_root = m1.mount_dir.len() < m2.mount_dir.len();
// With bind mounts, prefer items nearer the root of the source
let m2_below_root = !m1.mount_root.is_empty()
&& !m2.mount_root.is_empty()
&& m1.mount_root.len() > m2.mount_root.len();
// let points towards the root of the device win. // let points towards the root of the device win.
&& (!target_nearer_root || source_below_root) if m1_nearer_root && !m2_below_root {
// let an entry over-mounted on a new device win... return false;
&& (seen.dev_name == mi.dev_name
/* ... but only when matching an existing mnt point,
to avoid problematic replacement when given
inaccurate mount lists, seen with some chroot
environments for example. */
|| seen.mount_dir != mi.mount_dir)
{
acc[&id].replace(seen);
} }
} else {
acc.insert(id, Cell::new(mi)); // let an entry over-mounted on a new device win, but only when
// matching an existing mnt point, to avoid problematic
// replacement when given inaccurate mount lists, seen with some
// chroot environments for example.
if m1.dev_name != m2.dev_name && m1.mount_dir == m2.mount_dir {
return false;
} }
acc
} true
},
)
.into_iter()
.map(|ent| ent.1.into_inner())
.collect::<Vec<_>>()
} }
/// Convert `value` to a human readable string based on `base`. /// Whether to prioritize given mount info over all others on the same device.
/// e.g. It returns 1G when value is 1 * 1024 * 1024 * 1024 and base is 1024. ///
/// Note: It returns `value` if `base` isn't positive. /// This function decides whether the mount info `mi` is better than
fn human_readable(value: u64, base: i64) -> UResult<String> { /// all others in `previous` that mount the same device as `mi`.
let base_str = match base { fn is_best(previous: &[MountInfo], mi: &MountInfo) -> bool {
d if d < 0 => value.to_string(), for seen in previous {
if seen.dev_id == mi.dev_id && mount_info_lt(mi, seen) {
// ref: [Binary prefix](https://en.wikipedia.org/wiki/Binary_prefix) @@ <https://archive.is/cnwmF> return false;
// ref: [SI/metric prefix](https://en.wikipedia.org/wiki/Metric_prefix) @@ <https://archive.is/QIuLj> }
1000 => match NumberPrefix::decimal(value as f64) { }
NumberPrefix::Standalone(bytes) => bytes.to_string(), true
NumberPrefix::Prefixed(prefix, bytes) => format!("{:.1}{}", bytes, prefix.symbol()),
},
1024 => match NumberPrefix::binary(value as f64) {
NumberPrefix::Standalone(bytes) => bytes.to_string(),
NumberPrefix::Prefixed(prefix, bytes) => format!("{:.1}{}", bytes, prefix.symbol()),
},
_ => return Err(DfError::InvalidBaseValue(base.to_string()).into()),
};
Ok(base_str)
} }
fn use_size(free_size: u64, total_size: u64) -> String { /// Keep only the specified subset of [`MountInfo`] instances.
if total_size == 0 { ///
return String::from("-"); /// If `paths` is non-empty, this function excludes any [`MountInfo`]
} /// that is not mounted at the specified path.
return format!( ///
"{:.0}%", /// The `opt` argument specifies a variety of ways of excluding
100f64 - 100f64 * (free_size as f64 / total_size as f64) /// [`MountInfo`] instances; see [`Options`] for more information.
); ///
} /// Finally, if there are duplicate entries, the one with the shorter
/// path is kept.
#[derive(Debug)] fn filter_mount_list(vmi: Vec<MountInfo>, paths: &[String], opt: &Options) -> Vec<MountInfo> {
enum DfError { let mut result = vec![];
InvalidBaseValue(String), for mi in vmi {
} // TODO The running time of the `is_best()` function is linear
// in the length of `result`. That makes the running time of
impl Display for DfError { // this loop quadratic in the length of `vmi`. This could be
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { // improved by a more efficient implementation of `is_best()`,
match self { // but `vmi` is probably not very long in practice.
DfError::InvalidBaseValue(s) => write!(f, "Internal error: Unknown base value {}", s), if is_included(&mi, paths, opt) && is_best(&result, &mi) {
} result.push(mi);
}
}
impl Error for DfError {}
impl UError for DfError {
fn code(&self) -> i32 {
match self {
DfError::InvalidBaseValue(_) => 1,
} }
} }
result
} }
#[uucore::main] #[uucore::main]
@ -293,127 +272,18 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
} }
} }
let mut opt = Options::new(); let opt = Options::from(&matches);
if matches.is_present(OPT_LOCAL) {
opt.show_local_fs = true;
}
if matches.is_present(OPT_ALL) {
opt.show_all_fs = true;
}
if matches.is_present(OPT_INODES) {
opt.show_inode_instead = true;
}
if matches.is_present(OPT_PRINT_TYPE) {
opt.show_fs_type = true;
}
if matches.is_present(OPT_HUMAN_READABLE) {
opt.human_readable_base = 1024;
}
if matches.is_present(OPT_HUMAN_READABLE_2) {
opt.human_readable_base = 1000;
}
for fs_type in matches.values_of_lossy(OPT_TYPE).unwrap_or_default() {
opt.fs_selector.include(fs_type.to_owned());
}
for fs_type in matches
.values_of_lossy(OPT_EXCLUDE_TYPE)
.unwrap_or_default()
{
opt.fs_selector.exclude(fs_type.to_owned());
}
let fs_list = filter_mount_list(read_fs_list(), &paths, &opt) let mounts = read_fs_list();
let data: Vec<Row> = filter_mount_list(mounts, &paths, &opt)
.into_iter() .into_iter()
.filter_map(Filesystem::new) .filter_map(Filesystem::new)
.filter(|fs| fs.usage.blocks != 0 || opt.show_all_fs || opt.show_listed_fs) .filter(|fs| fs.usage.blocks != 0 || opt.show_all_fs || opt.show_listed_fs)
.collect::<Vec<_>>(); .map(Into::into)
.collect();
// set headers println!("{}", Header::new(&opt));
let mut header = vec!["Filesystem"]; for row in data {
if opt.show_fs_type { println!("{}", DisplayRow::new(row, &opt));
header.push("Type");
}
header.extend_from_slice(&if opt.show_inode_instead {
// spell-checker:disable-next-line
["Inodes", "Iused", "IFree", "IUses%"]
} else {
[
if opt.human_readable_base == -1 {
"1k-blocks"
} else {
"Size"
},
"Used",
"Available",
"Use%",
]
});
if cfg!(target_os = "macos") && !opt.show_inode_instead {
header.insert(header.len() - 1, "Capacity");
}
header.push("Mounted on");
for (idx, title) in header.iter().enumerate() {
if idx == 0 || idx == header.len() - 1 {
print!("{0: <16} ", title);
} else if opt.show_fs_type && idx == 1 {
print!("{0: <5} ", title);
} else if idx == header.len() - 2 {
print!("{0: >5} ", title);
} else {
print!("{0: >12} ", title);
}
}
println!();
for fs in &fs_list {
print!("{0: <16} ", fs.mount_info.dev_name);
if opt.show_fs_type {
print!("{0: <5} ", fs.mount_info.fs_type);
}
if opt.show_inode_instead {
print!(
"{0: >12} ",
human_readable(fs.usage.files, opt.human_readable_base)?
);
print!(
"{0: >12} ",
human_readable(fs.usage.files - fs.usage.ffree, opt.human_readable_base)?
);
print!(
"{0: >12} ",
human_readable(fs.usage.ffree, opt.human_readable_base)?
);
print!(
"{0: >5} ",
format!(
"{0:.1}%",
100f64 - 100f64 * (fs.usage.ffree as f64 / fs.usage.files as f64)
)
);
} else {
let total_size = fs.usage.blocksize * fs.usage.blocks;
let free_size = fs.usage.blocksize * fs.usage.bfree;
print!(
"{0: >12} ",
human_readable(total_size, opt.human_readable_base)?
);
print!(
"{0: >12} ",
human_readable(total_size - free_size, opt.human_readable_base)?
);
print!(
"{0: >12} ",
human_readable(free_size, opt.human_readable_base)?
);
if cfg!(target_os = "macos") {
let used = fs.usage.blocks - fs.usage.bfree;
let blocks = used + fs.usage.bavail;
print!("{0: >12} ", use_size(used, blocks));
}
print!("{0: >5} ", use_size(free_size, total_size));
}
print!("{0: <16}", fs.mount_info.mount_dir);
println!();
} }
Ok(()) Ok(())

501
src/uu/df/src/table.rs Normal file
View file

@ -0,0 +1,501 @@
// * This file is part of the uutils coreutils package.
// *
// * For the full copyright and license information, please view the LICENSE
// * file that was distributed with this source code.
// spell-checker:ignore tmpfs
//! The filesystem usage data table.
//!
//! A table comprises a header row ([`Header`]) and a collection of
//! data rows ([`Row`]), one per filesystem. To display a [`Row`],
//! combine it with [`Options`] in the [`DisplayRow`] struct; the
//! [`DisplayRow`] implements [`std::fmt::Display`].
use number_prefix::NumberPrefix;
use crate::{Filesystem, Options};
use uucore::fsext::{FsUsage, MountInfo};
use std::fmt;
/// A row in the filesystem usage data table.
///
/// A row comprises several pieces of information, including the
/// filesystem device, the mountpoint, the number of bytes used, etc.
pub(crate) struct Row {
/// Name of the device on which the filesystem lives.
fs_device: String,
/// Type of filesystem (for example, `"ext4"`, `"tmpfs"`, etc.).
fs_type: String,
/// Path at which the filesystem is mounted.
fs_mount: String,
/// Total number of bytes in the filesystem regardless of whether they are used.
bytes: u64,
/// Number of used bytes.
bytes_used: u64,
/// Number of free bytes.
bytes_free: u64,
/// Percentage of bytes that are used, given as a float between 0 and 1.
///
/// If the filesystem has zero bytes, then this is `None`.
bytes_usage: Option<f64>,
/// Percentage of bytes that are available, given as a float between 0 and 1.
///
/// These are the bytes that are available to non-privileged processes.
///
/// If the filesystem has zero bytes, then this is `None`.
#[cfg(target_os = "macos")]
bytes_capacity: Option<f64>,
/// Total number of inodes in the filesystem.
inodes: u64,
/// Number of used inodes.
inodes_used: u64,
/// Number of free inodes.
inodes_free: u64,
/// Percentage of inodes that are used, given as a float between 0 and 1.
///
/// If the filesystem has zero bytes, then this is `None`.
inodes_usage: Option<f64>,
}
impl From<Filesystem> for Row {
fn from(fs: Filesystem) -> Self {
let MountInfo {
dev_name,
fs_type,
mount_dir,
..
} = fs.mount_info;
let FsUsage {
blocksize,
blocks,
bfree,
#[cfg(target_os = "macos")]
bavail,
files,
ffree,
..
} = fs.usage;
Self {
fs_device: dev_name,
fs_type,
fs_mount: mount_dir,
bytes: blocksize * blocks,
bytes_used: blocksize * (blocks - bfree),
bytes_free: blocksize * bfree,
bytes_usage: if blocks == 0 {
None
} else {
Some(((blocks - bfree) as f64) / blocks as f64)
},
#[cfg(target_os = "macos")]
bytes_capacity: if bavail == 0 {
None
} else {
Some(bavail as f64 / ((blocks - bfree + bavail) as f64))
},
inodes: files,
inodes_used: files - ffree,
inodes_free: ffree,
inodes_usage: if files == 0 {
None
} else {
Some(ffree as f64 / files as f64)
},
}
}
}
/// A displayable wrapper around a [`Row`].
///
/// The `options` control how the information in the row gets displayed.
pub(crate) struct DisplayRow<'a> {
/// The data in this row.
row: Row,
/// Options that control how to display the data.
options: &'a Options,
// TODO We don't need all of the command-line options here. Some
// of the command-line options indicate which rows to include or
// exclude. Other command-line options indicate which columns to
// include or exclude. Still other options indicate how to format
// numbers. We could split the options up into those groups to
// reduce the coupling between this `table.rs` module and the main
// `df.rs` module.
}
impl<'a> DisplayRow<'a> {
/// Instantiate this struct.
pub(crate) fn new(row: Row, options: &'a Options) -> Self {
Self { row, options }
}
/// Get a string giving the scaled version of the input number.
///
/// The scaling factor is defined in the `options` field.
///
/// # Errors
///
/// If the scaling factor is not 1000, 1024, or a negative number.
fn scaled(&self, size: u64) -> Result<String, fmt::Error> {
// TODO The argument-parsing code should be responsible for
// ensuring that the `human_readable_base` number is
// positive. Then we could remove the `Err` case from this
// function.
//
// TODO We should not be using a negative number to indicate
// default behavior. The default behavior for `df` is to show
// sizes in blocks of 1K bytes each, so we should just do
// that.
//
// TODO Support arbitrary positive scaling factors (from the
// `--block-size` command-line argument).
let number_prefix = match self.options.human_readable_base {
1000 => NumberPrefix::decimal(size as f64),
1024 => NumberPrefix::binary(size as f64),
d if d < 0 => return Ok(size.to_string()),
_ => return Err(fmt::Error {}),
};
match number_prefix {
NumberPrefix::Standalone(bytes) => Ok(bytes.to_string()),
NumberPrefix::Prefixed(prefix, bytes) => Ok(format!("{:.1}{}", bytes, prefix.symbol())),
}
}
/// Convert a float between 0 and 1 into a percentage string.
///
/// If `None`, return the string `"-"` instead.
fn percentage(fraction: Option<f64>) -> String {
match fraction {
None => "-".to_string(),
Some(x) => format!("{:.0}%", 100.0 * x),
}
}
/// Write the bytes data for this row.
///
/// # Errors
///
/// If there is a problem writing to `f`.
///
/// If the scaling factor is not 1000, 1024, or a negative number.
fn fmt_bytes(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{0: >12} ", self.scaled(self.row.bytes)?)?;
write!(f, "{0: >12} ", self.scaled(self.row.bytes_used)?)?;
write!(f, "{0: >12} ", self.scaled(self.row.bytes_free)?)?;
#[cfg(target_os = "macos")]
write!(
f,
"{0: >12} ",
DisplayRow::percentage(self.row.bytes_capacity)
)?;
write!(f, "{0: >5} ", DisplayRow::percentage(self.row.bytes_usage))?;
Ok(())
}
/// Write the inodes data for this row.
///
/// # Errors
///
/// If there is a problem writing to `f`.
///
/// If the scaling factor is not 1000, 1024, or a negative number.
fn fmt_inodes(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{0: >12} ", self.scaled(self.row.inodes)?)?;
write!(f, "{0: >12} ", self.scaled(self.row.inodes_used)?)?;
write!(f, "{0: >12} ", self.scaled(self.row.inodes_free)?)?;
write!(f, "{0: >5} ", DisplayRow::percentage(self.row.inodes_usage))?;
Ok(())
}
}
impl fmt::Display for DisplayRow<'_> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{0: <16} ", self.row.fs_device)?;
if self.options.show_fs_type {
write!(f, "{0: <5} ", self.row.fs_type)?;
}
if self.options.show_inode_instead {
self.fmt_inodes(f)?;
} else {
self.fmt_bytes(f)?;
}
write!(f, "{0: <16}", self.row.fs_mount)?;
Ok(())
}
}
/// The header row.
///
/// The `options` control which columns are displayed.
pub(crate) struct Header<'a> {
/// Options that control which columns are displayed.
options: &'a Options,
}
impl<'a> Header<'a> {
/// Instantiate this struct.
pub(crate) fn new(options: &'a Options) -> Self {
Self { options }
}
}
impl fmt::Display for Header<'_> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{0: <16} ", "Filesystem")?;
if self.options.show_fs_type {
write!(f, "{0: <5} ", "Type")?;
}
if self.options.show_inode_instead {
write!(f, "{0: >12} ", "Inodes")?;
write!(f, "{0: >12} ", "IUsed")?;
write!(f, "{0: >12} ", "IFree")?;
write!(f, "{0: >5} ", "IUse%")?;
} else {
if self.options.human_readable_base == -1 {
write!(f, "{0: >12} ", "1k-blocks")?;
} else {
write!(f, "{0: >12} ", "Size")?;
};
write!(f, "{0: >12} ", "Used")?;
write!(f, "{0: >12} ", "Available")?;
#[cfg(target_os = "macos")]
write!(f, "{0: >12} ", "Capacity")?;
write!(f, "{0: >5} ", "Use%")?;
}
write!(f, "{0: <16} ", "Mounted on")?;
Ok(())
}
}
#[cfg(test)]
mod tests {
use crate::table::{DisplayRow, Header, Row};
use crate::Options;
#[test]
fn test_header_display() {
let options = Options {
human_readable_base: -1,
..Default::default()
};
assert_eq!(
Header::new(&options).to_string(),
"Filesystem 1k-blocks Used Available Use% Mounted on "
);
}
#[test]
fn test_header_display_fs_type() {
let options = Options {
human_readable_base: -1,
show_fs_type: true,
..Default::default()
};
assert_eq!(
Header::new(&options).to_string(),
"Filesystem Type 1k-blocks Used Available Use% Mounted on "
);
}
#[test]
fn test_header_display_inode() {
let options = Options {
human_readable_base: -1,
show_inode_instead: true,
..Default::default()
};
assert_eq!(
Header::new(&options).to_string(),
"Filesystem Inodes IUsed IFree IUse% Mounted on "
);
}
#[test]
fn test_header_display_human_readable_binary() {
let options = Options {
human_readable_base: 1024,
..Default::default()
};
assert_eq!(
Header::new(&options).to_string(),
"Filesystem Size Used Available Use% Mounted on "
);
}
#[test]
fn test_header_display_human_readable_si() {
let options = Options {
human_readable_base: 1000,
..Default::default()
};
assert_eq!(
Header::new(&options).to_string(),
"Filesystem Size Used Available Use% Mounted on "
);
}
#[test]
fn test_row_display() {
let options = Options {
human_readable_base: -1,
..Default::default()
};
let row = Row {
fs_device: "my_device".to_string(),
fs_type: "my_type".to_string(),
fs_mount: "my_mount".to_string(),
bytes: 100,
bytes_used: 25,
bytes_free: 75,
bytes_usage: Some(0.25),
#[cfg(target_os = "macos")]
bytes_capacity: Some(0.5),
inodes: 10,
inodes_used: 2,
inodes_free: 8,
inodes_usage: Some(0.2),
};
assert_eq!(
DisplayRow::new(row, &options).to_string(),
"my_device 100 25 75 25% my_mount "
);
}
#[test]
fn test_row_display_fs_type() {
let options = Options {
human_readable_base: -1,
show_fs_type: true,
..Default::default()
};
let row = Row {
fs_device: "my_device".to_string(),
fs_type: "my_type".to_string(),
fs_mount: "my_mount".to_string(),
bytes: 100,
bytes_used: 25,
bytes_free: 75,
bytes_usage: Some(0.25),
#[cfg(target_os = "macos")]
bytes_capacity: Some(0.5),
inodes: 10,
inodes_used: 2,
inodes_free: 8,
inodes_usage: Some(0.2),
};
assert_eq!(
DisplayRow::new(row, &options).to_string(),
"my_device my_type 100 25 75 25% my_mount "
);
}
#[test]
fn test_row_display_inodes() {
let options = Options {
human_readable_base: -1,
show_inode_instead: true,
..Default::default()
};
let row = Row {
fs_device: "my_device".to_string(),
fs_type: "my_type".to_string(),
fs_mount: "my_mount".to_string(),
bytes: 100,
bytes_used: 25,
bytes_free: 75,
bytes_usage: Some(0.25),
#[cfg(target_os = "macos")]
bytes_capacity: Some(0.5),
inodes: 10,
inodes_used: 2,
inodes_free: 8,
inodes_usage: Some(0.2),
};
assert_eq!(
DisplayRow::new(row, &options).to_string(),
"my_device 10 2 8 20% my_mount "
);
}
#[test]
fn test_row_display_human_readable_si() {
let options = Options {
human_readable_base: 1000,
show_fs_type: true,
..Default::default()
};
let row = Row {
fs_device: "my_device".to_string(),
fs_type: "my_type".to_string(),
fs_mount: "my_mount".to_string(),
bytes: 4000,
bytes_used: 1000,
bytes_free: 3000,
bytes_usage: Some(0.25),
#[cfg(target_os = "macos")]
bytes_capacity: Some(0.5),
inodes: 10,
inodes_used: 2,
inodes_free: 8,
inodes_usage: Some(0.2),
};
assert_eq!(
DisplayRow::new(row, &options).to_string(),
"my_device my_type 4.0k 1.0k 3.0k 25% my_mount "
);
}
#[test]
fn test_row_display_human_readable_binary() {
let options = Options {
human_readable_base: 1024,
show_fs_type: true,
..Default::default()
};
let row = Row {
fs_device: "my_device".to_string(),
fs_type: "my_type".to_string(),
fs_mount: "my_mount".to_string(),
bytes: 4096,
bytes_used: 1024,
bytes_free: 3072,
bytes_usage: Some(0.25),
#[cfg(target_os = "macos")]
bytes_capacity: Some(0.5),
inodes: 10,
inodes_used: 2,
inodes_free: 8,
inodes_usage: Some(0.2),
};
assert_eq!(
DisplayRow::new(row, &options).to_string(),
"my_device my_type 4.0Ki 1.0Ki 3.0Ki 25% my_mount "
);
}
}

View file

@ -144,7 +144,7 @@ impl Stat {
#[cfg(windows)] #[cfg(windows)]
let file_info = get_file_info(&path); let file_info = get_file_info(&path);
#[cfg(windows)] #[cfg(windows)]
Ok(Stat { Ok(Self {
path, path,
is_dir: metadata.is_dir(), is_dir: metadata.is_dir(),
size: metadata.len(), size: metadata.len(),

View file

@ -104,6 +104,7 @@ fn load_config_file(opts: &mut Options) -> UResult<()> {
} }
#[cfg(not(windows))] #[cfg(not(windows))]
#[allow(clippy::ptr_arg)]
fn build_command<'a, 'b>(args: &'a mut Vec<&'b str>) -> (Cow<'b, str>, &'a [&'b str]) { fn build_command<'a, 'b>(args: &'a mut Vec<&'b str>) -> (Cow<'b, str>, &'a [&'b str]) {
let progname = Cow::from(args[0]); let progname = Cow::from(args[0]);
(progname, &args[1..]) (progname, &args[1..])

View file

@ -14,12 +14,23 @@ mod tokens;
const VERSION: &str = "version"; const VERSION: &str = "version";
const HELP: &str = "help"; const HELP: &str = "help";
static ABOUT: &str = "Print the value of EXPRESSION to standard output";
static USAGE: &str = r#"
expr [EXPRESSION]
expr [OPTIONS]"#;
pub fn uu_app<'a>() -> App<'a> { pub fn uu_app<'a>() -> App<'a> {
App::new(uucore::util_name()) App::new(uucore::util_name())
.version(crate_version!())
.about(ABOUT)
.override_usage(USAGE)
.setting(AppSettings::InferLongArgs) .setting(AppSettings::InferLongArgs)
.arg(Arg::new(VERSION).long(VERSION)) .arg(
.arg(Arg::new(HELP).long(HELP)) Arg::new(VERSION)
.long(VERSION)
.help("output version information and exit"),
)
.arg(Arg::new(HELP).long(HELP).help("display this help and exit"))
} }
#[uucore::main] #[uucore::main]

View file

@ -10,7 +10,7 @@
//! * `<https://en.wikipedia.org/wiki/Shunting-yard_algorithm>` //! * `<https://en.wikipedia.org/wiki/Shunting-yard_algorithm>`
//! //!
// spell-checker:ignore (ToDO) binop binops ints paren prec // spell-checker:ignore (ToDO) binop binops ints paren prec multibytes
use num_bigint::BigInt; use num_bigint::BigInt;
use num_traits::{One, Zero}; use num_traits::{One, Zero};
@ -465,7 +465,9 @@ fn operator_match(values: &[String]) -> Result<String, String> {
fn prefix_operator_length(values: &[String]) -> String { fn prefix_operator_length(values: &[String]) -> String {
assert!(values.len() == 1); assert!(values.len() == 1);
values[0].len().to_string() // Use chars().count() as we can have some multibytes chars
// See https://github.com/uutils/coreutils/issues/3132
values[0].chars().count().to_string()
} }
fn prefix_operator_index(values: &[String]) -> String { fn prefix_operator_index(values: &[String]) -> String {

View file

@ -4,16 +4,62 @@
// * // *
// * For the full copyright and license information, please view the LICENSE // * For the full copyright and license information, please view the LICENSE
// * file that was distributed with this source code. // * file that was distributed with this source code.
use clap::{App, AppSettings, Arg};
use std::io::Write;
use uucore::error::{set_exit_code, UResult};
use clap::App; static ABOUT: &str = "\
use uucore::error::UResult; Returns false, an unsuccessful exit status.
Immediately returns with the exit status `1`. When invoked with one of the recognized options it
will try to write the help or version text. Any IO error during this operation is diagnosed, yet
the program will also return `1`.
";
#[uucore::main] #[uucore::main]
pub fn uumain(args: impl uucore::Args) -> UResult<()> { pub fn uumain(args: impl uucore::Args) -> UResult<()> {
uu_app().get_matches_from(args); let mut app = uu_app();
Err(1.into())
// Mirror GNU options, always return `1`. In particular even the 'successful' cases of no-op,
// and the interrupted display of help and version should return `1`. Also, we return Ok in all
// paths to avoid the allocation of an error object, an operation that could, in theory, fail
// and unwind through the standard library allocation handling machinery.
set_exit_code(1);
if let Ok(matches) = app.try_get_matches_from_mut(args) {
let error = if matches.index_of("help").is_some() {
app.print_long_help()
} else if matches.index_of("version").is_some() {
writeln!(std::io::stdout(), "{}", app.render_version())
} else {
Ok(())
};
// Try to display this error.
if let Err(print_fail) = error {
// Completely ignore any error here, no more failover and we will fail in any case.
let _ = writeln!(std::io::stderr(), "{}: {}", uucore::util_name(), print_fail);
}
}
Ok(())
} }
pub fn uu_app<'a>() -> App<'a> { pub fn uu_app<'a>() -> App<'a> {
App::new(uucore::util_name()) App::new(uucore::util_name())
.version(clap::crate_version!())
.about(ABOUT)
// We provide our own help and version options, to ensure maximum compatibility with GNU.
.setting(AppSettings::DisableHelpFlag | AppSettings::DisableVersionFlag)
.arg(
Arg::new("help")
.long("help")
.help("Print help information")
.exclusive(true),
)
.arg(
Arg::new("version")
.long("version")
.help("Print version information"),
)
} }

View file

@ -23,10 +23,11 @@ memchr = "2"
md5 = "0.3.5" md5 = "0.3.5"
regex = "1.0.1" regex = "1.0.1"
regex-syntax = "0.6.7" regex-syntax = "0.6.7"
sha1 = "0.6.0" sha1 = "0.10.0"
sha2 = "0.10.1" sha2 = "0.10.1"
sha3 = "0.10.0" sha3 = "0.10.0"
blake2b_simd = "0.5.11" blake2b_simd = "0.5.11"
blake3 = "1.3.1"
uucore = { version=">=0.0.11", package="uucore", path="../../uucore" } uucore = { version=">=0.0.11", package="uucore", path="../../uucore" }
[[bin]] [[bin]]

View file

@ -81,7 +81,7 @@ impl Digest for blake2b_simd::State {
} }
} }
impl Digest for sha1::Sha1 { impl Digest for blake3::Hasher {
fn new() -> Self { fn new() -> Self {
Self::new() Self::new()
} }
@ -91,11 +91,34 @@ impl Digest for sha1::Sha1 {
} }
fn result(&mut self, out: &mut [u8]) { fn result(&mut self, out: &mut [u8]) {
out.copy_from_slice(&self.digest().bytes()); let hash_result = &self.finalize();
out.copy_from_slice(hash_result.as_bytes());
} }
fn reset(&mut self) { fn reset(&mut self) {
self.reset(); *self = Self::new();
}
fn output_bits(&self) -> usize {
256
}
}
impl Digest for sha1::Sha1 {
fn new() -> Self {
Self::default()
}
fn input(&mut self, input: &[u8]) {
digest::Digest::update(self, input);
}
fn result(&mut self, out: &mut [u8]) {
digest::Digest::finalize_into_reset(self, out.into());
}
fn reset(&mut self) {
*self = Self::new();
} }
fn output_bits(&self) -> usize { fn output_bits(&self) -> usize {

View file

@ -70,6 +70,7 @@ fn is_custom_binary(program: &str) -> bool {
| "shake128sum" | "shake128sum"
| "shake256sum" | "shake256sum"
| "b2sum" | "b2sum"
| "b3sum"
) )
} }
@ -93,6 +94,11 @@ fn detect_algo(
Box::new(blake2b_simd::State::new()) as Box<dyn Digest>, Box::new(blake2b_simd::State::new()) as Box<dyn Digest>,
512, 512,
), ),
"b3sum" => (
"BLAKE3",
Box::new(blake3::Hasher::new()) as Box<dyn Digest>,
256,
),
"sha3sum" => match matches.value_of("bits") { "sha3sum" => match matches.value_of("bits") {
Some(bits_str) => match (bits_str).parse::<usize>() { Some(bits_str) => match (bits_str).parse::<usize>() {
Ok(224) => ( Ok(224) => (
@ -196,6 +202,9 @@ fn detect_algo(
if matches.is_present("b2sum") { if matches.is_present("b2sum") {
set_or_crash("BLAKE2", Box::new(blake2b_simd::State::new()), 512); set_or_crash("BLAKE2", Box::new(blake2b_simd::State::new()), 512);
} }
if matches.is_present("b3sum") {
set_or_crash("BLAKE3", Box::new(blake3::Hasher::new()), 256);
}
if matches.is_present("sha3") { if matches.is_present("sha3") {
match matches.value_of("bits") { match matches.value_of("bits") {
Some(bits_str) => match (bits_str).parse::<usize>() { Some(bits_str) => match (bits_str).parse::<usize>() {
@ -433,6 +442,7 @@ pub fn uu_app_custom<'a>() -> App<'a> {
"work with SHAKE256 using BITS for the output size", "work with SHAKE256 using BITS for the output size",
), ),
("b2sum", "work with BLAKE2"), ("b2sum", "work with BLAKE2"),
("b3sum", "work with BLAKE3"),
]; ];
for (name, desc) in algorithms { for (name, desc) in algorithms {

View file

@ -17,7 +17,7 @@ path = "src/head.rs"
[dependencies] [dependencies]
clap = { version = "3.0", features = ["wrap_help", "cargo"] } clap = { version = "3.0", features = ["wrap_help", "cargo"] }
memchr = "2" memchr = "2"
uucore = { version=">=0.0.11", package="uucore", path="../../uucore", features=["ringbuffer"] } uucore = { version=">=0.0.11", package="uucore", path="../../uucore", features=["ringbuffer", "lines"] }
[[bin]] [[bin]]
name = "head" name = "head"

View file

@ -5,12 +5,13 @@
// spell-checker:ignore (vars) zlines BUFWRITER seekable // spell-checker:ignore (vars) zlines BUFWRITER seekable
use clap::{crate_version, App, AppSettings, Arg}; use clap::{crate_version, App, AppSettings, Arg, ArgMatches};
use std::convert::{TryFrom, TryInto}; use std::convert::{TryFrom, TryInto};
use std::ffi::OsString; use std::ffi::OsString;
use std::io::{self, BufWriter, ErrorKind, Read, Seek, SeekFrom, Write}; use std::io::{self, BufWriter, ErrorKind, Read, Seek, SeekFrom, Write};
use uucore::display::Quotable; use uucore::display::Quotable;
use uucore::error::{FromIo, UError, UResult, USimpleError}; use uucore::error::{FromIo, UError, UResult, USimpleError};
use uucore::lines::lines;
use uucore::show; use uucore::show;
const BUF_SIZE: usize = 65536; const BUF_SIZE: usize = 65536;
@ -35,10 +36,8 @@ mod options {
pub const ZERO_NAME: &str = "ZERO"; pub const ZERO_NAME: &str = "ZERO";
pub const FILES_NAME: &str = "FILE"; pub const FILES_NAME: &str = "FILE";
} }
mod lines;
mod parse; mod parse;
mod take; mod take;
use lines::zlines;
use take::take_all_but; use take::take_all_but;
use take::take_lines; use take::take_lines;
@ -104,25 +103,42 @@ pub fn uu_app<'a>() -> App<'a> {
) )
.arg(Arg::new(options::FILES_NAME).multiple_occurrences(true)) .arg(Arg::new(options::FILES_NAME).multiple_occurrences(true))
} }
#[derive(PartialEq, Debug, Clone, Copy)]
enum Modes { #[derive(Debug, PartialEq)]
Lines(usize), enum Mode {
Bytes(usize), FirstLines(usize),
AllButLastLines(usize),
FirstBytes(usize),
AllButLastBytes(usize),
} }
impl Default for Modes { impl Default for Mode {
fn default() -> Self { fn default() -> Self {
Self::Lines(10) Self::FirstLines(10)
} }
} }
fn parse_mode<F>(src: &str, closure: F) -> Result<(Modes, bool), String> impl Mode {
where fn from(matches: &ArgMatches) -> Result<Self, String> {
F: FnOnce(usize) -> Modes, if let Some(v) = matches.value_of(options::BYTES_NAME) {
{ let (n, all_but_last) =
match parse::parse_num(src) { parse::parse_num(v).map_err(|err| format!("invalid number of bytes: {}", err))?;
Ok((n, last)) => Ok((closure(n), last)), if all_but_last {
Err(e) => Err(e.to_string()), Ok(Self::AllButLastBytes(n))
} else {
Ok(Self::FirstBytes(n))
}
} else if let Some(v) = matches.value_of(options::LINES_NAME) {
let (n, all_but_last) =
parse::parse_num(v).map_err(|err| format!("invalid number of lines: {}", err))?;
if all_but_last {
Ok(Self::AllButLastLines(n))
} else {
Ok(Self::FirstLines(n))
}
} else {
Ok(Default::default())
}
} }
} }
@ -157,8 +173,7 @@ struct HeadOptions {
pub quiet: bool, pub quiet: bool,
pub verbose: bool, pub verbose: bool,
pub zeroed: bool, pub zeroed: bool,
pub all_but_last: bool, pub mode: Mode,
pub mode: Modes,
pub files: Vec<String>, pub files: Vec<String>,
} }
@ -173,18 +188,7 @@ impl HeadOptions {
options.verbose = matches.is_present(options::VERBOSE_NAME); options.verbose = matches.is_present(options::VERBOSE_NAME);
options.zeroed = matches.is_present(options::ZERO_NAME); options.zeroed = matches.is_present(options::ZERO_NAME);
let mode_and_from_end = if let Some(v) = matches.value_of(options::BYTES_NAME) { options.mode = Mode::from(&matches)?;
parse_mode(v, Modes::Bytes)
.map_err(|err| format!("invalid number of bytes: {}", err))?
} else if let Some(v) = matches.value_of(options::LINES_NAME) {
parse_mode(v, Modes::Lines)
.map_err(|err| format!("invalid number of lines: {}", err))?
} else {
(Modes::Lines(10), false)
};
options.mode = mode_and_from_end.0;
options.all_but_last = mode_and_from_end.1;
options.files = match matches.values_of(options::FILES_NAME) { options.files = match matches.values_of(options::FILES_NAME) {
Some(v) => v.map(|s| s.to_owned()).collect(), Some(v) => v.map(|s| s.to_owned()).collect(),
@ -281,12 +285,14 @@ fn read_but_last_n_lines(
if zero { if zero {
let stdout = std::io::stdout(); let stdout = std::io::stdout();
let mut stdout = stdout.lock(); let mut stdout = stdout.lock();
for bytes in take_all_but(zlines(input), n) { for bytes in take_all_but(lines(input, b'\0'), n) {
stdout.write_all(&bytes?)?; stdout.write_all(&bytes?)?;
} }
} else { } else {
for line in take_all_but(input.lines(), n) { let stdout = std::io::stdout();
println!("{}", line?); let mut stdout = stdout.lock();
for bytes in take_all_but(lines(input, b'\n'), n) {
stdout.write_all(&bytes?)?;
} }
} }
Ok(()) Ok(())
@ -374,9 +380,8 @@ where
} }
fn head_backwards_file(input: &mut std::fs::File, options: &HeadOptions) -> std::io::Result<()> { fn head_backwards_file(input: &mut std::fs::File, options: &HeadOptions) -> std::io::Result<()> {
assert!(options.all_but_last);
match options.mode { match options.mode {
Modes::Bytes(n) => { Mode::AllButLastBytes(n) => {
let size = input.metadata()?.len().try_into().unwrap(); let size = input.metadata()?.len().try_into().unwrap();
if n >= size { if n >= size {
return Ok(()); return Ok(());
@ -387,31 +392,29 @@ fn head_backwards_file(input: &mut std::fs::File, options: &HeadOptions) -> std:
)?; )?;
} }
} }
Modes::Lines(n) => { Mode::AllButLastLines(n) => {
let found = find_nth_line_from_end(input, n, options.zeroed)?; let found = find_nth_line_from_end(input, n, options.zeroed)?;
read_n_bytes( read_n_bytes(
&mut std::io::BufReader::with_capacity(BUF_SIZE, input), &mut std::io::BufReader::with_capacity(BUF_SIZE, input),
found, found,
)?; )?;
} }
_ => unreachable!(),
} }
Ok(()) Ok(())
} }
fn head_file(input: &mut std::fs::File, options: &HeadOptions) -> std::io::Result<()> { fn head_file(input: &mut std::fs::File, options: &HeadOptions) -> std::io::Result<()> {
if options.all_but_last {
head_backwards_file(input, options)
} else {
match options.mode { match options.mode {
Modes::Bytes(n) => { Mode::FirstBytes(n) => {
read_n_bytes(&mut std::io::BufReader::with_capacity(BUF_SIZE, input), n) read_n_bytes(&mut std::io::BufReader::with_capacity(BUF_SIZE, input), n)
} }
Modes::Lines(n) => read_n_lines( Mode::FirstLines(n) => read_n_lines(
&mut std::io::BufReader::with_capacity(BUF_SIZE, input), &mut std::io::BufReader::with_capacity(BUF_SIZE, input),
n, n,
options.zeroed, options.zeroed,
), ),
} Mode::AllButLastBytes(_) | Mode::AllButLastLines(_) => head_backwards_file(input, options),
} }
} }
@ -429,19 +432,11 @@ fn uu_head(options: &HeadOptions) -> UResult<()> {
let stdin = std::io::stdin(); let stdin = std::io::stdin();
let mut stdin = stdin.lock(); let mut stdin = stdin.lock();
match options.mode { match options.mode {
Modes::Bytes(n) => { Mode::FirstBytes(n) => read_n_bytes(&mut stdin, n),
if options.all_but_last { Mode::AllButLastBytes(n) => read_but_last_n_bytes(&mut stdin, n),
read_but_last_n_bytes(&mut stdin, n) Mode::FirstLines(n) => read_n_lines(&mut stdin, n, options.zeroed),
} else { Mode::AllButLastLines(n) => {
read_n_bytes(&mut stdin, n)
}
}
Modes::Lines(n) => {
if options.all_but_last {
read_but_last_n_lines(&mut stdin, n, options.zeroed) read_but_last_n_lines(&mut stdin, n, options.zeroed)
} else {
read_n_lines(&mut stdin, n, options.zeroed)
}
} }
} }
} }
@ -512,17 +507,16 @@ mod tests {
let args = options("-n -10M -vz").unwrap(); let args = options("-n -10M -vz").unwrap();
assert!(args.zeroed); assert!(args.zeroed);
assert!(args.verbose); assert!(args.verbose);
assert!(args.all_but_last); assert_eq!(args.mode, Mode::AllButLastLines(10 * 1024 * 1024));
assert_eq!(args.mode, Modes::Lines(10 * 1024 * 1024));
} }
#[test] #[test]
fn test_gnu_compatibility() { fn test_gnu_compatibility() {
let args = options("-n 1 -c 1 -n 5 -c kiB -vqvqv").unwrap(); // spell-checker:disable-line let args = options("-n 1 -c 1 -n 5 -c kiB -vqvqv").unwrap(); // spell-checker:disable-line
assert!(args.mode == Modes::Bytes(1024)); assert!(args.mode == Mode::FirstBytes(1024));
assert!(args.verbose); assert!(args.verbose);
assert_eq!(options("-5").unwrap().mode, Modes::Lines(5)); assert_eq!(options("-5").unwrap().mode, Mode::FirstLines(5));
assert_eq!(options("-2b").unwrap().mode, Modes::Bytes(1024)); assert_eq!(options("-2b").unwrap().mode, Mode::FirstBytes(1024));
assert_eq!(options("-5 -c 1").unwrap().mode, Modes::Bytes(1)); assert_eq!(options("-5 -c 1").unwrap().mode, Mode::FirstBytes(1));
} }
#[test] #[test]
fn all_args_test() { fn all_args_test() {
@ -533,10 +527,10 @@ mod tests {
assert!(options("-v").unwrap().verbose); assert!(options("-v").unwrap().verbose);
assert!(options("--zero-terminated").unwrap().zeroed); assert!(options("--zero-terminated").unwrap().zeroed);
assert!(options("-z").unwrap().zeroed); assert!(options("-z").unwrap().zeroed);
assert_eq!(options("--lines 15").unwrap().mode, Modes::Lines(15)); assert_eq!(options("--lines 15").unwrap().mode, Mode::FirstLines(15));
assert_eq!(options("-n 15").unwrap().mode, Modes::Lines(15)); assert_eq!(options("-n 15").unwrap().mode, Mode::FirstLines(15));
assert_eq!(options("--bytes 15").unwrap().mode, Modes::Bytes(15)); assert_eq!(options("--bytes 15").unwrap().mode, Mode::FirstBytes(15));
assert_eq!(options("-c 15").unwrap().mode, Modes::Bytes(15)); assert_eq!(options("-c 15").unwrap().mode, Mode::FirstBytes(15));
} }
#[test] #[test]
fn test_options_errors() { fn test_options_errors() {
@ -550,26 +544,9 @@ mod tests {
assert!(!opts.verbose); assert!(!opts.verbose);
assert!(!opts.quiet); assert!(!opts.quiet);
assert!(!opts.zeroed); assert!(!opts.zeroed);
assert!(!opts.all_but_last); assert_eq!(opts.mode, Mode::FirstLines(10));
assert_eq!(opts.mode, Modes::Lines(10));
assert!(opts.files.is_empty()); assert!(opts.files.is_empty());
} }
#[test]
fn test_parse_mode() {
assert_eq!(
parse_mode("123", Modes::Lines),
Ok((Modes::Lines(123), false))
);
assert_eq!(
parse_mode("-456", Modes::Bytes),
Ok((Modes::Bytes(456), true))
);
assert!(parse_mode("Nonsensical Nonsense", Modes::Bytes).is_err());
#[cfg(target_pointer_width = "64")]
assert!(parse_mode("1Y", Modes::Lines).is_err());
#[cfg(target_pointer_width = "32")]
assert!(parse_mode("1T", Modes::Bytes).is_err());
}
fn arg_outputs(src: &str) -> Result<String, String> { fn arg_outputs(src: &str) -> Result<String, String> {
let split = src.split_whitespace().map(OsString::from); let split = src.split_whitespace().map(OsString::from);
match arg_iterate(split) { match arg_iterate(split) {

View file

@ -1,75 +0,0 @@
// spell-checker:ignore (vars) zline zlines
//! Iterate over zero-terminated lines.
use std::io::BufRead;
/// The zero byte, representing the null character.
const ZERO: u8 = 0;
/// Returns an iterator over the lines of the given reader.
///
/// The iterator returned from this function will yield instances of
/// [`std::io::Result`]<[`Vec`]<[`u8`]>>, representing the bytes of the line
/// *including* the null character (with the possible exception of the
/// last line, which may not have one).
///
/// # Examples
///
/// ```rust,ignore
/// use std::io::Cursor;
///
/// let cursor = Cursor::new(b"x\0y\0z\0");
/// let mut iter = zlines(cursor).map(|l| l.unwrap());
/// assert_eq!(iter.next(), Some(b"x\0".to_vec()));
/// assert_eq!(iter.next(), Some(b"y\0".to_vec()));
/// assert_eq!(iter.next(), Some(b"z\0".to_vec()));
/// assert_eq!(iter.next(), None);
/// ```
pub fn zlines<B>(buf: B) -> ZLines<B> {
ZLines { buf }
}
/// An iterator over the zero-terminated lines of an instance of `BufRead`.
pub struct ZLines<B> {
buf: B,
}
impl<B: BufRead> Iterator for ZLines<B> {
type Item = std::io::Result<Vec<u8>>;
fn next(&mut self) -> Option<std::io::Result<Vec<u8>>> {
let mut buf = Vec::new();
match self.buf.read_until(ZERO, &mut buf) {
Ok(0) => None,
Ok(_) => Some(Ok(buf)),
Err(e) => Some(Err(e)),
}
}
}
#[cfg(test)]
mod tests {
use crate::lines::zlines;
use std::io::Cursor;
#[test]
fn test_null_terminated() {
let cursor = Cursor::new(b"x\0y\0z\0");
let mut iter = zlines(cursor).map(|l| l.unwrap());
assert_eq!(iter.next(), Some(b"x\0".to_vec()));
assert_eq!(iter.next(), Some(b"y\0".to_vec()));
assert_eq!(iter.next(), Some(b"z\0".to_vec()));
assert_eq!(iter.next(), None);
}
#[test]
fn test_not_null_terminated() {
let cursor = Cursor::new(b"x\0y\0z");
let mut iter = zlines(cursor).map(|l| l.unwrap());
assert_eq!(iter.next(), Some(b"x\0".to_vec()));
assert_eq!(iter.next(), Some(b"y\0".to_vec()));
assert_eq!(iter.next(), Some(b"z".to_vec()));
assert_eq!(iter.next(), None);
}
}

View file

@ -20,7 +20,7 @@ pub fn parse_obsolete(src: &str) -> Option<Result<impl Iterator<Item = OsString>
let mut has_num = false; let mut has_num = false;
let mut last_char = 0 as char; let mut last_char = 0 as char;
for (n, c) in &mut chars { for (n, c) in &mut chars {
if c.is_numeric() { if c.is_digit(10) {
has_num = true; has_num = true;
num_end = n; num_end = n;
} else { } else {

View file

@ -12,6 +12,7 @@ use libc::c_long;
use uucore::error::UResult; use uucore::error::UResult;
static SYNTAX: &str = "[options]"; static SYNTAX: &str = "[options]";
const SUMMARY: &str = "Print the numeric identifier (in hexadecimal) for the current host";
// currently rust libc interface doesn't include gethostid // currently rust libc interface doesn't include gethostid
extern "C" { extern "C" {
@ -28,6 +29,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
pub fn uu_app<'a>() -> App<'a> { pub fn uu_app<'a>() -> App<'a> {
App::new(uucore::util_name()) App::new(uucore::util_name())
.version(crate_version!()) .version(crate_version!())
.about(SUMMARY)
.override_usage(SYNTAX) .override_usage(SYNTAX)
.setting(AppSettings::InferLongArgs) .setting(AppSettings::InferLongArgs)
} }

View file

@ -0,0 +1,55 @@
# Benchmarking join
<!-- spell-checker:ignore (words) CSVs nocheck hotpaths -->
## Performance profile
The amount of time spent in which part of the code can vary depending on the files being joined and the flags used.
A benchmark with `-j` and `-i` shows the following time:
| Function/Method | Fraction of Samples | Why? |
| ---------------- | ------------------- | ---- |
| `Line::new` | 27% | Linear search for field separators, plus some vector operations. |
| `read_until` | 22% | Mostly libc reading file contents, with a few vector operations to represent them. |
| `Input::compare` | 20% | ~2/3 making the keys lowercase, ~1/3 comparing them. |
| `print_fields` | 11% | Writing to and flushing the buffer. |
| Other | 20% | |
| libc | 25% | I/O and memory allocation. |
More detailed profiles can be obtained via [flame graphs](https://github.com/flamegraph-rs/flamegraph):
```
cargo flamegraph --bin join --package uu_join -- file1 file2 > /dev/null
```
You may need to add the following lines to the top-level `Cargo.toml` to get full stack traces:
```
[profile.release]
debug = true
```
## How to benchmark
Benchmarking typically requires files large enough to ensure that the benchmark is not overwhelmed by background system noise; say, on the order of tens of MB.
While `join` operates on line-oriented data, and not properly formatted CSVs (e.g., `join` is not designed to accommodate escaped or quoted delimiters),
in practice many CSV datasets will function well after being sorted.
Like most of the utils, the recommended tool for benchmarking is [hyperfine](https://github.com/sharkdp/hyperfine).
To benchmark your changes:
- checkout the main branch (without your changes), do a `--release` build, and back up the executable produced at `target/release/join`
- checkout your working branch (with your changes), do a `--release` build
- run
```
hyperfine -w 5 "/path/to/main/branch/build/join file1 file2" "/path/to/working/branch/build/join file1 file2"
```
- you'll likely need to add additional options to both commands, such as a field separator, or if you're benchmarking some particular behavior
- you can also optionally benchmark against GNU's join
## What to benchmark
The following options can have a non-trivial impact on performance:
- `-a`/`-v` if one of the two files has significantly more lines than the other
- `-j`/`-1`/`-2` cause work to be done to grab the appropriate field
- `-i` adds a call to `to_ascii_lowercase()` that adds some time for allocating and dropping memory for the lowercase key
- `--nocheck-order` causes some calls of `Input::compare` to be skipped
The content of the files being joined has a very significant impact on the performance.
Things like how long each line is, how many fields there are, how long the key fields are, how many lines there are, how many lines can be joined, and how many lines each line can be joined with all change the behavior of the hotpaths.

View file

@ -17,6 +17,7 @@ path = "src/join.rs"
[dependencies] [dependencies]
clap = { version = "3.0", features = ["wrap_help", "cargo"] } clap = { version = "3.0", features = ["wrap_help", "cargo"] }
uucore = { version=">=0.0.11", package="uucore", path="../../uucore" } uucore = { version=">=0.0.11", package="uucore", path="../../uucore" }
memchr = "2"
[[bin]] [[bin]]
name = "join" name = "join"

View file

@ -11,16 +11,49 @@
extern crate uucore; extern crate uucore;
use clap::{crate_version, App, AppSettings, Arg}; use clap::{crate_version, App, AppSettings, Arg};
use memchr::{memchr3_iter, memchr_iter};
use std::cmp::Ordering; use std::cmp::Ordering;
use std::convert::From;
use std::error::Error;
use std::fmt::Display;
use std::fs::File; use std::fs::File;
use std::io::{stdin, stdout, BufRead, BufReader, Split, Stdin, Write}; use std::io::{stdin, stdout, BufRead, BufReader, BufWriter, Split, Stdin, Write};
#[cfg(unix)] #[cfg(unix)]
use std::os::unix::ffi::OsStrExt; use std::os::unix::ffi::OsStrExt;
use uucore::display::Quotable; use uucore::display::Quotable;
use uucore::error::{set_exit_code, UResult, USimpleError}; use uucore::error::{set_exit_code, UError, UResult, USimpleError};
static NAME: &str = "join"; static NAME: &str = "join";
#[derive(Debug)]
enum JoinError {
IOError(std::io::Error),
UnorderedInput(String),
}
impl UError for JoinError {
fn code(&self) -> i32 {
1
}
}
impl Error for JoinError {}
impl Display for JoinError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
JoinError::IOError(e) => write!(f, "io error: {}", e),
JoinError::UnorderedInput(e) => f.write_str(e),
}
}
}
impl From<std::io::Error> for JoinError {
fn from(error: std::io::Error) -> Self {
Self::IOError(error)
}
}
#[derive(Copy, Clone, PartialEq)] #[derive(Copy, Clone, PartialEq)]
enum FileNum { enum FileNum {
File1, File1,
@ -34,7 +67,7 @@ enum LineEnding {
Newline = b'\n', Newline = b'\n',
} }
#[derive(Copy, Clone)] #[derive(Copy, Clone, PartialEq)]
enum Sep { enum Sep {
Char(u8), Char(u8),
Line, Line,
@ -112,34 +145,43 @@ impl<'a> Repr<'a> {
} }
/// Print the field or empty filler if the field is not set. /// Print the field or empty filler if the field is not set.
fn print_field(&self, field: Option<&Vec<u8>>) -> Result<(), std::io::Error> { fn print_field(
&self,
writer: &mut impl Write,
field: Option<&[u8]>,
) -> Result<(), std::io::Error> {
let value = match field { let value = match field {
Some(field) => field, Some(field) => field,
None => self.empty, None => self.empty,
}; };
stdout().write_all(value) writer.write_all(value)
} }
/// Print each field except the one at the index. /// Print each field except the one at the index.
fn print_fields(&self, line: &Line, index: usize) -> Result<(), std::io::Error> { fn print_fields(
for i in 0..line.fields.len() { &self,
writer: &mut impl Write,
line: &Line,
index: usize,
) -> Result<(), std::io::Error> {
for i in 0..line.field_ranges.len() {
if i != index { if i != index {
stdout().write_all(&[self.separator])?; writer.write_all(&[self.separator])?;
stdout().write_all(&line.fields[i])?; writer.write_all(line.get_field(i).unwrap())?;
} }
} }
Ok(()) Ok(())
} }
/// Print each field or the empty filler if the field is not set. /// Print each field or the empty filler if the field is not set.
fn print_format<F>(&self, f: F) -> Result<(), std::io::Error> fn print_format<F>(&self, writer: &mut impl Write, f: F) -> Result<(), std::io::Error>
where where
F: Fn(&Spec) -> Option<&'a Vec<u8>>, F: Fn(&Spec) -> Option<&'a [u8]>,
{ {
for i in 0..self.format.len() { for i in 0..self.format.len() {
if i > 0 { if i > 0 {
stdout().write_all(&[self.separator])?; writer.write_all(&[self.separator])?;
} }
let field = match f(&self.format[i]) { let field = match f(&self.format[i]) {
@ -147,13 +189,13 @@ impl<'a> Repr<'a> {
None => self.empty, None => self.empty,
}; };
stdout().write_all(field)?; writer.write_all(field)?;
} }
Ok(()) Ok(())
} }
fn print_line_ending(&self) -> Result<(), std::io::Error> { fn print_line_ending(&self, writer: &mut impl Write) -> Result<(), std::io::Error> {
stdout().write_all(&[self.line_ending as u8]) writer.write_all(&[self.line_ending as u8])
} }
} }
@ -173,7 +215,7 @@ impl Input {
} }
} }
fn compare(&self, field1: Option<&Vec<u8>>, field2: Option<&Vec<u8>>) -> Ordering { fn compare(&self, field1: Option<&[u8]>, field2: Option<&[u8]>) -> Ordering {
if let (Some(field1), Some(field2)) = (field1, field2) { if let (Some(field1), Some(field2)) = (field1, field2) {
if self.ignore_case { if self.ignore_case {
field1 field1
@ -236,30 +278,41 @@ impl Spec {
} }
struct Line { struct Line {
fields: Vec<Vec<u8>>, field_ranges: Vec<(usize, usize)>,
string: Vec<u8>, string: Vec<u8>,
} }
impl Line { impl Line {
fn new(string: Vec<u8>, separator: Sep) -> Self { fn new(string: Vec<u8>, separator: Sep, len_guess: usize) -> Self {
let fields = match separator { let mut field_ranges = Vec::with_capacity(len_guess);
Sep::Whitespaces => string let mut last_end = 0;
if separator == Sep::Whitespaces {
// GNU join uses Bourne shell field splitters by default // GNU join uses Bourne shell field splitters by default
.split(|c| matches!(*c, b' ' | b'\t' | b'\n')) for i in memchr3_iter(b' ', b'\t', b'\n', &string) {
.filter(|f| !f.is_empty()) if i > last_end {
.map(Vec::from) field_ranges.push((last_end, i));
.collect(), }
Sep::Char(sep) => string.split(|c| *c == sep).map(Vec::from).collect(), last_end = i + 1;
Sep::Line => vec![string.clone()], }
}; } else if let Sep::Char(sep) = separator {
for i in memchr_iter(sep, &string) {
field_ranges.push((last_end, i));
last_end = i + 1;
}
}
field_ranges.push((last_end, string.len()));
Self { fields, string } Self {
field_ranges,
string,
}
} }
/// Get field at index. /// Get field at index.
fn get_field(&self, index: usize) -> Option<&Vec<u8>> { fn get_field(&self, index: usize) -> Option<&[u8]> {
if index < self.fields.len() { if index < self.field_ranges.len() {
Some(&self.fields[index]) let (low, high) = self.field_ranges[index];
Some(&self.string[low..high])
} else { } else {
None None
} }
@ -272,6 +325,7 @@ struct State<'a> {
file_num: FileNum, file_num: FileNum,
print_unpaired: bool, print_unpaired: bool,
lines: Split<Box<dyn BufRead + 'a>>, lines: Split<Box<dyn BufRead + 'a>>,
max_len: usize,
seq: Vec<Line>, seq: Vec<Line>,
line_num: usize, line_num: usize,
has_failed: bool, has_failed: bool,
@ -302,6 +356,7 @@ impl<'a> State<'a> {
file_num, file_num,
print_unpaired, print_unpaired,
lines: f.split(line_ending as u8), lines: f.split(line_ending as u8),
max_len: 1,
seq: Vec::new(), seq: Vec::new(),
line_num: 0, line_num: 0,
has_failed: false, has_failed: false,
@ -310,54 +365,69 @@ impl<'a> State<'a> {
} }
/// Skip the current unpaired line. /// Skip the current unpaired line.
fn skip_line(&mut self, input: &Input, repr: &Repr) -> Result<(), std::io::Error> { fn skip_line(
&mut self,
writer: &mut impl Write,
input: &Input,
repr: &Repr,
) -> Result<(), JoinError> {
if self.print_unpaired { if self.print_unpaired {
self.print_first_line(repr)?; self.print_first_line(writer, repr)?;
} }
self.reset_next_line(input); self.reset_next_line(input)?;
Ok(()) Ok(())
} }
/// Keep reading line sequence until the key does not change, return /// Keep reading line sequence until the key does not change, return
/// the first line whose key differs. /// the first line whose key differs.
fn extend(&mut self, input: &Input) -> Option<Line> { fn extend(&mut self, input: &Input) -> Result<Option<Line>, JoinError> {
while let Some(line) = self.next_line(input) { while let Some(line) = self.next_line(input)? {
let diff = input.compare(self.get_current_key(), line.get_field(self.key)); let diff = input.compare(self.get_current_key(), line.get_field(self.key));
if diff == Ordering::Equal { if diff == Ordering::Equal {
self.seq.push(line); self.seq.push(line);
} else { } else {
return Some(line); return Ok(Some(line));
} }
} }
None Ok(None)
} }
/// Print lines in the buffers as headers. /// Print lines in the buffers as headers.
fn print_headers(&self, other: &State, repr: &Repr) -> Result<(), std::io::Error> { fn print_headers(
&self,
writer: &mut impl Write,
other: &State,
repr: &Repr,
) -> Result<(), std::io::Error> {
if self.has_line() { if self.has_line() {
if other.has_line() { if other.has_line() {
self.combine(other, repr)?; self.combine(writer, other, repr)?;
} else { } else {
self.print_first_line(repr)?; self.print_first_line(writer, repr)?;
} }
} else if other.has_line() { } else if other.has_line() {
other.print_first_line(repr)?; other.print_first_line(writer, repr)?;
} }
Ok(()) Ok(())
} }
/// Combine two line sequences. /// Combine two line sequences.
fn combine(&self, other: &State, repr: &Repr) -> Result<(), std::io::Error> { fn combine(
&self,
writer: &mut impl Write,
other: &State,
repr: &Repr,
) -> Result<(), std::io::Error> {
let key = self.get_current_key(); let key = self.get_current_key();
for line1 in &self.seq { for line1 in &self.seq {
for line2 in &other.seq { for line2 in &other.seq {
if repr.uses_format() { if repr.uses_format() {
repr.print_format(|spec| match *spec { repr.print_format(writer, |spec| match *spec {
Spec::Key => key, Spec::Key => key,
Spec::Field(file_num, field_num) => { Spec::Field(file_num, field_num) => {
if file_num == self.file_num { if file_num == self.file_num {
@ -372,12 +442,12 @@ impl<'a> State<'a> {
} }
})?; })?;
} else { } else {
repr.print_field(key)?; repr.print_field(writer, key)?;
repr.print_fields(line1, self.key)?; repr.print_fields(writer, line1, self.key)?;
repr.print_fields(line2, other.key)?; repr.print_fields(writer, line2, other.key)?;
} }
repr.print_line_ending()?; repr.print_line_ending(writer)?;
} }
} }
@ -393,14 +463,16 @@ impl<'a> State<'a> {
} }
} }
fn reset_read_line(&mut self, input: &Input) { fn reset_read_line(&mut self, input: &Input) -> Result<(), std::io::Error> {
let line = self.read_line(input.separator); let line = self.read_line(input.separator)?;
self.reset(line); self.reset(line);
Ok(())
} }
fn reset_next_line(&mut self, input: &Input) { fn reset_next_line(&mut self, input: &Input) -> Result<(), JoinError> {
let line = self.next_line(input); let line = self.next_line(input)?;
self.reset(line); self.reset(line);
Ok(())
} }
fn has_line(&self) -> bool { fn has_line(&self) -> bool {
@ -408,29 +480,34 @@ impl<'a> State<'a> {
} }
fn initialize(&mut self, read_sep: Sep, autoformat: bool) -> usize { fn initialize(&mut self, read_sep: Sep, autoformat: bool) -> usize {
if let Some(line) = self.read_line(read_sep) { if let Some(line) = crash_if_err!(1, self.read_line(read_sep)) {
self.seq.push(line); self.seq.push(line);
if autoformat { if autoformat {
return self.seq[0].fields.len(); return self.seq[0].field_ranges.len();
} }
} }
0 0
} }
fn finalize(&mut self, input: &Input, repr: &Repr) -> Result<(), std::io::Error> { fn finalize(
&mut self,
writer: &mut impl Write,
input: &Input,
repr: &Repr,
) -> Result<(), JoinError> {
if self.has_line() { if self.has_line() {
if self.print_unpaired { if self.print_unpaired {
self.print_first_line(repr)?; self.print_first_line(writer, repr)?;
} }
let mut next_line = self.next_line(input); let mut next_line = self.next_line(input)?;
while let Some(line) = &next_line { while let Some(line) = &next_line {
if self.print_unpaired { if self.print_unpaired {
self.print_line(line, repr)?; self.print_line(writer, line, repr)?;
} }
self.reset(next_line); self.reset(next_line);
next_line = self.next_line(input); next_line = self.next_line(input)?;
} }
} }
@ -438,51 +515,66 @@ impl<'a> State<'a> {
} }
/// Get the next line without the order check. /// Get the next line without the order check.
fn read_line(&mut self, sep: Sep) -> Option<Line> { fn read_line(&mut self, sep: Sep) -> Result<Option<Line>, std::io::Error> {
let value = self.lines.next()?; match self.lines.next() {
Some(value) => {
self.line_num += 1; self.line_num += 1;
Some(Line::new(crash_if_err!(1, value), sep)) let line = Line::new(value?, sep, self.max_len);
if line.field_ranges.len() > self.max_len {
self.max_len = line.field_ranges.len();
}
Ok(Some(line))
}
None => Ok(None),
}
} }
/// Get the next line with the order check. /// Get the next line with the order check.
fn next_line(&mut self, input: &Input) -> Option<Line> { fn next_line(&mut self, input: &Input) -> Result<Option<Line>, JoinError> {
let line = self.read_line(input.separator)?; if let Some(line) = self.read_line(input.separator)? {
if input.check_order == CheckOrder::Disabled { if input.check_order == CheckOrder::Disabled {
return Some(line); return Ok(Some(line));
} }
let diff = input.compare(self.get_current_key(), line.get_field(self.key)); let diff = input.compare(self.get_current_key(), line.get_field(self.key));
if diff == Ordering::Greater { if diff == Ordering::Greater
if input.check_order == CheckOrder::Enabled || (self.has_unpaired && !self.has_failed) { && (input.check_order == CheckOrder::Enabled
eprintln!( || (self.has_unpaired && !self.has_failed))
"{}: {}:{}: is not sorted: {}", {
uucore::execution_phrase(), let err_msg = format!(
"{}:{}: is not sorted: {}",
self.file_name.maybe_quote(), self.file_name.maybe_quote(),
self.line_num, self.line_num,
String::from_utf8_lossy(&line.string) String::from_utf8_lossy(&line.string)
); );
self.has_failed = true;
}
// This is fatal if the check is enabled. // This is fatal if the check is enabled.
if input.check_order == CheckOrder::Enabled { if input.check_order == CheckOrder::Enabled {
std::process::exit(1); return Err(JoinError::UnorderedInput(err_msg));
} }
eprintln!("{}: {}", uucore::execution_phrase(), err_msg);
self.has_failed = true;
} }
Some(line) Ok(Some(line))
} else {
Ok(None)
}
} }
/// Gets the key value of the lines stored in seq. /// Gets the key value of the lines stored in seq.
fn get_current_key(&self) -> Option<&Vec<u8>> { fn get_current_key(&self) -> Option<&[u8]> {
self.seq[0].get_field(self.key) self.seq[0].get_field(self.key)
} }
fn print_line(&self, line: &Line, repr: &Repr) -> Result<(), std::io::Error> { fn print_line(
&self,
writer: &mut impl Write,
line: &Line,
repr: &Repr,
) -> Result<(), std::io::Error> {
if repr.uses_format() { if repr.uses_format() {
repr.print_format(|spec| match *spec { repr.print_format(writer, |spec| match *spec {
Spec::Key => line.get_field(self.key), Spec::Key => line.get_field(self.key),
Spec::Field(file_num, field_num) => { Spec::Field(file_num, field_num) => {
if file_num == self.file_num { if file_num == self.file_num {
@ -493,15 +585,15 @@ impl<'a> State<'a> {
} }
})?; })?;
} else { } else {
repr.print_field(line.get_field(self.key))?; repr.print_field(writer, line.get_field(self.key))?;
repr.print_fields(line, self.key)?; repr.print_fields(writer, line, self.key)?;
} }
repr.print_line_ending() repr.print_line_ending(writer)
} }
fn print_first_line(&self, repr: &Repr) -> Result<(), std::io::Error> { fn print_first_line(&self, writer: &mut impl Write, repr: &Repr) -> Result<(), std::io::Error> {
self.print_line(&self.seq[0], repr) self.print_line(writer, &self.seq[0], repr)
} }
} }
@ -718,7 +810,7 @@ FILENUM is 1 or 2, corresponding to FILE1 or FILE2",
) )
} }
fn exec(file1: &str, file2: &str, settings: Settings) -> Result<(), std::io::Error> { fn exec(file1: &str, file2: &str, settings: Settings) -> Result<(), JoinError> {
let stdin = stdin(); let stdin = stdin();
let mut state1 = State::new( let mut state1 = State::new(
@ -774,10 +866,13 @@ fn exec(file1: &str, file2: &str, settings: Settings) -> Result<(), std::io::Err
&settings.empty, &settings.empty,
); );
let stdout = stdout();
let mut writer = BufWriter::new(stdout.lock());
if settings.headers { if settings.headers {
state1.print_headers(&state2, &repr)?; state1.print_headers(&mut writer, &state2, &repr)?;
state1.reset_read_line(&input); state1.reset_read_line(&input)?;
state2.reset_read_line(&input); state2.reset_read_line(&input)?;
} }
while state1.has_line() && state2.has_line() { while state1.has_line() && state2.has_line() {
@ -785,21 +880,39 @@ fn exec(file1: &str, file2: &str, settings: Settings) -> Result<(), std::io::Err
match diff { match diff {
Ordering::Less => { Ordering::Less => {
state1.skip_line(&input, &repr)?; if let Err(e) = state1.skip_line(&mut writer, &input, &repr) {
writer.flush()?;
return Err(e);
}
state1.has_unpaired = true; state1.has_unpaired = true;
state2.has_unpaired = true; state2.has_unpaired = true;
} }
Ordering::Greater => { Ordering::Greater => {
state2.skip_line(&input, &repr)?; if let Err(e) = state2.skip_line(&mut writer, &input, &repr) {
writer.flush()?;
return Err(e);
}
state1.has_unpaired = true; state1.has_unpaired = true;
state2.has_unpaired = true; state2.has_unpaired = true;
} }
Ordering::Equal => { Ordering::Equal => {
let next_line1 = state1.extend(&input); let next_line1 = match state1.extend(&input) {
let next_line2 = state2.extend(&input); Ok(line) => line,
Err(e) => {
writer.flush()?;
return Err(e);
}
};
let next_line2 = match state2.extend(&input) {
Ok(line) => line,
Err(e) => {
writer.flush()?;
return Err(e);
}
};
if settings.print_joined { if settings.print_joined {
state1.combine(&state2, &repr)?; state1.combine(&mut writer, &state2, &repr)?;
} }
state1.reset(next_line1); state1.reset(next_line1);
@ -808,8 +921,16 @@ fn exec(file1: &str, file2: &str, settings: Settings) -> Result<(), std::io::Err
} }
} }
state1.finalize(&input, &repr)?; if let Err(e) = state1.finalize(&mut writer, &input, &repr) {
state2.finalize(&input, &repr)?; writer.flush()?;
return Err(e);
};
if let Err(e) = state2.finalize(&mut writer, &input, &repr) {
writer.flush()?;
return Err(e);
};
writer.flush()?;
if state1.has_failed || state2.has_failed { if state1.has_failed || state2.has_failed {
eprintln!( eprintln!(

View file

@ -74,7 +74,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
table(); table();
Ok(()) Ok(())
} }
Mode::List => list(pids_or_signals.get(0).cloned()), Mode::List => list(pids_or_signals.get(0)),
} }
} }
@ -168,9 +168,9 @@ fn print_signals() {
println!(); println!();
} }
fn list(arg: Option<String>) -> UResult<()> { fn list(arg: Option<&String>) -> UResult<()> {
match arg { match arg {
Some(ref x) => print_signal(x), Some(x) => print_signal(x),
None => { None => {
print_signals(); print_signals();
Ok(()) Ok(())

View file

@ -324,16 +324,16 @@ struct LongFormat {
struct PaddingCollection { struct PaddingCollection {
#[cfg(unix)] #[cfg(unix)]
longest_inode_len: usize, inode: usize,
longest_link_count_len: usize, link_count: usize,
longest_uname_len: usize, uname: usize,
longest_group_len: usize, group: usize,
longest_context_len: usize, context: usize,
longest_size_len: usize, size: usize,
#[cfg(unix)] #[cfg(unix)]
longest_major_len: usize, major: usize,
#[cfg(unix)] #[cfg(unix)]
longest_minor_len: usize, minor: usize,
} }
impl Config { impl Config {
@ -583,8 +583,19 @@ impl Config {
"slash" => IndicatorStyle::Slash, "slash" => IndicatorStyle::Slash,
&_ => IndicatorStyle::None, &_ => IndicatorStyle::None,
} }
} else if options.is_present(options::indicator_style::CLASSIFY) { } else if let Some(field) = options.value_of(options::indicator_style::CLASSIFY) {
match field {
"never" | "no" | "none" => IndicatorStyle::None,
"always" | "yes" | "force" => IndicatorStyle::Classify,
"auto" | "tty" | "if-tty" => {
if atty::is(atty::Stream::Stdout) {
IndicatorStyle::Classify IndicatorStyle::Classify
} else {
IndicatorStyle::None
}
}
&_ => IndicatorStyle::None,
}
} else if options.is_present(options::indicator_style::SLASH) { } else if options.is_present(options::indicator_style::SLASH) {
IndicatorStyle::Slash IndicatorStyle::Slash
} else if options.is_present(options::indicator_style::FILE_TYPE) { } else if options.is_present(options::indicator_style::FILE_TYPE) {
@ -1202,6 +1213,11 @@ only ignore '.' and '..'.",
]), ]),
) )
.arg( .arg(
// The --classify flag can take an optional when argument to
// control its behavior from version 9 of GNU coreutils.
// There is currently an inconsistency where GNU coreutils allows only
// the long form of the flag to take the argument while we allow it
// for both the long and short form of the flag.
Arg::new(options::indicator_style::CLASSIFY) Arg::new(options::indicator_style::CLASSIFY)
.short('F') .short('F')
.long(options::indicator_style::CLASSIFY) .long(options::indicator_style::CLASSIFY)
@ -1209,8 +1225,22 @@ only ignore '.' and '..'.",
"Append a character to each file name indicating the file type. Also, for \ "Append a character to each file name indicating the file type. Also, for \
regular files that are executable, append '*'. The file type indicators are \ regular files that are executable, append '*'. The file type indicators are \
'/' for directories, '@' for symbolic links, '|' for FIFOs, '=' for sockets, \ '/' for directories, '@' for symbolic links, '|' for FIFOs, '=' for sockets, \
'>' for doors, and nothing for regular files.", '>' for doors, and nothing for regular files. when may be omitted, or one of:\n\
\tnone - Do not classify. This is the default.\n\
\tauto - Only classify if standard output is a terminal.\n\
\talways - Always classify.\n\
Specifying --classify and no when is equivalent to --classify=always. This will not follow\
symbolic links listed on the command line unless the --dereference-command-line (-H),\
--dereference (-L), or --dereference-command-line-symlink-to-dir options are specified.",
) )
.takes_value(true)
.value_name("when")
.possible_values(&[
"always", "yes", "force", "auto", "tty", "if-tty", "never", "no", "none",
])
.default_missing_value("always")
.require_equals(true)
.min_values(0)
.overrides_with_all(&[ .overrides_with_all(&[
options::indicator_style::FILE_TYPE, options::indicator_style::FILE_TYPE,
options::indicator_style::SLASH, options::indicator_style::SLASH,
@ -1275,9 +1305,9 @@ only ignore '.' and '..'.",
) )
} }
/// Represents a Path along with it's associated data /// Represents a Path along with it's associated data.
/// Any data that will be reused several times makes sense to be added to this structure /// Any data that will be reused several times makes sense to be added to this structure.
/// Caching data here helps eliminate redundant syscalls to fetch same information /// Caching data here helps eliminate redundant syscalls to fetch same information.
#[derive(Debug)] #[derive(Debug)]
struct PathData { struct PathData {
// Result<MetaData> got from symlink_metadata() or metadata() based on config // Result<MetaData> got from symlink_metadata() or metadata() based on config
@ -1379,7 +1409,8 @@ impl PathData {
// if not, check if we can use Path metadata // if not, check if we can use Path metadata
match get_metadata(self.p_buf.as_path(), self.must_dereference) { match get_metadata(self.p_buf.as_path(), self.must_dereference) {
Err(err) => { Err(err) => {
let _ = out.flush(); // FIXME: A bit tricky to propagate the result here
out.flush().unwrap();
let errno = err.raw_os_error().unwrap_or(1i32); let errno = err.raw_os_error().unwrap_or(1i32);
// a bad fd will throw an error when dereferenced, // a bad fd will throw an error when dereferenced,
// but GNU will not throw an error until a bad fd "dir" // but GNU will not throw an error until a bad fd "dir"
@ -1443,7 +1474,7 @@ fn list(locs: Vec<&Path>, config: &Config) -> UResult<()> {
sort_entries(&mut files, config, &mut out); sort_entries(&mut files, config, &mut out);
sort_entries(&mut dirs, config, &mut out); sort_entries(&mut dirs, config, &mut out);
display_items(&files, config, &mut out); display_items(&files, config, &mut out)?;
for (pos, path_data) in dirs.iter().enumerate() { for (pos, path_data) in dirs.iter().enumerate() {
// Do read_dir call here to match GNU semantics by printing // Do read_dir call here to match GNU semantics by printing
@ -1451,7 +1482,7 @@ fn list(locs: Vec<&Path>, config: &Config) -> UResult<()> {
let read_dir = match fs::read_dir(&path_data.p_buf) { let read_dir = match fs::read_dir(&path_data.p_buf) {
Err(err) => { Err(err) => {
// flush stdout buffer before the error to preserve formatting and order // flush stdout buffer before the error to preserve formatting and order
let _ = out.flush(); out.flush()?;
show!(LsError::IOErrorContext(err, path_data.p_buf.clone())); show!(LsError::IOErrorContext(err, path_data.p_buf.clone()));
continue; continue;
} }
@ -1461,12 +1492,12 @@ fn list(locs: Vec<&Path>, config: &Config) -> UResult<()> {
// Print dir heading - name... 'total' comes after error display // Print dir heading - name... 'total' comes after error display
if initial_locs_len > 1 || config.recursive { if initial_locs_len > 1 || config.recursive {
if pos.eq(&0usize) && files.is_empty() { if pos.eq(&0usize) && files.is_empty() {
let _ = writeln!(out, "{}:", path_data.p_buf.display()); writeln!(out, "{}:", path_data.p_buf.display())?;
} else { } else {
let _ = writeln!(out, "\n{}:", path_data.p_buf.display()); writeln!(out, "\n{}:", path_data.p_buf.display())?;
} }
} }
enter_directory(path_data, read_dir, config, &mut out); enter_directory(path_data, read_dir, config, &mut out)?;
} }
Ok(()) Ok(())
@ -1540,7 +1571,7 @@ fn enter_directory(
read_dir: ReadDir, read_dir: ReadDir,
config: &Config, config: &Config,
out: &mut BufWriter<Stdout>, out: &mut BufWriter<Stdout>,
) { ) -> UResult<()> {
// Create vec of entries with initial dot files // Create vec of entries with initial dot files
let mut entries: Vec<PathData> = if config.files == Files::All { let mut entries: Vec<PathData> = if config.files == Files::All {
vec![ vec![
@ -1570,7 +1601,7 @@ fn enter_directory(
let dir_entry = match raw_entry { let dir_entry = match raw_entry {
Ok(path) => path, Ok(path) => path,
Err(err) => { Err(err) => {
let _ = out.flush(); out.flush()?;
show!(LsError::IOError(err)); show!(LsError::IOError(err));
continue; continue;
} }
@ -1588,10 +1619,10 @@ fn enter_directory(
// Print total after any error display // Print total after any error display
if config.format == Format::Long { if config.format == Format::Long {
display_total(&entries, config, out); display_total(&entries, config, out)?;
} }
display_items(&entries, config, out); display_items(&entries, config, out)?;
if config.recursive { if config.recursive {
for e in entries for e in entries
@ -1603,17 +1634,19 @@ fn enter_directory(
{ {
match fs::read_dir(&e.p_buf) { match fs::read_dir(&e.p_buf) {
Err(err) => { Err(err) => {
let _ = out.flush(); out.flush()?;
show!(LsError::IOErrorContext(err, e.p_buf.clone())); show!(LsError::IOErrorContext(err, e.p_buf.clone()));
continue; continue;
} }
Ok(rd) => { Ok(rd) => {
let _ = writeln!(out, "\n{}:", e.p_buf.display()); writeln!(out, "\n{}:", e.p_buf.display())?;
enter_directory(e, rd, config, out); enter_directory(e, rd, config, out)?;
} }
} }
} }
} }
Ok(())
} }
fn get_metadata(p_buf: &Path, dereference: bool) -> std::io::Result<Metadata> { fn get_metadata(p_buf: &Path, dereference: bool) -> std::io::Result<Metadata> {
@ -1661,7 +1694,7 @@ fn pad_right(string: &str, count: usize) -> String {
format!("{:<width$}", string, width = count) format!("{:<width$}", string, width = count)
} }
fn display_total(items: &[PathData], config: &Config, out: &mut BufWriter<Stdout>) { fn display_total(items: &[PathData], config: &Config, out: &mut BufWriter<Stdout>) -> UResult<()> {
let mut total_size = 0; let mut total_size = 0;
for item in items { for item in items {
total_size += item total_size += item
@ -1669,101 +1702,20 @@ fn display_total(items: &[PathData], config: &Config, out: &mut BufWriter<Stdout
.as_ref() .as_ref()
.map_or(0, |md| get_block_size(md, config)); .map_or(0, |md| get_block_size(md, config));
} }
let _ = writeln!(out, "total {}", display_size(total_size, config)); writeln!(out, "total {}", display_size(total_size, config))?;
Ok(())
} }
fn display_items(items: &[PathData], config: &Config, out: &mut BufWriter<Stdout>) { fn display_items(items: &[PathData], config: &Config, out: &mut BufWriter<Stdout>) -> UResult<()> {
// `-Z`, `--context`: // `-Z`, `--context`:
// Display the SELinux security context or '?' if none is found. When used with the `-l` // Display the SELinux security context or '?' if none is found. When used with the `-l`
// option, print the security context to the left of the size column. // option, print the security context to the left of the size column.
if config.format == Format::Long { if config.format == Format::Long {
#[cfg(unix)] let padding_collection = calculate_padding_collection(items, config, out);
let (
mut longest_inode_len,
mut longest_link_count_len,
mut longest_uname_len,
mut longest_group_len,
mut longest_context_len,
mut longest_size_len,
mut longest_major_len,
mut longest_minor_len,
) = (1, 1, 1, 1, 1, 1, 1, 1);
#[cfg(not(unix))]
let (
mut longest_link_count_len,
mut longest_uname_len,
mut longest_group_len,
mut longest_context_len,
mut longest_size_len,
) = (1, 1, 1, 1, 1);
#[cfg(unix)]
for item in items {
let context_len = item.security_context.len();
let (link_count_len, uname_len, group_len, size_len, major_len, minor_len, inode_len) =
display_dir_entry_size(item, config, out);
longest_inode_len = inode_len.max(longest_inode_len);
longest_link_count_len = link_count_len.max(longest_link_count_len);
longest_uname_len = uname_len.max(longest_uname_len);
longest_group_len = group_len.max(longest_group_len);
if config.context {
longest_context_len = context_len.max(longest_context_len);
}
if items.len() == 1usize {
longest_size_len = 0usize;
longest_major_len = 0usize;
longest_minor_len = 0usize;
} else {
longest_major_len = major_len.max(longest_major_len);
longest_minor_len = minor_len.max(longest_minor_len);
longest_size_len = size_len
.max(longest_size_len)
.max(longest_major_len + longest_minor_len + 2usize);
}
}
#[cfg(not(unix))]
for item in items {
let context_len = item.security_context.len();
let (
link_count_len,
uname_len,
group_len,
size_len,
_major_len,
_minor_len,
_inode_len,
) = display_dir_entry_size(item, config, out);
longest_link_count_len = link_count_len.max(longest_link_count_len);
longest_uname_len = uname_len.max(longest_uname_len);
longest_group_len = group_len.max(longest_group_len);
if config.context {
longest_context_len = context_len.max(longest_context_len);
}
longest_size_len = size_len.max(longest_size_len);
}
for item in items { for item in items {
display_item_long( display_item_long(item, &padding_collection, config, out)?;
item,
&PaddingCollection {
#[cfg(unix)]
longest_inode_len,
longest_link_count_len,
longest_uname_len,
longest_group_len,
longest_context_len,
longest_size_len,
#[cfg(unix)]
longest_major_len,
#[cfg(unix)]
longest_minor_len,
},
config,
out,
);
} }
} else { } else {
let mut longest_context_len = 1; let mut longest_context_len = 1;
@ -1800,13 +1752,13 @@ fn display_items(items: &[PathData], config: &Config, out: &mut BufWriter<Stdout
.into_iter(); .into_iter();
match config.format { match config.format {
Format::Columns => display_grid(names, config.width, Direction::TopToBottom, out), Format::Columns => display_grid(names, config.width, Direction::TopToBottom, out)?,
Format::Across => display_grid(names, config.width, Direction::LeftToRight, out), Format::Across => display_grid(names, config.width, Direction::LeftToRight, out)?,
Format::Commas => { Format::Commas => {
let mut current_col = 0; let mut current_col = 0;
let mut names = names; let mut names = names;
if let Some(name) = names.next() { if let Some(name) = names.next() {
let _ = write!(out, "{}", name.contents); write!(out, "{}", name.contents)?;
current_col = name.width as u16 + 2; current_col = name.width as u16 + 2;
} }
for name in names { for name in names {
@ -1814,25 +1766,27 @@ fn display_items(items: &[PathData], config: &Config, out: &mut BufWriter<Stdout
// If the width is 0 we print one single line // If the width is 0 we print one single line
if config.width != 0 && current_col + name_width + 1 > config.width { if config.width != 0 && current_col + name_width + 1 > config.width {
current_col = name_width + 2; current_col = name_width + 2;
let _ = write!(out, ",\n{}", name.contents); write!(out, ",\n{}", name.contents)?;
} else { } else {
current_col += name_width + 2; current_col += name_width + 2;
let _ = write!(out, ", {}", name.contents); write!(out, ", {}", name.contents)?;
} }
} }
// Current col is never zero again if names have been printed. // Current col is never zero again if names have been printed.
// So we print a newline. // So we print a newline.
if current_col > 0 { if current_col > 0 {
let _ = writeln!(out,); writeln!(out,)?;
} }
} }
_ => { _ => {
for name in names { for name in names {
let _ = writeln!(out, "{}", name.contents); writeln!(out, "{}", name.contents)?;
}
} }
} }
};
} }
Ok(())
} }
fn get_block_size(md: &Metadata, config: &Config) -> u64 { fn get_block_size(md: &Metadata, config: &Config) -> u64 {
@ -1851,6 +1805,7 @@ fn get_block_size(md: &Metadata, config: &Config) -> u64 {
#[cfg(not(unix))] #[cfg(not(unix))]
{ {
// Silence linter warning about `config` being unused for windows.
let _ = config; let _ = config;
// no way to get block size for windows, fall-back to file size // no way to get block size for windows, fall-back to file size
md.len() md.len()
@ -1862,19 +1817,19 @@ fn display_grid(
width: u16, width: u16,
direction: Direction, direction: Direction,
out: &mut BufWriter<Stdout>, out: &mut BufWriter<Stdout>,
) { ) -> UResult<()> {
if width == 0 { if width == 0 {
// If the width is 0 we print one single line // If the width is 0 we print one single line
let mut printed_something = false; let mut printed_something = false;
for name in names { for name in names {
if printed_something { if printed_something {
let _ = write!(out, " "); write!(out, " ")?;
} }
printed_something = true; printed_something = true;
let _ = write!(out, "{}", name.contents); write!(out, "{}", name.contents)?;
} }
if printed_something { if printed_something {
let _ = writeln!(out); writeln!(out)?;
} }
} else { } else {
let mut grid = Grid::new(GridOptions { let mut grid = Grid::new(GridOptions {
@ -1888,14 +1843,15 @@ fn display_grid(
match grid.fit_into_width(width as usize) { match grid.fit_into_width(width as usize) {
Some(output) => { Some(output) => {
let _ = write!(out, "{}", output); write!(out, "{}", output)?;
} }
// Width is too small for the grid, so we fit it in one column // Width is too small for the grid, so we fit it in one column
None => { None => {
let _ = write!(out, "{}", grid.fit_into_columns(1)); write!(out, "{}", grid.fit_into_columns(1))?;
} }
} }
} }
Ok(())
} }
/// This writes to the BufWriter out a single string of the output of `ls -l`. /// This writes to the BufWriter out a single string of the output of `ls -l`.
@ -1931,20 +1887,16 @@ fn display_item_long(
padding: &PaddingCollection, padding: &PaddingCollection,
config: &Config, config: &Config,
out: &mut BufWriter<Stdout>, out: &mut BufWriter<Stdout>,
) { ) -> UResult<()> {
if let Some(md) = item.md(out) { if let Some(md) = item.md(out) {
#[cfg(unix)] #[cfg(unix)]
{ {
if config.inode { if config.inode {
let _ = write!( write!(out, "{} ", pad_left(&get_inode(md), padding.inode))?;
out,
"{} ",
pad_left(&get_inode(md), padding.longest_inode_len),
);
} }
} }
let _ = write!( write!(
out, out,
"{}{} {}", "{}{} {}",
display_permissions(md, true), display_permissions(md, true),
@ -1955,49 +1907,49 @@ fn display_item_long(
} else { } else {
"" ""
}, },
pad_left(&display_symlink_count(md), padding.longest_link_count_len), pad_left(&display_symlink_count(md), padding.link_count)
); )?;
if config.long.owner { if config.long.owner {
let _ = write!( write!(
out, out,
" {}", " {}",
pad_right(&display_uname(md, config), padding.longest_uname_len), pad_right(&display_uname(md, config), padding.uname)
); )?;
} }
if config.long.group { if config.long.group {
let _ = write!( write!(
out, out,
" {}", " {}",
pad_right(&display_group(md, config), padding.longest_group_len), pad_right(&display_group(md, config), padding.group)
); )?;
} }
if config.context { if config.context {
let _ = write!( write!(
out, out,
" {}", " {}",
pad_right(&item.security_context, padding.longest_context_len), pad_right(&item.security_context, padding.context)
); )?;
} }
// Author is only different from owner on GNU/Hurd, so we reuse // Author is only different from owner on GNU/Hurd, so we reuse
// the owner, since GNU/Hurd is not currently supported by Rust. // the owner, since GNU/Hurd is not currently supported by Rust.
if config.long.author { if config.long.author {
let _ = write!( write!(
out, out,
" {}", " {}",
pad_right(&display_uname(md, config), padding.longest_uname_len), pad_right(&display_uname(md, config), padding.uname)
); )?;
} }
match display_size_or_rdev(md, config) { match display_size_or_rdev(md, config) {
SizeOrDeviceId::Size(size) => { SizeOrDeviceId::Size(size) => {
let _ = write!(out, " {}", pad_left(&size, padding.longest_size_len),); write!(out, " {}", pad_left(&size, padding.size))?;
} }
SizeOrDeviceId::Device(major, minor) => { SizeOrDeviceId::Device(major, minor) => {
let _ = write!( write!(
out, out,
" {}, {}", " {}, {}",
pad_left( pad_left(
@ -2005,10 +1957,10 @@ fn display_item_long(
#[cfg(not(unix))] #[cfg(not(unix))]
0usize, 0usize,
#[cfg(unix)] #[cfg(unix)]
padding.longest_major_len.max( padding.major.max(
padding padding
.longest_size_len .size
.saturating_sub(padding.longest_minor_len.saturating_add(2usize)) .saturating_sub(padding.minor.saturating_add(2usize))
) )
), ),
pad_left( pad_left(
@ -2016,21 +1968,21 @@ fn display_item_long(
#[cfg(not(unix))] #[cfg(not(unix))]
0usize, 0usize,
#[cfg(unix)] #[cfg(unix)]
padding.longest_minor_len, padding.minor,
), ),
); )?;
} }
}; };
let dfn = display_file_name(item, config, None, 0, out).contents; let dfn = display_file_name(item, config, None, 0, out).contents;
let _ = writeln!(out, " {} {}", display_date(md, config), dfn); writeln!(out, " {} {}", display_date(md, config), dfn)?;
} else { } else {
// this 'else' is expressly for the case of a dangling symlink/restricted file // this 'else' is expressly for the case of a dangling symlink/restricted file
#[cfg(unix)] #[cfg(unix)]
{ {
if config.inode { if config.inode {
let _ = write!(out, "{} ", pad_left("?", padding.longest_inode_len),); write!(out, "{} ", pad_left("?", padding.inode))?;
} }
} }
@ -2067,7 +2019,7 @@ fn display_item_long(
} }
}; };
let _ = write!( write!(
out, out,
"{}{} {}", "{}{} {}",
format_args!("{}?????????", leading_char), format_args!("{}?????????", leading_char),
@ -2078,42 +2030,44 @@ fn display_item_long(
} else { } else {
"" ""
}, },
pad_left("?", padding.longest_link_count_len), pad_left("?", padding.link_count)
); )?;
if config.long.owner { if config.long.owner {
let _ = write!(out, " {}", pad_right("?", padding.longest_uname_len)); write!(out, " {}", pad_right("?", padding.uname))?;
} }
if config.long.group { if config.long.group {
let _ = write!(out, " {}", pad_right("?", padding.longest_group_len)); write!(out, " {}", pad_right("?", padding.group))?;
} }
if config.context { if config.context {
let _ = write!( write!(
out, out,
" {}", " {}",
pad_right(&item.security_context, padding.longest_context_len) pad_right(&item.security_context, padding.context)
); )?;
} }
// Author is only different from owner on GNU/Hurd, so we reuse // Author is only different from owner on GNU/Hurd, so we reuse
// the owner, since GNU/Hurd is not currently supported by Rust. // the owner, since GNU/Hurd is not currently supported by Rust.
if config.long.author { if config.long.author {
let _ = write!(out, " {}", pad_right("?", padding.longest_uname_len)); write!(out, " {}", pad_right("?", padding.uname))?;
} }
let dfn = display_file_name(item, config, None, 0, out).contents; let dfn = display_file_name(item, config, None, 0, out).contents;
let date_len = 12; let date_len = 12;
let _ = writeln!( writeln!(
out, out,
" {} {} {}", " {} {} {}",
pad_left("?", padding.longest_size_len), pad_left("?", padding.size),
pad_left("?", date_len), pad_left("?", date_len),
dfn, dfn,
); )?;
} }
Ok(())
} }
#[cfg(unix)] #[cfg(unix)]
@ -2564,3 +2518,77 @@ fn get_security_context(config: &Config, p_buf: &Path, must_dereference: bool) -
substitute_string substitute_string
} }
} }
#[cfg(unix)]
fn calculate_padding_collection(
items: &[PathData],
config: &Config,
out: &mut BufWriter<Stdout>,
) -> PaddingCollection {
let mut padding_collections = PaddingCollection {
inode: 1,
link_count: 1,
uname: 1,
group: 1,
context: 1,
size: 1,
major: 1,
minor: 1,
};
for item in items {
let context_len = item.security_context.len();
let (link_count_len, uname_len, group_len, size_len, major_len, minor_len, inode_len) =
display_dir_entry_size(item, config, out);
padding_collections.inode = inode_len.max(padding_collections.inode);
padding_collections.link_count = link_count_len.max(padding_collections.link_count);
padding_collections.uname = uname_len.max(padding_collections.uname);
padding_collections.group = group_len.max(padding_collections.group);
if config.context {
padding_collections.context = context_len.max(padding_collections.context);
}
if items.len() == 1usize {
padding_collections.size = 0usize;
padding_collections.major = 0usize;
padding_collections.minor = 0usize;
} else {
padding_collections.major = major_len.max(padding_collections.major);
padding_collections.minor = minor_len.max(padding_collections.minor);
padding_collections.size = size_len
.max(padding_collections.size)
.max(padding_collections.major + padding_collections.minor + 2usize);
}
}
padding_collections
}
#[cfg(not(unix))]
fn calculate_padding_collection(
items: &[PathData],
config: &Config,
out: &mut BufWriter<Stdout>,
) -> PaddingCollection {
let mut padding_collections = PaddingCollection {
link_count: 1,
uname: 1,
group: 1,
context: 1,
size: 1,
};
for item in items {
let context_len = item.security_context.len();
let (link_count_len, uname_len, group_len, size_len, _major_len, _minor_len, _inode_len) =
display_dir_entry_size(item, config, out);
padding_collections.link_count = link_count_len.max(padding_collections.link_count);
padding_collections.uname = uname_len.max(padding_collections.uname);
padding_collections.group = group_len.max(padding_collections.group);
if config.context {
padding_collections.context = context_len.max(padding_collections.context);
}
padding_collections.size = size_len.max(padding_collections.size);
}
padding_collections
}

View file

@ -51,8 +51,7 @@ use uucore::InvalidEncodingHandling;
const PEEK_BUFFER_SIZE: usize = 4; // utf-8 can be 4 bytes const PEEK_BUFFER_SIZE: usize = 4; // utf-8 can be 4 bytes
static ABOUT: &str = "dump files in octal and other formats"; static ABOUT: &str = "dump files in octal and other formats";
static USAGE: &str = r#" static USAGE: &str = r#"od [OPTION]... [--] [FILENAME]...
od [OPTION]... [--] [FILENAME]...
od [-abcdDefFhHiIlLoOsxX] [FILENAME] [[+][0x]OFFSET[.][b]] od [-abcdDefFhHiIlLoOsxX] [FILENAME] [[+][0x]OFFSET[.][b]]
od --traditional [OPTION]... [FILENAME] [[+][0x]OFFSET[.][b] [[+][0x]LABEL[.][b]]]"#; od --traditional [OPTION]... [FILENAME] [[+][0x]OFFSET[.][b] [[+][0x]LABEL[.][b]]]"#;

View file

@ -9,8 +9,7 @@
use clap::{crate_version, App, AppSettings, Arg}; use clap::{crate_version, App, AppSettings, Arg};
use std::fs::File; use std::fs::File;
use std::io::{stdin, BufRead, BufReader, Read}; use std::io::{stdin, stdout, BufRead, BufReader, Read, Write};
use std::iter::repeat;
use std::path::Path; use std::path::Path;
use uucore::error::{FromIo, UResult}; use uucore::error::{FromIo, UResult};
@ -77,7 +76,7 @@ pub fn uu_app<'a>() -> App<'a> {
} }
fn paste(filenames: Vec<String>, serial: bool, delimiters: &str) -> UResult<()> { fn paste(filenames: Vec<String>, serial: bool, delimiters: &str) -> UResult<()> {
let mut files = vec![]; let mut files = Vec::with_capacity(filenames.len());
for name in filenames { for name in filenames {
let file = if name == "-" { let file = if name == "-" {
None None
@ -89,55 +88,62 @@ fn paste(filenames: Vec<String>, serial: bool, delimiters: &str) -> UResult<()>
files.push(file); files.push(file);
} }
let delimiters: Vec<String> = unescape(delimiters) let delimiters: Vec<char> = unescape(delimiters).chars().collect();
.chars()
.map(|x| x.to_string())
.collect();
let mut delim_count = 0; let mut delim_count = 0;
let stdout = stdout();
let mut stdout = stdout.lock();
let mut output = String::new();
if serial { if serial {
for file in &mut files { for file in &mut files {
let mut output = String::new(); output.clear();
loop { loop {
let mut line = String::new(); match read_line(file.as_mut(), &mut output) {
match read_line(file.as_mut(), &mut line) {
Ok(0) => break, Ok(0) => break,
Ok(_) => { Ok(_) => {
output.push_str(line.trim_end()); if output.ends_with('\n') {
output.push_str(&delimiters[delim_count % delimiters.len()]); output.pop();
}
output.push(delimiters[delim_count % delimiters.len()]);
} }
Err(e) => return Err(e.map_err_context(String::new)), Err(e) => return Err(e.map_err_context(String::new)),
} }
delim_count += 1; delim_count += 1;
} }
println!("{}", &output[..output.len() - 1]); output.pop();
writeln!(stdout, "{}", output)?;
} }
} else { } else {
let mut eof: Vec<bool> = repeat(false).take(files.len()).collect(); let mut eof = vec![false; files.len()];
loop { loop {
let mut output = String::new(); output.clear();
let mut eof_count = 0; let mut eof_count = 0;
for (i, file) in files.iter_mut().enumerate() { for (i, file) in files.iter_mut().enumerate() {
if eof[i] { if eof[i] {
eof_count += 1; eof_count += 1;
} else { } else {
let mut line = String::new(); match read_line(file.as_mut(), &mut output) {
match read_line(file.as_mut(), &mut line) {
Ok(0) => { Ok(0) => {
eof[i] = true; eof[i] = true;
eof_count += 1; eof_count += 1;
} }
Ok(_) => output.push_str(line.trim_end()), Ok(_) => {
if output.ends_with('\n') {
output.pop();
}
}
Err(e) => return Err(e.map_err_context(String::new)), Err(e) => return Err(e.map_err_context(String::new)),
} }
} }
output.push_str(&delimiters[delim_count % delimiters.len()]); output.push(delimiters[delim_count % delimiters.len()]);
delim_count += 1; delim_count += 1;
} }
if files.len() == eof_count { if files.len() == eof_count {
break; break;
} }
println!("{}", &output[..output.len() - 1]); // Remove final delimiter
output.pop();
writeln!(stdout, "{}", output)?;
delim_count = 0; delim_count = 0;
} }
} }

View file

@ -32,6 +32,8 @@ type IOError = std::io::Error;
const NAME: &str = "pr"; const NAME: &str = "pr";
const VERSION: &str = env!("CARGO_PKG_VERSION"); const VERSION: &str = env!("CARGO_PKG_VERSION");
const ABOUT: &str =
"Write content of given file or standard input to standard output with pagination filter";
const TAB: char = '\t'; const TAB: char = '\t';
const LINES_PER_PAGE: usize = 66; const LINES_PER_PAGE: usize = 66;
const LINES_PER_PAGE_FOR_FORM_FEED: usize = 63; const LINES_PER_PAGE_FOR_FORM_FEED: usize = 63;
@ -172,7 +174,10 @@ quick_error! {
} }
pub fn uu_app<'a>() -> App<'a> { pub fn uu_app<'a>() -> App<'a> {
App::new(uucore::util_name()).setting(AppSettings::InferLongArgs) App::new(uucore::util_name())
.version(VERSION)
.about(ABOUT)
.setting(AppSettings::InferLongArgs)
} }
#[uucore::main] #[uucore::main]

View file

@ -9,160 +9,153 @@ use uucore::InvalidEncodingHandling;
const VERSION: &str = "version"; const VERSION: &str = "version";
const HELP: &str = "help"; const HELP: &str = "help";
static LONGHELP_LEAD: &str = "printf const USAGE: &str = "printf FORMATSTRING [ARGUMENT]...";
const ABOUT: &str = "Print output based off of the format string and proceeding arguments.";
const AFTER_HELP: &str = "
basic anonymous string templating:
USAGE: printf FORMATSTRING [ARGUMENT]... prints format string at least once, repeating as long as there are remaining arguments
output prints escaped literals in the format string as character literals
output replaces anonymous fields with the next unused argument, formatted according to the field.
basic anonymous string templating: Prints the , replacing escaped character sequences with character literals
prints format string at least once, repeating as long as there are remaining arguments
output prints escaped literals in the format string as character literals
output replaces anonymous fields with the next unused argument, formatted according to the field.
Options:
--help display this help and exit
--version output version information and exit
";
static LONGHELP_BODY: &str = "
Prints the , replacing escaped character sequences with character literals
and substitution field sequences with passed arguments and substitution field sequences with passed arguments
literally, with the exception of the below literally, with the exception of the below
escaped character sequences, and the substitution sequences described further down. escaped character sequences, and the substitution sequences described further down.
ESCAPE SEQUENCES ESCAPE SEQUENCES
The following escape sequences, organized here in alphabetical order, The following escape sequences, organized here in alphabetical order,
will print the corresponding character literal: will print the corresponding character literal:
\" double quote \" double quote
\\\\ backslash \\\\ backslash
\\a alert (BEL) \\a alert (BEL)
\\b backspace \\b backspace
\\c End-of-Input \\c End-of-Input
\\e escape \\e escape
\\f form feed \\f form feed
\\n new line \\n new line
\\r carriage return \\r carriage return
\\t horizontal tab \\t horizontal tab
\\v vertical tab \\v vertical tab
\\NNN byte with value expressed in octal value NNN (1 to 3 digits) \\NNN byte with value expressed in octal value NNN (1 to 3 digits)
values greater than 256 will be treated values greater than 256 will be treated
\\xHH byte with value expressed in hexadecimal value NN (1 to 2 digits) \\xHH byte with value expressed in hexadecimal value NN (1 to 2 digits)
\\uHHHH Unicode (IEC 10646) character with value expressed in hexadecimal value HHHH (4 digits) \\uHHHH Unicode (IEC 10646) character with value expressed in hexadecimal value HHHH (4 digits)
\\uHHHH Unicode character with value expressed in hexadecimal value HHHH (8 digits) \\uHHHH Unicode character with value expressed in hexadecimal value HHHH (8 digits)
%% a single % %% a single %
SUBSTITUTIONS SUBSTITUTIONS
SUBSTITUTION QUICK REFERENCE SUBSTITUTION QUICK REFERENCE
Fields Fields
%s - string %s - string
%b - string parsed for literals %b - string parsed for literals
second parameter is max length second parameter is max length
%c - char %c - char
no second parameter no second parameter
%i or %d - 64-bit integer %i or %d - 64-bit integer
%u - 64 bit unsigned integer %u - 64 bit unsigned integer
%x or %X - 64-bit unsigned integer as hex %x or %X - 64-bit unsigned integer as hex
%o - 64-bit unsigned integer as octal %o - 64-bit unsigned integer as octal
second parameter is min-width, integer second parameter is min-width, integer
output below that width is padded with leading zeroes output below that width is padded with leading zeroes
%f or %F - decimal floating point value %f or %F - decimal floating point value
%e or %E - scientific notation floating point value %e or %E - scientific notation floating point value
%g or %G - shorter of specially interpreted decimal or SciNote floating point value. %g or %G - shorter of specially interpreted decimal or SciNote floating point value.
second parameter is second parameter is
-max places after decimal point for floating point output -max places after decimal point for floating point output
-max number of significant digits for scientific notation output -max number of significant digits for scientific notation output
parameterizing fields parameterizing fields
examples: examples:
printf '%4.3i' 7 printf '%4.3i' 7
has a first parameter of 4 has a first parameter of 4
and a second parameter of 3 and a second parameter of 3
will result in ' 007' will result in ' 007'
printf '%.1s' abcde printf '%.1s' abcde
has no first parameter has no first parameter
and a second parameter of 1 and a second parameter of 1
will result in 'a' will result in 'a'
printf '%4c' q printf '%4c' q
has a first parameter of 4 has a first parameter of 4
and no second parameter and no second parameter
will result in ' q' will result in ' q'
The first parameter of a field is the minimum width to pad the output to The first parameter of a field is the minimum width to pad the output to
if the output is less than this absolute value of this width, if the output is less than this absolute value of this width,
it will be padded with leading spaces, or, if the argument is negative, it will be padded with leading spaces, or, if the argument is negative,
with trailing spaces. the default is zero. with trailing spaces. the default is zero.
The second parameter of a field is particular to the output field type. The second parameter of a field is particular to the output field type.
defaults can be found in the full substitution help below defaults can be found in the full substitution help below
special prefixes to numeric arguments special prefixes to numeric arguments
0 (e.g. 010) - interpret argument as octal (integer output fields only) 0 (e.g. 010) - interpret argument as octal (integer output fields only)
0x (e.g. 0xABC) - interpret argument as hex (numeric output fields only) 0x (e.g. 0xABC) - interpret argument as hex (numeric output fields only)
\' (e.g. \'a) - interpret argument as a character constant \' (e.g. \'a) - interpret argument as a character constant
HOW TO USE SUBSTITUTIONS HOW TO USE SUBSTITUTIONS
Substitutions are used to pass additional argument(s) into the FORMAT string, to be formatted a Substitutions are used to pass additional argument(s) into the FORMAT string, to be formatted a
particular way. E.g. particular way. E.g.
printf 'the letter %X comes before the letter %X' 10 11 printf 'the letter %X comes before the letter %X' 10 11
will print will print
'the letter A comes before the letter B' 'the letter A comes before the letter B'
because the substitution field %X means because the substitution field %X means
'take an integer argument and write it as a hexadecimal number' 'take an integer argument and write it as a hexadecimal number'
Passing more arguments than are in the format string will cause the format string to be Passing more arguments than are in the format string will cause the format string to be
repeated for the remaining substitutions repeated for the remaining substitutions
printf 'it is %i F in %s \n' 22 Portland 25 Boston 27 New York printf 'it is %i F in %s \n' 22 Portland 25 Boston 27 New York
will print will print
'it is 22 F in Portland 'it is 22 F in Portland
it is 25 F in Boston it is 25 F in Boston
it is 27 F in Boston it is 27 F in Boston
' '
If a format string is printed but there are less arguments remaining If a format string is printed but there are less arguments remaining
than there are substitution fields, substitution fields without than there are substitution fields, substitution fields without
an argument will default to empty strings, or for numeric fields an argument will default to empty strings, or for numeric fields
the value 0 the value 0
AVAILABLE SUBSTITUTIONS AVAILABLE SUBSTITUTIONS
This program, like GNU coreutils printf, This program, like GNU coreutils printf,
interprets a modified subset of the POSIX C printf spec, interprets a modified subset of the POSIX C printf spec,
a quick reference to substitutions is below. a quick reference to substitutions is below.
STRING SUBSTITUTIONS STRING SUBSTITUTIONS
All string fields have a 'max width' parameter All string fields have a 'max width' parameter
@ -233,7 +226,7 @@ static LONGHELP_BODY: &str = "
behavior in this utility is selected to reproduce in exact behavior in this utility is selected to reproduce in exact
the behavior of GNU coreutils' printf from an inputs and outputs standpoint. the behavior of GNU coreutils' printf from an inputs and outputs standpoint.
USING PARAMETERS USING PARAMETERS
Most substitution fields can be parameterized using up to 2 numbers that can Most substitution fields can be parameterized using up to 2 numbers that can
be passed to the field, between the % sign and the field letter. be passed to the field, between the % sign and the field letter.
@ -243,7 +236,7 @@ static LONGHELP_BODY: &str = "
The 2nd parameter is proceeded by a dot. The 2nd parameter is proceeded by a dot.
You do not have to use parameters You do not have to use parameters
SPECIAL FORMS OF INPUT SPECIAL FORMS OF INPUT
For numeric input, the following additional forms of input are accepted besides decimal: For numeric input, the following additional forms of input are accepted besides decimal:
Octal (only with integer): if the argument begins with a 0 the proceeding characters Octal (only with integer): if the argument begins with a 0 the proceeding characters
@ -271,31 +264,43 @@ COPYRIGHT :
"; ";
mod options {
pub const FORMATSTRING: &str = "FORMATSTRING";
pub const ARGUMENT: &str = "ARGUMENT";
}
#[uucore::main] #[uucore::main]
pub fn uumain(args: impl uucore::Args) -> UResult<()> { pub fn uumain(args: impl uucore::Args) -> UResult<()> {
let args = args let args = args
.collect_str(InvalidEncodingHandling::Ignore) .collect_str(InvalidEncodingHandling::Ignore)
.accept_any(); .accept_any();
let matches = uu_app().get_matches_from(args);
if args.len() <= 1 { let format_string = matches
return Err(UUsageError::new(1, "missing operand")); .value_of(options::FORMATSTRING)
} .ok_or_else(|| UUsageError::new(1, "missing operand"))?;
let formatstr = &args[1]; let values: Vec<String> = match matches.values_of(options::ARGUMENT) {
Some(s) => s.map(|s| s.to_string()).collect(),
None => vec![],
};
if formatstr == "--help" { memo::Memo::run_all(format_string, &values[..])?;
print!("{} {}", LONGHELP_LEAD, LONGHELP_BODY);
} else if formatstr == "--version" {
println!("{} {}", uucore::util_name(), crate_version!());
} else {
let printf_args = &args[2..];
memo::Memo::run_all(formatstr, printf_args);
}
Ok(()) Ok(())
} }
pub fn uu_app<'a>() -> App<'a> { pub fn uu_app<'a>() -> App<'a> {
App::new(uucore::util_name()) App::new(uucore::util_name())
.arg(Arg::new(VERSION).long(VERSION)) .setting(AppSettings::AllowHyphenValues)
.arg(Arg::new(HELP).long(HELP)) .version(crate_version!())
.setting(AppSettings::InferLongArgs) .about(ABOUT)
.after_help(AFTER_HELP)
.override_usage(USAGE)
.arg(Arg::new(HELP).long(HELP).help("Print help information"))
.arg(
Arg::new(VERSION)
.long(VERSION)
.help("Print version information"),
)
.arg(Arg::new(options::FORMATSTRING))
.arg(Arg::new(options::ARGUMENT).multiple_occurrences(true))
} }

View file

@ -130,14 +130,7 @@ impl Display for ExtendedBigDecimal {
} }
ExtendedBigDecimal::Infinity => f32::INFINITY.fmt(f), ExtendedBigDecimal::Infinity => f32::INFINITY.fmt(f),
ExtendedBigDecimal::MinusInfinity => f32::NEG_INFINITY.fmt(f), ExtendedBigDecimal::MinusInfinity => f32::NEG_INFINITY.fmt(f),
ExtendedBigDecimal::MinusZero => { ExtendedBigDecimal::MinusZero => (-0.0f32).fmt(f),
// FIXME In Rust version 1.53.0 and later, the display
// of floats was updated to allow displaying negative
// zero. See
// https://github.com/rust-lang/rust/pull/78618. Currently,
// this just formats "0.0".
(0.0f32).fmt(f)
}
ExtendedBigDecimal::Nan => "nan".fmt(f), ExtendedBigDecimal::Nan => "nan".fmt(f),
} }
} }
@ -280,11 +273,6 @@ mod tests {
assert_eq!(format!("{}", ExtendedBigDecimal::Infinity), "inf"); assert_eq!(format!("{}", ExtendedBigDecimal::Infinity), "inf");
assert_eq!(format!("{}", ExtendedBigDecimal::MinusInfinity), "-inf"); assert_eq!(format!("{}", ExtendedBigDecimal::MinusInfinity), "-inf");
assert_eq!(format!("{}", ExtendedBigDecimal::Nan), "nan"); assert_eq!(format!("{}", ExtendedBigDecimal::Nan), "nan");
// FIXME In Rust version 1.53.0 and later, the display of floats assert_eq!(format!("{}", ExtendedBigDecimal::MinusZero), "-0");
// was updated to allow displaying negative zero. Until then, we
// just display `MinusZero` as "0.0".
//
// assert_eq!(format!("{}", ExtendedBigDecimal::MinusZero), "-0.0");
//
} }
} }

View file

@ -5,6 +5,7 @@
// TODO: Support -f flag // TODO: Support -f flag
// spell-checker:ignore (ToDO) istr chiter argptr ilen extendedbigdecimal extendedbigint numberparse // spell-checker:ignore (ToDO) istr chiter argptr ilen extendedbigdecimal extendedbigint numberparse
use std::io::{stdout, ErrorKind, Write}; use std::io::{stdout, ErrorKind, Write};
use std::process::exit;
use clap::{crate_version, App, AppSettings, Arg}; use clap::{crate_version, App, AppSettings, Arg};
use num_traits::Zero; use num_traits::Zero;
@ -12,6 +13,7 @@ use num_traits::Zero;
use uucore::error::FromIo; use uucore::error::FromIo;
use uucore::error::UResult; use uucore::error::UResult;
use uucore::memo::Memo; use uucore::memo::Memo;
use uucore::show;
mod error; mod error;
mod extendedbigdecimal; mod extendedbigdecimal;
@ -198,27 +200,15 @@ fn done_printing<T: Zero + PartialOrd>(next: &T, increment: &T, last: &T) -> boo
} }
/// Write a big decimal formatted according to the given parameters. /// Write a big decimal formatted according to the given parameters.
///
/// This method is an adapter to support displaying negative zero on
/// Rust versions earlier than 1.53.0. After that version, we should be
/// able to display negative zero using the default formatting provided
/// by `-0.0f32`, for example.
fn write_value_float( fn write_value_float(
writer: &mut impl Write, writer: &mut impl Write,
value: &ExtendedBigDecimal, value: &ExtendedBigDecimal,
width: usize, width: usize,
precision: usize, precision: usize,
is_first_iteration: bool, _is_first_iteration: bool,
) -> std::io::Result<()> { ) -> std::io::Result<()> {
let value_as_str = if *value == ExtendedBigDecimal::MinusZero && is_first_iteration { let value_as_str =
format!( if *value == ExtendedBigDecimal::Infinity || *value == ExtendedBigDecimal::MinusInfinity {
"-{value:>0width$.precision$}",
value = value,
width = if width > 0 { width - 1 } else { width },
precision = precision,
)
} else if *value == ExtendedBigDecimal::Infinity || *value == ExtendedBigDecimal::MinusInfinity
{
format!( format!(
"{value:>width$.precision$}", "{value:>width$.precision$}",
value = value, value = value,
@ -299,7 +289,10 @@ fn print_seq(
match format { match format {
Some(f) => { Some(f) => {
let s = format!("{}", value); let s = format!("{}", value);
Memo::run_all(f, &[s]); if let Err(x) = Memo::run_all(f, &[s]) {
show!(x);
exit(1);
}
} }
None => write_value_float( None => write_value_float(
&mut stdout, &mut stdout,
@ -361,7 +354,10 @@ fn print_seq_integers(
match format { match format {
Some(f) => { Some(f) => {
let s = format!("{}", value); let s = format!("{}", value);
Memo::run_all(f, &[s]); if let Err(x) = Memo::run_all(f, &[s]) {
show!(x);
exit(1);
}
} }
None => write_value_int(&mut stdout, &value, padding, pad, is_first_iteration)?, None => write_value_int(&mut stdout, &value, padding, pad, is_first_iteration)?,
} }

View file

@ -0,0 +1,28 @@
# Benchmarking shuf
`shuf` is a simple utility, but there are at least two important cases
benchmark: with and without repetition.
When benchmarking changes, make sure to always build with the `--release` flag.
You can compare with another branch by compiling on that branch and than
renaming the executable from `shuf` to `shuf.old`.
## Without repetition
By default, `shuf` samples without repetition. To benchmark only the
randomization and not IO, we can pass the `-i` flag with a range of numbers to
randomly sample from. An example of a command that works well for testing:
```shell
hyperfine --warmup 10 "target/release/shuf -i 0-10000000"
```
## With repetition
When repetition is allowed, `shuf` works very differently under the hood, so it
should be benchmarked separately. In this case we have to pass the `-n` flag or
the command will run forever. An example of a hyperfine command is
```shell
hyperfine --warmup 10 "target/release/shuf -r -n 10000000 -i 0-1000"
```

View file

@ -8,12 +8,13 @@
// spell-checker:ignore (ToDO) cmdline evec seps rvec fdata // spell-checker:ignore (ToDO) cmdline evec seps rvec fdata
use clap::{crate_version, App, AppSettings, Arg}; use clap::{crate_version, App, AppSettings, Arg};
use rand::Rng; use rand::prelude::SliceRandom;
use rand::RngCore;
use std::fs::File; use std::fs::File;
use std::io::{stdin, stdout, BufReader, BufWriter, Read, Write}; use std::io::{stdin, stdout, BufReader, BufWriter, Read, Write};
use uucore::display::Quotable; use uucore::display::Quotable;
use uucore::error::{FromIo, UResult, USimpleError}; use uucore::error::{FromIo, UResult, USimpleError};
use uucore::InvalidEncodingHandling; use uucore::{execution_phrase, InvalidEncodingHandling};
mod rand_read_adapter; mod rand_read_adapter;
@ -26,12 +27,8 @@ enum Mode {
static NAME: &str = "shuf"; static NAME: &str = "shuf";
static USAGE: &str = r#"shuf [OPTION]... [FILE] static USAGE: &str = r#"shuf [OPTION]... [FILE]
or: shuf -e [OPTION]... [ARG]... or: shuf -e [OPTION]... [ARG]...
or: shuf -i LO-HI [OPTION]... or: shuf -i LO-HI [OPTION]..."#;
Write a random permutation of the input lines to standard output. static ABOUT: &str = "Shuffle the input by outputting a random permutation of input lines. Each output permutation is equally likely.";
With no FILE, or when FILE is -, read standard input.
"#;
static TEMPLATE: &str = "Usage: {usage}\nMandatory arguments to long options are mandatory for short options too.\n{options}";
struct Options { struct Options {
head_count: usize, head_count: usize,
@ -58,7 +55,9 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
.collect_str(InvalidEncodingHandling::ConvertLossy) .collect_str(InvalidEncodingHandling::ConvertLossy)
.accept_any(); .accept_any();
let matches = uu_app().get_matches_from(args); let matches = uu_app()
.override_usage(&USAGE.replace(NAME, execution_phrase())[..])
.get_matches_from(args);
let mode = if let Some(args) = matches.values_of(options::ECHO) { let mode = if let Some(args) = matches.values_of(options::ECHO) {
Mode::Echo(args.map(String::from).collect()) Mode::Echo(args.map(String::from).collect())
@ -121,8 +120,8 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
pub fn uu_app<'a>() -> App<'a> { pub fn uu_app<'a>() -> App<'a> {
App::new(uucore::util_name()) App::new(uucore::util_name())
.name(NAME) .name(NAME)
.about(ABOUT)
.version(crate_version!()) .version(crate_version!())
.help_template(TEMPLATE)
.override_usage(USAGE) .override_usage(USAGE)
.setting(AppSettings::InferLongArgs) .setting(AppSettings::InferLongArgs)
.arg( .arg(
@ -252,55 +251,49 @@ fn shuf_bytes(input: &mut Vec<&[u8]>, opts: Options) -> UResult<()> {
None => WrappedRng::RngDefault(rand::thread_rng()), None => WrappedRng::RngDefault(rand::thread_rng()),
}; };
// we're generating a random usize. To keep things fair, we take this number mod ceil(log2(length+1)) if input.is_empty() {
let mut len_mod = 1; return Ok(());
let mut len = input.len();
while len > 0 {
len >>= 1;
len_mod <<= 1;
} }
let mut count = opts.head_count; if opts.repeat {
while count > 0 && !input.is_empty() { for _ in 0..opts.head_count {
let mut r = input.len(); // Returns None is the slice is empty. We checked this before, so
while r >= input.len() { // this is safe.
r = rng.next_usize() % len_mod; let r = input.choose(&mut rng).unwrap();
}
// write the randomly chosen value and the separator
output output
.write_all(input[r]) .write_all(r)
.map_err_context(|| "write failed".to_string())?;
output
.write_all(&[opts.sep])
.map_err_context(|| "write failed".to_string())?;
}
} else {
let (shuffled, _) = input.partial_shuffle(&mut rng, opts.head_count);
for r in shuffled {
output
.write_all(r)
.map_err_context(|| "write failed".to_string())?; .map_err_context(|| "write failed".to_string())?;
output output
.write_all(&[opts.sep]) .write_all(&[opts.sep])
.map_err_context(|| "write failed".to_string())?; .map_err_context(|| "write failed".to_string())?;
// if we do not allow repeats, remove the chosen value from the input vector
if !opts.repeat {
// shrink the mask if we will drop below a power of 2
if input.len() % 2 == 0 && len_mod > 2 {
len_mod >>= 1;
} }
input.swap_remove(r);
} }
count -= 1;
}
Ok(()) Ok(())
} }
fn parse_range(input_range: &str) -> Result<(usize, usize), String> { fn parse_range(input_range: &str) -> Result<(usize, usize), String> {
let split: Vec<&str> = input_range.split('-').collect(); if let Some((from, to)) = input_range.split_once('-') {
if split.len() != 2 { let begin = from
Err(format!("invalid input range: {}", input_range.quote()))
} else {
let begin = split[0]
.parse::<usize>() .parse::<usize>()
.map_err(|_| format!("invalid input range: {}", split[0].quote()))?; .map_err(|_| format!("invalid input range: {}", from.quote()))?;
let end = split[1] let end = to
.parse::<usize>() .parse::<usize>()
.map_err(|_| format!("invalid input range: {}", split[1].quote()))?; .map_err(|_| format!("invalid input range: {}", to.quote()))?;
Ok((begin, end + 1)) Ok((begin, end + 1))
} else {
Err(format!("invalid input range: {}", input_range.quote()))
} }
} }
@ -309,11 +302,32 @@ enum WrappedRng {
RngDefault(rand::rngs::ThreadRng), RngDefault(rand::rngs::ThreadRng),
} }
impl WrappedRng { impl RngCore for WrappedRng {
fn next_usize(&mut self) -> usize { fn next_u32(&mut self) -> u32 {
match *self { match self {
WrappedRng::RngFile(ref mut r) => r.gen(), Self::RngFile(r) => r.next_u32(),
WrappedRng::RngDefault(ref mut r) => r.gen(), Self::RngDefault(r) => r.next_u32(),
}
}
fn next_u64(&mut self) -> u64 {
match self {
Self::RngFile(r) => r.next_u64(),
Self::RngDefault(r) => r.next_u64(),
}
}
fn fill_bytes(&mut self, dest: &mut [u8]) {
match self {
Self::RngFile(r) => r.fill_bytes(dest),
Self::RngDefault(r) => r.fill_bytes(dest),
}
}
fn try_fill_bytes(&mut self, dest: &mut [u8]) -> Result<(), rand::Error> {
match self {
Self::RngFile(r) => r.try_fill_bytes(dest),
Self::RngDefault(r) => r.try_fill_bytes(dest),
} }
} }
} }

View file

@ -0,0 +1,47 @@
<!-- spell-checker:ignore testfile -->
# Benchmarking to measure performance
To compare the performance of the `uutils` version of `split` with the
GNU version of `split`, you can use a benchmarking tool like
[hyperfine][0]. On Ubuntu 18.04 or later, you can install `hyperfine` by
running
sudo apt-get install hyperfine
Next, build the `split` binary under the release profile:
cargo build --release -p uu_split
Now, get a text file to test `split` on. The `split` program has three
main modes of operation: chunk by lines, chunk by bytes, and chunk by
lines with a byte limit. You may want to test the performance of `split`
with various shapes and sizes of input files and under various modes of
operation. For example, to test chunking by bytes on a large input file,
you can create a file named `testfile.txt` containing one million null
bytes like this:
printf "%0.s\0" {1..1000000} > testfile.txt
For another example, to test chunking by bytes on a large real-world
input file, you could download a [database dump of Wikidata][1] or some
related files that the Wikimedia project provides. For example, [this
file][2] contains about 130 million lines.
Finally, you can compare the performance of the two versions of `split`
by running, for example,
cd /tmp && hyperfine \
--prepare 'rm x* || true' \
"split -b 1000 testfile.txt" \
"target/release/split -b 1000 testfile.txt"
Since `split` creates a lot of files on the filesystem, I recommend
changing to the `/tmp` directory before running the benchmark. The
`--prepare` argument to `hyperfine` runs a specified command before each
timing run. We specify `rm x* || true` so that the output files from the
previous run of `split` are removed before each run begins.
[0]: https://github.com/sharkdp/hyperfine
[1]: https://www.wikidata.org/wiki/Wikidata:Database_download
[2]: https://dumps.wikimedia.org/wikidatawiki/20211001/wikidatawiki-20211001-pages-logging.xml.gz

View file

@ -16,6 +16,7 @@ path = "src/split.rs"
[dependencies] [dependencies]
clap = { version = "3.0", features = ["wrap_help", "cargo"] } clap = { version = "3.0", features = ["wrap_help", "cargo"] }
memchr = "2"
uucore = { version=">=0.0.11", package="uucore", path="../../uucore" } uucore = { version=">=0.0.11", package="uucore", path="../../uucore" }
[[bin]] [[bin]]

View file

@ -13,12 +13,13 @@
//! //!
//! ```rust,ignore //! ```rust,ignore
//! use crate::filenames::FilenameIterator; //! use crate::filenames::FilenameIterator;
//! use crate::filenames::SuffixType;
//! //!
//! let prefix = "chunk_".to_string(); //! let prefix = "chunk_".to_string();
//! let suffix = ".txt".to_string(); //! let suffix = ".txt".to_string();
//! let width = 2; //! let width = 2;
//! let use_numeric_suffix = false; //! let suffix_type = SuffixType::Alphabetic;
//! let it = FilenameIterator::new(prefix, suffix, width, use_numeric_suffix); //! let it = FilenameIterator::new(prefix, suffix, width, suffix_type);
//! //!
//! assert_eq!(it.next().unwrap(), "chunk_aa.txt"); //! assert_eq!(it.next().unwrap(), "chunk_aa.txt");
//! assert_eq!(it.next().unwrap(), "chunk_ab.txt"); //! assert_eq!(it.next().unwrap(), "chunk_ab.txt");
@ -28,6 +29,30 @@ use crate::number::DynamicWidthNumber;
use crate::number::FixedWidthNumber; use crate::number::FixedWidthNumber;
use crate::number::Number; use crate::number::Number;
/// The format to use for suffixes in the filename for each output chunk.
#[derive(Clone, Copy)]
pub enum SuffixType {
/// Lowercase ASCII alphabetic characters.
Alphabetic,
/// Decimal numbers.
NumericDecimal,
/// Hexadecimal numbers.
NumericHexadecimal,
}
impl SuffixType {
/// The radix to use when representing the suffix string as digits.
fn radix(&self) -> u8 {
match self {
SuffixType::Alphabetic => 26,
SuffixType::NumericDecimal => 10,
SuffixType::NumericHexadecimal => 16,
}
}
}
/// Compute filenames from a given index. /// Compute filenames from a given index.
/// ///
/// This iterator yields filenames for use with ``split``. /// This iterator yields filenames for use with ``split``.
@ -42,8 +67,8 @@ use crate::number::Number;
/// width in characters. In that case, after the iterator yields each /// width in characters. In that case, after the iterator yields each
/// string of that width, the iterator is exhausted. /// string of that width, the iterator is exhausted.
/// ///
/// Finally, if `use_numeric_suffix` is `true`, then numbers will be /// Finally, `suffix_type` controls which type of suffix to produce,
/// used instead of lowercase ASCII alphabetic characters. /// alphabetic or numeric.
/// ///
/// # Examples /// # Examples
/// ///
@ -52,28 +77,30 @@ use crate::number::Number;
/// ///
/// ```rust,ignore /// ```rust,ignore
/// use crate::filenames::FilenameIterator; /// use crate::filenames::FilenameIterator;
/// use crate::filenames::SuffixType;
/// ///
/// let prefix = "chunk_".to_string(); /// let prefix = "chunk_".to_string();
/// let suffix = ".txt".to_string(); /// let suffix = ".txt".to_string();
/// let width = 2; /// let width = 2;
/// let use_numeric_suffix = false; /// let suffix_type = SuffixType::Alphabetic;
/// let it = FilenameIterator::new(prefix, suffix, width, use_numeric_suffix); /// let it = FilenameIterator::new(prefix, suffix, width, suffix_type);
/// ///
/// assert_eq!(it.next().unwrap(), "chunk_aa.txt"); /// assert_eq!(it.next().unwrap(), "chunk_aa.txt");
/// assert_eq!(it.next().unwrap(), "chunk_ab.txt"); /// assert_eq!(it.next().unwrap(), "chunk_ab.txt");
/// assert_eq!(it.next().unwrap(), "chunk_ac.txt"); /// assert_eq!(it.next().unwrap(), "chunk_ac.txt");
/// ``` /// ```
/// ///
/// For numeric filenames, set `use_numeric_suffix` to `true`: /// For decimal numeric filenames, use `SuffixType::NumericDecimal`:
/// ///
/// ```rust,ignore /// ```rust,ignore
/// use crate::filenames::FilenameIterator; /// use crate::filenames::FilenameIterator;
/// use crate::filenames::SuffixType;
/// ///
/// let prefix = "chunk_".to_string(); /// let prefix = "chunk_".to_string();
/// let suffix = ".txt".to_string(); /// let suffix = ".txt".to_string();
/// let width = 2; /// let width = 2;
/// let use_numeric_suffix = true; /// let suffix_type = SuffixType::NumericDecimal;
/// let it = FilenameIterator::new(prefix, suffix, width, use_numeric_suffix); /// let it = FilenameIterator::new(prefix, suffix, width, suffix_type);
/// ///
/// assert_eq!(it.next().unwrap(), "chunk_00.txt"); /// assert_eq!(it.next().unwrap(), "chunk_00.txt");
/// assert_eq!(it.next().unwrap(), "chunk_01.txt"); /// assert_eq!(it.next().unwrap(), "chunk_01.txt");
@ -91,9 +118,9 @@ impl<'a> FilenameIterator<'a> {
prefix: &'a str, prefix: &'a str,
additional_suffix: &'a str, additional_suffix: &'a str,
suffix_length: usize, suffix_length: usize,
use_numeric_suffix: bool, suffix_type: SuffixType,
) -> FilenameIterator<'a> { ) -> FilenameIterator<'a> {
let radix = if use_numeric_suffix { 10 } else { 26 }; let radix = suffix_type.radix();
let number = if suffix_length == 0 { let number = if suffix_length == 0 {
Number::DynamicWidth(DynamicWidthNumber::new(radix)) Number::DynamicWidth(DynamicWidthNumber::new(radix))
} else { } else {
@ -130,39 +157,40 @@ impl<'a> Iterator for FilenameIterator<'a> {
mod tests { mod tests {
use crate::filenames::FilenameIterator; use crate::filenames::FilenameIterator;
use crate::filenames::SuffixType;
#[test] #[test]
fn test_filename_iterator_alphabetic_fixed_width() { fn test_filename_iterator_alphabetic_fixed_width() {
let mut it = FilenameIterator::new("chunk_", ".txt", 2, false); let mut it = FilenameIterator::new("chunk_", ".txt", 2, SuffixType::Alphabetic);
assert_eq!(it.next().unwrap(), "chunk_aa.txt"); assert_eq!(it.next().unwrap(), "chunk_aa.txt");
assert_eq!(it.next().unwrap(), "chunk_ab.txt"); assert_eq!(it.next().unwrap(), "chunk_ab.txt");
assert_eq!(it.next().unwrap(), "chunk_ac.txt"); assert_eq!(it.next().unwrap(), "chunk_ac.txt");
let mut it = FilenameIterator::new("chunk_", ".txt", 2, false); let mut it = FilenameIterator::new("chunk_", ".txt", 2, SuffixType::Alphabetic);
assert_eq!(it.nth(26 * 26 - 1).unwrap(), "chunk_zz.txt"); assert_eq!(it.nth(26 * 26 - 1).unwrap(), "chunk_zz.txt");
assert_eq!(it.next(), None); assert_eq!(it.next(), None);
} }
#[test] #[test]
fn test_filename_iterator_numeric_fixed_width() { fn test_filename_iterator_numeric_fixed_width() {
let mut it = FilenameIterator::new("chunk_", ".txt", 2, true); let mut it = FilenameIterator::new("chunk_", ".txt", 2, SuffixType::NumericDecimal);
assert_eq!(it.next().unwrap(), "chunk_00.txt"); assert_eq!(it.next().unwrap(), "chunk_00.txt");
assert_eq!(it.next().unwrap(), "chunk_01.txt"); assert_eq!(it.next().unwrap(), "chunk_01.txt");
assert_eq!(it.next().unwrap(), "chunk_02.txt"); assert_eq!(it.next().unwrap(), "chunk_02.txt");
let mut it = FilenameIterator::new("chunk_", ".txt", 2, true); let mut it = FilenameIterator::new("chunk_", ".txt", 2, SuffixType::NumericDecimal);
assert_eq!(it.nth(10 * 10 - 1).unwrap(), "chunk_99.txt"); assert_eq!(it.nth(10 * 10 - 1).unwrap(), "chunk_99.txt");
assert_eq!(it.next(), None); assert_eq!(it.next(), None);
} }
#[test] #[test]
fn test_filename_iterator_alphabetic_dynamic_width() { fn test_filename_iterator_alphabetic_dynamic_width() {
let mut it = FilenameIterator::new("chunk_", ".txt", 0, false); let mut it = FilenameIterator::new("chunk_", ".txt", 0, SuffixType::Alphabetic);
assert_eq!(it.next().unwrap(), "chunk_aa.txt"); assert_eq!(it.next().unwrap(), "chunk_aa.txt");
assert_eq!(it.next().unwrap(), "chunk_ab.txt"); assert_eq!(it.next().unwrap(), "chunk_ab.txt");
assert_eq!(it.next().unwrap(), "chunk_ac.txt"); assert_eq!(it.next().unwrap(), "chunk_ac.txt");
let mut it = FilenameIterator::new("chunk_", ".txt", 0, false); let mut it = FilenameIterator::new("chunk_", ".txt", 0, SuffixType::Alphabetic);
assert_eq!(it.nth(26 * 25 - 1).unwrap(), "chunk_yz.txt"); assert_eq!(it.nth(26 * 25 - 1).unwrap(), "chunk_yz.txt");
assert_eq!(it.next().unwrap(), "chunk_zaaa.txt"); assert_eq!(it.next().unwrap(), "chunk_zaaa.txt");
assert_eq!(it.next().unwrap(), "chunk_zaab.txt"); assert_eq!(it.next().unwrap(), "chunk_zaab.txt");
@ -170,12 +198,12 @@ mod tests {
#[test] #[test]
fn test_filename_iterator_numeric_dynamic_width() { fn test_filename_iterator_numeric_dynamic_width() {
let mut it = FilenameIterator::new("chunk_", ".txt", 0, true); let mut it = FilenameIterator::new("chunk_", ".txt", 0, SuffixType::NumericDecimal);
assert_eq!(it.next().unwrap(), "chunk_00.txt"); assert_eq!(it.next().unwrap(), "chunk_00.txt");
assert_eq!(it.next().unwrap(), "chunk_01.txt"); assert_eq!(it.next().unwrap(), "chunk_01.txt");
assert_eq!(it.next().unwrap(), "chunk_02.txt"); assert_eq!(it.next().unwrap(), "chunk_02.txt");
let mut it = FilenameIterator::new("chunk_", ".txt", 0, true); let mut it = FilenameIterator::new("chunk_", ".txt", 0, SuffixType::NumericDecimal);
assert_eq!(it.nth(10 * 9 - 1).unwrap(), "chunk_89.txt"); assert_eq!(it.nth(10 * 9 - 1).unwrap(), "chunk_89.txt");
assert_eq!(it.next().unwrap(), "chunk_9000.txt"); assert_eq!(it.next().unwrap(), "chunk_9000.txt");
assert_eq!(it.next().unwrap(), "chunk_9001.txt"); assert_eq!(it.next().unwrap(), "chunk_9001.txt");

View file

@ -40,13 +40,19 @@ impl Error for Overflow {}
/// specifically for the `split` program. See the /// specifically for the `split` program. See the
/// [`DynamicWidthNumber`] documentation for more information. /// [`DynamicWidthNumber`] documentation for more information.
/// ///
/// Numbers of radix 10 are displayable and rendered as decimal /// Numbers of radix
/// numbers (for example, "00" or "917"). Numbers of radix 26 are ///
/// displayable and rendered as lowercase ASCII alphabetic characters /// * 10 are displayable and rendered as decimal numbers (for example,
/// (for example, "aa" or "zax"). Numbers of other radices cannot be /// "00" or "917"),
/// displayed. The display of a [`DynamicWidthNumber`] includes a /// * 16 are displayable and rendered as hexadecimal numbers (for example,
/// prefix whose length depends on the width of the number. See the /// "00" or "e7f"),
/// [`DynamicWidthNumber`] documentation for more information. /// * 26 are displayable and rendered as lowercase ASCII alphabetic
/// characters (for example, "aa" or "zax").
///
/// Numbers of other radices cannot be displayed. The display of a
/// [`DynamicWidthNumber`] includes a prefix whose length depends on
/// the width of the number. See the [`DynamicWidthNumber`]
/// documentation for more information.
/// ///
/// The digits of a number are accessible via the [`Number::digits`] /// The digits of a number are accessible via the [`Number::digits`]
/// method. The digits are represented as a [`Vec<u8>`] with the most /// method. The digits are represented as a [`Vec<u8>`] with the most
@ -96,8 +102,8 @@ impl Number {
#[allow(dead_code)] #[allow(dead_code)]
fn digits(&self) -> &Vec<u8> { fn digits(&self) -> &Vec<u8> {
match self { match self {
Number::FixedWidth(number) => &number.digits, Self::FixedWidth(number) => &number.digits,
Number::DynamicWidth(number) => &number.digits, Self::DynamicWidth(number) => &number.digits,
} }
} }
@ -136,8 +142,8 @@ impl Number {
/// ``` /// ```
pub fn increment(&mut self) -> Result<(), Overflow> { pub fn increment(&mut self) -> Result<(), Overflow> {
match self { match self {
Number::FixedWidth(number) => number.increment(), Self::FixedWidth(number) => number.increment(),
Number::DynamicWidth(number) => number.increment(), Self::DynamicWidth(number) => number.increment(),
} }
} }
} }
@ -145,8 +151,8 @@ impl Number {
impl Display for Number { impl Display for Number {
fn fmt(&self, f: &mut Formatter) -> fmt::Result { fn fmt(&self, f: &mut Formatter) -> fmt::Result {
match self { match self {
Number::FixedWidth(number) => number.fmt(f), Self::FixedWidth(number) => number.fmt(f),
Number::DynamicWidth(number) => number.fmt(f), Self::DynamicWidth(number) => number.fmt(f),
} }
} }
} }
@ -169,12 +175,12 @@ impl Display for Number {
/// ///
/// # Displaying /// # Displaying
/// ///
/// This number is only displayable if `radix` is 10 or `radix` is /// This number is only displayable if `radix` is 10, 26, or 26. If
/// 26. If `radix` is 10, then the digits are concatenated and /// `radix` is 10 or 16, then the digits are concatenated and
/// displayed as a fixed-width decimal number. If `radix` is 26, then /// displayed as a fixed-width decimal or hexadecimal number,
/// each digit is translated to the corresponding lowercase ASCII /// respectively. If `radix` is 26, then each digit is translated to
/// alphabetic character (that is, 'a', 'b', 'c', etc.) and /// the corresponding lowercase ASCII alphabetic character (that is,
/// concatenated. /// 'a', 'b', 'c', etc.) and concatenated.
#[derive(Clone)] #[derive(Clone)]
pub struct FixedWidthNumber { pub struct FixedWidthNumber {
radix: u8, radix: u8,
@ -183,8 +189,8 @@ pub struct FixedWidthNumber {
impl FixedWidthNumber { impl FixedWidthNumber {
/// Instantiate a number of the given radix and width. /// Instantiate a number of the given radix and width.
pub fn new(radix: u8, width: usize) -> FixedWidthNumber { pub fn new(radix: u8, width: usize) -> Self {
FixedWidthNumber { Self {
radix, radix,
digits: vec![0; width], digits: vec![0; width],
} }
@ -228,6 +234,14 @@ impl Display for FixedWidthNumber {
let digits: String = self.digits.iter().map(|d| (b'0' + d) as char).collect(); let digits: String = self.digits.iter().map(|d| (b'0' + d) as char).collect();
write!(f, "{}", digits) write!(f, "{}", digits)
} }
16 => {
let digits: String = self
.digits
.iter()
.map(|d| (if *d < 10 { b'0' + d } else { b'a' + (d - 10) }) as char)
.collect();
write!(f, "{}", digits)
}
26 => { 26 => {
let digits: String = self.digits.iter().map(|d| (b'a' + d) as char).collect(); let digits: String = self.digits.iter().map(|d| (b'a' + d) as char).collect();
write!(f, "{}", digits) write!(f, "{}", digits)
@ -264,14 +278,15 @@ impl Display for FixedWidthNumber {
/// ///
/// # Displaying /// # Displaying
/// ///
/// This number is only displayable if `radix` is 10 or `radix` is /// This number is only displayable if `radix` is 10, 16, or 26. If
/// 26. If `radix` is 10, then the digits are concatenated and /// `radix` is 10 or 16, then the digits are concatenated and
/// displayed as a fixed-width decimal number with a prefix of `n - 2` /// displayed as a fixed-width decimal or hexadecimal number,
/// instances of the character '9', where `n` is the number of digits. /// respectively, with a prefix of `n - 2` instances of the character
/// If `radix` is 26, then each digit is translated to the /// '9' of 'f', respectively, where `n` is the number of digits. If
/// corresponding lowercase ASCII alphabetic character (that is, 'a', /// `radix` is 26, then each digit is translated to the corresponding
/// 'b', 'c', etc.) and concatenated with a prefix of `n - 2` /// lowercase ASCII alphabetic character (that is, 'a', 'b', 'c',
/// instances of the character 'z'. /// etc.) and concatenated with a prefix of `n - 2` instances of the
/// character 'z'.
/// ///
/// This notion of displaying the number is specific to the `split` /// This notion of displaying the number is specific to the `split`
/// program. /// program.
@ -286,8 +301,8 @@ impl DynamicWidthNumber {
/// ///
/// This associated function returns a new instance of the struct /// This associated function returns a new instance of the struct
/// with the given radix and a width of two digits, both 0. /// with the given radix and a width of two digits, both 0.
pub fn new(radix: u8) -> DynamicWidthNumber { pub fn new(radix: u8) -> Self {
DynamicWidthNumber { Self {
radix, radix,
digits: vec![0, 0], digits: vec![0, 0],
} }
@ -349,6 +364,21 @@ impl Display for DynamicWidthNumber {
digits = digits, digits = digits,
) )
} }
16 => {
let num_fill_chars = self.digits.len() - 2;
let digits: String = self
.digits
.iter()
.map(|d| (if *d < 10 { b'0' + d } else { b'a' + (d - 10) }) as char)
.collect();
write!(
f,
"{empty:f<num_fill_chars$}{digits}",
empty = "",
num_fill_chars = num_fill_chars,
digits = digits,
)
}
26 => { 26 => {
let num_fill_chars = self.digits.len() - 2; let num_fill_chars = self.digits.len() - 2;
let digits: String = self.digits.iter().map(|d| (b'a' + d) as char).collect(); let digits: String = self.digits.iter().map(|d| (b'a' + d) as char).collect();
@ -404,7 +434,7 @@ mod tests {
fn num(n: usize) -> Number { fn num(n: usize) -> Number {
let mut number = Number::DynamicWidth(DynamicWidthNumber::new(26)); let mut number = Number::DynamicWidth(DynamicWidthNumber::new(26));
for _ in 0..n { for _ in 0..n {
number.increment().unwrap() number.increment().unwrap();
} }
number number
} }
@ -424,11 +454,11 @@ mod tests {
} }
#[test] #[test]
fn test_dynamic_width_number_display_numeric() { fn test_dynamic_width_number_display_numeric_decimal() {
fn num(n: usize) -> Number { fn num(n: usize) -> Number {
let mut number = Number::DynamicWidth(DynamicWidthNumber::new(10)); let mut number = Number::DynamicWidth(DynamicWidthNumber::new(10));
for _ in 0..n { for _ in 0..n {
number.increment().unwrap() number.increment().unwrap();
} }
number number
} }
@ -444,6 +474,30 @@ mod tests {
assert_eq!(format!("{}", num(10 * 99 + 1)), "990001"); assert_eq!(format!("{}", num(10 * 99 + 1)), "990001");
} }
#[test]
fn test_dynamic_width_number_display_numeric_hexadecimal() {
fn num(n: usize) -> Number {
let mut number = Number::DynamicWidth(DynamicWidthNumber::new(16));
for _ in 0..n {
number.increment().unwrap()
}
number
}
assert_eq!(format!("{}", num(0)), "00");
assert_eq!(format!("{}", num(15)), "0f");
assert_eq!(format!("{}", num(16)), "10");
assert_eq!(format!("{}", num(17)), "11");
assert_eq!(format!("{}", num(18)), "12");
assert_eq!(format!("{}", num(16 * 15 - 1)), "ef");
assert_eq!(format!("{}", num(16 * 15)), "f000");
assert_eq!(format!("{}", num(16 * 15 + 1)), "f001");
assert_eq!(format!("{}", num(16 * 255 - 1)), "feff");
assert_eq!(format!("{}", num(16 * 255)), "ff0000");
assert_eq!(format!("{}", num(16 * 255 + 1)), "ff0001");
}
#[test] #[test]
fn test_fixed_width_number_increment() { fn test_fixed_width_number_increment() {
let mut n = Number::FixedWidth(FixedWidthNumber::new(3, 2)); let mut n = Number::FixedWidth(FixedWidthNumber::new(3, 2));
@ -493,7 +547,7 @@ mod tests {
} }
#[test] #[test]
fn test_fixed_width_number_display_numeric() { fn test_fixed_width_number_display_numeric_decimal() {
fn num(n: usize) -> Result<Number, Overflow> { fn num(n: usize) -> Result<Number, Overflow> {
let mut number = Number::FixedWidth(FixedWidthNumber::new(10, 2)); let mut number = Number::FixedWidth(FixedWidthNumber::new(10, 2));
for _ in 0..n { for _ in 0..n {
@ -510,4 +564,23 @@ mod tests {
assert_eq!(format!("{}", num(10 * 10 - 1).unwrap()), "99"); assert_eq!(format!("{}", num(10 * 10 - 1).unwrap()), "99");
assert!(num(10 * 10).is_err()); assert!(num(10 * 10).is_err());
} }
#[test]
fn test_fixed_width_number_display_numeric_hexadecimal() {
fn num(n: usize) -> Result<Number, Overflow> {
let mut number = Number::FixedWidth(FixedWidthNumber::new(16, 2));
for _ in 0..n {
number.increment()?;
}
Ok(number)
}
assert_eq!(format!("{}", num(0).unwrap()), "00");
assert_eq!(format!("{}", num(15).unwrap()), "0f");
assert_eq!(format!("{}", num(17).unwrap()), "11");
assert_eq!(format!("{}", num(16 * 15 - 1).unwrap()), "ef");
assert_eq!(format!("{}", num(16 * 15).unwrap()), "f0");
assert_eq!(format!("{}", num(16 * 16 - 1).unwrap()), "ff");
assert!(num(16 * 16).is_err());
}
} }

View file

@ -12,15 +12,18 @@ mod number;
mod platform; mod platform;
use crate::filenames::FilenameIterator; use crate::filenames::FilenameIterator;
use crate::filenames::SuffixType;
use clap::{crate_version, App, AppSettings, Arg, ArgMatches}; use clap::{crate_version, App, AppSettings, Arg, ArgMatches};
use std::convert::TryFrom;
use std::env; use std::env;
use std::fs::{metadata, remove_file, File}; use std::fmt;
use std::io::{stdin, BufRead, BufReader, BufWriter, Read, Write}; use std::fs::{metadata, File};
use std::io::{stdin, BufReader, BufWriter, ErrorKind, Read, Write};
use std::num::ParseIntError;
use std::path::Path; use std::path::Path;
use uucore::display::Quotable; use uucore::display::Quotable;
use uucore::error::{FromIo, UResult, USimpleError, UUsageError}; use uucore::error::{FromIo, UIoError, UResult, USimpleError, UUsageError};
use uucore::parse_size::parse_size; use uucore::parse_size::{parse_size, ParseSizeError};
use uucore::uio_error;
static OPT_BYTES: &str = "bytes"; static OPT_BYTES: &str = "bytes";
static OPT_LINE_BYTES: &str = "line-bytes"; static OPT_LINE_BYTES: &str = "line-bytes";
@ -29,9 +32,13 @@ static OPT_ADDITIONAL_SUFFIX: &str = "additional-suffix";
static OPT_FILTER: &str = "filter"; static OPT_FILTER: &str = "filter";
static OPT_NUMBER: &str = "number"; static OPT_NUMBER: &str = "number";
static OPT_NUMERIC_SUFFIXES: &str = "numeric-suffixes"; static OPT_NUMERIC_SUFFIXES: &str = "numeric-suffixes";
static OPT_HEX_SUFFIXES: &str = "hex-suffixes";
static OPT_SUFFIX_LENGTH: &str = "suffix-length"; static OPT_SUFFIX_LENGTH: &str = "suffix-length";
static OPT_DEFAULT_SUFFIX_LENGTH: &str = "0"; static OPT_DEFAULT_SUFFIX_LENGTH: &str = "0";
static OPT_VERBOSE: &str = "verbose"; static OPT_VERBOSE: &str = "verbose";
//The ---io-blksize parameter is consumed and ignored.
//The parameter is included to make GNU coreutils tests pass.
static OPT_IO_BLKSIZE: &str = "-io-blksize";
static ARG_INPUT: &str = "input"; static ARG_INPUT: &str = "input";
static ARG_PREFIX: &str = "prefix"; static ARG_PREFIX: &str = "prefix";
@ -62,8 +69,11 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
.override_usage(&usage[..]) .override_usage(&usage[..])
.after_help(&long_usage[..]) .after_help(&long_usage[..])
.get_matches_from(args); .get_matches_from(args);
let settings = Settings::from(matches)?; match Settings::from(&matches) {
split(&settings) Ok(settings) => split(&settings),
Err(e) if e.requires_usage() => Err(UUsageError::new(1, format!("{}", e))),
Err(e) => Err(USimpleError::new(1, format!("{}", e))),
}
} }
pub fn uu_app<'a>() -> App<'a> { pub fn uu_app<'a>() -> App<'a> {
@ -134,11 +144,26 @@ pub fn uu_app<'a>() -> App<'a> {
.default_value(OPT_DEFAULT_SUFFIX_LENGTH) .default_value(OPT_DEFAULT_SUFFIX_LENGTH)
.help("use suffixes of length N (default 2)"), .help("use suffixes of length N (default 2)"),
) )
.arg(
Arg::new(OPT_HEX_SUFFIXES)
.short('x')
.long(OPT_HEX_SUFFIXES)
.takes_value(true)
.default_missing_value("0")
.help("use hex suffixes starting at 0, not alphabetic"),
)
.arg( .arg(
Arg::new(OPT_VERBOSE) Arg::new(OPT_VERBOSE)
.long(OPT_VERBOSE) .long(OPT_VERBOSE)
.help("print a diagnostic just before each output file is opened"), .help("print a diagnostic just before each output file is opened"),
) )
.arg(
Arg::new(OPT_IO_BLKSIZE)
.long(OPT_IO_BLKSIZE)
.alias(OPT_IO_BLKSIZE)
.takes_value(true)
.hide(true),
)
.arg( .arg(
Arg::new(ARG_INPUT) Arg::new(ARG_INPUT)
.takes_value(true) .takes_value(true)
@ -169,9 +194,35 @@ enum Strategy {
Number(usize), Number(usize),
} }
/// An error when parsing a chunking strategy from command-line arguments.
enum StrategyError {
/// Invalid number of lines.
Lines(ParseSizeError),
/// Invalid number of bytes.
Bytes(ParseSizeError),
/// Invalid number of chunks.
NumberOfChunks(ParseIntError),
/// Multiple chunking strategies were specified (but only one should be).
MultipleWays,
}
impl fmt::Display for StrategyError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Self::Lines(e) => write!(f, "invalid number of lines: {}", e),
Self::Bytes(e) => write!(f, "invalid number of bytes: {}", e),
Self::NumberOfChunks(e) => write!(f, "invalid number of chunks: {}", e),
Self::MultipleWays => write!(f, "cannot split in more than one way"),
}
}
}
impl Strategy { impl Strategy {
/// Parse a strategy from the command-line arguments. /// Parse a strategy from the command-line arguments.
fn from(matches: &ArgMatches) -> UResult<Self> { fn from(matches: &ArgMatches) -> Result<Self, StrategyError> {
// Check that the user is not specifying more than one strategy. // Check that the user is not specifying more than one strategy.
// //
// Note: right now, this exact behavior cannot be handled by // Note: right now, this exact behavior cannot be handled by
@ -186,41 +237,47 @@ impl Strategy {
(0, 0, 0, 0) => Ok(Self::Lines(1000)), (0, 0, 0, 0) => Ok(Self::Lines(1000)),
(1, 0, 0, 0) => { (1, 0, 0, 0) => {
let s = matches.value_of(OPT_LINES).unwrap(); let s = matches.value_of(OPT_LINES).unwrap();
let n = parse_size(s) let n = parse_size(s).map_err(StrategyError::Lines)?;
.map_err(|e| USimpleError::new(1, format!("invalid number of lines: {}", e)))?;
Ok(Self::Lines(n)) Ok(Self::Lines(n))
} }
(0, 1, 0, 0) => { (0, 1, 0, 0) => {
let s = matches.value_of(OPT_BYTES).unwrap(); let s = matches.value_of(OPT_BYTES).unwrap();
let n = parse_size(s) let n = parse_size(s).map_err(StrategyError::Bytes)?;
.map_err(|e| USimpleError::new(1, format!("invalid number of bytes: {}", e)))?;
Ok(Self::Bytes(n)) Ok(Self::Bytes(n))
} }
(0, 0, 1, 0) => { (0, 0, 1, 0) => {
let s = matches.value_of(OPT_LINE_BYTES).unwrap(); let s = matches.value_of(OPT_LINE_BYTES).unwrap();
let n = parse_size(s) let n = parse_size(s).map_err(StrategyError::Bytes)?;
.map_err(|e| USimpleError::new(1, format!("invalid number of bytes: {}", e)))?;
Ok(Self::LineBytes(n)) Ok(Self::LineBytes(n))
} }
(0, 0, 0, 1) => { (0, 0, 0, 1) => {
let s = matches.value_of(OPT_NUMBER).unwrap(); let s = matches.value_of(OPT_NUMBER).unwrap();
let n = s.parse::<usize>().map_err(|e| { let n = s.parse::<usize>().map_err(StrategyError::NumberOfChunks)?;
USimpleError::new(1, format!("invalid number of chunks: {}", e))
})?;
Ok(Self::Number(n)) Ok(Self::Number(n))
} }
_ => Err(UUsageError::new(1, "cannot split in more than one way")), _ => Err(StrategyError::MultipleWays),
} }
} }
} }
/// Parse the suffix type from the command-line arguments.
fn suffix_type_from(matches: &ArgMatches) -> SuffixType {
if matches.occurrences_of(OPT_NUMERIC_SUFFIXES) > 0 {
SuffixType::NumericDecimal
} else if matches.occurrences_of(OPT_HEX_SUFFIXES) > 0 {
SuffixType::NumericHexadecimal
} else {
SuffixType::Alphabetic
}
}
/// Parameters that control how a file gets split. /// Parameters that control how a file gets split.
/// ///
/// You can convert an [`ArgMatches`] instance into a [`Settings`] /// You can convert an [`ArgMatches`] instance into a [`Settings`]
/// instance by calling [`Settings::from`]. /// instance by calling [`Settings::from`].
struct Settings { struct Settings {
prefix: String, prefix: String,
numeric_suffix: bool, suffix_type: SuffixType,
suffix_length: usize, suffix_length: usize,
additional_suffix: String, additional_suffix: String,
input: String, input: String,
@ -230,19 +287,68 @@ struct Settings {
verbose: bool, verbose: bool,
} }
/// An error when parsing settings from command-line arguments.
enum SettingsError {
/// Invalid chunking strategy.
Strategy(StrategyError),
/// Invalid suffix length parameter.
SuffixLength(String),
/// Suffix contains a directory separator, which is not allowed.
SuffixContainsSeparator(String),
/// The `--filter` option is not supported on Windows.
#[cfg(windows)]
NotSupported,
}
impl SettingsError {
/// Whether the error demands a usage message.
fn requires_usage(&self) -> bool {
matches!(
self,
Self::Strategy(StrategyError::MultipleWays) | Self::SuffixContainsSeparator(_)
)
}
}
impl fmt::Display for SettingsError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Self::Strategy(e) => e.fmt(f),
Self::SuffixLength(s) => write!(f, "invalid suffix length: {}", s.quote()),
Self::SuffixContainsSeparator(s) => write!(
f,
"invalid suffix {}, contains directory separator",
s.quote()
),
#[cfg(windows)]
Self::NotSupported => write!(
f,
"{} is currently not supported in this platform",
OPT_FILTER
),
}
}
}
impl Settings { impl Settings {
/// Parse a strategy from the command-line arguments. /// Parse a strategy from the command-line arguments.
fn from(matches: ArgMatches) -> UResult<Self> { fn from(matches: &ArgMatches) -> Result<Self, SettingsError> {
let additional_suffix = matches.value_of(OPT_ADDITIONAL_SUFFIX).unwrap().to_string();
if additional_suffix.contains('/') {
return Err(SettingsError::SuffixContainsSeparator(additional_suffix));
}
let suffix_length_str = matches.value_of(OPT_SUFFIX_LENGTH).unwrap();
let result = Self { let result = Self {
suffix_length: matches suffix_length: suffix_length_str
.value_of(OPT_SUFFIX_LENGTH)
.unwrap()
.parse() .parse()
.unwrap_or_else(|_| panic!("Invalid number for {}", OPT_SUFFIX_LENGTH)), .map_err(|_| SettingsError::SuffixLength(suffix_length_str.to_string()))?,
numeric_suffix: matches.occurrences_of(OPT_NUMERIC_SUFFIXES) > 0, suffix_type: suffix_type_from(matches),
additional_suffix: matches.value_of(OPT_ADDITIONAL_SUFFIX).unwrap().to_owned(), additional_suffix,
verbose: matches.occurrences_of("verbose") > 0, verbose: matches.occurrences_of("verbose") > 0,
strategy: Strategy::from(&matches)?, strategy: Strategy::from(matches).map_err(SettingsError::Strategy)?,
input: matches.value_of(ARG_INPUT).unwrap().to_owned(), input: matches.value_of(ARG_INPUT).unwrap().to_owned(),
prefix: matches.value_of(ARG_PREFIX).unwrap().to_owned(), prefix: matches.value_of(ARG_PREFIX).unwrap().to_owned(),
filter: matches.value_of(OPT_FILTER).map(|s| s.to_owned()), filter: matches.value_of(OPT_FILTER).map(|s| s.to_owned()),
@ -250,112 +356,242 @@ impl Settings {
#[cfg(windows)] #[cfg(windows)]
if result.filter.is_some() { if result.filter.is_some() {
// see https://github.com/rust-lang/rust/issues/29494 // see https://github.com/rust-lang/rust/issues/29494
return Err(USimpleError::new( return Err(SettingsError::NotSupported);
-1,
format!("{} is currently not supported in this platform", OPT_FILTER),
));
} }
Ok(result) Ok(result)
} }
} }
trait Splitter { /// Write a certain number of bytes to one file, then move on to another one.
// Consume as much as possible from `reader` so as to saturate `writer`. ///
// Equivalent to finishing one of the part files. Returns the number of /// This struct maintains an underlying writer representing the
// bytes that have been moved. /// current chunk of the output. If a call to [`write`] would cause
fn consume( /// the underlying writer to write more than the allowed number of
&mut self, /// bytes, a new writer is created and the excess bytes are written to
reader: &mut BufReader<Box<dyn Read>>, /// that one instead. As many new underlying writers are created as
writer: &mut BufWriter<Box<dyn Write>>, /// needed to write all the bytes in the input buffer.
) -> std::io::Result<u128>; struct ByteChunkWriter<'a> {
/// Parameters for creating the underlying writer for each new chunk.
settings: &'a Settings,
/// The maximum number of bytes allowed for a single chunk of output.
chunk_size: usize,
/// Running total of number of chunks that have been completed.
num_chunks_written: usize,
/// Remaining capacity in number of bytes in the current chunk.
///
/// This number starts at `chunk_size` and decreases as bytes are
/// written. Once it reaches zero, a writer for a new chunk is
/// initialized and this number gets reset to `chunk_size`.
num_bytes_remaining_in_current_chunk: usize,
/// The underlying writer for the current chunk.
///
/// Once the number of bytes written to this writer exceeds
/// `chunk_size`, a new writer is initialized and assigned to this
/// field.
inner: BufWriter<Box<dyn Write>>,
/// Iterator that yields filenames for each chunk.
filename_iterator: FilenameIterator<'a>,
} }
struct LineSplitter { impl<'a> ByteChunkWriter<'a> {
lines_per_split: usize, fn new(chunk_size: usize, settings: &'a Settings) -> Option<ByteChunkWriter<'a>> {
} let mut filename_iterator = FilenameIterator::new(
&settings.prefix,
impl LineSplitter { &settings.additional_suffix,
fn new(chunk_size: usize) -> Self { settings.suffix_length,
Self { settings.suffix_type,
lines_per_split: chunk_size, );
let filename = filename_iterator.next()?;
if settings.verbose {
println!("creating file {}", filename.quote());
} }
let inner = platform::instantiate_current_writer(&settings.filter, &filename);
Some(ByteChunkWriter {
settings,
chunk_size,
num_bytes_remaining_in_current_chunk: chunk_size,
num_chunks_written: 0,
inner,
filename_iterator,
})
} }
} }
impl Splitter for LineSplitter { impl<'a> Write for ByteChunkWriter<'a> {
fn consume( fn write(&mut self, mut buf: &[u8]) -> std::io::Result<usize> {
&mut self, // If the length of `buf` exceeds the number of bytes remaining
reader: &mut BufReader<Box<dyn Read>>, // in the current chunk, we will need to write to multiple
writer: &mut BufWriter<Box<dyn Write>>, // different underlying writers. In that case, each iteration of
) -> std::io::Result<u128> { // this loop writes to the underlying writer that corresponds to
let mut bytes_consumed = 0u128; // the current chunk number.
let mut buffer = String::with_capacity(1024); let mut carryover_bytes_written = 0;
for _ in 0..self.lines_per_split { loop {
let bytes_read = reader.read_line(&mut buffer)?; if buf.is_empty() {
// If we ever read 0 bytes then we know we've hit EOF. return Ok(carryover_bytes_written);
if bytes_read == 0 {
return Ok(bytes_consumed);
} }
writer.write_all(buffer.as_bytes())?; // If the capacity of this chunk is greater than the number of
// Empty out the String buffer since `read_line` appends instead of // bytes in `buf`, then write all the bytes in `buf`. Otherwise,
// replaces. // write enough bytes to fill the current chunk, then increment
buffer.clear(); // the chunk number and repeat.
let n = buf.len();
bytes_consumed += bytes_read as u128; if n < self.num_bytes_remaining_in_current_chunk {
} let num_bytes_written = self.inner.write(buf)?;
self.num_bytes_remaining_in_current_chunk -= num_bytes_written;
Ok(bytes_consumed) return Ok(carryover_bytes_written + num_bytes_written);
}
}
struct ByteSplitter {
bytes_per_split: u128,
}
impl ByteSplitter {
fn new(chunk_size: usize) -> Self {
Self {
bytes_per_split: u128::try_from(chunk_size).unwrap(),
}
}
}
impl Splitter for ByteSplitter {
fn consume(
&mut self,
reader: &mut BufReader<Box<dyn Read>>,
writer: &mut BufWriter<Box<dyn Write>>,
) -> std::io::Result<u128> {
// We buffer reads and writes. We proceed until `bytes_consumed` is
// equal to `self.bytes_per_split` or we reach EOF.
let mut bytes_consumed = 0u128;
const BUFFER_SIZE: usize = 1024;
let mut buffer = [0u8; BUFFER_SIZE];
while bytes_consumed < self.bytes_per_split {
// Don't overshoot `self.bytes_per_split`! Note: Using std::cmp::min
// doesn't really work since we have to get types to match which
// can't be done in a way that keeps all conversions safe.
let bytes_desired = if (BUFFER_SIZE as u128) <= self.bytes_per_split - bytes_consumed {
BUFFER_SIZE
} else { } else {
// This is a safe conversion since the difference must be less // Write enough bytes to fill the current chunk.
// than BUFFER_SIZE in this branch. let i = self.num_bytes_remaining_in_current_chunk;
(self.bytes_per_split - bytes_consumed) as usize let num_bytes_written = self.inner.write(&buf[..i])?;
};
let bytes_read = reader.read(&mut buffer[0..bytes_desired])?; // It's possible that the underlying writer did not
// If we ever read 0 bytes then we know we've hit EOF. // write all the bytes.
if bytes_read == 0 { if num_bytes_written < i {
return Ok(bytes_consumed); self.num_bytes_remaining_in_current_chunk -= num_bytes_written;
return Ok(carryover_bytes_written + num_bytes_written);
} else {
// Move the window to look at only the remaining bytes.
buf = &buf[i..];
// Increment the chunk number, reset the number of
// bytes remaining, and instantiate the new
// underlying writer.
self.num_chunks_written += 1;
self.num_bytes_remaining_in_current_chunk = self.chunk_size;
// Remember for the next iteration that we wrote these bytes.
carryover_bytes_written += num_bytes_written;
// Only create the writer for the next chunk if
// there are any remaining bytes to write. This
// check prevents us from creating a new empty
// file.
if !buf.is_empty() {
let filename = self.filename_iterator.next().ok_or_else(|| {
std::io::Error::new(ErrorKind::Other, "output file suffixes exhausted")
})?;
if self.settings.verbose {
println!("creating file {}", filename.quote());
}
self.inner =
platform::instantiate_current_writer(&self.settings.filter, &filename);
}
}
}
}
}
fn flush(&mut self) -> std::io::Result<()> {
self.inner.flush()
}
}
/// Write a certain number of lines to one file, then move on to another one.
///
/// This struct maintains an underlying writer representing the
/// current chunk of the output. If a call to [`write`] would cause
/// the underlying writer to write more than the allowed number of
/// lines, a new writer is created and the excess lines are written to
/// that one instead. As many new underlying writers are created as
/// needed to write all the lines in the input buffer.
struct LineChunkWriter<'a> {
/// Parameters for creating the underlying writer for each new chunk.
settings: &'a Settings,
/// The maximum number of lines allowed for a single chunk of output.
chunk_size: usize,
/// Running total of number of chunks that have been completed.
num_chunks_written: usize,
/// Remaining capacity in number of lines in the current chunk.
///
/// This number starts at `chunk_size` and decreases as lines are
/// written. Once it reaches zero, a writer for a new chunk is
/// initialized and this number gets reset to `chunk_size`.
num_lines_remaining_in_current_chunk: usize,
/// The underlying writer for the current chunk.
///
/// Once the number of lines written to this writer exceeds
/// `chunk_size`, a new writer is initialized and assigned to this
/// field.
inner: BufWriter<Box<dyn Write>>,
/// Iterator that yields filenames for each chunk.
filename_iterator: FilenameIterator<'a>,
}
impl<'a> LineChunkWriter<'a> {
fn new(chunk_size: usize, settings: &'a Settings) -> Option<LineChunkWriter<'a>> {
let mut filename_iterator = FilenameIterator::new(
&settings.prefix,
&settings.additional_suffix,
settings.suffix_length,
settings.suffix_type,
);
let filename = filename_iterator.next()?;
if settings.verbose {
println!("creating file {}", filename.quote());
}
let inner = platform::instantiate_current_writer(&settings.filter, &filename);
Some(LineChunkWriter {
settings,
chunk_size,
num_lines_remaining_in_current_chunk: chunk_size,
num_chunks_written: 0,
inner,
filename_iterator,
})
}
}
impl<'a> Write for LineChunkWriter<'a> {
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
// If the number of lines in `buf` exceeds the number of lines
// remaining in the current chunk, we will need to write to
// multiple different underlying writers. In that case, each
// iteration of this loop writes to the underlying writer that
// corresponds to the current chunk number.
let mut prev = 0;
let mut total_bytes_written = 0;
for i in memchr::memchr_iter(b'\n', buf) {
// If we have exceeded the number of lines to write in the
// current chunk, then start a new chunk and its
// corresponding writer.
if self.num_lines_remaining_in_current_chunk == 0 {
self.num_chunks_written += 1;
let filename = self.filename_iterator.next().ok_or_else(|| {
std::io::Error::new(ErrorKind::Other, "output file suffixes exhausted")
})?;
if self.settings.verbose {
println!("creating file {}", filename.quote());
}
self.inner = platform::instantiate_current_writer(&self.settings.filter, &filename);
self.num_lines_remaining_in_current_chunk = self.chunk_size;
} }
writer.write_all(&buffer[0..bytes_read])?; // Write the line, starting from *after* the previous
// newline character and ending *after* the current
bytes_consumed += bytes_read as u128; // newline character.
let n = self.inner.write(&buf[prev..i + 1])?;
total_bytes_written += n;
prev = i + 1;
self.num_lines_remaining_in_current_chunk -= 1;
} }
Ok(bytes_consumed) let n = self.inner.write(&buf[prev..buf.len()])?;
total_bytes_written += n;
Ok(total_bytes_written)
}
fn flush(&mut self) -> std::io::Result<()> {
self.inner.flush()
} }
} }
@ -389,7 +625,7 @@ where
&settings.prefix, &settings.prefix,
&settings.additional_suffix, &settings.additional_suffix,
settings.suffix_length, settings.suffix_length,
settings.numeric_suffix, settings.suffix_type,
); );
// Create one writer for each chunk. This will create each // Create one writer for each chunk. This will create each
@ -450,65 +686,47 @@ fn split(settings: &Settings) -> UResult<()> {
Box::new(r) as Box<dyn Read> Box::new(r) as Box<dyn Read>
}); });
if let Strategy::Number(num_chunks) = settings.strategy { match settings.strategy {
return split_into_n_chunks_by_byte(settings, &mut reader, num_chunks); Strategy::Number(num_chunks) => {
split_into_n_chunks_by_byte(settings, &mut reader, num_chunks)
} }
Strategy::Lines(chunk_size) => {
let mut splitter: Box<dyn Splitter> = match settings.strategy { let mut writer = LineChunkWriter::new(chunk_size, settings)
Strategy::Lines(chunk_size) => Box::new(LineSplitter::new(chunk_size)),
Strategy::Bytes(chunk_size) | Strategy::LineBytes(chunk_size) => {
Box::new(ByteSplitter::new(chunk_size))
}
_ => unreachable!(),
};
// This object is responsible for creating the filename for each chunk.
let mut filename_iterator = FilenameIterator::new(
&settings.prefix,
&settings.additional_suffix,
settings.suffix_length,
settings.numeric_suffix,
);
loop {
// Get a new part file set up, and construct `writer` for it.
let filename = filename_iterator
.next()
.ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?; .ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?;
let mut writer = platform::instantiate_current_writer(&settings.filter, filename.as_str()); match std::io::copy(&mut reader, &mut writer) {
Ok(_) => Ok(()),
let bytes_consumed = splitter Err(e) => match e.kind() {
.consume(&mut reader, &mut writer) // TODO Since the writer object controls the creation of
.map_err_context(|| "input/output error".to_string())?; // new files, we need to rely on the `std::io::Result`
writer // returned by its `write()` method to communicate any
.flush() // errors to this calling scope. If a new file cannot be
.map_err_context(|| "error flushing to output file".to_string())?; // created because we have exceeded the number of
// allowable filenames, we use `ErrorKind::Other` to
// If we didn't write anything we should clean up the empty file, and // indicate that. A special error message needs to be
// break from the loop. // printed in that case.
if bytes_consumed == 0 { ErrorKind::Other => Err(USimpleError::new(1, "output file suffixes exhausted")),
// The output file is only ever created if --filter isn't used. _ => Err(uio_error!(e, "input/output error")),
// Complicated, I know... },
if settings.filter.is_none() { }
remove_file(filename) }
.map_err_context(|| "error removing empty file".to_string())?; Strategy::Bytes(chunk_size) | Strategy::LineBytes(chunk_size) => {
} let mut writer = ByteChunkWriter::new(chunk_size, settings)
break; .ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?;
} match std::io::copy(&mut reader, &mut writer) {
Ok(_) => Ok(()),
// TODO It is silly to have the "creating file" message here Err(e) => match e.kind() {
// after the file has been already created. However, because // TODO Since the writer object controls the creation of
// of the way the main loop has been written, an extra file // new files, we need to rely on the `std::io::Result`
// gets created and then deleted in the last iteration of the // returned by its `write()` method to communicate any
// loop. So we need to make sure we are not in that case when // errors to this calling scope. If a new file cannot be
// printing this message. // created because we have exceeded the number of
// // allowable filenames, we use `ErrorKind::Other` to
// This is only here temporarily while we make some // indicate that. A special error message needs to be
// improvements to the architecture of the main loop in this // printed in that case.
// function. In the future, it will move to a more appropriate ErrorKind::Other => Err(USimpleError::new(1, "output file suffixes exhausted")),
// place---at the point where the file is actually created. _ => Err(uio_error!(e, "input/output error")),
if settings.verbose { },
println!("creating file {}", filename.quote()); }
} }
} }
Ok(())
} }

View file

@ -193,11 +193,19 @@ impl ScanUtil for str {
} }
pub fn group_num(s: &str) -> Cow<str> { pub fn group_num(s: &str) -> Cow<str> {
assert!(s.chars().all(char::is_numeric)); let is_negative = s.starts_with('-');
assert!(is_negative || s.chars().take(1).all(|c| c.is_digit(10)));
assert!(s.chars().skip(1).all(|c| c.is_digit(10)));
if s.len() < 4 { if s.len() < 4 {
return s.into(); return s.into();
} }
let mut res = String::with_capacity((s.len() - 1) / 3); let mut res = String::with_capacity((s.len() - 1) / 3);
let s = if is_negative {
res.push('-');
&s[1..]
} else {
s
};
let mut alone = (s.len() - 1) % 3 + 1; let mut alone = (s.len() - 1) % 3 + 1;
res.push_str(&s[..alone]); res.push_str(&s[..alone]);
while alone != s.len() { while alone != s.len() {

View file

@ -17,7 +17,7 @@ path = "src/tail.rs"
[dependencies] [dependencies]
clap = { version = "3.0", features = ["wrap_help", "cargo"] } clap = { version = "3.0", features = ["wrap_help", "cargo"] }
libc = "0.2.42" libc = "0.2.42"
uucore = { version=">=0.0.11", package="uucore", path="../../uucore", features=["ringbuffer"] } uucore = { version=">=0.0.11", package="uucore", path="../../uucore", features=["ringbuffer", "lines"] }
[target.'cfg(windows)'.dependencies] [target.'cfg(windows)'.dependencies]
winapi = { version="0.3", features=["fileapi", "handleapi", "processthreadsapi", "synchapi", "winbase"] } winapi = { version="0.3", features=["fileapi", "handleapi", "processthreadsapi", "synchapi", "winbase"] }

View file

@ -19,7 +19,7 @@ pub fn parse_obsolete(src: &str) -> Option<Result<impl Iterator<Item = OsString>
let mut has_num = false; let mut has_num = false;
let mut last_char = 0 as char; let mut last_char = 0 as char;
for (n, c) in &mut chars { for (n, c) in &mut chars {
if c.is_numeric() { if c.is_digit(10) {
has_num = true; has_num = true;
num_end = n; num_end = n;
} else { } else {

View file

@ -30,6 +30,7 @@ impl ProcessChecker {
} }
// Borrowing mutably to be aligned with Windows implementation // Borrowing mutably to be aligned with Windows implementation
#[allow(clippy::wrong_self_convention)]
pub fn is_dead(&mut self) -> bool { pub fn is_dead(&mut self) -> bool {
unsafe { libc::kill(self.pid, 0) != 0 && get_errno() != libc::EPERM } unsafe { libc::kill(self.pid, 0) != 0 && get_errno() != libc::EPERM }
} }

View file

@ -24,11 +24,11 @@ pub struct ProcessChecker {
} }
impl ProcessChecker { impl ProcessChecker {
pub fn new(process_id: self::Pid) -> ProcessChecker { pub fn new(process_id: self::Pid) -> Self {
#[allow(non_snake_case)] #[allow(non_snake_case)]
let FALSE = 0i32; let FALSE = 0i32;
let h = unsafe { OpenProcess(SYNCHRONIZE, FALSE, process_id as DWORD) }; let h = unsafe { OpenProcess(SYNCHRONIZE, FALSE, process_id as DWORD) };
ProcessChecker { Self {
dead: h.is_null(), dead: h.is_null(),
handle: h, handle: h,
} }

View file

@ -16,11 +16,9 @@ extern crate clap;
extern crate uucore; extern crate uucore;
mod chunks; mod chunks;
mod lines;
mod parse; mod parse;
mod platform; mod platform;
use chunks::ReverseChunks; use chunks::ReverseChunks;
use lines::lines;
use clap::{App, AppSettings, Arg}; use clap::{App, AppSettings, Arg};
use std::collections::VecDeque; use std::collections::VecDeque;
@ -33,6 +31,7 @@ use std::thread::sleep;
use std::time::Duration; use std::time::Duration;
use uucore::display::Quotable; use uucore::display::Quotable;
use uucore::error::{FromIo, UResult, USimpleError}; use uucore::error::{FromIo, UResult, USimpleError};
use uucore::lines::lines;
use uucore::parse_size::{parse_size, ParseSizeError}; use uucore::parse_size::{parse_size, ParseSizeError};
use uucore::ringbuffer::RingBuffer; use uucore::ringbuffer::RingBuffer;

View file

@ -10,7 +10,7 @@
mod parser; mod parser;
use clap::{crate_version, App, AppSettings}; use clap::{crate_version, App};
use parser::{parse, Operator, Symbol, UnaryOperator}; use parser::{parse, Operator, Symbol, UnaryOperator};
use std::ffi::{OsStr, OsString}; use std::ffi::{OsStr, OsString};
use uucore::display::Quotable; use uucore::display::Quotable;
@ -86,10 +86,14 @@ NOTE: your shell may have its own version of test and/or [, which usually supers
the version described here. Please refer to your shell's documentation the version described here. Please refer to your shell's documentation
for details about the options it supports."; for details about the options it supports.";
const ABOUT: &str = "Check file types and compare values.";
pub fn uu_app<'a>() -> App<'a> { pub fn uu_app<'a>() -> App<'a> {
App::new(uucore::util_name()) App::new(uucore::util_name())
.setting(AppSettings::DisableHelpFlag) .version(crate_version!())
.setting(AppSettings::DisableVersionFlag) .about(ABOUT)
.override_usage(USAGE)
.after_help(AFTER_HELP)
} }
#[uucore::main] #[uucore::main]
@ -104,6 +108,7 @@ pub fn uumain(mut args: impl uucore::Args) -> UResult<()> {
// Let clap pretty-print help and version // Let clap pretty-print help and version
App::new(binary_name) App::new(binary_name)
.version(crate_version!()) .version(crate_version!())
.about(ABOUT)
.override_usage(USAGE) .override_usage(USAGE)
.after_help(AFTER_HELP) .after_help(AFTER_HELP)
// Disable printing of -h and -v as valid alternatives for --help and --version, // Disable printing of -h and -v as valid alternatives for --help and --version,

View file

@ -58,7 +58,13 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
let matches = uu_app().override_usage(&usage[..]).get_matches_from(args); let matches = uu_app().override_usage(&usage[..]).get_matches_from(args);
let files = matches.values_of_os(ARG_FILES).unwrap(); let files = matches.values_of_os(ARG_FILES).ok_or_else(|| {
USimpleError::new(
1,
r##"missing file operand
Try 'touch --help' for more information."##,
)
})?;
let (mut atime, mut mtime) = let (mut atime, mut mtime) =
if let Some(reference) = matches.value_of_os(options::sources::REFERENCE) { if let Some(reference) = matches.value_of_os(options::sources::REFERENCE) {
@ -77,8 +83,18 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
for filename in files { for filename in files {
let path = Path::new(filename); let path = Path::new(filename);
if !path.exists() { if !path.exists() {
// no-dereference included here for compatibility if matches.is_present(options::NO_CREATE) {
if matches.is_present(options::NO_CREATE) || matches.is_present(options::NO_DEREF) { continue;
}
if matches.is_present(options::NO_DEREF) {
show!(USimpleError::new(
1,
format!(
"setting times of {}: No such file or directory",
filename.quote()
)
));
continue; continue;
} }

View file

@ -4,16 +4,59 @@
// * // *
// * For the full copyright and license information, please view the LICENSE // * For the full copyright and license information, please view the LICENSE
// * file that was distributed with this source code. // * file that was distributed with this source code.
use clap::{App, AppSettings, Arg};
use std::io::Write;
use uucore::error::{set_exit_code, UResult};
use clap::{App, AppSettings}; static ABOUT: &str = "\
use uucore::error::UResult; Returns true, a successful exit status.
Immediately returns with the exit status `0`, except when invoked with one of the recognized
options. In those cases it will try to write the help or version text. Any IO error during this
operation causes the program to return `1` instead.
";
#[uucore::main] #[uucore::main]
pub fn uumain(args: impl uucore::Args) -> UResult<()> { pub fn uumain(args: impl uucore::Args) -> UResult<()> {
uu_app().get_matches_from(args); let mut app = uu_app();
if let Ok(matches) = app.try_get_matches_from_mut(args) {
let error = if matches.index_of("help").is_some() {
app.print_long_help()
} else if matches.index_of("version").is_some() {
writeln!(std::io::stdout(), "{}", app.render_version())
} else {
Ok(())
};
if let Err(print_fail) = error {
// Try to display this error.
let _ = writeln!(std::io::stderr(), "{}: {}", uucore::util_name(), print_fail);
// Mirror GNU options. When failing to print warnings or version flags, then we exit
// with FAIL. This avoids allocation some error information which may result in yet
// other types of failure.
set_exit_code(1);
}
}
Ok(()) Ok(())
} }
pub fn uu_app<'a>() -> App<'a> { pub fn uu_app<'a>() -> App<'a> {
App::new(uucore::util_name()).setting(AppSettings::InferLongArgs) App::new(uucore::util_name())
.version(clap::crate_version!())
.about(ABOUT)
// We provide our own help and version options, to ensure maximum compatibility with GNU.
.setting(AppSettings::DisableHelpFlag | AppSettings::DisableVersionFlag)
.arg(
Arg::new("help")
.long("help")
.help("Print help information")
.exclusive(true),
)
.arg(
Arg::new("version")
.long("version")
.help("Print version information"),
)
} }

View file

@ -10,6 +10,8 @@ use clap::{crate_version, App, AppSettings, Arg};
use std::convert::TryFrom; use std::convert::TryFrom;
use std::fs::{metadata, OpenOptions}; use std::fs::{metadata, OpenOptions};
use std::io::ErrorKind; use std::io::ErrorKind;
#[cfg(unix)]
use std::os::unix::fs::FileTypeExt;
use std::path::Path; use std::path::Path;
use uucore::display::Quotable; use uucore::display::Quotable;
use uucore::error::{FromIo, UResult, USimpleError, UUsageError}; use uucore::error::{FromIo, UResult, USimpleError, UUsageError};
@ -115,7 +117,14 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
let matches = uu_app() let matches = uu_app()
.override_usage(&usage[..]) .override_usage(&usage[..])
.after_help(&long_usage[..]) .after_help(&long_usage[..])
.get_matches_from(args); .try_get_matches_from(args)
.map_err(|e| {
e.print().expect("Error writing clap::Error");
match e.kind {
clap::ErrorKind::DisplayHelp | clap::ErrorKind::DisplayVersion => 0,
_ => 1,
}
})?;
let files: Vec<String> = matches let files: Vec<String> = matches
.values_of(options::ARG_FILES) .values_of(options::ARG_FILES)
@ -205,8 +214,10 @@ fn file_truncate(filename: &str, create: bool, size: usize) -> std::io::Result<(
/// ///
/// # Errors /// # Errors
/// ///
/// If the any file could not be opened, or there was a problem setting /// If any file could not be opened, or there was a problem setting
/// the size of at least one file. /// the size of at least one file.
///
/// If at least one file is a named pipe (also known as a fifo).
fn truncate_reference_and_size( fn truncate_reference_and_size(
rfilename: &str, rfilename: &str,
size_string: &str, size_string: &str,
@ -239,6 +250,17 @@ fn truncate_reference_and_size(
let fsize = metadata.len() as usize; let fsize = metadata.len() as usize;
let tsize = mode.to_size(fsize); let tsize = mode.to_size(fsize);
for filename in filenames { for filename in filenames {
#[cfg(unix)]
if std::fs::metadata(filename)?.file_type().is_fifo() {
return Err(USimpleError::new(
1,
format!(
"cannot open {} for writing: No such device or address",
filename.quote()
),
));
}
file_truncate(filename, create, tsize) file_truncate(filename, create, tsize)
.map_err_context(|| format!("cannot open {} for writing", filename.quote()))?; .map_err_context(|| format!("cannot open {} for writing", filename.quote()))?;
} }
@ -254,8 +276,10 @@ fn truncate_reference_and_size(
/// ///
/// # Errors /// # Errors
/// ///
/// If the any file could not be opened, or there was a problem setting /// If any file could not be opened, or there was a problem setting
/// the size of at least one file. /// the size of at least one file.
///
/// If at least one file is a named pipe (also known as a fifo).
fn truncate_reference_file_only( fn truncate_reference_file_only(
rfilename: &str, rfilename: &str,
filenames: &[String], filenames: &[String],
@ -273,6 +297,16 @@ fn truncate_reference_file_only(
})?; })?;
let tsize = metadata.len() as usize; let tsize = metadata.len() as usize;
for filename in filenames { for filename in filenames {
#[cfg(unix)]
if std::fs::metadata(filename)?.file_type().is_fifo() {
return Err(USimpleError::new(
1,
format!(
"cannot open {} for writing: No such device or address",
filename.quote()
),
));
}
file_truncate(filename, create, tsize) file_truncate(filename, create, tsize)
.map_err_context(|| format!("cannot open {} for writing", filename.quote()))?; .map_err_context(|| format!("cannot open {} for writing", filename.quote()))?;
} }
@ -292,8 +326,10 @@ fn truncate_reference_file_only(
/// ///
/// # Errors /// # Errors
/// ///
/// If the any file could not be opened, or there was a problem setting /// If any file could not be opened, or there was a problem setting
/// the size of at least one file. /// the size of at least one file.
///
/// If at least one file is a named pipe (also known as a fifo).
fn truncate_size_only(size_string: &str, filenames: &[String], create: bool) -> UResult<()> { fn truncate_size_only(size_string: &str, filenames: &[String], create: bool) -> UResult<()> {
let mode = parse_mode_and_size(size_string) let mode = parse_mode_and_size(size_string)
.map_err(|e| USimpleError::new(1, format!("Invalid number: {}", e)))?; .map_err(|e| USimpleError::new(1, format!("Invalid number: {}", e)))?;
@ -302,7 +338,19 @@ fn truncate_size_only(size_string: &str, filenames: &[String], create: bool) ->
} }
for filename in filenames { for filename in filenames {
let fsize = match metadata(filename) { let fsize = match metadata(filename) {
Ok(m) => m.len(), Ok(m) => {
#[cfg(unix)]
if m.file_type().is_fifo() {
return Err(USimpleError::new(
1,
format!(
"cannot open {} for writing: No such device or address",
filename.quote()
),
));
}
m.len()
}
Err(_) => 0, Err(_) => 0,
}; };
let tsize = mode.to_size(fsize as usize); let tsize = mode.to_size(fsize as usize);

View file

@ -20,12 +20,15 @@ use word_count::{TitledWordCount, WordCount};
use clap::{crate_version, App, AppSettings, Arg, ArgMatches}; use clap::{crate_version, App, AppSettings, Arg, ArgMatches};
use std::cmp::max; use std::cmp::max;
use std::error::Error;
use std::ffi::OsStr;
use std::fmt::Display;
use std::fs::{self, File}; use std::fs::{self, File};
use std::io::{self, Write}; use std::io::{self, Read, Write};
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use uucore::display::{Quotable, Quoted}; use uucore::display::{Quotable, Quoted};
use uucore::error::{UResult, USimpleError}; use uucore::error::{UError, UResult, USimpleError};
/// The minimum character width for formatting counts when reading from stdin. /// The minimum character width for formatting counts when reading from stdin.
const MINIMUM_WIDTH: usize = 7; const MINIMUM_WIDTH: usize = 7;
@ -83,12 +86,14 @@ more than one FILE is specified.";
pub mod options { pub mod options {
pub static BYTES: &str = "bytes"; pub static BYTES: &str = "bytes";
pub static CHAR: &str = "chars"; pub static CHAR: &str = "chars";
pub static FILES0_FROM: &str = "files0-from";
pub static LINES: &str = "lines"; pub static LINES: &str = "lines";
pub static MAX_LINE_LENGTH: &str = "max-line-length"; pub static MAX_LINE_LENGTH: &str = "max-line-length";
pub static WORDS: &str = "words"; pub static WORDS: &str = "words";
} }
static ARG_FILES: &str = "files"; static ARG_FILES: &str = "files";
static STDIN_REPR: &str = "-";
fn usage() -> String { fn usage() -> String {
format!( format!(
@ -115,12 +120,22 @@ enum Input {
Stdin(StdinKind), Stdin(StdinKind),
} }
impl From<&OsStr> for Input {
fn from(input: &OsStr) -> Self {
if input == STDIN_REPR {
Self::Stdin(StdinKind::Explicit)
} else {
Self::Path(input.into())
}
}
}
impl Input { impl Input {
/// Converts input to title that appears in stats. /// Converts input to title that appears in stats.
fn to_title(&self) -> Option<&Path> { fn to_title(&self) -> Option<&Path> {
match self { match self {
Input::Path(path) => Some(path), Input::Path(path) => Some(path),
Input::Stdin(StdinKind::Explicit) => Some("-".as_ref()), Input::Stdin(StdinKind::Explicit) => Some(STDIN_REPR.as_ref()),
Input::Stdin(StdinKind::Implicit) => None, Input::Stdin(StdinKind::Implicit) => None,
} }
} }
@ -133,29 +148,43 @@ impl Input {
} }
} }
#[derive(Debug)]
enum WcError {
FilesDisabled(String),
StdinReprNotAllowed(String),
}
impl UError for WcError {
fn code(&self) -> i32 {
match self {
WcError::FilesDisabled(_) | WcError::StdinReprNotAllowed(_) => 1,
}
}
fn usage(&self) -> bool {
matches!(self, WcError::FilesDisabled(_))
}
}
impl Error for WcError {}
impl Display for WcError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
WcError::FilesDisabled(message) | WcError::StdinReprNotAllowed(message) => {
write!(f, "{}", message)
}
}
}
}
#[uucore::main] #[uucore::main]
pub fn uumain(args: impl uucore::Args) -> UResult<()> { pub fn uumain(args: impl uucore::Args) -> UResult<()> {
let usage = usage(); let usage = usage();
let matches = uu_app().override_usage(&usage[..]).get_matches_from(args); let matches = uu_app().override_usage(&usage[..]).get_matches_from(args);
let mut inputs: Vec<Input> = matches let inputs = inputs(&matches)?;
.values_of_os(ARG_FILES)
.map(|v| {
v.map(|i| {
if i == "-" {
Input::Stdin(StdinKind::Explicit)
} else {
Input::Path(i.into())
}
})
.collect()
})
.unwrap_or_default();
if inputs.is_empty() {
inputs.push(Input::Stdin(StdinKind::Implicit));
}
let settings = Settings::new(&matches); let settings = Settings::new(&matches);
@ -179,6 +208,17 @@ pub fn uu_app<'a>() -> App<'a> {
.long(options::CHAR) .long(options::CHAR)
.help("print the character counts"), .help("print the character counts"),
) )
.arg(
Arg::new(options::FILES0_FROM)
.long(options::FILES0_FROM)
.takes_value(true)
.value_name("F")
.help(
"read input from the files specified by
NUL-terminated names in file F;
If F is - then read names from standard input",
),
)
.arg( .arg(
Arg::new(options::LINES) Arg::new(options::LINES)
.short('l') .short('l')
@ -205,6 +245,47 @@ pub fn uu_app<'a>() -> App<'a> {
) )
} }
fn inputs(matches: &ArgMatches) -> UResult<Vec<Input>> {
match matches.values_of_os(ARG_FILES) {
Some(os_values) => {
if matches.is_present(options::FILES0_FROM) {
return Err(WcError::FilesDisabled(
"file operands cannot be combined with --files0-from".into(),
)
.into());
}
Ok(os_values.map(Input::from).collect())
}
None => match matches.value_of(options::FILES0_FROM) {
Some(files_0_from) => create_paths_from_files0(files_0_from),
None => Ok(vec![Input::Stdin(StdinKind::Implicit)]),
},
}
}
fn create_paths_from_files0(files_0_from: &str) -> UResult<Vec<Input>> {
let mut paths = String::new();
let read_from_stdin = files_0_from == STDIN_REPR;
if read_from_stdin {
io::stdin().lock().read_to_string(&mut paths)?;
} else {
File::open(files_0_from)?.read_to_string(&mut paths)?;
}
let paths: Vec<&str> = paths.split_terminator('\0').collect();
if read_from_stdin && paths.contains(&STDIN_REPR) {
return Err(WcError::StdinReprNotAllowed(
"when reading file names from stdin, no file name of '-' allowed".into(),
)
.into());
}
Ok(paths.iter().map(OsStr::new).map(Input::from).collect())
}
fn word_count_from_reader<T: WordCountable>( fn word_count_from_reader<T: WordCountable>(
mut reader: T, mut reader: T,
settings: &Settings, settings: &Settings,

View file

@ -6,7 +6,7 @@ license = "MIT"
description = "uutils ~ 'core' uutils code library (cross-platform)" description = "uutils ~ 'core' uutils code library (cross-platform)"
homepage = "https://github.com/uutils/coreutils" homepage = "https://github.com/uutils/coreutils"
repository = "https://github.com/uutils/coreutils/tree/master/src/uu/arch" repository = "https://github.com/uutils/coreutils/tree/master/src/uucore"
# readme = "README.md" # readme = "README.md"
keywords = ["coreutils", "uutils", "cross-platform", "cli", "utility"] keywords = ["coreutils", "uutils", "cross-platform", "cli", "utility"]
categories = ["command-line-utilities"] categories = ["command-line-utilities"]
@ -55,6 +55,7 @@ encoding = ["data-encoding", "data-encoding-macro", "z85", "thiserror"]
entries = ["libc"] entries = ["libc"]
fs = ["libc", "nix", "winapi-util"] fs = ["libc", "nix", "winapi-util"]
fsext = ["libc", "time"] fsext = ["libc", "time"]
lines = []
memo = ["itertools"] memo = ["itertools"]
mode = ["libc"] mode = ["libc"]
perms = ["libc", "walkdir"] perms = ["libc", "walkdir"]

View file

@ -6,6 +6,8 @@ pub mod encoding;
pub mod fs; pub mod fs;
#[cfg(feature = "fsext")] #[cfg(feature = "fsext")]
pub mod fsext; pub mod fsext;
#[cfg(feature = "lines")]
pub mod lines;
#[cfg(feature = "memo")] #[cfg(feature = "memo")]
pub mod memo; pub mod memo;
#[cfg(feature = "ringbuffer")] #[cfg(feature = "ringbuffer")]

View file

@ -299,9 +299,8 @@ pub fn canonicalize<P: AsRef<Path>>(
let original = if original.is_absolute() { let original = if original.is_absolute() {
original.to_path_buf() original.to_path_buf()
} else { } else {
dunce::canonicalize(env::current_dir().unwrap()) let current_dir = env::current_dir()?;
.unwrap() dunce::canonicalize(current_dir)?.join(original)
.join(original)
}; };
let mut result = PathBuf::new(); let mut result = PathBuf::new();

View file

@ -238,7 +238,7 @@ impl MountInfo {
} }
} }
#[cfg(windows)] #[cfg(windows)]
fn new(mut volume_name: String) -> Option<MountInfo> { fn new(mut volume_name: String) -> Option<Self> {
let mut dev_name_buf = [0u16; MAX_PATH]; let mut dev_name_buf = [0u16; MAX_PATH];
volume_name.pop(); volume_name.pop();
unsafe { unsafe {
@ -289,7 +289,7 @@ impl MountInfo {
} else { } else {
None None
}; };
let mut mn_info = MountInfo { let mut mn_info = Self {
dev_id: volume_name, dev_id: volume_name,
dev_name, dev_name,
fs_type: fs_type.unwrap_or_else(|| "".to_string()), fs_type: fs_type.unwrap_or_else(|| "".to_string()),
@ -319,7 +319,7 @@ use std::ffi::CStr;
))] ))]
impl From<StatFs> for MountInfo { impl From<StatFs> for MountInfo {
fn from(statfs: StatFs) -> Self { fn from(statfs: StatFs) -> Self {
let mut info = MountInfo { let mut info = Self {
dev_id: "".to_string(), dev_id: "".to_string(),
dev_name: unsafe { dev_name: unsafe {
// spell-checker:disable-next-line // spell-checker:disable-next-line
@ -553,7 +553,7 @@ impl FsUsage {
} }
let bytes_per_cluster = sectors_per_cluster as u64 * bytes_per_sector as u64; let bytes_per_cluster = sectors_per_cluster as u64 * bytes_per_sector as u64;
FsUsage { Self {
// f_bsize File system block size. // f_bsize File system block size.
blocksize: bytes_per_cluster as u64, blocksize: bytes_per_cluster as u64,
// f_blocks - Total number of blocks on the file system, in units of f_frsize. // f_blocks - Total number of blocks on the file system, in units of f_frsize.
@ -567,7 +567,7 @@ impl FsUsage {
// Total number of file nodes (inodes) on the file system. // Total number of file nodes (inodes) on the file system.
files: 0, // Not available on windows files: 0, // Not available on windows
// Total number of free file nodes (inodes). // Total number of free file nodes (inodes).
ffree: 4096, // Meaningless on Windows ffree: 0, // Meaningless on Windows
} }
} }
} }

View file

@ -2,15 +2,17 @@
// * // *
// * For the full copyright and license information, please view the LICENSE // * For the full copyright and license information, please view the LICENSE
// * file that was distributed with this source code. // * file that was distributed with this source code.
// spell-checker:ignore (vars)
//! Iterate over lines, including the line ending character(s). //! Iterate over lines, including the line ending character(s).
//! //!
//! This module provides the [`lines`] function, similar to the //! This module provides the [`lines`] function, similar to the
//! [`BufRead::lines`] method. While the [`BufRead::lines`] method //! [`BufRead::lines`] method. While the [`BufRead::lines`] method
//! yields [`String`] instances that do not include the line ending //! yields [`String`] instances that do not include the line ending
//! characters (`"\n"` or `"\r\n"`), our function yields [`String`] //! characters (`"\n"` or `"\r\n"`), our functions yield
//! instances that include the line ending characters. This is useful //! [`Vec`]<['u8']> instances that include the line ending
//! if the input data does not end with a newline character and you //! characters. This is useful if the input data does not end with a
//! want to preserve the exact form of the input data. //! newline character and you want to preserve the exact form of the
//! input data.
use std::io::BufRead; use std::io::BufRead;
/// Returns an iterator over the lines, including line ending characters. /// Returns an iterator over the lines, including line ending characters.
@ -51,7 +53,7 @@ use std::io::BufRead;
/// assert_eq!(it.next(), Some(Vec::from("z"))); /// assert_eq!(it.next(), Some(Vec::from("z")));
/// assert_eq!(it.next(), None); /// assert_eq!(it.next(), None);
/// ``` /// ```
pub(crate) fn lines<B>(reader: B, sep: u8) -> Lines<B> pub fn lines<B>(reader: B, sep: u8) -> Lines<B>
where where
B: BufRead, B: BufRead,
{ {
@ -62,7 +64,7 @@ where
/// ///
/// This struct is generally created by calling [`lines`] on a `BufRead`. /// This struct is generally created by calling [`lines`] on a `BufRead`.
/// Please see the documentation of [`lines`] for more details. /// Please see the documentation of [`lines`] for more details.
pub(crate) struct Lines<B> { pub struct Lines<B> {
buf: B, buf: B,
sep: u8, sep: u8,
} }

View file

@ -6,6 +6,7 @@
//! that prints tokens. //! that prints tokens.
use crate::display::Quotable; use crate::display::Quotable;
use crate::error::UResult;
use crate::features::tokenize::sub::Sub; use crate::features::tokenize::sub::Sub;
use crate::features::tokenize::token::{Token, Tokenizer}; use crate::features::tokenize::token::{Token, Tokenizer};
use crate::features::tokenize::unescaped_text::UnescapedText; use crate::features::tokenize::unescaped_text::UnescapedText;
@ -26,17 +27,17 @@ fn warn_excess_args(first_arg: &str) {
} }
impl Memo { impl Memo {
pub fn new(pf_string: &str, pf_args_it: &mut Peekable<Iter<String>>) -> Self { pub fn new(pf_string: &str, pf_args_it: &mut Peekable<Iter<String>>) -> UResult<Self> {
let mut pm = Self { tokens: Vec::new() }; let mut pm = Self { tokens: Vec::new() };
let mut tmp_token: Option<Box<dyn Token>>; let mut tmp_token: Option<Box<dyn Token>>;
let mut it = put_back_n(pf_string.chars()); let mut it = put_back_n(pf_string.chars());
let mut has_sub = false; let mut has_sub = false;
loop { loop {
tmp_token = UnescapedText::from_it(&mut it, pf_args_it); tmp_token = UnescapedText::from_it(&mut it, pf_args_it)?;
if let Some(x) = tmp_token { if let Some(x) = tmp_token {
pm.tokens.push(x); pm.tokens.push(x);
} }
tmp_token = Sub::from_it(&mut it, pf_args_it); tmp_token = Sub::from_it(&mut it, pf_args_it)?;
if let Some(x) = tmp_token { if let Some(x) = tmp_token {
if !has_sub { if !has_sub {
has_sub = true; has_sub = true;
@ -64,19 +65,19 @@ impl Memo {
} }
} }
} }
pm Ok(pm)
} }
pub fn apply(&self, pf_args_it: &mut Peekable<Iter<String>>) { pub fn apply(&self, pf_args_it: &mut Peekable<Iter<String>>) {
for tkn in &self.tokens { for tkn in &self.tokens {
tkn.print(pf_args_it); tkn.print(pf_args_it);
} }
} }
pub fn run_all(pf_string: &str, pf_args: &[String]) { pub fn run_all(pf_string: &str, pf_args: &[String]) -> UResult<()> {
let mut arg_it = pf_args.iter().peekable(); let mut arg_it = pf_args.iter().peekable();
let pm = Self::new(pf_string, &mut arg_it); let pm = Self::new(pf_string, &mut arg_it)?;
loop { loop {
if arg_it.peek().is_none() { if arg_it.peek().is_none() {
break; return Ok(());
} }
pm.apply(&mut arg_it); pm.apply(&mut arg_it);
} }

View file

@ -5,8 +5,10 @@
//! it is created by Sub's implementation of the Tokenizer trait //! it is created by Sub's implementation of the Tokenizer trait
//! Subs which have numeric field chars make use of the num_format //! Subs which have numeric field chars make use of the num_format
//! submodule //! submodule
use crate::show_error; use crate::error::{UError, UResult};
use itertools::{put_back_n, PutBackN}; use itertools::{put_back_n, PutBackN};
use std::error::Error;
use std::fmt::Display;
use std::iter::Peekable; use std::iter::Peekable;
use std::process::exit; use std::process::exit;
use std::slice::Iter; use std::slice::Iter;
@ -20,11 +22,23 @@ use super::unescaped_text::UnescapedText;
const EXIT_ERR: i32 = 1; const EXIT_ERR: i32 = 1;
fn err_conv(sofar: &str) { #[derive(Debug)]
show_error!("%{}: invalid conversion specification", sofar); pub enum SubError {
exit(EXIT_ERR); InvalidSpec(String),
} }
impl Display for SubError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
match self {
Self::InvalidSpec(s) => write!(f, "%{}: invalid conversion specification", s),
}
}
}
impl Error for SubError {}
impl UError for SubError {}
fn convert_asterisk_arg_int(asterisk_arg: &str) -> isize { fn convert_asterisk_arg_int(asterisk_arg: &str) -> isize {
// this is a costly way to parse the // this is a costly way to parse the
// args used for asterisk values into integers // args used for asterisk values into integers
@ -60,6 +74,7 @@ pub struct Sub {
field_char: char, field_char: char,
field_type: FieldType, field_type: FieldType,
orig: String, orig: String,
prefix_char: char,
} }
impl Sub { impl Sub {
pub fn new( pub fn new(
@ -67,6 +82,7 @@ impl Sub {
second_field: CanAsterisk<Option<u32>>, second_field: CanAsterisk<Option<u32>>,
field_char: char, field_char: char,
orig: String, orig: String,
prefix_char: char,
) -> Self { ) -> Self {
// for more dry printing, field characters are grouped // for more dry printing, field characters are grouped
// in initialization of token. // in initialization of token.
@ -90,6 +106,7 @@ impl Sub {
field_char, field_char,
field_type, field_type,
orig, orig,
prefix_char,
} }
} }
} }
@ -113,19 +130,24 @@ impl SubParser {
fn from_it( fn from_it(
it: &mut PutBackN<Chars>, it: &mut PutBackN<Chars>,
args: &mut Peekable<Iter<String>>, args: &mut Peekable<Iter<String>>,
) -> Option<Box<dyn token::Token>> { ) -> UResult<Option<Box<dyn token::Token>>> {
let mut parser = Self::new(); let mut parser = Self::new();
if parser.sub_vals_retrieved(it) { if parser.sub_vals_retrieved(it)? {
let t: Box<dyn token::Token> = Self::build_token(parser); let t: Box<dyn token::Token> = Self::build_token(parser);
t.print(args); t.print(args);
Some(t) Ok(Some(t))
} else { } else {
None Ok(None)
} }
} }
fn build_token(parser: Self) -> Box<dyn token::Token> { fn build_token(parser: Self) -> Box<dyn token::Token> {
// not a self method so as to allow move of sub-parser vals. // not a self method so as to allow move of sub-parser vals.
// return new Sub struct as token // return new Sub struct as token
let prefix_char = match &parser.min_width_tmp {
Some(width) if width.starts_with('0') => '0',
_ => ' ',
};
let t: Box<dyn token::Token> = Box::new(Sub::new( let t: Box<dyn token::Token> = Box::new(Sub::new(
if parser.min_width_is_asterisk { if parser.min_width_is_asterisk {
CanAsterisk::Asterisk CanAsterisk::Asterisk
@ -139,12 +161,13 @@ impl SubParser {
}, },
parser.field_char.unwrap(), parser.field_char.unwrap(),
parser.text_so_far, parser.text_so_far,
prefix_char,
)); ));
t t
} }
fn sub_vals_retrieved(&mut self, it: &mut PutBackN<Chars>) -> bool { fn sub_vals_retrieved(&mut self, it: &mut PutBackN<Chars>) -> UResult<bool> {
if !Self::successfully_eat_prefix(it, &mut self.text_so_far) { if !Self::successfully_eat_prefix(it, &mut self.text_so_far)? {
return false; return Ok(false);
} }
// this fn in particular is much longer than it needs to be // this fn in particular is much longer than it needs to be
// .could get a lot // .could get a lot
@ -168,7 +191,7 @@ impl SubParser {
'-' | '*' | '0'..='9' => { '-' | '*' | '0'..='9' => {
if !self.past_decimal { if !self.past_decimal {
if self.min_width_is_asterisk || self.specifiers_found { if self.min_width_is_asterisk || self.specifiers_found {
err_conv(&self.text_so_far); return Err(SubError::InvalidSpec(self.text_so_far.clone()).into());
} }
if self.min_width_tmp.is_none() { if self.min_width_tmp.is_none() {
self.min_width_tmp = Some(String::new()); self.min_width_tmp = Some(String::new());
@ -176,7 +199,9 @@ impl SubParser {
match self.min_width_tmp.as_mut() { match self.min_width_tmp.as_mut() {
Some(x) => { Some(x) => {
if (ch == '-' || ch == '*') && !x.is_empty() { if (ch == '-' || ch == '*') && !x.is_empty() {
err_conv(&self.text_so_far); return Err(
SubError::InvalidSpec(self.text_so_far.clone()).into()
);
} }
if ch == '*' { if ch == '*' {
self.min_width_is_asterisk = true; self.min_width_is_asterisk = true;
@ -191,7 +216,7 @@ impl SubParser {
// second field should never have a // second field should never have a
// negative value // negative value
if self.second_field_is_asterisk || ch == '-' || self.specifiers_found { if self.second_field_is_asterisk || ch == '-' || self.specifiers_found {
err_conv(&self.text_so_far); return Err(SubError::InvalidSpec(self.text_so_far.clone()).into());
} }
if self.second_field_tmp.is_none() { if self.second_field_tmp.is_none() {
self.second_field_tmp = Some(String::new()); self.second_field_tmp = Some(String::new());
@ -199,7 +224,9 @@ impl SubParser {
match self.second_field_tmp.as_mut() { match self.second_field_tmp.as_mut() {
Some(x) => { Some(x) => {
if ch == '*' && !x.is_empty() { if ch == '*' && !x.is_empty() {
err_conv(&self.text_so_far); return Err(
SubError::InvalidSpec(self.text_so_far.clone()).into()
);
} }
if ch == '*' { if ch == '*' {
self.second_field_is_asterisk = true; self.second_field_is_asterisk = true;
@ -216,7 +243,7 @@ impl SubParser {
if !self.past_decimal { if !self.past_decimal {
self.past_decimal = true; self.past_decimal = true;
} else { } else {
err_conv(&self.text_so_far); return Err(SubError::InvalidSpec(self.text_so_far.clone()).into());
} }
} }
x if legal_fields.binary_search(&x).is_ok() => { x if legal_fields.binary_search(&x).is_ok() => {
@ -233,18 +260,18 @@ impl SubParser {
} }
} }
_ => { _ => {
err_conv(&self.text_so_far); return Err(SubError::InvalidSpec(self.text_so_far.clone()).into());
} }
} }
} }
if self.field_char.is_none() { if self.field_char.is_none() {
err_conv(&self.text_so_far); return Err(SubError::InvalidSpec(self.text_so_far.clone()).into());
} }
let field_char_retrieved = self.field_char.unwrap(); let field_char_retrieved = self.field_char.unwrap();
if self.past_decimal && self.second_field_tmp.is_none() { if self.past_decimal && self.second_field_tmp.is_none() {
self.second_field_tmp = Some(String::from("0")); self.second_field_tmp = Some(String::from("0"));
} }
self.validate_field_params(field_char_retrieved); self.validate_field_params(field_char_retrieved)?;
// if the dot is provided without a second field // if the dot is provided without a second field
// printf interprets it as 0. // printf interprets it as 0.
if let Some(x) = self.second_field_tmp.as_mut() { if let Some(x) = self.second_field_tmp.as_mut() {
@ -253,9 +280,12 @@ impl SubParser {
} }
} }
true Ok(true)
} }
fn successfully_eat_prefix(it: &mut PutBackN<Chars>, text_so_far: &mut String) -> bool { fn successfully_eat_prefix(
it: &mut PutBackN<Chars>,
text_so_far: &mut String,
) -> UResult<bool> {
// get next two chars, // get next two chars,
// if they're '%%' we're not tokenizing it // if they're '%%' we're not tokenizing it
// else put chars back // else put chars back
@ -265,12 +295,11 @@ impl SubParser {
match n_ch { match n_ch {
Some(x) => { Some(x) => {
it.put_back(x); it.put_back(x);
true Ok(true)
} }
None => { None => {
text_so_far.push('%'); text_so_far.push('%');
err_conv(text_so_far); Err(SubError::InvalidSpec(text_so_far.clone()).into())
false
} }
} }
} else { } else {
@ -280,10 +309,10 @@ impl SubParser {
if let Some(x) = preface { if let Some(x) = preface {
it.put_back(x); it.put_back(x);
}; };
false Ok(false)
} }
} }
fn validate_field_params(&self, field_char: char) { fn validate_field_params(&self, field_char: char) -> UResult<()> {
// check for illegal combinations here when possible vs // check for illegal combinations here when possible vs
// on each application so we check less per application // on each application so we check less per application
// to do: move these checks to Sub::new // to do: move these checks to Sub::new
@ -295,8 +324,12 @@ impl SubParser {
|| self.past_decimal || self.past_decimal
|| self.second_field_tmp.is_some())) || self.second_field_tmp.is_some()))
{ {
err_conv(&self.text_so_far); // invalid string substitution
// to do: include information about an invalid
// string substitution
return Err(SubError::InvalidSpec(self.text_so_far.clone()).into());
} }
Ok(())
} }
} }
@ -304,7 +337,7 @@ impl token::Tokenizer for Sub {
fn from_it( fn from_it(
it: &mut PutBackN<Chars>, it: &mut PutBackN<Chars>,
args: &mut Peekable<Iter<String>>, args: &mut Peekable<Iter<String>>,
) -> Option<Box<dyn token::Token>> { ) -> UResult<Option<Box<dyn token::Token>>> {
SubParser::from_it(it, args) SubParser::from_it(it, args)
} }
} }
@ -394,7 +427,7 @@ impl token::Token for Sub {
final_str.push_str(&pre_min_width); final_str.push_str(&pre_min_width);
} }
for _ in 0..diff { for _ in 0..diff {
final_str.push(' '); final_str.push(self.prefix_char);
} }
if pad_before { if pad_before {
final_str.push_str(&pre_min_width); final_str.push_str(&pre_min_width);

Some files were not shown because too many files have changed in this diff Show more