Merge branch 'master' of github.com:uutils/coreutils into refactoring_parse_size

2025-09-15 11:36:16 +00:00 · 2021-06-06 22:54:02 +02:00 · 2021-06-06 22:54:02 +02:00 · 12de58aec0
commit 12de58aec0
parent 884f570125 a3b520abde
20 changed files with 519 additions and 148 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -44,13 +44,16 @@ dependencies = [
 ]
 [[package]]
-name = "arrayvec"
+name = "arrayref"
-version = "0.4.12"
+version = "0.3.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cd9fd44efafa8690358b7408d253adf110036b88f55672a933f01d616ad9b1b9"
+checksum = "a4c527152e37cf757a3f78aae5a06fbeefdb07ccc535c980a3208ee3060dd544"
-dependencies = [
+
- "nodrop",
+[[package]]
-]
+name = "arrayvec"
 version = "0.5.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b"
 [[package]]
 name = "atty"
@ -100,11 +103,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693"
 [[package]]
-name = "blake2-rfc"
+name = "blake2b_simd"
-version = "0.2.18"
+version = "0.5.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5d6d530bdd2d52966a6d03b7a964add7ae1a288d25214066fd4b600f0f796400"
+checksum = "afa748e348ad3be8263be728124b24a24f268266f6f5d58af9d75f6a40b5c587"
 dependencies = [
 "arrayref",
 "arrayvec",
 "constant_time_eq",
 ]
@ -700,9 +704,9 @@ checksum = "62aca2aba2d62b4a7f5b33f3712cb1b0692779a56fb510499d5c0aa594daeaf3"
 [[package]]
 name = "heck"
-version = "0.3.2"
+version = "0.3.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "87cbf45460356b7deeb5e3415b5563308c0a9b057c85e12b06ad551f98d0a6ac"
+checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c"
 dependencies = [
 "unicode-segmentation",
 ]
@ -1383,12 +1387,9 @@ dependencies = [
 [[package]]
 name = "regex-automata"
-version = "0.1.9"
+version = "0.1.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ae1ded71d66a4a97f5e961fd0cb25a5f366a42a41570d16a763a69c092c26ae4"
+checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132"
 dependencies = [
 "byteorder",
 ]
 [[package]]
 name = "regex-syntax"
@ -1501,9 +1502,9 @@ dependencies = [
 [[package]]
 name = "signal-hook-registry"
-version = "1.3.0"
+version = "1.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "16f1d0fef1604ba8f7a073c7e701f213e056707210e9020af4528e0101ce11a6"
+checksum = "e51e73328dc4ac0c7ccbda3a494dfa03df1de2f46018127f60c693f2648455b0"
 dependencies = [
 "libc",
 ]
@ -1904,6 +1905,7 @@ dependencies = [
 name = "uu_dircolors"
 version = "0.0.6"
 dependencies = [
 "clap",
 "glob 0.3.0",
 "uucore",
 "uucore_procs",
@ -2028,7 +2030,7 @@ dependencies = [
 name = "uu_hashsum"
 version = "0.0.6"
 dependencies = [
- "blake2-rfc",
+ "blake2b_simd",
 "clap",
 "digest",
 "hex",
@ -2215,6 +2217,8 @@ dependencies = [
 "nix 0.13.1",
 "redox_syscall 0.1.57",
 "redox_termios",
 "unicode-segmentation",
 "unicode-width",
 "uucore",
 "uucore_procs",
 ]
--- a/README.md
+++ b/README.md
@ -342,22 +342,22 @@ To contribute to uutils, please see [CONTRIBUTING](CONTRIBUTING.md).
 | Done      | Semi-Done | To Do  |
 |-----------|-----------|--------|
 | arch      | cp        | chcon  |
-| base32    | expr      | csplit |
+| base32    | date      | dd     |
-| base64    | install   | dd     |
+| base64    | df        | runcon |
-| basename  | ls        | df     |
+| basename  | expr      | stty   |
-| cat       | more      | numfmt |
+| cat       | install   |        |
-| chgrp     | od (`--strings` and 128-bit data types missing) | runcon |
+| chgrp     | join      |        |
-| chmod     | printf    | stty   |
+| chmod     | ls        |        |
-| chown     | sort      |        |
+| chown     | more      |        |
-| chroot    | split     |        |
+| chroot    | numfmt    |        |
-| cksum     | tail      |        |
+| cksum     | od (`--strings` and 128-bit data types missing) | |
-| comm      | test      |        |
+| comm      | pr        |        |
-| csplit    | date      |        |
+| csplit    | printf    |        |
-| cut       | join      |        |
+| cut       | sort      |        |
-| dircolors | df        |        |
+| dircolors | split     |        |
 | dirname   | tac       |        |
-| du        | pr        |        |
+| du        | tail      |        |
-| echo      |           |        |
+| echo      | test      |        |
 | env       |           |        |
 | expand    |           |        |
 | factor    |           |        |
@ -374,12 +374,12 @@ To contribute to uutils, please see [CONTRIBUTING](CONTRIBUTING.md).
 | link      |           |        |
 | ln        |           |        |
 | logname   |           |        |
-| ~~md5sum~~ (replaced by [hashsum](https://github.com/uutils/coreutils/blob/master/src/uu/hashsum/src/hashsum.rs)) | |
+| ~~md5sum~~ (replaced by [hashsum](https://github.com/uutils/coreutils/blob/master/src/uu/hashsum/src/hashsum.rs)) | | |
-| ~~sha1sum~~ (replaced by [hashsum](https://github.com/uutils/coreutils/blob/master/src/uu/hashsum/src/hashsum.rs)) | |
+| ~~sha1sum~~ (replaced by [hashsum](https://github.com/uutils/coreutils/blob/master/src/uu/hashsum/src/hashsum.rs)) | | |
-| ~~sha224sum~~ (replaced by [hashsum](https://github.com/uutils/coreutils/blob/master/src/uu/hashsum/src/hashsum.rs)) | |
+| ~~sha224sum~~ (replaced by [hashsum](https://github.com/uutils/coreutils/blob/master/src/uu/hashsum/src/hashsum.rs)) | | |
-| ~~sha256sum~~ (replaced by [hashsum](https://github.com/uutils/coreutils/blob/master/src/uu/hashsum/src/hashsum.rs)) | |
+| ~~sha256sum~~ (replaced by [hashsum](https://github.com/uutils/coreutils/blob/master/src/uu/hashsum/src/hashsum.rs)) | | |
-| ~~sha384sum~~ (replaced by [hashsum](https://github.com/uutils/coreutils/blob/master/src/uu/hashsum/src/hashsum.rs)) | |
+| ~~sha384sum~~ (replaced by [hashsum](https://github.com/uutils/coreutils/blob/master/src/uu/hashsum/src/hashsum.rs)) | | |
-| ~~sha512sum~~ (replaced by [hashsum](https://github.com/uutils/coreutils/blob/master/src/uu/hashsum/src/hashsum.rs)) | |
+| ~~sha512sum~~ (replaced by [hashsum](https://github.com/uutils/coreutils/blob/master/src/uu/hashsum/src/hashsum.rs)) | | |
 | mkdir     |           |        |
 | mkfifo    |           |        |
 | mknod     |           |        |
--- a/src/uu/dircolors/Cargo.toml
+++ b/src/uu/dircolors/Cargo.toml
@ -15,6 +15,7 @@ edition = "2018"
 path = "src/dircolors.rs"
 [dependencies]
 clap = "2.33"
 glob = "0.3.0"
 uucore = { version=">=0.0.8", package="uucore", path="../../uucore" }
 uucore_procs = { version=">=0.0.5", package="uucore_procs", path="../../uucore_procs" }
--- a/src/uu/dircolors/src/dircolors.rs
+++ b/src/uu/dircolors/src/dircolors.rs
@ -1,6 +1,7 @@
 // This file is part of the uutils coreutils package.
 //
 // (c) Jian Zeng <anonymousknight96@gmail.com>
 // (c) Mitchell Mebane <mitchell.mebane@gmail.com>
 //
 // For the full copyright and license information, please view the LICENSE
 // file that was distributed with this source code.
@ -15,6 +16,15 @@ use std::env;
 use std::fs::File;
 use std::io::{BufRead, BufReader};
 use clap::{crate_version, App, Arg};
 mod options {
    pub const BOURNE_SHELL: &str = "bourne-shell";
    pub const C_SHELL: &str = "c-shell";
    pub const PRINT_DATABASE: &str = "print-database";
    pub const FILE: &str = "FILE";
 }
 static SYNTAX: &str = "[OPTION]... [FILE]";
 static SUMMARY: &str = "Output commands to set the LS_COLORS environment variable.";
 static LONG_HELP: &str = "
@ -52,28 +62,56 @@ pub fn guess_syntax() -> OutputFmt {
    }
 }
 fn get_usage() -> String {
    format!("{0} {1}", executable!(), SYNTAX)
 }
 pub fn uumain(args: impl uucore::Args) -> i32 {
    let args = args
        .collect_str(InvalidEncodingHandling::Ignore)
        .accept_any();
-    let matches = app!(SYNTAX, SUMMARY, LONG_HELP)
+    let usage = get_usage();
        .optflag("b", "sh", "output Bourne shell code to set LS_COLORS")
        .optflag(
            "",
            "bourne-shell",
            "output Bourne shell code to set LS_COLORS",
        )
        .optflag("c", "csh", "output C shell code to set LS_COLORS")
        .optflag("", "c-shell", "output C shell code to set LS_COLORS")
        .optflag("p", "print-database", "print the byte counts")
        .parse(args);
-    if (matches.opt_present("csh")
+    let matches = App::new(executable!())
-        || matches.opt_present("c-shell")
+        .version(crate_version!())
-        || matches.opt_present("sh")
+        .about(SUMMARY)
-        || matches.opt_present("bourne-shell"))
+        .usage(&usage[..])
-        && matches.opt_present("print-database")
+        .after_help(LONG_HELP)
        .arg(
            Arg::with_name(options::BOURNE_SHELL)
                .long("sh")
                .short("b")
                .visible_alias("bourne-shell")
                .help("output Bourne shell code to set LS_COLORS")
                .display_order(1),
        )
        .arg(
            Arg::with_name(options::C_SHELL)
                .long("csh")
                .short("c")
                .visible_alias("c-shell")
                .help("output C shell code to set LS_COLORS")
                .display_order(2),
        )
        .arg(
            Arg::with_name(options::PRINT_DATABASE)
                .long("print-database")
                .short("p")
                .help("print the byte counts")
                .display_order(3),
        )
        .arg(Arg::with_name(options::FILE).hidden(true).multiple(true))
        .get_matches_from(&args);
    let files = matches
        .values_of(options::FILE)
        .map_or(vec![], |file_values| file_values.collect());
    // clap provides .conflicts_with / .conflicts_with_all, but we want to
    // manually handle conflicts so we can match the output of GNU coreutils
    if (matches.is_present(options::C_SHELL) || matches.is_present(options::BOURNE_SHELL))
        && matches.is_present(options::PRINT_DATABASE)
    {
        show_usage_error!(
            "the options to output dircolors' internal database and\nto select a shell \
@ -82,12 +120,12 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
        return 1;
    }
-    if matches.opt_present("print-database") {
+    if matches.is_present(options::PRINT_DATABASE) {
-        if !matches.free.is_empty() {
+        if !files.is_empty() {
            show_usage_error!(
                "extra operand ‘{}’\nfile operands cannot be combined with \
                 --print-database (-p)",
-                matches.free[0]
+                files[0]
            );
            return 1;
        }
@ -96,9 +134,9 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
    }
    let mut out_format = OutputFmt::Unknown;
-    if matches.opt_present("csh") || matches.opt_present("c-shell") {
+    if matches.is_present(options::C_SHELL) {
        out_format = OutputFmt::CShell;
-    } else if matches.opt_present("sh") || matches.opt_present("bourne-shell") {
+    } else if matches.is_present(options::BOURNE_SHELL) {
        out_format = OutputFmt::Shell;
    }
@ -113,24 +151,20 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
    }
    let result;
-    if matches.free.is_empty() {
+    if files.is_empty() {
        result = parse(INTERNAL_DB.lines(), out_format, "")
    } else {
-        if matches.free.len() > 1 {
+        if files.len() > 1 {
-            show_usage_error!("extra operand ‘{}’", matches.free[1]);
+            show_usage_error!("extra operand ‘{}’", files[1]);
            return 1;
        }
-        match File::open(matches.free[0].as_str()) {
+        match File::open(files[0]) {
            Ok(f) => {
                let fin = BufReader::new(f);
-                result = parse(
+                result = parse(fin.lines().filter_map(Result::ok), out_format, files[0])
                    fin.lines().filter_map(Result::ok),
                    out_format,
                    matches.free[0].as_str(),
                )
            }
            Err(e) => {
-                show_error!("{}: {}", matches.free[0], e);
+                show_error!("{}: {}", files[0], e);
                return 1;
            }
        }
--- a/src/uu/du/src/du.rs
+++ b/src/uu/du/src/du.rs
@ -393,6 +393,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
                    although  the apparent  size is usually smaller, it may be larger due to holes \
                    in ('sparse') files, internal  fragmentation,  indirect  blocks, and the like"
                )
                .alias("app") // The GNU testsuite uses this alias
        )
        .arg(
            Arg::with_name(options::BLOCK_SIZE)
--- a/src/uu/hashsum/BENCHMARKING.md
+++ b/src/uu/hashsum/BENCHMARKING.md
@ -0,0 +1,9 @@
 ## Benchmarking hashsum
 ### To bench blake2
 Taken from: https://github.com/uutils/coreutils/pull/2296
 With a large file:
 $ hyperfine "./target/release/coreutils hashsum --b2sum large-file" "b2sum large-file"
--- a/src/uu/hashsum/Cargo.toml
+++ b/src/uu/hashsum/Cargo.toml
@ -25,7 +25,7 @@ regex-syntax = "0.6.7"
 sha1 = "0.6.0"
 sha2 = "0.6.0"
 sha3 = "0.6.0"
-blake2-rfc = "0.2.18"
+blake2b_simd = "0.5.11"
 uucore = { version=">=0.0.8", package="uucore", path="../../uucore" }
 uucore_procs = { version=">=0.0.5", package="uucore_procs", path="../../uucore_procs" }
--- a/src/uu/hashsum/src/digest.rs
+++ b/src/uu/hashsum/src/digest.rs
@ -1,4 +1,3 @@
 extern crate blake2_rfc;
 extern crate digest;
 extern crate md5;
 extern crate sha1;
@ -49,9 +48,9 @@ impl Digest for md5::Context {
    }
 }
-impl Digest for blake2_rfc::blake2b::Blake2b {
+impl Digest for blake2b_simd::State {
    fn new() -> Self {
-        blake2_rfc::blake2b::Blake2b::new(64)
+        Self::new()
    }
    fn input(&mut self, input: &[u8]) {
@ -59,12 +58,12 @@ impl Digest for blake2_rfc::blake2b::Blake2b {
    }
    fn result(&mut self, out: &mut [u8]) {
-        let hash_result = &self.clone().finalize();
+        let hash_result = &self.finalize();
        out.copy_from_slice(&hash_result.as_bytes());
    }
    fn reset(&mut self) {
-        *self = blake2_rfc::blake2b::Blake2b::new(64);
+        *self = Self::new();
    }
    fn output_bits(&self) -> usize {
--- a/src/uu/hashsum/src/hashsum.rs
+++ b/src/uu/hashsum/src/hashsum.rs
@ -19,7 +19,6 @@ mod digest;
 use self::digest::Digest;
 use blake2_rfc::blake2b::Blake2b;
 use clap::{App, Arg, ArgMatches};
 use hex::ToHex;
 use md5::Context as Md5;
@ -85,7 +84,11 @@ fn detect_algo<'a>(
        "sha256sum" => ("SHA256", Box::new(Sha256::new()) as Box<dyn Digest>, 256),
        "sha384sum" => ("SHA384", Box::new(Sha384::new()) as Box<dyn Digest>, 384),
        "sha512sum" => ("SHA512", Box::new(Sha512::new()) as Box<dyn Digest>, 512),
-        "b2sum" => ("BLAKE2", Box::new(Blake2b::new(64)) as Box<dyn Digest>, 512),
+        "b2sum" => (
            "BLAKE2",
            Box::new(blake2b_simd::State::new()) as Box<dyn Digest>,
            512,
        ),
        "sha3sum" => match matches.value_of("bits") {
            Some(bits_str) => match (&bits_str).parse::<usize>() {
                Ok(224) => (
@ -187,7 +190,7 @@ fn detect_algo<'a>(
                    set_or_crash("SHA512", Box::new(Sha512::new()), 512)
                }
                if matches.is_present("b2sum") {
-                    set_or_crash("BLAKE2", Box::new(Blake2b::new(64)), 512)
+                    set_or_crash("BLAKE2", Box::new(blake2b_simd::State::new()), 512)
                }
                if matches.is_present("sha3") {
                    match matches.value_of("bits") {
--- a/src/uu/more/Cargo.toml
+++ b/src/uu/more/Cargo.toml
@ -20,6 +20,8 @@ uucore = { version = ">=0.0.7", package = "uucore", path = "../../uucore" }
 uucore_procs = { version = ">=0.0.5", package = "uucore_procs", path = "../../uucore_procs" }
 crossterm = ">=0.19"
 atty = "0.2.14"
 unicode-width = "0.1.7"
 unicode-segmentation = "1.7.1"
 [target.'cfg(target_os = "redox")'.dependencies]
 redox_termios = "0.1"
--- a/src/uu/more/src/more.rs
+++ b/src/uu/more/src/more.rs
@ -29,6 +29,9 @@ use crossterm::{
    terminal,
 };
 use unicode_segmentation::UnicodeSegmentation;
 use unicode_width::UnicodeWidthStr;
 pub mod options {
    pub const SILENT: &str = "silent";
    pub const LOGICAL: &str = "logical";
@ -140,7 +143,9 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
    if let Some(files) = matches.values_of(options::FILES) {
        let mut stdout = setup_term();
        let length = files.len();
-        for (idx, file) in files.enumerate() {
+
        let mut files_iter = files.peekable();
        while let (Some(file), next_file) = (files_iter.next(), files_iter.peek()) {
            let file = Path::new(file);
            if file.is_dir() {
                terminal::disable_raw_mode().unwrap();
@ -157,15 +162,14 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
            }
            let mut reader = BufReader::new(File::open(file).unwrap());
            reader.read_to_string(&mut buff).unwrap();
-            let is_last = idx + 1 == length;
+            more(&buff, &mut stdout, next_file.copied());
            more(&buff, &mut stdout, is_last);
            buff.clear();
        }
        reset_term(&mut stdout);
    } else if atty::isnt(atty::Stream::Stdin) {
        stdin().read_to_string(&mut buff).unwrap();
        let mut stdout = setup_term();
-        more(&buff, &mut stdout, true);
+        more(&buff, &mut stdout, None);
        reset_term(&mut stdout);
    } else {
        show_usage_error!("bad usage");
@ -200,7 +204,7 @@ fn reset_term(stdout: &mut std::io::Stdout) {
 #[inline(always)]
 fn reset_term(_: &mut usize) {}
-fn more(buff: &str, mut stdout: &mut Stdout, is_last: bool) {
+fn more(buff: &str, mut stdout: &mut Stdout, next_file: Option<&str>) {
    let (cols, rows) = terminal::size().unwrap();
    let lines = break_buff(buff, usize::from(cols));
    let line_count: u16 = lines.len().try_into().unwrap();
@ -214,8 +218,11 @@ fn more(buff: &str, mut stdout: &mut Stdout, is_last: bool) {
        &mut stdout,
        lines.clone(),
        line_count,
        next_file,
    );
    let is_last = next_file.is_none();
    // Specifies whether we have reached the end of the file and should
    // return on the next key press. However, we immediately return when
    // this is the last file.
@ -267,6 +274,7 @@ fn more(buff: &str, mut stdout: &mut Stdout, is_last: bool) {
                &mut stdout,
                lines.clone(),
                line_count,
                next_file,
            );
            if lines_left == 0 {
@ -285,6 +293,7 @@ fn draw(
    mut stdout: &mut std::io::Stdout,
    lines: Vec<String>,
    lc: u16,
    next_file: Option<&str>,
 ) {
    execute!(stdout, terminal::Clear(terminal::ClearType::CurrentLine)).unwrap();
    let (up_mark, lower_mark) = calc_range(*upper_mark, rows, lc);
@ -299,7 +308,7 @@ fn draw(
            .write_all(format!("\r{}\n", line).as_bytes())
            .unwrap();
    }
-    make_prompt_and_flush(&mut stdout, lower_mark, lc);
+    make_prompt_and_flush(&mut stdout, lower_mark, lc, next_file);
    *upper_mark = up_mark;
 }
@ -313,23 +322,30 @@ fn break_buff(buff: &str, cols: usize) -> Vec<String> {
    lines
 }
-fn break_line(mut line: &str, cols: usize) -> Vec<String> {
+fn break_line(line: &str, cols: usize) -> Vec<String> {
-    let breaks = (line.len() / cols).saturating_add(1);
+    let width = UnicodeWidthStr::width(line);
-    let mut lines = Vec::with_capacity(breaks);
+    let mut lines = Vec::new();
-    // TODO: Use unicode width instead of the length in bytes.
+    if width < cols {
    if line.len() < cols {
        lines.push(line.to_string());
        return lines;
    }
-    for _ in 1..=breaks {
+    let gr_idx = UnicodeSegmentation::grapheme_indices(line, true);
-        let (line1, line2) = line.split_at(cols);
+    let mut last_index = 0;
-        lines.push(line1.to_string());
+    let mut total_width = 0;
-        if line2.len() < cols {
+    for (index, grapheme) in gr_idx {
-            lines.push(line2.to_string());
+        let width = UnicodeWidthStr::width(grapheme);
-            break;
+        total_width += width;
        if total_width > cols {
            lines.push(line[last_index..index].to_string());
            last_index = index;
            total_width = width;
        }
-        line = line2;
+    }
    if last_index != line.len() {
        lines.push(line[last_index..].to_string());
    }
    lines
 }
@ -339,7 +355,7 @@ fn calc_range(mut upper_mark: u16, rows: u16, line_count: u16) -> (u16, u16) {
    let mut lower_mark = upper_mark.saturating_add(rows);
    if lower_mark >= line_count {
-        upper_mark = line_count.saturating_sub(rows);
+        upper_mark = line_count.saturating_sub(rows).saturating_add(1);
        lower_mark = line_count;
    } else {
        lower_mark = lower_mark.saturating_sub(1)
@ -348,12 +364,20 @@ fn calc_range(mut upper_mark: u16, rows: u16, line_count: u16) -> (u16, u16) {
 }
 // Make a prompt similar to original more
-fn make_prompt_and_flush(stdout: &mut Stdout, lower_mark: u16, lc: u16) {
+fn make_prompt_and_flush(stdout: &mut Stdout, lower_mark: u16, lc: u16, next_file: Option<&str>) {
    let status = if lower_mark == lc {
        format!("Next file: {}", next_file.unwrap_or_default())
    } else {
        format!(
            "{}%",
            (lower_mark as f64 / lc as f64 * 100.0).round() as u16
        )
    };
    write!(
        stdout,
-        "\r{}--More--({}%){}",
+        "\r{}--More--({}){}",
        Attribute::Reverse,
-        ((lower_mark as f64 / lc as f64) * 100.0).round() as u16,
+        status,
        Attribute::Reset
    )
    .unwrap();
@ -363,13 +387,14 @@ fn make_prompt_and_flush(stdout: &mut Stdout, lower_mark: u16, lc: u16) {
 #[cfg(test)]
 mod tests {
    use super::{break_line, calc_range};
    use unicode_width::UnicodeWidthStr;
    // It is good to test the above functions
    #[test]
    fn test_calc_range() {
        assert_eq!((0, 24), calc_range(0, 25, 100));
        assert_eq!((50, 74), calc_range(50, 25, 100));
-        assert_eq!((75, 100), calc_range(85, 25, 100));
+        assert_eq!((76, 100), calc_range(85, 25, 100));
    }
    #[test]
    fn test_break_lines_long() {
@ -379,11 +404,12 @@ mod tests {
        }
        let lines = break_line(&test_string, 80);
        let widths: Vec<usize> = lines
            .iter()
            .map(|s| UnicodeWidthStr::width(&s[..]))
            .collect();
-        assert_eq!(
+        assert_eq!((80, 80, 40), (widths[0], widths[1], widths[2]));
            (80, 80, 40),
            (lines[0].len(), lines[1].len(), lines[2].len())
        );
    }
    #[test]
@ -397,4 +423,22 @@ mod tests {
        assert_eq!(20, lines[0].len());
    }
    #[test]
    fn test_break_line_zwj() {
        let mut test_string = String::with_capacity(1100);
        for _ in 0..20 {
            test_string.push_str("👩🏻‍🔬");
        }
        let lines = break_line(&test_string, 80);
        let widths: Vec<usize> = lines
            .iter()
            .map(|s| UnicodeWidthStr::width(&s[..]))
            .collect();
        // Each 👩🏻‍🔬 is 6 character width it break line to the closest number to 80 => 6 * 13 = 78
        assert_eq!((78, 42), (widths[0], widths[1]));
    }
 }
--- a/src/uu/rm/src/rm.rs
+++ b/src/uu/rm/src/rm.rs
@ -255,7 +255,18 @@ fn handle_dir(path: &Path, options: &Options) -> bool {
            // correctly on Windows
            if let Err(e) = remove_dir_all(path) {
                had_err = true;
-                show_error!("could not remove '{}': {}", path.display(), e);
+                if e.kind() == std::io::ErrorKind::PermissionDenied {
                    // GNU compatibility (rm/fail-eacces.sh)
                    // here, GNU doesn't use some kind of remove_dir_all
                    // It will show directory+file
                    show_error!(
                        "cannot remove '{}': {}",
                        path.display(),
                        "Permission denied"
                    );
                } else {
                    show_error!("cannot remove '{}': {}", path.display(), e);
                }
            }
        } else {
            let mut dirs: VecDeque<DirEntry> = VecDeque::new();
@ -314,7 +325,16 @@ fn remove_dir(path: &Path, options: &Options) -> bool {
                            }
                        }
                        Err(e) => {
-                            show_error!("cannot remove '{}': {}", path.display(), e);
+                            if e.kind() == std::io::ErrorKind::PermissionDenied {
                                // GNU compatibility (rm/fail-eacces.sh)
                                show_error!(
                                    "cannot remove '{}': {}",
                                    path.display(),
                                    "Permission denied"
                                );
                            } else {
                                show_error!("cannot remove '{}': {}", path.display(), e);
                            }
                            return true;
                        }
                    }
@ -352,7 +372,16 @@ fn remove_file(path: &Path, options: &Options) -> bool {
                }
            }
            Err(e) => {
-                show_error!("removing '{}': {}", path.display(), e);
+                if e.kind() == std::io::ErrorKind::PermissionDenied {
                    // GNU compatibility (rm/fail-eacces.sh)
                    show_error!(
                        "cannot remove '{}': {}",
                        path.display(),
                        "Permission denied"
                    );
                } else {
                    show_error!("cannot remove '{}': {}", path.display(), e);
                }
                return true;
            }
        }
--- a/src/uu/sort/src/chunks.rs
+++ b/src/uu/sort/src/chunks.rs
@ -102,17 +102,17 @@ pub fn read(
        carry_over.clear();
        carry_over.extend_from_slice(&buffer[read..]);
-        let payload = Chunk::new(buffer, |buf| {
+        if read != 0 {
-            let mut lines = unsafe {
+            let payload = Chunk::new(buffer, |buf| {
-                // SAFETY: It is safe to transmute to a vector of lines with shorter lifetime,
+                let mut lines = unsafe {
-                // because it was only temporarily transmuted to a Vec<Line<'static>> to make recycling possible.
+                    // SAFETY: It is safe to transmute to a vector of lines with shorter lifetime,
-                std::mem::transmute::<Vec<Line<'static>>, Vec<Line<'_>>>(lines)
+                    // because it was only temporarily transmuted to a Vec<Line<'static>> to make recycling possible.
-            };
+                    std::mem::transmute::<Vec<Line<'static>>, Vec<Line<'_>>>(lines)
-            let read = crash_if_err!(1, std::str::from_utf8(&buf[..read]));
+                };
-            parse_lines(read, &mut lines, separator, &settings);
+                let read = crash_if_err!(1, std::str::from_utf8(&buf[..read]));
-            lines
+                parse_lines(read, &mut lines, separator, &settings);
-        });
+                lines
-        if !payload.borrow_lines().is_empty() {
+            });
            sender.send(payload).unwrap();
        }
        if !should_continue {
@ -175,6 +175,7 @@ fn read_to_buffer(
    separator: u8,
 ) -> (usize, bool) {
    let mut read_target = &mut buffer[start_offset..];
    let mut last_file_target_size = read_target.len();
    loop {
        match file.read(read_target) {
            Ok(0) => {
@ -208,14 +209,27 @@ fn read_to_buffer(
                        read_target = &mut buffer[len..];
                    }
                } else {
-                    // This file is empty.
+                    // This file has been fully read.
                    let mut leftover_len = read_target.len();
                    if last_file_target_size != leftover_len {
                        // The file was not empty.
                        let read_len = buffer.len() - leftover_len;
                        if buffer[read_len - 1] != separator {
                            // The file did not end with a separator. We have to insert one.
                            buffer[read_len] = separator;
                            leftover_len -= 1;
                        }
                        let read_len = buffer.len() - leftover_len;
                        read_target = &mut buffer[read_len..];
                    }
                    if let Some(next_file) = next_files.next() {
                        // There is another file.
                        last_file_target_size = leftover_len;
                        *file = next_file;
                    } else {
                        // This was the last file.
-                        let leftover_len = read_target.len();
+                        let read_len = buffer.len() - leftover_len;
-                        return (buffer.len() - leftover_len, false);
+                        return (read_len, false);
                    }
                }
            }
--- a/src/uu/sort/src/ext_sort.rs
+++ b/src/uu/sort/src/ext_sort.rs
@ -12,8 +12,12 @@
 //! The buffers for the individual chunks are recycled. There are two buffers.
 use std::cmp::Ordering;
 use std::fs::File;
 use std::io::BufReader;
 use std::io::{BufWriter, Write};
 use std::path::Path;
 use std::process::Child;
 use std::process::{Command, Stdio};
 use std::{
    fs::OpenOptions,
    io::Read,
@ -25,12 +29,13 @@ use itertools::Itertools;
 use tempfile::TempDir;
 use crate::Line;
 use crate::{
    chunks::{self, Chunk},
    compare_by, merge, output_sorted_lines, sort_by, GlobalSettings,
 };
-const MIN_BUFFER_SIZE: usize = 8_000;
+const START_BUFFER_SIZE: usize = 8_000;
 /// Sort files by using auxiliary files for storing intermediate chunks (if needed), and output the result.
 pub fn ext_sort(files: &mut impl Iterator<Item = Box<dyn Read + Send>>, settings: &GlobalSettings) {
@ -63,10 +68,31 @@ pub fn ext_sort(files: &mut impl Iterator<Item = Box<dyn Read + Send>>, settings
    );
    match read_result {
        ReadResult::WroteChunksToFile { chunks_written } => {
-            let files = (0..chunks_written)
+            let mut children = Vec::new();
-                .map(|chunk_num| tmp_dir.path().join(chunk_num.to_string()))
+            let files = (0..chunks_written).map(|chunk_num| {
-                .collect::<Vec<_>>();
+                let file_path = tmp_dir.path().join(chunk_num.to_string());
-            let mut merger = merge::merge(&files, settings);
+                let file = File::open(file_path).unwrap();
                if let Some(compress_prog) = &settings.compress_prog {
                    let mut command = Command::new(compress_prog);
                    command.stdin(file).stdout(Stdio::piped()).arg("-d");
                    let mut child = crash_if_err!(
                        2,
                        command.spawn().map_err(|err| format!(
                            "couldn't execute compress program: errno {}",
                            err.raw_os_error().unwrap()
                        ))
                    );
                    let child_stdout = child.stdout.take().unwrap();
                    children.push(child);
                    Box::new(BufReader::new(child_stdout)) as Box<dyn Read + Send>
                } else {
                    Box::new(BufReader::new(file)) as Box<dyn Read + Send>
                }
            });
            let mut merger = merge::merge_with_file_limit(files, settings);
            for child in children {
                assert_child_success(child, settings.compress_prog.as_ref().unwrap());
            }
            merger.write_all(settings);
        }
        ReadResult::SortedSingleChunk(chunk) => {
@ -132,7 +158,14 @@ fn reader_writer(
    for _ in 0..2 {
        chunks::read(
            &mut sender_option,
-            vec![0; MIN_BUFFER_SIZE],
+            vec![
                0;
                if START_BUFFER_SIZE < buffer_size {
                    START_BUFFER_SIZE
                } else {
                    buffer_size
                }
            ],
            Some(buffer_size),
            &mut carry_over,
            &mut file,
@ -171,6 +204,7 @@ fn reader_writer(
        write(
            &mut chunk,
            &tmp_dir.path().join(file_number.to_string()),
            settings.compress_prog.as_deref(),
            separator,
        );
@ -193,14 +227,45 @@ fn reader_writer(
 }
 /// Write the lines in `chunk` to `file`, separated by `separator`.
-fn write(chunk: &mut Chunk, file: &Path, separator: u8) {
+/// `compress_prog` is used to optionally compress file contents.
 fn write(chunk: &mut Chunk, file: &Path, compress_prog: Option<&str>, separator: u8) {
    chunk.with_lines_mut(|lines| {
        // Write the lines to the file
        let file = crash_if_err!(1, OpenOptions::new().create(true).write(true).open(file));
-        let mut writer = BufWriter::new(file);
+        if let Some(compress_prog) = compress_prog {
-        for s in lines.iter() {
+            let mut command = Command::new(compress_prog);
-            crash_if_err!(1, writer.write_all(s.line.as_bytes()));
+            command.stdin(Stdio::piped()).stdout(file);
-            crash_if_err!(1, writer.write_all(&[separator]));
+            let mut child = crash_if_err!(
-        }
+                2,
                command.spawn().map_err(|err| format!(
                    "couldn't execute compress program: errno {}",
                    err.raw_os_error().unwrap()
                ))
            );
            let mut writer = BufWriter::new(child.stdin.take().unwrap());
            write_lines(lines, &mut writer, separator);
            writer.flush().unwrap();
            drop(writer);
            assert_child_success(child, compress_prog);
        } else {
            let mut writer = BufWriter::new(file);
            write_lines(lines, &mut writer, separator);
        };
    });
 }
 fn write_lines<'a, T: Write>(lines: &[Line<'a>], writer: &mut T, separator: u8) {
    for s in lines {
        crash_if_err!(1, writer.write_all(s.line.as_bytes()));
        crash_if_err!(1, writer.write_all(&[separator]));
    }
 }
 fn assert_child_success(mut child: Child, program: &str) {
    if !matches!(
        child.wait().map(|e| e.code()),
        Ok(Some(0)) | Ok(None) | Err(_)
    ) {
        crash!(2, "'{}' terminated abnormally", program)
    }
 }
--- a/src/uu/sort/src/merge.rs
+++ b/src/uu/sort/src/merge.rs
@ -9,8 +9,8 @@
 use std::{
    cmp::Ordering,
-    ffi::OsStr,
+    fs::File,
-    io::{Read, Write},
+    io::{BufWriter, Read, Write},
    iter,
    rc::Rc,
    sync::mpsc::{channel, sync_channel, Receiver, Sender, SyncSender},
@ -18,18 +18,69 @@ use std::{
 };
 use compare::Compare;
 use itertools::Itertools;
 use crate::{
    chunks::{self, Chunk},
-    compare_by, open, GlobalSettings,
+    compare_by, GlobalSettings,
 };
 // Merge already sorted files.
-pub fn merge<'a>(files: &[impl AsRef<OsStr>], settings: &'a GlobalSettings) -> FileMerger<'a> {
+pub fn merge_with_file_limit<F: ExactSizeIterator<Item = Box<dyn Read + Send>>>(
    files: F,
    settings: &GlobalSettings,
 ) -> FileMerger {
    if files.len() > settings.merge_batch_size {
        let tmp_dir = tempfile::Builder::new()
            .prefix("uutils_sort")
            .tempdir_in(&settings.tmp_dir)
            .unwrap();
        let mut batch_number = 0;
        let mut remaining_files = files.len();
        let batches = files.chunks(settings.merge_batch_size);
        let mut batches = batches.into_iter();
        while batch_number + remaining_files > settings.merge_batch_size && remaining_files != 0 {
            remaining_files = remaining_files.saturating_sub(settings.merge_batch_size);
            let mut merger = merge_without_limit(batches.next().unwrap(), settings);
            let tmp_file = File::create(tmp_dir.path().join(batch_number.to_string())).unwrap();
            merger.write_all_to(settings, &mut BufWriter::new(tmp_file));
            batch_number += 1;
        }
        let batch_files = (0..batch_number).map(|n| {
            Box::new(File::open(tmp_dir.path().join(n.to_string())).unwrap())
                as Box<dyn Read + Send>
        });
        if batch_number > settings.merge_batch_size {
            assert!(batches.next().is_none());
            merge_with_file_limit(
                Box::new(batch_files) as Box<dyn ExactSizeIterator<Item = Box<dyn Read + Send>>>,
                settings,
            )
        } else {
            let final_batch = batches.next();
            assert!(batches.next().is_none());
            merge_without_limit(
                batch_files.chain(final_batch.into_iter().flatten()),
                settings,
            )
        }
    } else {
        merge_without_limit(files, settings)
    }
 }
 /// Merge files without limiting how many files are concurrently open
 ///
 /// It is the responsibility of the caller to ensure that `files` yields only
 /// as many files as we are allowed to open concurrently.
 fn merge_without_limit<F: Iterator<Item = Box<dyn Read + Send>>>(
    files: F,
    settings: &GlobalSettings,
 ) -> FileMerger {
    let (request_sender, request_receiver) = channel();
-    let mut reader_files = Vec::with_capacity(files.len());
+    let mut reader_files = Vec::with_capacity(files.size_hint().0);
-    let mut loaded_receivers = Vec::with_capacity(files.len());
+    let mut loaded_receivers = Vec::with_capacity(files.size_hint().0);
-    for (file_number, file) in files.iter().map(open).enumerate() {
+    for (file_number, file) in files.enumerate() {
        let (sender, receiver) = sync_channel(2);
        loaded_receivers.push(receiver);
        reader_files.push(ReaderFile {
@ -146,7 +197,11 @@ impl<'a> FileMerger<'a> {
    /// Write the merged contents to the output file.
    pub fn write_all(&mut self, settings: &GlobalSettings) {
        let mut out = settings.out_writer();
-        while self.write_next(settings, &mut out) {}
+        self.write_all_to(settings, &mut out);
    }
    pub fn write_all_to(&mut self, settings: &GlobalSettings, out: &mut impl Write) {
        while self.write_next(settings, out) {}
    }
    fn write_next(&mut self, settings: &GlobalSettings, out: &mut impl Write) -> bool {
--- a/src/uu/sort/src/sort.rs
+++ b/src/uu/sort/src/sort.rs
@ -96,6 +96,8 @@ static OPT_PARALLEL: &str = "parallel";
 static OPT_FILES0_FROM: &str = "files0-from";
 static OPT_BUF_SIZE: &str = "buffer-size";
 static OPT_TMP_DIR: &str = "temporary-directory";
 static OPT_COMPRESS_PROG: &str = "compress-program";
 static OPT_BATCH_SIZE: &str = "batch-size";
 static ARG_FILES: &str = "files";
@ -156,6 +158,8 @@ pub struct GlobalSettings {
    zero_terminated: bool,
    buffer_size: usize,
    tmp_dir: PathBuf,
    compress_prog: Option<String>,
    merge_batch_size: usize,
 }
 impl GlobalSettings {
@ -223,6 +227,8 @@ impl Default for GlobalSettings {
            zero_terminated: false,
            buffer_size: DEFAULT_BUF_SIZE,
            tmp_dir: PathBuf::new(),
            compress_prog: None,
            merge_batch_size: 16,
        }
    }
 }
@ -1076,6 +1082,19 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
                .takes_value(true)
                .value_name("DIR"),
        )
        .arg(
            Arg::with_name(OPT_COMPRESS_PROG)
                .long(OPT_COMPRESS_PROG)
                .help("compress temporary files with PROG, decompress with PROG -d")
                .long_help("PROG has to take input from stdin and output to stdout")
                .value_name("PROG")
        )
        .arg(
            Arg::with_name(OPT_BATCH_SIZE)
                .long(OPT_BATCH_SIZE)
                .help("Merge at most N_MERGE inputs at once.")
                .value_name("N_MERGE")
        )
        .arg(
            Arg::with_name(OPT_FILES0_FROM)
                .long(OPT_FILES0_FROM)
@ -1167,6 +1186,14 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
        .map(PathBuf::from)
        .unwrap_or_else(env::temp_dir);
    settings.compress_prog = matches.value_of(OPT_COMPRESS_PROG).map(String::from);
    if let Some(n_merge) = matches.value_of(OPT_BATCH_SIZE) {
        settings.merge_batch_size = n_merge
            .parse()
            .unwrap_or_else(|_| crash!(2, "invalid --batch-size argument '{}'", n_merge));
    }
    settings.zero_terminated = matches.is_present(OPT_ZERO_TERMINATED);
    settings.merge = matches.is_present(OPT_MERGE);
@ -1242,7 +1269,7 @@ fn output_sorted_lines<'a>(iter: impl Iterator<Item = &'a Line<'a>>, settings: &
 fn exec(files: &[String], settings: &GlobalSettings) -> i32 {
    if settings.merge {
-        let mut file_merger = merge::merge(files, settings);
+        let mut file_merger = merge::merge_with_file_limit(files.iter().map(open), settings);
        file_merger.write_all(settings);
    } else if settings.check {
        if files.len() > 1 {
--- a/tests/by-util/test_sort.rs
+++ b/tests/by-util/test_sort.rs
@ -837,3 +837,64 @@ fn test_nonexistent_file() {
 fn test_blanks() {
    test_helper("blanks", &["-b", "--ignore-blanks"]);
 }
 #[test]
 fn sort_multiple() {
    new_ucmd!()
        .args(&["no_trailing_newline1.txt", "no_trailing_newline2.txt"])
        .succeeds()
        .stdout_is("a\nb\nb\n");
 }
 #[test]
 fn sort_empty_chunk() {
    new_ucmd!()
        .args(&["-S", "40B"])
        .pipe_in("a\na\n")
        .succeeds()
        .stdout_is("a\na\n");
 }
 #[test]
 #[cfg(target_os = "linux")]
 fn test_compress() {
    new_ucmd!()
        .args(&[
            "ext_sort.txt",
            "-n",
            "--compress-program",
            "gzip",
            "-S",
            "10",
        ])
        .succeeds()
        .stdout_only_fixture("ext_sort.expected");
 }
 #[test]
 fn test_compress_fail() {
    new_ucmd!()
        .args(&[
            "ext_sort.txt",
            "-n",
            "--compress-program",
            "nonexistent-program",
            "-S",
            "10",
        ])
        .fails()
        .stderr_only("sort: couldn't execute compress program: errno 2");
 }
 #[test]
 fn test_merge_batches() {
    new_ucmd!()
        .args(&[
            "ext_sort.txt",
            "-n",
            "-S",
            "150B",
        ])
        .succeeds()
        .stdout_only_fixture("ext_sort.expected");
 }
--- a/tests/fixtures/sort/no_trailing_newline1.txt
+++ b/tests/fixtures/sort/no_trailing_newline1.txt
@ -0,0 +1,2 @@
 a
 b
--- a/tests/fixtures/sort/no_trailing_newline2.txt
+++ b/tests/fixtures/sort/no_trailing_newline2.txt
@ -0,0 +1 @@
 b
--- a/util/build-gnu.sh
+++ b/util/build-gnu.sh
@ -44,7 +44,7 @@ sed -i 's|"\$@|/usr/bin/timeout 600 "\$@|' build-aux/test-driver
 # Change the PATH in the Makefile to test the uutils coreutils instead of the GNU coreutils
 sed -i "s/^[[:blank:]]*PATH=.*/  PATH='${BUILDDIR//\//\\/}\$(PATH_SEPARATOR)'\"\$\$PATH\" \\\/" Makefile
 sed -i 's| tr | /usr/bin/tr |' tests/init.sh
-make
+make -j "$(nproc)"
 # Generate the factor tests, so they can be fixed
 # Used to be 36. Reduced to 20 to decrease the log size
 for i in {00..20}
@ -59,7 +59,7 @@ do
 done
-grep -rl 'path_prepend_' tests/* | xargs sed -i 's|path_prepend_ ./src||'
+grep -rl 'path_prepend_' tests/* | xargs sed -i 's| path_prepend_ ./src||'
 sed -i -e 's|^seq |/usr/bin/seq |' -e 's|sha1sum |/usr/bin/sha1sum |' tests/factor/t*sh
 # Remove tests checking for --version & --help
@ -94,8 +94,28 @@ sed -i 's|cp |/usr/bin/cp |' tests/mv/hard-2.sh
 sed -i 's|paste |/usr/bin/paste |' tests/misc/od-endian.sh
 sed -i 's|seq |/usr/bin/seq |' tests/misc/sort-discrim.sh
-#Add specific timeout to tests that currently hang to limit time spent waiting
+# Add specific timeout to tests that currently hang to limit time spent waiting
 sed -i 's|seq \$|/usr/bin/timeout 0.1 seq \$|' tests/misc/seq-precision.sh tests/misc/seq-long-double.sh
 sed -i 's|cat |/usr/bin/timeout 0.1 cat |' tests/misc/cat-self.sh
 # Remove dup of /usr/bin/ when executed several times
 grep -rl '/usr/bin//usr/bin/' tests/* | xargs --no-run-if-empty sed -i 's|/usr/bin//usr/bin/|/usr/bin/|g'
 #### Adjust tests to make them work with Rust/coreutils
 # in some cases, what we are doing in rust/coreutils is good (or better)
 # we should not regress our project just to match what GNU is going.
 # So, do some changes on the fly
 sed -i -e "s|rm: cannot remove 'e/slink'|rm: cannot remove 'e'|g" tests/rm/fail-eacces.sh
 sed -i -e "s|rm: cannot remove 'a/b/file'|rm: cannot remove 'a'|g" tests/rm/cycle.sh
 sed -i -e "s|rm: cannot remove directory 'b/a/p'|rm: cannot remove 'b'|g" tests/rm/rm1.sh
 sed -i -e "s|rm: cannot remove 'a/1'|rm: cannot remove 'a'|g" tests/rm/rm2.sh
 sed -i -e "s|removed directory 'a/'|removed directory 'a'|g" tests/rm/v-slash.sh
 test -f "${BUILDDIR}/getlimits" || cp src/getlimits "${BUILDDIR}"