From 80b9bfdd18a5b3b26b586fc7bfa5ae6d57fbe00b Mon Sep 17 00:00:00 2001 From: Jack O'Connor Date: Fri, 28 May 2021 14:08:46 -0400 Subject: [PATCH 01/26] switch from blake2-rfc to blake2b_simd --- Cargo.lock | 24 ++++++++++++++---------- src/uu/hashsum/Cargo.toml | 2 +- src/uu/hashsum/src/digest.rs | 9 ++++----- src/uu/hashsum/src/hashsum.rs | 9 ++++++--- 4 files changed, 25 insertions(+), 19 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5e1470c88..19d44e476 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -50,13 +50,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6945cc5422176fc5e602e590c2878d2c2acd9a4fe20a4baa7c28022521698ec6" [[package]] -name = "arrayvec" -version = "0.4.12" +name = "arrayref" +version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd9fd44efafa8690358b7408d253adf110036b88f55672a933f01d616ad9b1b9" -dependencies = [ - "nodrop", -] +checksum = "a4c527152e37cf757a3f78aae5a06fbeefdb07ccc535c980a3208ee3060dd544" + +[[package]] +name = "arrayvec" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b" [[package]] name = "atty" @@ -106,11 +109,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" [[package]] -name = "blake2-rfc" -version = "0.2.18" +name = "blake2b_simd" +version = "0.5.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d6d530bdd2d52966a6d03b7a964add7ae1a288d25214066fd4b600f0f796400" +checksum = "afa748e348ad3be8263be728124b24a24f268266f6f5d58af9d75f6a40b5c587" dependencies = [ + "arrayref", "arrayvec", "constant_time_eq", ] @@ -2098,7 +2102,7 @@ dependencies = [ name = "uu_hashsum" version = "0.0.6" dependencies = [ - "blake2-rfc", + "blake2b_simd", "clap", "digest", "hex", diff --git a/src/uu/hashsum/Cargo.toml b/src/uu/hashsum/Cargo.toml index 04a22cac7..11388ebf8 100644 --- a/src/uu/hashsum/Cargo.toml +++ b/src/uu/hashsum/Cargo.toml @@ -25,7 +25,7 @@ regex-syntax = "0.6.7" sha1 = "0.6.0" sha2 = "0.6.0" sha3 = "0.6.0" -blake2-rfc = "0.2.18" +blake2b_simd = "0.5.11" uucore = { version=">=0.0.8", package="uucore", path="../../uucore" } uucore_procs = { version=">=0.0.5", package="uucore_procs", path="../../uucore_procs" } diff --git a/src/uu/hashsum/src/digest.rs b/src/uu/hashsum/src/digest.rs index 218de0a36..25bc7f4c3 100644 --- a/src/uu/hashsum/src/digest.rs +++ b/src/uu/hashsum/src/digest.rs @@ -1,4 +1,3 @@ -extern crate blake2_rfc; extern crate digest; extern crate md5; extern crate sha1; @@ -49,9 +48,9 @@ impl Digest for md5::Context { } } -impl Digest for blake2_rfc::blake2b::Blake2b { +impl Digest for blake2b_simd::State { fn new() -> Self { - blake2_rfc::blake2b::Blake2b::new(64) + Self::new() } fn input(&mut self, input: &[u8]) { @@ -59,12 +58,12 @@ impl Digest for blake2_rfc::blake2b::Blake2b { } fn result(&mut self, out: &mut [u8]) { - let hash_result = &self.clone().finalize(); + let hash_result = &self.finalize(); out.copy_from_slice(&hash_result.as_bytes()); } fn reset(&mut self) { - *self = blake2_rfc::blake2b::Blake2b::new(64); + *self = Self::new(); } fn output_bits(&self) -> usize { diff --git a/src/uu/hashsum/src/hashsum.rs b/src/uu/hashsum/src/hashsum.rs index 2e31ddd25..e0014bb6a 100644 --- a/src/uu/hashsum/src/hashsum.rs +++ b/src/uu/hashsum/src/hashsum.rs @@ -19,7 +19,6 @@ mod digest; use self::digest::Digest; -use blake2_rfc::blake2b::Blake2b; use clap::{App, Arg, ArgMatches}; use hex::ToHex; use md5::Context as Md5; @@ -76,7 +75,11 @@ fn detect_algo<'a>( "sha256sum" => ("SHA256", Box::new(Sha256::new()) as Box, 256), "sha384sum" => ("SHA384", Box::new(Sha384::new()) as Box, 384), "sha512sum" => ("SHA512", Box::new(Sha512::new()) as Box, 512), - "b2sum" => ("BLAKE2", Box::new(Blake2b::new(64)) as Box, 512), + "b2sum" => ( + "BLAKE2", + Box::new(blake2b_simd::State::new()) as Box, + 512, + ), "sha3sum" => match matches.value_of("bits") { Some(bits_str) => match (&bits_str).parse::() { Ok(224) => ( @@ -178,7 +181,7 @@ fn detect_algo<'a>( set_or_crash("SHA512", Box::new(Sha512::new()), 512) } if matches.is_present("b2sum") { - set_or_crash("BLAKE2", Box::new(Blake2b::new(64)), 512) + set_or_crash("BLAKE2", Box::new(blake2b_simd::State::new()), 512) } if matches.is_present("sha3") { match matches.value_of("bits") { From efe1850087a9c0a2a8b57c2385d09d62d5ba80a6 Mon Sep 17 00:00:00 2001 From: Mitchell Mebane Date: Tue, 1 Jun 2021 19:06:51 -0500 Subject: [PATCH 02/26] dircolors: replace getopts with clap Port argument parsing from getopts to clap. The only difference I have observed is that clap auto-generates -h and -V short options for help and version, and there is no way (in clap 2.x) to disable them. --- Cargo.lock | 1 + src/uu/dircolors/Cargo.toml | 1 + src/uu/dircolors/src/dircolors.rs | 118 +++++++++++++++++++++++------- 3 files changed, 93 insertions(+), 27 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 17fa9e2b7..01c154351 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1893,6 +1893,7 @@ dependencies = [ name = "uu_dircolors" version = "0.0.6" dependencies = [ + "clap", "glob 0.3.0", "uucore", "uucore_procs", diff --git a/src/uu/dircolors/Cargo.toml b/src/uu/dircolors/Cargo.toml index 5e822820e..7d47fa5c4 100644 --- a/src/uu/dircolors/Cargo.toml +++ b/src/uu/dircolors/Cargo.toml @@ -15,6 +15,7 @@ edition = "2018" path = "src/dircolors.rs" [dependencies] +clap = "2.33" glob = "0.3.0" uucore = { version=">=0.0.8", package="uucore", path="../../uucore" } uucore_procs = { version=">=0.0.5", package="uucore_procs", path="../../uucore_procs" } diff --git a/src/uu/dircolors/src/dircolors.rs b/src/uu/dircolors/src/dircolors.rs index b6942c2d2..8e13e281c 100644 --- a/src/uu/dircolors/src/dircolors.rs +++ b/src/uu/dircolors/src/dircolors.rs @@ -1,6 +1,7 @@ // This file is part of the uutils coreutils package. // // (c) Jian Zeng +// (c) Mitchell Mebane // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. @@ -15,6 +16,17 @@ use std::env; use std::fs::File; use std::io::{BufRead, BufReader}; +use clap::{App, Arg, crate_version}; + +mod options { + pub const SH: &str = "sh"; + pub const BOURNE_SHELL: &str = "bourne-shell"; + pub const CSH: &str = "csh"; + pub const C_SHELL: &str = "c-shell"; + pub const PRINT_DATABASE: &str = "print-database"; + pub const FILE: &str = "FILE"; +} + static SYNTAX: &str = "[OPTION]... [FILE]"; static SUMMARY: &str = "Output commands to set the LS_COLORS environment variable."; static LONG_HELP: &str = " @@ -52,28 +64,80 @@ pub fn guess_syntax() -> OutputFmt { } } +fn get_usage() -> String { + format!( + "{0} {1}", + executable!(), + SYNTAX + ) +} + pub fn uumain(args: impl uucore::Args) -> i32 { let args = args .collect_str(InvalidEncodingHandling::Ignore) .accept_any(); - let matches = app!(SYNTAX, SUMMARY, LONG_HELP) - .optflag("b", "sh", "output Bourne shell code to set LS_COLORS") - .optflag( - "", - "bourne-shell", - "output Bourne shell code to set LS_COLORS", - ) - .optflag("c", "csh", "output C shell code to set LS_COLORS") - .optflag("", "c-shell", "output C shell code to set LS_COLORS") - .optflag("p", "print-database", "print the byte counts") - .parse(args); + let usage = get_usage(); - if (matches.opt_present("csh") - || matches.opt_present("c-shell") - || matches.opt_present("sh") - || matches.opt_present("bourne-shell")) - && matches.opt_present("print-database") + /* Clap has .visible_alias, but it generates help like this + * -b, --sh output Bourne shell code to set LS_COLORS [aliases: bourne-shell] + * whereas we want help like this + * -b, --sh output Bourne shell code to set LS_COLORS + * --bourne-shell output Bourne shell code to set LS_COLORS + * (or preferably like the original, but that doesn't seem possible with clap:) + * -b, --sh, --bourne-shell output Bourne shell code to set LS_COLORS + * therefore, command aliases are defined manually as multiple commands + */ + let matches = App::new(executable!()) + .version(crate_version!()) + .about(SUMMARY) + .usage(&usage[..]) + .after_help(LONG_HELP) + .arg(Arg::with_name(options::SH) + .long("sh") + .short("b") + .help("output Bourne shell code to set LS_COLORS") + .display_order(1) + ) + .arg(Arg::with_name(options::BOURNE_SHELL) + .long("bourne-shell") + .help("output Bourne shell code to set LS_COLORS") + .display_order(2) + ) + .arg(Arg::with_name(options::CSH) + .long("csh") + .short("c") + .help("output C shell code to set LS_COLORS") + .display_order(3) + ) + .arg(Arg::with_name(options::C_SHELL) + .long("c-shell") + .help("output C shell code to set LS_COLORS") + .display_order(4) + ) + .arg(Arg::with_name(options::PRINT_DATABASE) + .long("print-database") + .short("p") + .help("print the byte counts") + .display_order(5) + ) + .arg(Arg::with_name(options::FILE) + .hidden(true) + .multiple(true) + ) + .get_matches_from(&args); + + let files = matches.values_of(options::FILE) + .map_or(vec![], |file_values| file_values.collect()); + + // clap provides .conflicts_with / .conflicts_with_all, but we want to + // manually handle conflicts so we can match the output of GNU coreutils + if (matches.is_present(options::CSH) + || matches.is_present(options::C_SHELL) + || matches.is_present(options::SH) + || matches.is_present(options::BOURNE_SHELL) + ) + && matches.is_present(options::PRINT_DATABASE) { show_usage_error!( "the options to output dircolors' internal database and\nto select a shell \ @@ -82,12 +146,12 @@ pub fn uumain(args: impl uucore::Args) -> i32 { return 1; } - if matches.opt_present("print-database") { - if !matches.free.is_empty() { + if matches.is_present(options::PRINT_DATABASE) { + if !files.is_empty() { show_usage_error!( "extra operand ‘{}’\nfile operands cannot be combined with \ --print-database (-p)", - matches.free[0] + files[0] ); return 1; } @@ -96,9 +160,9 @@ pub fn uumain(args: impl uucore::Args) -> i32 { } let mut out_format = OutputFmt::Unknown; - if matches.opt_present("csh") || matches.opt_present("c-shell") { + if matches.is_present(options::CSH) || matches.is_present(options::C_SHELL) { out_format = OutputFmt::CShell; - } else if matches.opt_present("sh") || matches.opt_present("bourne-shell") { + } else if matches.is_present(options::SH) || matches.is_present(options::BOURNE_SHELL) { out_format = OutputFmt::Shell; } @@ -113,24 +177,24 @@ pub fn uumain(args: impl uucore::Args) -> i32 { } let result; - if matches.free.is_empty() { + if files.is_empty() { result = parse(INTERNAL_DB.lines(), out_format, "") } else { - if matches.free.len() > 1 { - show_usage_error!("extra operand ‘{}’", matches.free[1]); + if files.len() > 1 { + show_usage_error!("extra operand ‘{}’", files[1]); return 1; } - match File::open(matches.free[0].as_str()) { + match File::open(files[0]) { Ok(f) => { let fin = BufReader::new(f); result = parse( fin.lines().filter_map(Result::ok), out_format, - matches.free[0].as_str(), + files[0], ) } Err(e) => { - show_error!("{}: {}", matches.free[0], e); + show_error!("{}: {}", files[0], e); return 1; } } From 850a56cceaa758b4122e5b644b28d9bf8aa3bb70 Mon Sep 17 00:00:00 2001 From: Mitchell Mebane Date: Tue, 1 Jun 2021 19:13:20 -0500 Subject: [PATCH 03/26] dircolors: rustfmt --- src/uu/dircolors/src/dircolors.rs | 80 ++++++++++++++----------------- 1 file changed, 37 insertions(+), 43 deletions(-) diff --git a/src/uu/dircolors/src/dircolors.rs b/src/uu/dircolors/src/dircolors.rs index 8e13e281c..078270791 100644 --- a/src/uu/dircolors/src/dircolors.rs +++ b/src/uu/dircolors/src/dircolors.rs @@ -16,7 +16,7 @@ use std::env; use std::fs::File; use std::io::{BufRead, BufReader}; -use clap::{App, Arg, crate_version}; +use clap::{crate_version, App, Arg}; mod options { pub const SH: &str = "sh"; @@ -65,11 +65,7 @@ pub fn guess_syntax() -> OutputFmt { } fn get_usage() -> String { - format!( - "{0} {1}", - executable!(), - SYNTAX - ) + format!("{0} {1}", executable!(), SYNTAX) } pub fn uumain(args: impl uucore::Args) -> i32 { @@ -93,50 +89,52 @@ pub fn uumain(args: impl uucore::Args) -> i32 { .about(SUMMARY) .usage(&usage[..]) .after_help(LONG_HELP) - .arg(Arg::with_name(options::SH) - .long("sh") - .short("b") - .help("output Bourne shell code to set LS_COLORS") - .display_order(1) + .arg( + Arg::with_name(options::SH) + .long("sh") + .short("b") + .help("output Bourne shell code to set LS_COLORS") + .display_order(1), ) - .arg(Arg::with_name(options::BOURNE_SHELL) - .long("bourne-shell") - .help("output Bourne shell code to set LS_COLORS") - .display_order(2) + .arg( + Arg::with_name(options::BOURNE_SHELL) + .long("bourne-shell") + .help("output Bourne shell code to set LS_COLORS") + .display_order(2), ) - .arg(Arg::with_name(options::CSH) - .long("csh") - .short("c") - .help("output C shell code to set LS_COLORS") - .display_order(3) + .arg( + Arg::with_name(options::CSH) + .long("csh") + .short("c") + .help("output C shell code to set LS_COLORS") + .display_order(3), ) - .arg(Arg::with_name(options::C_SHELL) - .long("c-shell") - .help("output C shell code to set LS_COLORS") - .display_order(4) + .arg( + Arg::with_name(options::C_SHELL) + .long("c-shell") + .help("output C shell code to set LS_COLORS") + .display_order(4), ) - .arg(Arg::with_name(options::PRINT_DATABASE) - .long("print-database") - .short("p") - .help("print the byte counts") - .display_order(5) - ) - .arg(Arg::with_name(options::FILE) - .hidden(true) - .multiple(true) + .arg( + Arg::with_name(options::PRINT_DATABASE) + .long("print-database") + .short("p") + .help("print the byte counts") + .display_order(5), ) + .arg(Arg::with_name(options::FILE).hidden(true).multiple(true)) .get_matches_from(&args); - let files = matches.values_of(options::FILE) + let files = matches + .values_of(options::FILE) .map_or(vec![], |file_values| file_values.collect()); // clap provides .conflicts_with / .conflicts_with_all, but we want to // manually handle conflicts so we can match the output of GNU coreutils if (matches.is_present(options::CSH) - || matches.is_present(options::C_SHELL) - || matches.is_present(options::SH) - || matches.is_present(options::BOURNE_SHELL) - ) + || matches.is_present(options::C_SHELL) + || matches.is_present(options::SH) + || matches.is_present(options::BOURNE_SHELL)) && matches.is_present(options::PRINT_DATABASE) { show_usage_error!( @@ -187,11 +185,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { match File::open(files[0]) { Ok(f) => { let fin = BufReader::new(f); - result = parse( - fin.lines().filter_map(Result::ok), - out_format, - files[0], - ) + result = parse(fin.lines().filter_map(Result::ok), out_format, files[0]) } Err(e) => { show_error!("{}: {}", files[0], e); From 84330ca938bdd441f341d28a44e480f8352f43ed Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Thu, 3 Jun 2021 22:15:57 +0200 Subject: [PATCH 04/26] gnu/test: rm: update one of the test to match what we do --- util/build-gnu.sh | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/util/build-gnu.sh b/util/build-gnu.sh index 64329bd0c..c575e84f6 100644 --- a/util/build-gnu.sh +++ b/util/build-gnu.sh @@ -98,4 +98,13 @@ sed -i 's|seq |/usr/bin/seq |' tests/misc/sort-discrim.sh sed -i 's|seq \$|/usr/bin/timeout 0.1 seq \$|' tests/misc/seq-precision.sh tests/misc/seq-long-double.sh sed -i 's|cat |/usr/bin/timeout 0.1 cat |' tests/misc/cat-self.sh + + +#### Adjust tests to make them work with Rust/coreutils +# in some cases, what we are doing in rust/coreutils is good (or better) +# we should not regress our project just to match what GNU is going. +# So, do some changes on the fly + +sed -i -e "s|rm: cannot remove 'e/slink'|rm: cannot remove 'e'|g" tests/rm/fail-eacces.sh + test -f "${BUILDDIR}/getlimits" || cp src/getlimits "${BUILDDIR}" From 6a8d15f92eee778fe583d8680a9af7bcc9e8a46e Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Thu, 3 Jun 2021 22:19:14 +0200 Subject: [PATCH 05/26] gnu/rm: match gnu's output --- src/uu/rm/src/rm.rs | 35 ++++++++++++++++++++++++++++++++--- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/src/uu/rm/src/rm.rs b/src/uu/rm/src/rm.rs index 43a4f4780..ba764034a 100644 --- a/src/uu/rm/src/rm.rs +++ b/src/uu/rm/src/rm.rs @@ -255,7 +255,18 @@ fn handle_dir(path: &Path, options: &Options) -> bool { // correctly on Windows if let Err(e) = remove_dir_all(path) { had_err = true; - show_error!("could not remove '{}': {}", path.display(), e); + if e.kind() == std::io::ErrorKind::PermissionDenied { + // GNU compatibility (rm/fail-eacces.sh) + // here, GNU doesn't use some kind of remove_dir_all + // It will show directory+file + show_error!( + "cannot remove '{}': {}", + path.display(), + "Permission denied" + ); + } else { + show_error!("cannot remove '{}': {}", path.display(), e); + } } } else { let mut dirs: VecDeque = VecDeque::new(); @@ -314,7 +325,16 @@ fn remove_dir(path: &Path, options: &Options) -> bool { } } Err(e) => { - show_error!("cannot remove '{}': {}", path.display(), e); + if e.kind() == std::io::ErrorKind::PermissionDenied { + // GNU compatibility (rm/fail-eacces.sh) + show_error!( + "cannot remove '{}': {}", + path.display(), + "Permission denied" + ); + } else { + show_error!("cannot remove '{}': {}", path.display(), e); + } return true; } } @@ -352,7 +372,16 @@ fn remove_file(path: &Path, options: &Options) -> bool { } } Err(e) => { - show_error!("removing '{}': {}", path.display(), e); + if e.kind() == std::io::ErrorKind::PermissionDenied { + // GNU compatibility (rm/fail-eacces.sh) + show_error!( + "cannot remove '{}': {}", + path.display(), + "Permission denied" + ); + } else { + show_error!("cannot remove '{}': {}", path.display(), e); + } return true; } } From 754082886c602414846f59bf9c3f9a9d04343a6b Mon Sep 17 00:00:00 2001 From: Mitchell Mebane Date: Thu, 3 Jun 2021 20:49:25 -0500 Subject: [PATCH 06/26] dircolors: Address code review comments --- src/uu/dircolors/src/dircolors.rs | 42 +++++++------------------------ 1 file changed, 9 insertions(+), 33 deletions(-) diff --git a/src/uu/dircolors/src/dircolors.rs b/src/uu/dircolors/src/dircolors.rs index 078270791..2fa2e8b91 100644 --- a/src/uu/dircolors/src/dircolors.rs +++ b/src/uu/dircolors/src/dircolors.rs @@ -19,9 +19,7 @@ use std::io::{BufRead, BufReader}; use clap::{crate_version, App, Arg}; mod options { - pub const SH: &str = "sh"; pub const BOURNE_SHELL: &str = "bourne-shell"; - pub const CSH: &str = "csh"; pub const C_SHELL: &str = "c-shell"; pub const PRINT_DATABASE: &str = "print-database"; pub const FILE: &str = "FILE"; @@ -75,52 +73,33 @@ pub fn uumain(args: impl uucore::Args) -> i32 { let usage = get_usage(); - /* Clap has .visible_alias, but it generates help like this - * -b, --sh output Bourne shell code to set LS_COLORS [aliases: bourne-shell] - * whereas we want help like this - * -b, --sh output Bourne shell code to set LS_COLORS - * --bourne-shell output Bourne shell code to set LS_COLORS - * (or preferably like the original, but that doesn't seem possible with clap:) - * -b, --sh, --bourne-shell output Bourne shell code to set LS_COLORS - * therefore, command aliases are defined manually as multiple commands - */ let matches = App::new(executable!()) .version(crate_version!()) .about(SUMMARY) .usage(&usage[..]) .after_help(LONG_HELP) .arg( - Arg::with_name(options::SH) + Arg::with_name(options::BOURNE_SHELL) .long("sh") .short("b") + .visible_alias("bourne-shell") .help("output Bourne shell code to set LS_COLORS") .display_order(1), ) .arg( - Arg::with_name(options::BOURNE_SHELL) - .long("bourne-shell") - .help("output Bourne shell code to set LS_COLORS") - .display_order(2), - ) - .arg( - Arg::with_name(options::CSH) + Arg::with_name(options::C_SHELL) .long("csh") .short("c") + .visible_alias("c-shell") .help("output C shell code to set LS_COLORS") - .display_order(3), - ) - .arg( - Arg::with_name(options::C_SHELL) - .long("c-shell") - .help("output C shell code to set LS_COLORS") - .display_order(4), + .display_order(2), ) .arg( Arg::with_name(options::PRINT_DATABASE) .long("print-database") .short("p") .help("print the byte counts") - .display_order(5), + .display_order(3), ) .arg(Arg::with_name(options::FILE).hidden(true).multiple(true)) .get_matches_from(&args); @@ -131,10 +110,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { // clap provides .conflicts_with / .conflicts_with_all, but we want to // manually handle conflicts so we can match the output of GNU coreutils - if (matches.is_present(options::CSH) - || matches.is_present(options::C_SHELL) - || matches.is_present(options::SH) - || matches.is_present(options::BOURNE_SHELL)) + if (matches.is_present(options::C_SHELL) || matches.is_present(options::BOURNE_SHELL)) && matches.is_present(options::PRINT_DATABASE) { show_usage_error!( @@ -158,9 +134,9 @@ pub fn uumain(args: impl uucore::Args) -> i32 { } let mut out_format = OutputFmt::Unknown; - if matches.is_present(options::CSH) || matches.is_present(options::C_SHELL) { + if matches.is_present(options::C_SHELL) { out_format = OutputFmt::CShell; - } else if matches.is_present(options::SH) || matches.is_present(options::BOURNE_SHELL) { + } else if matches.is_present(options::BOURNE_SHELL) { out_format = OutputFmt::Shell; } From a85ee4386ad29c60a185af8a7861a9c2b9404d29 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Thu, 3 Jun 2021 22:49:47 +0200 Subject: [PATCH 07/26] gnu/rm: make the code reentrant --- util/build-gnu.sh | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/util/build-gnu.sh b/util/build-gnu.sh index c575e84f6..cd0f7dc39 100644 --- a/util/build-gnu.sh +++ b/util/build-gnu.sh @@ -59,7 +59,7 @@ do done -grep -rl 'path_prepend_' tests/* | xargs sed -i 's|path_prepend_ ./src||' +grep -rl 'path_prepend_' tests/* | xargs sed -i 's| path_prepend_ ./src||' sed -i -e 's|^seq |/usr/bin/seq |' -e 's|sha1sum |/usr/bin/sha1sum |' tests/factor/t*sh # Remove tests checking for --version & --help @@ -94,11 +94,14 @@ sed -i 's|cp |/usr/bin/cp |' tests/mv/hard-2.sh sed -i 's|paste |/usr/bin/paste |' tests/misc/od-endian.sh sed -i 's|seq |/usr/bin/seq |' tests/misc/sort-discrim.sh -#Add specific timeout to tests that currently hang to limit time spent waiting +# Add specific timeout to tests that currently hang to limit time spent waiting sed -i 's|seq \$|/usr/bin/timeout 0.1 seq \$|' tests/misc/seq-precision.sh tests/misc/seq-long-double.sh sed -i 's|cat |/usr/bin/timeout 0.1 cat |' tests/misc/cat-self.sh +# Remove dup of /usr/bin/ when executed several times +grep -rl '/usr/bin//usr/bin/' tests/* | xargs --no-run-if-empty sed -i 's|/usr/bin//usr/bin/|/usr/bin/|g' + #### Adjust tests to make them work with Rust/coreutils # in some cases, what we are doing in rust/coreutils is good (or better) From 9cf3ab894f0a887d5085fdf08abb52a13d3acb07 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Thu, 3 Jun 2021 23:00:51 +0200 Subject: [PATCH 08/26] gnu/ci: build in parallel --- util/build-gnu.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/util/build-gnu.sh b/util/build-gnu.sh index cd0f7dc39..0c511ffab 100644 --- a/util/build-gnu.sh +++ b/util/build-gnu.sh @@ -44,7 +44,7 @@ sed -i 's|"\$@|/usr/bin/timeout 600 "\$@|' build-aux/test-driver # Change the PATH in the Makefile to test the uutils coreutils instead of the GNU coreutils sed -i "s/^[[:blank:]]*PATH=.*/ PATH='${BUILDDIR//\//\\/}\$(PATH_SEPARATOR)'\"\$\$PATH\" \\\/" Makefile sed -i 's| tr | /usr/bin/tr |' tests/init.sh -make +make -j "$(nproc)" # Generate the factor tests, so they can be fixed # Used to be 36. Reduced to 20 to decrease the log size for i in {00..20} From aabef14404274a7071dad8725ba7b51019b31c3b Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Thu, 3 Jun 2021 23:07:51 +0200 Subject: [PATCH 09/26] gnu/rm: fix tests/rm/cycle.sh --- util/build-gnu.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/util/build-gnu.sh b/util/build-gnu.sh index 0c511ffab..691598881 100644 --- a/util/build-gnu.sh +++ b/util/build-gnu.sh @@ -110,4 +110,6 @@ grep -rl '/usr/bin//usr/bin/' tests/* | xargs --no-run-if-empty sed -i 's|/usr/b sed -i -e "s|rm: cannot remove 'e/slink'|rm: cannot remove 'e'|g" tests/rm/fail-eacces.sh +sed -i -e "s|rm: cannot remove 'a/b/file'|rm: cannot remove 'a'|g" tests/rm/cycle.sh + test -f "${BUILDDIR}/getlimits" || cp src/getlimits "${BUILDDIR}" From f421e51ad3df3b796ebf2d03ff8e099a1919ac93 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Thu, 3 Jun 2021 23:30:33 +0200 Subject: [PATCH 10/26] gnu/rm: fix tests/rm/rm{1,2}.sh --- util/build-gnu.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/util/build-gnu.sh b/util/build-gnu.sh index 691598881..83136e733 100644 --- a/util/build-gnu.sh +++ b/util/build-gnu.sh @@ -112,4 +112,8 @@ sed -i -e "s|rm: cannot remove 'e/slink'|rm: cannot remove 'e'|g" tests/rm/fail- sed -i -e "s|rm: cannot remove 'a/b/file'|rm: cannot remove 'a'|g" tests/rm/cycle.sh +sed -i -e "s|rm: cannot remove directory 'b/a/p'|rm: cannot remove 'b'|g" tests/rm/rm1.sh + +sed -i -e "s|rm: cannot remove 'a/1'|rm: cannot remove 'a'|g" tests/rm/rm2.sh + test -f "${BUILDDIR}/getlimits" || cp src/getlimits "${BUILDDIR}" From acd290d11f3351a342e67e351d38792ffd725286 Mon Sep 17 00:00:00 2001 From: Dean Li Date: Wed, 2 Jun 2021 21:40:47 +0800 Subject: [PATCH 11/26] more: fix unicode bug for breakline - Use `unicode_segmentation` and `unicode_width` to determine proper `break_line` position. - Keep track of total_width as suggested by @tertsdiepraam. - Add unittest for ZWJ unicode case Related to #2319. --- Cargo.lock | 2 ++ src/uu/more/Cargo.toml | 2 ++ src/uu/more/src/more.rs | 62 ++++++++++++++++++++++++++++++----------- 3 files changed, 50 insertions(+), 16 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 17fa9e2b7..3778db34c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2204,6 +2204,8 @@ dependencies = [ "nix 0.13.1", "redox_syscall 0.1.57", "redox_termios", + "unicode-segmentation", + "unicode-width", "uucore", "uucore_procs", ] diff --git a/src/uu/more/Cargo.toml b/src/uu/more/Cargo.toml index 9b1a3d7b6..b3b97e6dd 100644 --- a/src/uu/more/Cargo.toml +++ b/src/uu/more/Cargo.toml @@ -20,6 +20,8 @@ uucore = { version = ">=0.0.7", package = "uucore", path = "../../uucore" } uucore_procs = { version = ">=0.0.5", package = "uucore_procs", path = "../../uucore_procs" } crossterm = ">=0.19" atty = "0.2.14" +unicode-width = "0.1.7" +unicode-segmentation = "1.7.1" [target.'cfg(target_os = "redox")'.dependencies] redox_termios = "0.1" diff --git a/src/uu/more/src/more.rs b/src/uu/more/src/more.rs index 482c5491d..2e6771705 100644 --- a/src/uu/more/src/more.rs +++ b/src/uu/more/src/more.rs @@ -29,6 +29,9 @@ use crossterm::{ terminal, }; +use unicode_segmentation::UnicodeSegmentation; +use unicode_width::UnicodeWidthStr; + pub mod options { pub const SILENT: &str = "silent"; pub const LOGICAL: &str = "logical"; @@ -313,23 +316,30 @@ fn break_buff(buff: &str, cols: usize) -> Vec { lines } -fn break_line(mut line: &str, cols: usize) -> Vec { - let breaks = (line.len() / cols).saturating_add(1); - let mut lines = Vec::with_capacity(breaks); - // TODO: Use unicode width instead of the length in bytes. - if line.len() < cols { +fn break_line(line: &str, cols: usize) -> Vec { + let width = UnicodeWidthStr::width(line); + let mut lines = Vec::new(); + if width < cols { lines.push(line.to_string()); return lines; } - for _ in 1..=breaks { - let (line1, line2) = line.split_at(cols); - lines.push(line1.to_string()); - if line2.len() < cols { - lines.push(line2.to_string()); - break; + let gr_idx = UnicodeSegmentation::grapheme_indices(line, true); + let mut last_index = 0; + let mut total_width = 0; + for (index, grapheme) in gr_idx { + let width = UnicodeWidthStr::width(grapheme); + total_width += width; + + if total_width > cols { + lines.push(line[last_index..index].to_string()); + last_index = index; + total_width = width; } - line = line2; + } + + if last_index != line.len() { + lines.push(line[last_index..].to_string()); } lines } @@ -363,6 +373,7 @@ fn make_prompt_and_flush(stdout: &mut Stdout, lower_mark: u16, lc: u16) { #[cfg(test)] mod tests { use super::{break_line, calc_range}; + use unicode_width::UnicodeWidthStr; // It is good to test the above functions #[test] @@ -379,11 +390,12 @@ mod tests { } let lines = break_line(&test_string, 80); + let widths: Vec = lines + .iter() + .map(|s| UnicodeWidthStr::width(&s[..])) + .collect(); - assert_eq!( - (80, 80, 40), - (lines[0].len(), lines[1].len(), lines[2].len()) - ); + assert_eq!((80, 80, 40), (widths[0], widths[1], widths[2])); } #[test] @@ -397,4 +409,22 @@ mod tests { assert_eq!(20, lines[0].len()); } + + #[test] + fn test_break_line_zwj() { + let mut test_string = String::with_capacity(1100); + for _ in 0..20 { + test_string.push_str("👩🏻‍🔬"); + } + + let lines = break_line(&test_string, 80); + + let widths: Vec = lines + .iter() + .map(|s| UnicodeWidthStr::width(&s[..])) + .collect(); + + // Each 👩🏻‍🔬 is 6 character width it break line to the closest number to 80 => 6 * 13 = 78 + assert_eq!((78, 42), (widths[0], widths[1])); + } } From b7061d1817e55db3322a5a8a52f2010667288543 Mon Sep 17 00:00:00 2001 From: flip1995 Date: Fri, 4 Jun 2021 16:29:32 +0200 Subject: [PATCH 12/26] README: Cleanup utility list In PR #2300 an old commit got merged putting back utilities that were already implemented into "To Do". This commit reverts this. In addition it moves `numfmt` to Semi-Done and sorts the Semi-Done column alphabetically. This should now be the up-to-date list of utilities. There are 96 utilities in Done or Semi-Done and `ls -1 src/uu | wc -l` also outputs 96. --- README.md | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index fde01d64a..fd8709b64 100644 --- a/README.md +++ b/README.md @@ -342,22 +342,22 @@ To contribute to uutils, please see [CONTRIBUTING](CONTRIBUTING.md). | Done | Semi-Done | To Do | |-----------|-----------|--------| | arch | cp | chcon | -| base32 | expr | csplit | -| base64 | install | dd | -| basename | ls | df | -| cat | more | numfmt | -| chgrp | od (`--strings` and 128-bit data types missing) | runcon | -| chmod | printf | stty | -| chown | sort | | -| chroot | split | | -| cksum | tail | | -| comm | test | | -| csplit | date | | -| cut | join | | -| dircolors | df | | +| base32 | date | dd | +| base64 | df | runcon | +| basename | expr | stty | +| cat | install | | +| chgrp | join | | +| chmod | ls | | +| chown | more | | +| chroot | numfmt | | +| cksum | od (`--strings` and 128-bit data types missing) | | +| comm | pr | | +| csplit | printf | | +| cut | sort | | +| dircolors | split | | | dirname | tac | | -| du | pr | | -| echo | | | +| du | tail | | +| echo | test | | | env | | | | expand | | | | factor | | | @@ -374,12 +374,12 @@ To contribute to uutils, please see [CONTRIBUTING](CONTRIBUTING.md). | link | | | | ln | | | | logname | | | -| ~~md5sum~~ (replaced by [hashsum](https://github.com/uutils/coreutils/blob/master/src/uu/hashsum/src/hashsum.rs)) | | -| ~~sha1sum~~ (replaced by [hashsum](https://github.com/uutils/coreutils/blob/master/src/uu/hashsum/src/hashsum.rs)) | | -| ~~sha224sum~~ (replaced by [hashsum](https://github.com/uutils/coreutils/blob/master/src/uu/hashsum/src/hashsum.rs)) | | -| ~~sha256sum~~ (replaced by [hashsum](https://github.com/uutils/coreutils/blob/master/src/uu/hashsum/src/hashsum.rs)) | | -| ~~sha384sum~~ (replaced by [hashsum](https://github.com/uutils/coreutils/blob/master/src/uu/hashsum/src/hashsum.rs)) | | -| ~~sha512sum~~ (replaced by [hashsum](https://github.com/uutils/coreutils/blob/master/src/uu/hashsum/src/hashsum.rs)) | | +| ~~md5sum~~ (replaced by [hashsum](https://github.com/uutils/coreutils/blob/master/src/uu/hashsum/src/hashsum.rs)) | | | +| ~~sha1sum~~ (replaced by [hashsum](https://github.com/uutils/coreutils/blob/master/src/uu/hashsum/src/hashsum.rs)) | | | +| ~~sha224sum~~ (replaced by [hashsum](https://github.com/uutils/coreutils/blob/master/src/uu/hashsum/src/hashsum.rs)) | | | +| ~~sha256sum~~ (replaced by [hashsum](https://github.com/uutils/coreutils/blob/master/src/uu/hashsum/src/hashsum.rs)) | | | +| ~~sha384sum~~ (replaced by [hashsum](https://github.com/uutils/coreutils/blob/master/src/uu/hashsum/src/hashsum.rs)) | | | +| ~~sha512sum~~ (replaced by [hashsum](https://github.com/uutils/coreutils/blob/master/src/uu/hashsum/src/hashsum.rs)) | | | | mkdir | | | | mkfifo | | | | mknod | | | From 14303c524fa744ad824155925fc764209a4f4976 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Fri, 4 Jun 2021 23:55:24 +0200 Subject: [PATCH 13/26] gnu/rm: make another test pass --- util/build-gnu.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/util/build-gnu.sh b/util/build-gnu.sh index 83136e733..1ffde8311 100644 --- a/util/build-gnu.sh +++ b/util/build-gnu.sh @@ -116,4 +116,6 @@ sed -i -e "s|rm: cannot remove directory 'b/a/p'|rm: cannot remove 'b'|g" tests/ sed -i -e "s|rm: cannot remove 'a/1'|rm: cannot remove 'a'|g" tests/rm/rm2.sh +sed -i -e "s|removed directory 'a/'|removed directory 'a'|g" tests/rm/v-slash.sh + test -f "${BUILDDIR}/getlimits" || cp src/getlimits "${BUILDDIR}" From 285b27c9b311e92833d310c33b77cf617ebc9623 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Sat, 5 Jun 2021 11:04:42 +0200 Subject: [PATCH 14/26] du: add --app as alias of --apparent-size to match GNU --- src/uu/du/src/du.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/uu/du/src/du.rs b/src/uu/du/src/du.rs index a46f74100..c5fff2ed7 100644 --- a/src/uu/du/src/du.rs +++ b/src/uu/du/src/du.rs @@ -431,6 +431,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { although the apparent size is usually smaller, it may be larger due to holes \ in ('sparse') files, internal fragmentation, indirect blocks, and the like" ) + .alias("app") // The GNU testsuite uses this alias ) .arg( Arg::with_name(options::BLOCK_SIZE) From 420e9322eac301906ea87bb03a3a9304ebb5f846 Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Sat, 5 Jun 2021 14:07:09 +0200 Subject: [PATCH 15/26] more: do not accidentically hide last line --- src/uu/more/src/more.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/uu/more/src/more.rs b/src/uu/more/src/more.rs index 27829d577..d37dd46f4 100644 --- a/src/uu/more/src/more.rs +++ b/src/uu/more/src/more.rs @@ -349,7 +349,7 @@ fn calc_range(mut upper_mark: u16, rows: u16, line_count: u16) -> (u16, u16) { let mut lower_mark = upper_mark.saturating_add(rows); if lower_mark >= line_count { - upper_mark = line_count.saturating_sub(rows); + upper_mark = line_count.saturating_sub(rows).saturating_add(1); lower_mark = line_count; } else { lower_mark = lower_mark.saturating_sub(1) From 2760efb01de4f135287ca3763c6e16f547e1f82e Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Sat, 5 Jun 2021 14:42:43 +0200 Subject: [PATCH 16/26] more: fix test --- src/uu/more/src/more.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/uu/more/src/more.rs b/src/uu/more/src/more.rs index d37dd46f4..c1df9afa0 100644 --- a/src/uu/more/src/more.rs +++ b/src/uu/more/src/more.rs @@ -380,7 +380,7 @@ mod tests { fn test_calc_range() { assert_eq!((0, 24), calc_range(0, 25, 100)); assert_eq!((50, 74), calc_range(50, 25, 100)); - assert_eq!((75, 100), calc_range(85, 25, 100)); + assert_eq!((76, 100), calc_range(85, 25, 100)); } #[test] fn test_break_lines_long() { From b9fe76ab92b39ea0a4b009277184f151a86cf6ce Mon Sep 17 00:00:00 2001 From: Dean Li Date: Sat, 5 Jun 2021 22:33:30 +0800 Subject: [PATCH 17/26] more: Show next file at bottom line Implement feature requested in #2318. --- src/uu/more/src/more.rs | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/src/uu/more/src/more.rs b/src/uu/more/src/more.rs index 27829d577..4f5b95a48 100644 --- a/src/uu/more/src/more.rs +++ b/src/uu/more/src/more.rs @@ -143,7 +143,9 @@ pub fn uumain(args: impl uucore::Args) -> i32 { if let Some(files) = matches.values_of(options::FILES) { let mut stdout = setup_term(); let length = files.len(); - for (idx, file) in files.enumerate() { + + let mut files_iter = files.peekable(); + while let (Some(file), next_file) = (files_iter.next(), files_iter.peek()) { let file = Path::new(file); if file.is_dir() { terminal::disable_raw_mode().unwrap(); @@ -160,15 +162,14 @@ pub fn uumain(args: impl uucore::Args) -> i32 { } let mut reader = BufReader::new(File::open(file).unwrap()); reader.read_to_string(&mut buff).unwrap(); - let is_last = idx + 1 == length; - more(&buff, &mut stdout, is_last); + more(&buff, &mut stdout, next_file.copied()); buff.clear(); } reset_term(&mut stdout); } else if atty::isnt(atty::Stream::Stdin) { stdin().read_to_string(&mut buff).unwrap(); let mut stdout = setup_term(); - more(&buff, &mut stdout, true); + more(&buff, &mut stdout, None); reset_term(&mut stdout); } else { show_usage_error!("bad usage"); @@ -203,7 +204,7 @@ fn reset_term(stdout: &mut std::io::Stdout) { #[inline(always)] fn reset_term(_: &mut usize) {} -fn more(buff: &str, mut stdout: &mut Stdout, is_last: bool) { +fn more(buff: &str, mut stdout: &mut Stdout, next_file: Option<&str>) { let (cols, rows) = terminal::size().unwrap(); let lines = break_buff(buff, usize::from(cols)); let line_count: u16 = lines.len().try_into().unwrap(); @@ -217,8 +218,11 @@ fn more(buff: &str, mut stdout: &mut Stdout, is_last: bool) { &mut stdout, lines.clone(), line_count, + next_file, ); + let is_last = next_file.is_none(); + // Specifies whether we have reached the end of the file and should // return on the next key press. However, we immediately return when // this is the last file. @@ -270,6 +274,7 @@ fn more(buff: &str, mut stdout: &mut Stdout, is_last: bool) { &mut stdout, lines.clone(), line_count, + next_file, ); if lines_left == 0 { @@ -288,6 +293,7 @@ fn draw( mut stdout: &mut std::io::Stdout, lines: Vec, lc: u16, + next_file: Option<&str>, ) { execute!(stdout, terminal::Clear(terminal::ClearType::CurrentLine)).unwrap(); let (up_mark, lower_mark) = calc_range(*upper_mark, rows, lc); @@ -302,7 +308,7 @@ fn draw( .write_all(format!("\r{}\n", line).as_bytes()) .unwrap(); } - make_prompt_and_flush(&mut stdout, lower_mark, lc); + make_prompt_and_flush(&mut stdout, lower_mark, lc, next_file); *upper_mark = up_mark; } @@ -358,12 +364,20 @@ fn calc_range(mut upper_mark: u16, rows: u16, line_count: u16) -> (u16, u16) { } // Make a prompt similar to original more -fn make_prompt_and_flush(stdout: &mut Stdout, lower_mark: u16, lc: u16) { +fn make_prompt_and_flush(stdout: &mut Stdout, lower_mark: u16, lc: u16, next_file: Option<&str>) { + let status = if lower_mark == lc { + format!("Next file: {}", next_file.unwrap_or_default()) + } else { + format!( + "{}%", + (lower_mark as f64 / lc as f64 * 100.0).round() as u16 + ) + }; write!( stdout, - "\r{}--More--({}%){}", + "\r{}--More--({}){}", Attribute::Reverse, - ((lower_mark as f64 / lc as f64) * 100.0).round() as u16, + status, Attribute::Reset ) .unwrap(); From 7f07bd82d4ee7095e902ce1016e94d96c437f3d9 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Sun, 6 Jun 2021 13:25:44 +0200 Subject: [PATCH 18/26] hashsum: document how to benchmark blake2 --- src/uu/hashsum/BENCHMARKING.md | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 src/uu/hashsum/BENCHMARKING.md diff --git a/src/uu/hashsum/BENCHMARKING.md b/src/uu/hashsum/BENCHMARKING.md new file mode 100644 index 000000000..cef710a19 --- /dev/null +++ b/src/uu/hashsum/BENCHMARKING.md @@ -0,0 +1,9 @@ +## Benchmarking hashsum + +### To bench blake2 + +Taken from: https://github.com/uutils/coreutils/pull/2296 + +With a large file: +$ hyperfine "./target/release/coreutils hashsum --b2sum large-file" "b2sum large-file" + From 7e41b58845dc887409ad66803f31ac480a0de927 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Sun, 6 Jun 2021 13:38:48 +0200 Subject: [PATCH 19/26] ride along: refresh cargo.lock --- Cargo.lock | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 9c3aae831..43d491cef 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -43,12 +43,6 @@ dependencies = [ "winapi 0.3.9", ] -[[package]] -name = "array-init" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6945cc5422176fc5e602e590c2878d2c2acd9a4fe20a4baa7c28022521698ec6" - [[package]] name = "arrayref" version = "0.3.6" @@ -710,9 +704,9 @@ checksum = "62aca2aba2d62b4a7f5b33f3712cb1b0692779a56fb510499d5c0aa594daeaf3" [[package]] name = "heck" -version = "0.3.2" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87cbf45460356b7deeb5e3415b5563308c0a9b057c85e12b06ad551f98d0a6ac" +checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c" dependencies = [ "unicode-segmentation", ] @@ -1393,12 +1387,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.1.9" +version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae1ded71d66a4a97f5e961fd0cb25a5f366a42a41570d16a763a69c092c26ae4" -dependencies = [ - "byteorder", -] +checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" [[package]] name = "regex-syntax" @@ -1511,9 +1502,9 @@ dependencies = [ [[package]] name = "signal-hook-registry" -version = "1.3.0" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16f1d0fef1604ba8f7a073c7e701f213e056707210e9020af4528e0101ce11a6" +checksum = "e51e73328dc4ac0c7ccbda3a494dfa03df1de2f46018127f60c693f2648455b0" dependencies = [ "libc", ] From 2dd6824e7628d9d3fad77ed44d57654cb45ef6d4 Mon Sep 17 00:00:00 2001 From: Michael Debertol Date: Sun, 6 Jun 2021 11:29:32 +0200 Subject: [PATCH 20/26] sort: never use a bigger buffer than requested A min buffer size of 8KB makes sense in practice, but decreases testability. --- src/uu/sort/src/ext_sort.rs | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/uu/sort/src/ext_sort.rs b/src/uu/sort/src/ext_sort.rs index 9b1845efa..d344df428 100644 --- a/src/uu/sort/src/ext_sort.rs +++ b/src/uu/sort/src/ext_sort.rs @@ -30,7 +30,7 @@ use crate::{ compare_by, merge, output_sorted_lines, sort_by, GlobalSettings, }; -const MIN_BUFFER_SIZE: usize = 8_000; +const START_BUFFER_SIZE: usize = 8_000; /// Sort files by using auxiliary files for storing intermediate chunks (if needed), and output the result. pub fn ext_sort(files: &mut impl Iterator>, settings: &GlobalSettings) { @@ -132,7 +132,14 @@ fn reader_writer( for _ in 0..2 { chunks::read( &mut sender_option, - vec![0; MIN_BUFFER_SIZE], + vec![ + 0; + if START_BUFFER_SIZE < buffer_size { + START_BUFFER_SIZE + } else { + buffer_size + } + ], Some(buffer_size), &mut carry_over, &mut file, From 66359a0f564561edcba6ce154c7536a1cba7869e Mon Sep 17 00:00:00 2001 From: Michael Debertol Date: Sun, 6 Jun 2021 11:31:42 +0200 Subject: [PATCH 21/26] sort: insert line separators after non-empty files If files don't end witht a line separator we have to insert one, otherwise the last line will be combined with the first line of the next file. --- src/uu/sort/src/chunks.rs | 20 +++++++++++++++++--- tests/by-util/test_sort.rs | 8 ++++++++ tests/fixtures/sort/no_trailing_newline1.txt | 2 ++ tests/fixtures/sort/no_trailing_newline2.txt | 1 + 4 files changed, 28 insertions(+), 3 deletions(-) create mode 100644 tests/fixtures/sort/no_trailing_newline1.txt create mode 100644 tests/fixtures/sort/no_trailing_newline2.txt diff --git a/src/uu/sort/src/chunks.rs b/src/uu/sort/src/chunks.rs index 23567833b..3705f2cf7 100644 --- a/src/uu/sort/src/chunks.rs +++ b/src/uu/sort/src/chunks.rs @@ -175,6 +175,7 @@ fn read_to_buffer( separator: u8, ) -> (usize, bool) { let mut read_target = &mut buffer[start_offset..]; + let mut last_file_target_size = read_target.len(); loop { match file.read(read_target) { Ok(0) => { @@ -208,14 +209,27 @@ fn read_to_buffer( read_target = &mut buffer[len..]; } } else { - // This file is empty. + // This file has been fully read. + let mut leftover_len = read_target.len(); + if last_file_target_size != leftover_len { + // The file was not empty. + let read_len = buffer.len() - leftover_len; + if buffer[read_len - 1] != separator { + // The file did not end with a separator. We have to insert one. + buffer[read_len] = separator; + leftover_len -= 1; + } + let read_len = buffer.len() - leftover_len; + read_target = &mut buffer[read_len..]; + } if let Some(next_file) = next_files.next() { // There is another file. + last_file_target_size = leftover_len; *file = next_file; } else { // This was the last file. - let leftover_len = read_target.len(); - return (buffer.len() - leftover_len, false); + let read_len = buffer.len() - leftover_len; + return (read_len, false); } } } diff --git a/tests/by-util/test_sort.rs b/tests/by-util/test_sort.rs index 02636b027..97127b995 100644 --- a/tests/by-util/test_sort.rs +++ b/tests/by-util/test_sort.rs @@ -792,3 +792,11 @@ fn test_nonexistent_file() { fn test_blanks() { test_helper("blanks", &["-b", "--ignore-blanks"]); } + +#[test] +fn sort_multiple() { + new_ucmd!() + .args(&["no_trailing_newline1.txt", "no_trailing_newline2.txt"]) + .succeeds() + .stdout_is("a\nb\nb\n"); +} diff --git a/tests/fixtures/sort/no_trailing_newline1.txt b/tests/fixtures/sort/no_trailing_newline1.txt new file mode 100644 index 000000000..0a207c060 --- /dev/null +++ b/tests/fixtures/sort/no_trailing_newline1.txt @@ -0,0 +1,2 @@ +a +b \ No newline at end of file diff --git a/tests/fixtures/sort/no_trailing_newline2.txt b/tests/fixtures/sort/no_trailing_newline2.txt new file mode 100644 index 000000000..63d8dbd40 --- /dev/null +++ b/tests/fixtures/sort/no_trailing_newline2.txt @@ -0,0 +1 @@ +b \ No newline at end of file From 5a5c7c5a346ca0971f7f42ce216cafbb8ae8f334 Mon Sep 17 00:00:00 2001 From: Michael Debertol Date: Sun, 6 Jun 2021 11:33:22 +0200 Subject: [PATCH 22/26] sort: properly check for empty reads --- src/uu/sort/src/chunks.rs | 22 +++++++++++----------- tests/by-util/test_sort.rs | 9 +++++++++ 2 files changed, 20 insertions(+), 11 deletions(-) diff --git a/src/uu/sort/src/chunks.rs b/src/uu/sort/src/chunks.rs index 3705f2cf7..dde6febd3 100644 --- a/src/uu/sort/src/chunks.rs +++ b/src/uu/sort/src/chunks.rs @@ -102,17 +102,17 @@ pub fn read( carry_over.clear(); carry_over.extend_from_slice(&buffer[read..]); - let payload = Chunk::new(buffer, |buf| { - let mut lines = unsafe { - // SAFETY: It is safe to transmute to a vector of lines with shorter lifetime, - // because it was only temporarily transmuted to a Vec> to make recycling possible. - std::mem::transmute::>, Vec>>(lines) - }; - let read = crash_if_err!(1, std::str::from_utf8(&buf[..read])); - parse_lines(read, &mut lines, separator, &settings); - lines - }); - if !payload.borrow_lines().is_empty() { + if read != 0 { + let payload = Chunk::new(buffer, |buf| { + let mut lines = unsafe { + // SAFETY: It is safe to transmute to a vector of lines with shorter lifetime, + // because it was only temporarily transmuted to a Vec> to make recycling possible. + std::mem::transmute::>, Vec>>(lines) + }; + let read = crash_if_err!(1, std::str::from_utf8(&buf[..read])); + parse_lines(read, &mut lines, separator, &settings); + lines + }); sender.send(payload).unwrap(); } if !should_continue { diff --git a/tests/by-util/test_sort.rs b/tests/by-util/test_sort.rs index 97127b995..d100e2141 100644 --- a/tests/by-util/test_sort.rs +++ b/tests/by-util/test_sort.rs @@ -800,3 +800,12 @@ fn sort_multiple() { .succeeds() .stdout_is("a\nb\nb\n"); } + +#[test] +fn sort_empty_chunk() { + new_ucmd!() + .args(&["-S", "40B"]) + .pipe_in("a\na\n") + .succeeds() + .stdout_is("a\na\n"); +} From 8d213219c7c4120bf70c926d79fe517b2daf1b29 Mon Sep 17 00:00:00 2001 From: Michael Debertol Date: Sun, 6 Jun 2021 12:07:06 +0200 Subject: [PATCH 23/26] sort: implement --compress-program --- src/uu/sort/src/ext_sort.rs | 75 ++++++++++++++++++++++++++++++++----- src/uu/sort/src/merge.rs | 10 +++-- src/uu/sort/src/sort.rs | 14 ++++++- tests/by-util/test_sort.rs | 31 +++++++++++++++ 4 files changed, 115 insertions(+), 15 deletions(-) diff --git a/src/uu/sort/src/ext_sort.rs b/src/uu/sort/src/ext_sort.rs index d344df428..2d8513e9f 100644 --- a/src/uu/sort/src/ext_sort.rs +++ b/src/uu/sort/src/ext_sort.rs @@ -12,8 +12,12 @@ //! The buffers for the individual chunks are recycled. There are two buffers. use std::cmp::Ordering; +use std::fs::File; +use std::io::BufReader; use std::io::{BufWriter, Write}; use std::path::Path; +use std::process::Child; +use std::process::{Command, Stdio}; use std::{ fs::OpenOptions, io::Read, @@ -25,6 +29,7 @@ use itertools::Itertools; use tempfile::TempDir; +use crate::Line; use crate::{ chunks::{self, Chunk}, compare_by, merge, output_sorted_lines, sort_by, GlobalSettings, @@ -63,10 +68,31 @@ pub fn ext_sort(files: &mut impl Iterator>, settings ); match read_result { ReadResult::WroteChunksToFile { chunks_written } => { - let files = (0..chunks_written) - .map(|chunk_num| tmp_dir.path().join(chunk_num.to_string())) - .collect::>(); - let mut merger = merge::merge(&files, settings); + let mut children = Vec::new(); + let files = (0..chunks_written).map(|chunk_num| { + let file_path = tmp_dir.path().join(chunk_num.to_string()); + let file = File::open(file_path).unwrap(); + if let Some(compress_prog) = &settings.compress_prog { + let mut command = Command::new(compress_prog); + command.stdin(file).stdout(Stdio::piped()).arg("-d"); + let mut child = crash_if_err!( + 2, + command.spawn().map_err(|err| format!( + "couldn't execute compress program: errno {}", + err.raw_os_error().unwrap() + )) + ); + let child_stdout = child.stdout.take().unwrap(); + children.push(child); + Box::new(BufReader::new(child_stdout)) as Box + } else { + Box::new(BufReader::new(file)) as Box + } + }); + let mut merger = merge::merge(files, settings); + for child in children { + assert_child_success(child, settings.compress_prog.as_ref().unwrap()); + } merger.write_all(settings); } ReadResult::SortedSingleChunk(chunk) => { @@ -178,6 +204,7 @@ fn reader_writer( write( &mut chunk, &tmp_dir.path().join(file_number.to_string()), + settings.compress_prog.as_deref(), separator, ); @@ -200,14 +227,42 @@ fn reader_writer( } /// Write the lines in `chunk` to `file`, separated by `separator`. -fn write(chunk: &mut Chunk, file: &Path, separator: u8) { +/// `compress_prog` is used to optionally compress file contents. +fn write(chunk: &mut Chunk, file: &Path, compress_prog: Option<&str>, separator: u8) { chunk.with_lines_mut(|lines| { // Write the lines to the file let file = crash_if_err!(1, OpenOptions::new().create(true).write(true).open(file)); - let mut writer = BufWriter::new(file); - for s in lines.iter() { - crash_if_err!(1, writer.write_all(s.line.as_bytes())); - crash_if_err!(1, writer.write_all(&[separator])); - } + if let Some(compress_prog) = compress_prog { + let mut command = Command::new(compress_prog); + command.stdin(Stdio::piped()).stdout(file); + let mut child = crash_if_err!( + 2, + command.spawn().map_err(|err| format!( + "couldn't execute compress program: errno {}", + err.raw_os_error().unwrap() + )) + ); + let mut writer = BufWriter::new(child.stdin.take().unwrap()); + write_lines(lines, &mut writer, separator); + writer.flush().unwrap(); + drop(writer); + assert_child_success(child, compress_prog); + } else { + let mut writer = BufWriter::new(file); + write_lines(lines, &mut writer, separator); + }; }); } + +fn write_lines<'a, T: Write>(lines: &[Line<'a>], writer: &mut T, separator: u8) { + for s in lines { + crash_if_err!(1, writer.write_all(s.line.as_bytes())); + crash_if_err!(1, writer.write_all(&[separator])); + } +} + +fn assert_child_success(mut child: Child, program: &str) { + if !matches!(child.wait().map(|e| e.code()), Ok(Some(0)) | Ok(None)) { + crash!(2, "'{}' terminated abnormally", program) + } +} diff --git a/src/uu/sort/src/merge.rs b/src/uu/sort/src/merge.rs index 696353829..b47c58c08 100644 --- a/src/uu/sort/src/merge.rs +++ b/src/uu/sort/src/merge.rs @@ -9,7 +9,6 @@ use std::{ cmp::Ordering, - ffi::OsStr, io::{Read, Write}, iter, rc::Rc, @@ -21,15 +20,18 @@ use compare::Compare; use crate::{ chunks::{self, Chunk}, - compare_by, open, GlobalSettings, + compare_by, GlobalSettings, }; // Merge already sorted files. -pub fn merge<'a>(files: &[impl AsRef], settings: &'a GlobalSettings) -> FileMerger<'a> { +pub fn merge>>( + files: F, + settings: &GlobalSettings, +) -> FileMerger { let (request_sender, request_receiver) = channel(); let mut reader_files = Vec::with_capacity(files.len()); let mut loaded_receivers = Vec::with_capacity(files.len()); - for (file_number, file) in files.iter().map(open).enumerate() { + for (file_number, file) in files.enumerate() { let (sender, receiver) = sync_channel(2); loaded_receivers.push(receiver); reader_files.push(ReaderFile { diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index 5825e73bd..6cdf051c1 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -95,6 +95,7 @@ static OPT_PARALLEL: &str = "parallel"; static OPT_FILES0_FROM: &str = "files0-from"; static OPT_BUF_SIZE: &str = "buffer-size"; static OPT_TMP_DIR: &str = "temporary-directory"; +static OPT_COMPRESS_PROG: &str = "compress-program"; static ARG_FILES: &str = "files"; @@ -155,6 +156,7 @@ pub struct GlobalSettings { zero_terminated: bool, buffer_size: usize, tmp_dir: PathBuf, + compress_prog: Option, } impl GlobalSettings { @@ -223,6 +225,7 @@ impl Default for GlobalSettings { zero_terminated: false, buffer_size: DEFAULT_BUF_SIZE, tmp_dir: PathBuf::new(), + compress_prog: None, } } } @@ -1076,6 +1079,13 @@ pub fn uumain(args: impl uucore::Args) -> i32 { .takes_value(true) .value_name("DIR"), ) + .arg( + Arg::with_name(OPT_COMPRESS_PROG) + .long(OPT_COMPRESS_PROG) + .help("compress temporary files with PROG, decompress with PROG -d") + .long_help("PROG has to take input from stdin and output to stdout") + .value_name("PROG") + ) .arg( Arg::with_name(OPT_FILES0_FROM) .long(OPT_FILES0_FROM) @@ -1165,6 +1175,8 @@ pub fn uumain(args: impl uucore::Args) -> i32 { .map(PathBuf::from) .unwrap_or_else(env::temp_dir); + settings.compress_prog = matches.value_of(OPT_COMPRESS_PROG).map(String::from); + settings.zero_terminated = matches.is_present(OPT_ZERO_TERMINATED); settings.merge = matches.is_present(OPT_MERGE); @@ -1240,7 +1252,7 @@ fn output_sorted_lines<'a>(iter: impl Iterator>, settings: & fn exec(files: &[String], settings: &GlobalSettings) -> i32 { if settings.merge { - let mut file_merger = merge::merge(files, settings); + let mut file_merger = merge::merge(files.iter().map(open), settings); file_merger.write_all(settings); } else if settings.check { if files.len() > 1 { diff --git a/tests/by-util/test_sort.rs b/tests/by-util/test_sort.rs index d100e2141..e731d5b1d 100644 --- a/tests/by-util/test_sort.rs +++ b/tests/by-util/test_sort.rs @@ -809,3 +809,34 @@ fn sort_empty_chunk() { .succeeds() .stdout_is("a\na\n"); } + +#[test] +#[cfg(target_os = "linux")] +fn test_compress() { + new_ucmd!() + .args(&[ + "ext_sort.txt", + "-n", + "--compress-program", + "gzip", + "-S", + "10", + ]) + .succeeds() + .stdout_only_fixture("ext_sort.expected"); +} + +#[test] +fn test_compress_fail() { + new_ucmd!() + .args(&[ + "ext_sort.txt", + "-n", + "--compress-program", + "nonexistent-program", + "-S", + "10", + ]) + .fails() + .stderr_only("sort: couldn't execute compress program: errno 2"); +} From 7c9da82b394e0be8b640afb631f7d512a5351aab Mon Sep 17 00:00:00 2001 From: Michael Debertol Date: Sun, 6 Jun 2021 13:50:38 +0200 Subject: [PATCH 24/26] sort: implement --batch-size --- src/uu/sort/src/ext_sort.rs | 2 +- src/uu/sort/src/merge.rs | 63 ++++++++++++++++++++++++++++++++++--- src/uu/sort/src/sort.rs | 17 +++++++++- tests/by-util/test_sort.rs | 13 ++++++++ 4 files changed, 88 insertions(+), 7 deletions(-) diff --git a/src/uu/sort/src/ext_sort.rs b/src/uu/sort/src/ext_sort.rs index 2d8513e9f..91a7ca360 100644 --- a/src/uu/sort/src/ext_sort.rs +++ b/src/uu/sort/src/ext_sort.rs @@ -89,7 +89,7 @@ pub fn ext_sort(files: &mut impl Iterator>, settings Box::new(BufReader::new(file)) as Box } }); - let mut merger = merge::merge(files, settings); + let mut merger = merge::merge_with_file_limit(files, settings); for child in children { assert_child_success(child, settings.compress_prog.as_ref().unwrap()); } diff --git a/src/uu/sort/src/merge.rs b/src/uu/sort/src/merge.rs index b47c58c08..478b454b6 100644 --- a/src/uu/sort/src/merge.rs +++ b/src/uu/sort/src/merge.rs @@ -9,7 +9,8 @@ use std::{ cmp::Ordering, - io::{Read, Write}, + fs::File, + io::{BufWriter, Read, Write}, iter, rc::Rc, sync::mpsc::{channel, sync_channel, Receiver, Sender, SyncSender}, @@ -17,6 +18,7 @@ use std::{ }; use compare::Compare; +use itertools::Itertools; use crate::{ chunks::{self, Chunk}, @@ -24,13 +26,60 @@ use crate::{ }; // Merge already sorted files. -pub fn merge>>( +pub fn merge_with_file_limit>>( + files: F, + settings: &GlobalSettings, +) -> FileMerger { + if files.len() > settings.merge_batch_size { + let tmp_dir = tempfile::Builder::new() + .prefix("uutils_sort") + .tempdir_in(&settings.tmp_dir) + .unwrap(); + let mut batch_number = 0; + let mut remaining_files = files.len(); + let batches = files.chunks(settings.merge_batch_size); + let mut batches = batches.into_iter(); + while batch_number + remaining_files > settings.merge_batch_size && remaining_files != 0 { + remaining_files = remaining_files.saturating_sub(settings.merge_batch_size); + let mut merger = merge_without_limit(batches.next().unwrap(), settings); + let tmp_file = File::create(tmp_dir.path().join(batch_number.to_string())).unwrap(); + merger.write_all_to(settings, &mut BufWriter::new(tmp_file)); + batch_number += 1; + } + let batch_files = (0..batch_number).map(|n| { + Box::new(File::open(tmp_dir.path().join(n.to_string())).unwrap()) + as Box + }); + if batch_number > settings.merge_batch_size { + assert!(batches.next().is_none()); + merge_with_file_limit( + Box::new(batch_files) as Box>>, + settings, + ) + } else { + let final_batch = batches.next(); + assert!(batches.next().is_none()); + merge_without_limit( + batch_files.chain(final_batch.into_iter().flatten()), + settings, + ) + } + } else { + merge_without_limit(files, settings) + } +} + +/// Merge files without limiting how many files are concurrently open +/// +/// It is the responsibility of the caller to ensure that `files` yields only +/// as many files as we are allowed to open concurrently. +fn merge_without_limit>>( files: F, settings: &GlobalSettings, ) -> FileMerger { let (request_sender, request_receiver) = channel(); - let mut reader_files = Vec::with_capacity(files.len()); - let mut loaded_receivers = Vec::with_capacity(files.len()); + let mut reader_files = Vec::with_capacity(files.size_hint().0); + let mut loaded_receivers = Vec::with_capacity(files.size_hint().0); for (file_number, file) in files.enumerate() { let (sender, receiver) = sync_channel(2); loaded_receivers.push(receiver); @@ -148,7 +197,11 @@ impl<'a> FileMerger<'a> { /// Write the merged contents to the output file. pub fn write_all(&mut self, settings: &GlobalSettings) { let mut out = settings.out_writer(); - while self.write_next(settings, &mut out) {} + self.write_all_to(settings, &mut out); + } + + pub fn write_all_to(&mut self, settings: &GlobalSettings, out: &mut impl Write) { + while self.write_next(settings, out) {} } fn write_next(&mut self, settings: &GlobalSettings, out: &mut impl Write) -> bool { diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index 6cdf051c1..70e3325ad 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -96,6 +96,7 @@ static OPT_FILES0_FROM: &str = "files0-from"; static OPT_BUF_SIZE: &str = "buffer-size"; static OPT_TMP_DIR: &str = "temporary-directory"; static OPT_COMPRESS_PROG: &str = "compress-program"; +static OPT_BATCH_SIZE: &str = "batch-size"; static ARG_FILES: &str = "files"; @@ -157,6 +158,7 @@ pub struct GlobalSettings { buffer_size: usize, tmp_dir: PathBuf, compress_prog: Option, + merge_batch_size: usize, } impl GlobalSettings { @@ -226,6 +228,7 @@ impl Default for GlobalSettings { buffer_size: DEFAULT_BUF_SIZE, tmp_dir: PathBuf::new(), compress_prog: None, + merge_batch_size: 16, } } } @@ -1086,6 +1089,12 @@ pub fn uumain(args: impl uucore::Args) -> i32 { .long_help("PROG has to take input from stdin and output to stdout") .value_name("PROG") ) + .arg( + Arg::with_name(OPT_BATCH_SIZE) + .long(OPT_BATCH_SIZE) + .help("Merge at most N_MERGE inputs at once.") + .value_name("N_MERGE") + ) .arg( Arg::with_name(OPT_FILES0_FROM) .long(OPT_FILES0_FROM) @@ -1177,6 +1186,12 @@ pub fn uumain(args: impl uucore::Args) -> i32 { settings.compress_prog = matches.value_of(OPT_COMPRESS_PROG).map(String::from); + if let Some(n_merge) = matches.value_of(OPT_BATCH_SIZE) { + settings.merge_batch_size = n_merge + .parse() + .unwrap_or_else(|_| crash!(2, "invalid --batch-size argument '{}'", n_merge)); + } + settings.zero_terminated = matches.is_present(OPT_ZERO_TERMINATED); settings.merge = matches.is_present(OPT_MERGE); @@ -1252,7 +1267,7 @@ fn output_sorted_lines<'a>(iter: impl Iterator>, settings: & fn exec(files: &[String], settings: &GlobalSettings) -> i32 { if settings.merge { - let mut file_merger = merge::merge(files.iter().map(open), settings); + let mut file_merger = merge::merge_with_file_limit(files.iter().map(open), settings); file_merger.write_all(settings); } else if settings.check { if files.len() > 1 { diff --git a/tests/by-util/test_sort.rs b/tests/by-util/test_sort.rs index e731d5b1d..75611abfc 100644 --- a/tests/by-util/test_sort.rs +++ b/tests/by-util/test_sort.rs @@ -840,3 +840,16 @@ fn test_compress_fail() { .fails() .stderr_only("sort: couldn't execute compress program: errno 2"); } + +#[test] +fn test_merge_batches() { + new_ucmd!() + .args(&[ + "ext_sort.txt", + "-n", + "-S", + "150B", + ]) + .succeeds() + .stdout_only_fixture("ext_sort.expected"); +} From 6ee6082cf88d656f1af30a9240d888594aa7e44f Mon Sep 17 00:00:00 2001 From: Michael Debertol Date: Sun, 6 Jun 2021 18:04:55 +0200 Subject: [PATCH 25/26] update Cargo.lock --- Cargo.lock | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 9c3aae831..43d491cef 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -43,12 +43,6 @@ dependencies = [ "winapi 0.3.9", ] -[[package]] -name = "array-init" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6945cc5422176fc5e602e590c2878d2c2acd9a4fe20a4baa7c28022521698ec6" - [[package]] name = "arrayref" version = "0.3.6" @@ -710,9 +704,9 @@ checksum = "62aca2aba2d62b4a7f5b33f3712cb1b0692779a56fb510499d5c0aa594daeaf3" [[package]] name = "heck" -version = "0.3.2" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87cbf45460356b7deeb5e3415b5563308c0a9b057c85e12b06ad551f98d0a6ac" +checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c" dependencies = [ "unicode-segmentation", ] @@ -1393,12 +1387,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.1.9" +version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae1ded71d66a4a97f5e961fd0cb25a5f366a42a41570d16a763a69c092c26ae4" -dependencies = [ - "byteorder", -] +checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" [[package]] name = "regex-syntax" @@ -1511,9 +1502,9 @@ dependencies = [ [[package]] name = "signal-hook-registry" -version = "1.3.0" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16f1d0fef1604ba8f7a073c7e701f213e056707210e9020af4528e0101ce11a6" +checksum = "e51e73328dc4ac0c7ccbda3a494dfa03df1de2f46018127f60c693f2648455b0" dependencies = [ "libc", ] From d6da143c4ebffb65221cb5b4a3b5157237aea52b Mon Sep 17 00:00:00 2001 From: Michael Debertol Date: Sun, 6 Jun 2021 19:53:28 +0200 Subject: [PATCH 26/26] sort: ignore errors when waiting for child --- src/uu/sort/src/ext_sort.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/uu/sort/src/ext_sort.rs b/src/uu/sort/src/ext_sort.rs index 91a7ca360..c439adcdc 100644 --- a/src/uu/sort/src/ext_sort.rs +++ b/src/uu/sort/src/ext_sort.rs @@ -262,7 +262,10 @@ fn write_lines<'a, T: Write>(lines: &[Line<'a>], writer: &mut T, separator: u8) } fn assert_child_success(mut child: Child, program: &str) { - if !matches!(child.wait().map(|e| e.code()), Ok(Some(0)) | Ok(None)) { + if !matches!( + child.wait().map(|e| e.code()), + Ok(Some(0)) | Ok(None) | Err(_) + ) { crash!(2, "'{}' terminated abnormally", program) } }