1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-31 04:57:45 +00:00

Merge branch 'master' of github.com:uutils/coreutils into refactoring_parse_size

This commit is contained in:
Jan Scheer 2021-06-06 22:54:02 +02:00
commit 12de58aec0
20 changed files with 519 additions and 148 deletions

42
Cargo.lock generated
View file

@ -44,13 +44,16 @@ dependencies = [
] ]
[[package]] [[package]]
name = "arrayvec" name = "arrayref"
version = "0.4.12" version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cd9fd44efafa8690358b7408d253adf110036b88f55672a933f01d616ad9b1b9" checksum = "a4c527152e37cf757a3f78aae5a06fbeefdb07ccc535c980a3208ee3060dd544"
dependencies = [
"nodrop", [[package]]
] name = "arrayvec"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b"
[[package]] [[package]]
name = "atty" name = "atty"
@ -100,11 +103,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693"
[[package]] [[package]]
name = "blake2-rfc" name = "blake2b_simd"
version = "0.2.18" version = "0.5.11"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5d6d530bdd2d52966a6d03b7a964add7ae1a288d25214066fd4b600f0f796400" checksum = "afa748e348ad3be8263be728124b24a24f268266f6f5d58af9d75f6a40b5c587"
dependencies = [ dependencies = [
"arrayref",
"arrayvec", "arrayvec",
"constant_time_eq", "constant_time_eq",
] ]
@ -700,9 +704,9 @@ checksum = "62aca2aba2d62b4a7f5b33f3712cb1b0692779a56fb510499d5c0aa594daeaf3"
[[package]] [[package]]
name = "heck" name = "heck"
version = "0.3.2" version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "87cbf45460356b7deeb5e3415b5563308c0a9b057c85e12b06ad551f98d0a6ac" checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c"
dependencies = [ dependencies = [
"unicode-segmentation", "unicode-segmentation",
] ]
@ -1383,12 +1387,9 @@ dependencies = [
[[package]] [[package]]
name = "regex-automata" name = "regex-automata"
version = "0.1.9" version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ae1ded71d66a4a97f5e961fd0cb25a5f366a42a41570d16a763a69c092c26ae4" checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132"
dependencies = [
"byteorder",
]
[[package]] [[package]]
name = "regex-syntax" name = "regex-syntax"
@ -1501,9 +1502,9 @@ dependencies = [
[[package]] [[package]]
name = "signal-hook-registry" name = "signal-hook-registry"
version = "1.3.0" version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "16f1d0fef1604ba8f7a073c7e701f213e056707210e9020af4528e0101ce11a6" checksum = "e51e73328dc4ac0c7ccbda3a494dfa03df1de2f46018127f60c693f2648455b0"
dependencies = [ dependencies = [
"libc", "libc",
] ]
@ -1904,6 +1905,7 @@ dependencies = [
name = "uu_dircolors" name = "uu_dircolors"
version = "0.0.6" version = "0.0.6"
dependencies = [ dependencies = [
"clap",
"glob 0.3.0", "glob 0.3.0",
"uucore", "uucore",
"uucore_procs", "uucore_procs",
@ -2028,7 +2030,7 @@ dependencies = [
name = "uu_hashsum" name = "uu_hashsum"
version = "0.0.6" version = "0.0.6"
dependencies = [ dependencies = [
"blake2-rfc", "blake2b_simd",
"clap", "clap",
"digest", "digest",
"hex", "hex",
@ -2215,6 +2217,8 @@ dependencies = [
"nix 0.13.1", "nix 0.13.1",
"redox_syscall 0.1.57", "redox_syscall 0.1.57",
"redox_termios", "redox_termios",
"unicode-segmentation",
"unicode-width",
"uucore", "uucore",
"uucore_procs", "uucore_procs",
] ]

View file

@ -342,22 +342,22 @@ To contribute to uutils, please see [CONTRIBUTING](CONTRIBUTING.md).
| Done | Semi-Done | To Do | | Done | Semi-Done | To Do |
|-----------|-----------|--------| |-----------|-----------|--------|
| arch | cp | chcon | | arch | cp | chcon |
| base32 | expr | csplit | | base32 | date | dd |
| base64 | install | dd | | base64 | df | runcon |
| basename | ls | df | | basename | expr | stty |
| cat | more | numfmt | | cat | install | |
| chgrp | od (`--strings` and 128-bit data types missing) | runcon | | chgrp | join | |
| chmod | printf | stty | | chmod | ls | |
| chown | sort | | | chown | more | |
| chroot | split | | | chroot | numfmt | |
| cksum | tail | | | cksum | od (`--strings` and 128-bit data types missing) | |
| comm | test | | | comm | pr | |
| csplit | date | | | csplit | printf | |
| cut | join | | | cut | sort | |
| dircolors | df | | | dircolors | split | |
| dirname | tac | | | dirname | tac | |
| du | pr | | | du | tail | |
| echo | | | | echo | test | |
| env | | | | env | | |
| expand | | | | expand | | |
| factor | | | | factor | | |
@ -374,12 +374,12 @@ To contribute to uutils, please see [CONTRIBUTING](CONTRIBUTING.md).
| link | | | | link | | |
| ln | | | | ln | | |
| logname | | | | logname | | |
| ~~md5sum~~ (replaced by [hashsum](https://github.com/uutils/coreutils/blob/master/src/uu/hashsum/src/hashsum.rs)) | | | ~~md5sum~~ (replaced by [hashsum](https://github.com/uutils/coreutils/blob/master/src/uu/hashsum/src/hashsum.rs)) | | |
| ~~sha1sum~~ (replaced by [hashsum](https://github.com/uutils/coreutils/blob/master/src/uu/hashsum/src/hashsum.rs)) | | | ~~sha1sum~~ (replaced by [hashsum](https://github.com/uutils/coreutils/blob/master/src/uu/hashsum/src/hashsum.rs)) | | |
| ~~sha224sum~~ (replaced by [hashsum](https://github.com/uutils/coreutils/blob/master/src/uu/hashsum/src/hashsum.rs)) | | | ~~sha224sum~~ (replaced by [hashsum](https://github.com/uutils/coreutils/blob/master/src/uu/hashsum/src/hashsum.rs)) | | |
| ~~sha256sum~~ (replaced by [hashsum](https://github.com/uutils/coreutils/blob/master/src/uu/hashsum/src/hashsum.rs)) | | | ~~sha256sum~~ (replaced by [hashsum](https://github.com/uutils/coreutils/blob/master/src/uu/hashsum/src/hashsum.rs)) | | |
| ~~sha384sum~~ (replaced by [hashsum](https://github.com/uutils/coreutils/blob/master/src/uu/hashsum/src/hashsum.rs)) | | | ~~sha384sum~~ (replaced by [hashsum](https://github.com/uutils/coreutils/blob/master/src/uu/hashsum/src/hashsum.rs)) | | |
| ~~sha512sum~~ (replaced by [hashsum](https://github.com/uutils/coreutils/blob/master/src/uu/hashsum/src/hashsum.rs)) | | | ~~sha512sum~~ (replaced by [hashsum](https://github.com/uutils/coreutils/blob/master/src/uu/hashsum/src/hashsum.rs)) | | |
| mkdir | | | | mkdir | | |
| mkfifo | | | | mkfifo | | |
| mknod | | | | mknod | | |

View file

@ -15,6 +15,7 @@ edition = "2018"
path = "src/dircolors.rs" path = "src/dircolors.rs"
[dependencies] [dependencies]
clap = "2.33"
glob = "0.3.0" glob = "0.3.0"
uucore = { version=">=0.0.8", package="uucore", path="../../uucore" } uucore = { version=">=0.0.8", package="uucore", path="../../uucore" }
uucore_procs = { version=">=0.0.5", package="uucore_procs", path="../../uucore_procs" } uucore_procs = { version=">=0.0.5", package="uucore_procs", path="../../uucore_procs" }

View file

@ -1,6 +1,7 @@
// This file is part of the uutils coreutils package. // This file is part of the uutils coreutils package.
// //
// (c) Jian Zeng <anonymousknight96@gmail.com> // (c) Jian Zeng <anonymousknight96@gmail.com>
// (c) Mitchell Mebane <mitchell.mebane@gmail.com>
// //
// For the full copyright and license information, please view the LICENSE // For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code. // file that was distributed with this source code.
@ -15,6 +16,15 @@ use std::env;
use std::fs::File; use std::fs::File;
use std::io::{BufRead, BufReader}; use std::io::{BufRead, BufReader};
use clap::{crate_version, App, Arg};
mod options {
pub const BOURNE_SHELL: &str = "bourne-shell";
pub const C_SHELL: &str = "c-shell";
pub const PRINT_DATABASE: &str = "print-database";
pub const FILE: &str = "FILE";
}
static SYNTAX: &str = "[OPTION]... [FILE]"; static SYNTAX: &str = "[OPTION]... [FILE]";
static SUMMARY: &str = "Output commands to set the LS_COLORS environment variable."; static SUMMARY: &str = "Output commands to set the LS_COLORS environment variable.";
static LONG_HELP: &str = " static LONG_HELP: &str = "
@ -52,28 +62,56 @@ pub fn guess_syntax() -> OutputFmt {
} }
} }
fn get_usage() -> String {
format!("{0} {1}", executable!(), SYNTAX)
}
pub fn uumain(args: impl uucore::Args) -> i32 { pub fn uumain(args: impl uucore::Args) -> i32 {
let args = args let args = args
.collect_str(InvalidEncodingHandling::Ignore) .collect_str(InvalidEncodingHandling::Ignore)
.accept_any(); .accept_any();
let matches = app!(SYNTAX, SUMMARY, LONG_HELP) let usage = get_usage();
.optflag("b", "sh", "output Bourne shell code to set LS_COLORS")
.optflag(
"",
"bourne-shell",
"output Bourne shell code to set LS_COLORS",
)
.optflag("c", "csh", "output C shell code to set LS_COLORS")
.optflag("", "c-shell", "output C shell code to set LS_COLORS")
.optflag("p", "print-database", "print the byte counts")
.parse(args);
if (matches.opt_present("csh") let matches = App::new(executable!())
|| matches.opt_present("c-shell") .version(crate_version!())
|| matches.opt_present("sh") .about(SUMMARY)
|| matches.opt_present("bourne-shell")) .usage(&usage[..])
&& matches.opt_present("print-database") .after_help(LONG_HELP)
.arg(
Arg::with_name(options::BOURNE_SHELL)
.long("sh")
.short("b")
.visible_alias("bourne-shell")
.help("output Bourne shell code to set LS_COLORS")
.display_order(1),
)
.arg(
Arg::with_name(options::C_SHELL)
.long("csh")
.short("c")
.visible_alias("c-shell")
.help("output C shell code to set LS_COLORS")
.display_order(2),
)
.arg(
Arg::with_name(options::PRINT_DATABASE)
.long("print-database")
.short("p")
.help("print the byte counts")
.display_order(3),
)
.arg(Arg::with_name(options::FILE).hidden(true).multiple(true))
.get_matches_from(&args);
let files = matches
.values_of(options::FILE)
.map_or(vec![], |file_values| file_values.collect());
// clap provides .conflicts_with / .conflicts_with_all, but we want to
// manually handle conflicts so we can match the output of GNU coreutils
if (matches.is_present(options::C_SHELL) || matches.is_present(options::BOURNE_SHELL))
&& matches.is_present(options::PRINT_DATABASE)
{ {
show_usage_error!( show_usage_error!(
"the options to output dircolors' internal database and\nto select a shell \ "the options to output dircolors' internal database and\nto select a shell \
@ -82,12 +120,12 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
return 1; return 1;
} }
if matches.opt_present("print-database") { if matches.is_present(options::PRINT_DATABASE) {
if !matches.free.is_empty() { if !files.is_empty() {
show_usage_error!( show_usage_error!(
"extra operand {}\nfile operands cannot be combined with \ "extra operand {}\nfile operands cannot be combined with \
--print-database (-p)", --print-database (-p)",
matches.free[0] files[0]
); );
return 1; return 1;
} }
@ -96,9 +134,9 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
} }
let mut out_format = OutputFmt::Unknown; let mut out_format = OutputFmt::Unknown;
if matches.opt_present("csh") || matches.opt_present("c-shell") { if matches.is_present(options::C_SHELL) {
out_format = OutputFmt::CShell; out_format = OutputFmt::CShell;
} else if matches.opt_present("sh") || matches.opt_present("bourne-shell") { } else if matches.is_present(options::BOURNE_SHELL) {
out_format = OutputFmt::Shell; out_format = OutputFmt::Shell;
} }
@ -113,24 +151,20 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
} }
let result; let result;
if matches.free.is_empty() { if files.is_empty() {
result = parse(INTERNAL_DB.lines(), out_format, "") result = parse(INTERNAL_DB.lines(), out_format, "")
} else { } else {
if matches.free.len() > 1 { if files.len() > 1 {
show_usage_error!("extra operand {}", matches.free[1]); show_usage_error!("extra operand {}", files[1]);
return 1; return 1;
} }
match File::open(matches.free[0].as_str()) { match File::open(files[0]) {
Ok(f) => { Ok(f) => {
let fin = BufReader::new(f); let fin = BufReader::new(f);
result = parse( result = parse(fin.lines().filter_map(Result::ok), out_format, files[0])
fin.lines().filter_map(Result::ok),
out_format,
matches.free[0].as_str(),
)
} }
Err(e) => { Err(e) => {
show_error!("{}: {}", matches.free[0], e); show_error!("{}: {}", files[0], e);
return 1; return 1;
} }
} }

View file

@ -393,6 +393,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
although the apparent size is usually smaller, it may be larger due to holes \ although the apparent size is usually smaller, it may be larger due to holes \
in ('sparse') files, internal fragmentation, indirect blocks, and the like" in ('sparse') files, internal fragmentation, indirect blocks, and the like"
) )
.alias("app") // The GNU testsuite uses this alias
) )
.arg( .arg(
Arg::with_name(options::BLOCK_SIZE) Arg::with_name(options::BLOCK_SIZE)

View file

@ -0,0 +1,9 @@
## Benchmarking hashsum
### To bench blake2
Taken from: https://github.com/uutils/coreutils/pull/2296
With a large file:
$ hyperfine "./target/release/coreutils hashsum --b2sum large-file" "b2sum large-file"

View file

@ -25,7 +25,7 @@ regex-syntax = "0.6.7"
sha1 = "0.6.0" sha1 = "0.6.0"
sha2 = "0.6.0" sha2 = "0.6.0"
sha3 = "0.6.0" sha3 = "0.6.0"
blake2-rfc = "0.2.18" blake2b_simd = "0.5.11"
uucore = { version=">=0.0.8", package="uucore", path="../../uucore" } uucore = { version=">=0.0.8", package="uucore", path="../../uucore" }
uucore_procs = { version=">=0.0.5", package="uucore_procs", path="../../uucore_procs" } uucore_procs = { version=">=0.0.5", package="uucore_procs", path="../../uucore_procs" }

View file

@ -1,4 +1,3 @@
extern crate blake2_rfc;
extern crate digest; extern crate digest;
extern crate md5; extern crate md5;
extern crate sha1; extern crate sha1;
@ -49,9 +48,9 @@ impl Digest for md5::Context {
} }
} }
impl Digest for blake2_rfc::blake2b::Blake2b { impl Digest for blake2b_simd::State {
fn new() -> Self { fn new() -> Self {
blake2_rfc::blake2b::Blake2b::new(64) Self::new()
} }
fn input(&mut self, input: &[u8]) { fn input(&mut self, input: &[u8]) {
@ -59,12 +58,12 @@ impl Digest for blake2_rfc::blake2b::Blake2b {
} }
fn result(&mut self, out: &mut [u8]) { fn result(&mut self, out: &mut [u8]) {
let hash_result = &self.clone().finalize(); let hash_result = &self.finalize();
out.copy_from_slice(&hash_result.as_bytes()); out.copy_from_slice(&hash_result.as_bytes());
} }
fn reset(&mut self) { fn reset(&mut self) {
*self = blake2_rfc::blake2b::Blake2b::new(64); *self = Self::new();
} }
fn output_bits(&self) -> usize { fn output_bits(&self) -> usize {

View file

@ -19,7 +19,6 @@ mod digest;
use self::digest::Digest; use self::digest::Digest;
use blake2_rfc::blake2b::Blake2b;
use clap::{App, Arg, ArgMatches}; use clap::{App, Arg, ArgMatches};
use hex::ToHex; use hex::ToHex;
use md5::Context as Md5; use md5::Context as Md5;
@ -85,7 +84,11 @@ fn detect_algo<'a>(
"sha256sum" => ("SHA256", Box::new(Sha256::new()) as Box<dyn Digest>, 256), "sha256sum" => ("SHA256", Box::new(Sha256::new()) as Box<dyn Digest>, 256),
"sha384sum" => ("SHA384", Box::new(Sha384::new()) as Box<dyn Digest>, 384), "sha384sum" => ("SHA384", Box::new(Sha384::new()) as Box<dyn Digest>, 384),
"sha512sum" => ("SHA512", Box::new(Sha512::new()) as Box<dyn Digest>, 512), "sha512sum" => ("SHA512", Box::new(Sha512::new()) as Box<dyn Digest>, 512),
"b2sum" => ("BLAKE2", Box::new(Blake2b::new(64)) as Box<dyn Digest>, 512), "b2sum" => (
"BLAKE2",
Box::new(blake2b_simd::State::new()) as Box<dyn Digest>,
512,
),
"sha3sum" => match matches.value_of("bits") { "sha3sum" => match matches.value_of("bits") {
Some(bits_str) => match (&bits_str).parse::<usize>() { Some(bits_str) => match (&bits_str).parse::<usize>() {
Ok(224) => ( Ok(224) => (
@ -187,7 +190,7 @@ fn detect_algo<'a>(
set_or_crash("SHA512", Box::new(Sha512::new()), 512) set_or_crash("SHA512", Box::new(Sha512::new()), 512)
} }
if matches.is_present("b2sum") { if matches.is_present("b2sum") {
set_or_crash("BLAKE2", Box::new(Blake2b::new(64)), 512) set_or_crash("BLAKE2", Box::new(blake2b_simd::State::new()), 512)
} }
if matches.is_present("sha3") { if matches.is_present("sha3") {
match matches.value_of("bits") { match matches.value_of("bits") {

View file

@ -20,6 +20,8 @@ uucore = { version = ">=0.0.7", package = "uucore", path = "../../uucore" }
uucore_procs = { version = ">=0.0.5", package = "uucore_procs", path = "../../uucore_procs" } uucore_procs = { version = ">=0.0.5", package = "uucore_procs", path = "../../uucore_procs" }
crossterm = ">=0.19" crossterm = ">=0.19"
atty = "0.2.14" atty = "0.2.14"
unicode-width = "0.1.7"
unicode-segmentation = "1.7.1"
[target.'cfg(target_os = "redox")'.dependencies] [target.'cfg(target_os = "redox")'.dependencies]
redox_termios = "0.1" redox_termios = "0.1"

View file

@ -29,6 +29,9 @@ use crossterm::{
terminal, terminal,
}; };
use unicode_segmentation::UnicodeSegmentation;
use unicode_width::UnicodeWidthStr;
pub mod options { pub mod options {
pub const SILENT: &str = "silent"; pub const SILENT: &str = "silent";
pub const LOGICAL: &str = "logical"; pub const LOGICAL: &str = "logical";
@ -140,7 +143,9 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
if let Some(files) = matches.values_of(options::FILES) { if let Some(files) = matches.values_of(options::FILES) {
let mut stdout = setup_term(); let mut stdout = setup_term();
let length = files.len(); let length = files.len();
for (idx, file) in files.enumerate() {
let mut files_iter = files.peekable();
while let (Some(file), next_file) = (files_iter.next(), files_iter.peek()) {
let file = Path::new(file); let file = Path::new(file);
if file.is_dir() { if file.is_dir() {
terminal::disable_raw_mode().unwrap(); terminal::disable_raw_mode().unwrap();
@ -157,15 +162,14 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
} }
let mut reader = BufReader::new(File::open(file).unwrap()); let mut reader = BufReader::new(File::open(file).unwrap());
reader.read_to_string(&mut buff).unwrap(); reader.read_to_string(&mut buff).unwrap();
let is_last = idx + 1 == length; more(&buff, &mut stdout, next_file.copied());
more(&buff, &mut stdout, is_last);
buff.clear(); buff.clear();
} }
reset_term(&mut stdout); reset_term(&mut stdout);
} else if atty::isnt(atty::Stream::Stdin) { } else if atty::isnt(atty::Stream::Stdin) {
stdin().read_to_string(&mut buff).unwrap(); stdin().read_to_string(&mut buff).unwrap();
let mut stdout = setup_term(); let mut stdout = setup_term();
more(&buff, &mut stdout, true); more(&buff, &mut stdout, None);
reset_term(&mut stdout); reset_term(&mut stdout);
} else { } else {
show_usage_error!("bad usage"); show_usage_error!("bad usage");
@ -200,7 +204,7 @@ fn reset_term(stdout: &mut std::io::Stdout) {
#[inline(always)] #[inline(always)]
fn reset_term(_: &mut usize) {} fn reset_term(_: &mut usize) {}
fn more(buff: &str, mut stdout: &mut Stdout, is_last: bool) { fn more(buff: &str, mut stdout: &mut Stdout, next_file: Option<&str>) {
let (cols, rows) = terminal::size().unwrap(); let (cols, rows) = terminal::size().unwrap();
let lines = break_buff(buff, usize::from(cols)); let lines = break_buff(buff, usize::from(cols));
let line_count: u16 = lines.len().try_into().unwrap(); let line_count: u16 = lines.len().try_into().unwrap();
@ -214,8 +218,11 @@ fn more(buff: &str, mut stdout: &mut Stdout, is_last: bool) {
&mut stdout, &mut stdout,
lines.clone(), lines.clone(),
line_count, line_count,
next_file,
); );
let is_last = next_file.is_none();
// Specifies whether we have reached the end of the file and should // Specifies whether we have reached the end of the file and should
// return on the next key press. However, we immediately return when // return on the next key press. However, we immediately return when
// this is the last file. // this is the last file.
@ -267,6 +274,7 @@ fn more(buff: &str, mut stdout: &mut Stdout, is_last: bool) {
&mut stdout, &mut stdout,
lines.clone(), lines.clone(),
line_count, line_count,
next_file,
); );
if lines_left == 0 { if lines_left == 0 {
@ -285,6 +293,7 @@ fn draw(
mut stdout: &mut std::io::Stdout, mut stdout: &mut std::io::Stdout,
lines: Vec<String>, lines: Vec<String>,
lc: u16, lc: u16,
next_file: Option<&str>,
) { ) {
execute!(stdout, terminal::Clear(terminal::ClearType::CurrentLine)).unwrap(); execute!(stdout, terminal::Clear(terminal::ClearType::CurrentLine)).unwrap();
let (up_mark, lower_mark) = calc_range(*upper_mark, rows, lc); let (up_mark, lower_mark) = calc_range(*upper_mark, rows, lc);
@ -299,7 +308,7 @@ fn draw(
.write_all(format!("\r{}\n", line).as_bytes()) .write_all(format!("\r{}\n", line).as_bytes())
.unwrap(); .unwrap();
} }
make_prompt_and_flush(&mut stdout, lower_mark, lc); make_prompt_and_flush(&mut stdout, lower_mark, lc, next_file);
*upper_mark = up_mark; *upper_mark = up_mark;
} }
@ -313,23 +322,30 @@ fn break_buff(buff: &str, cols: usize) -> Vec<String> {
lines lines
} }
fn break_line(mut line: &str, cols: usize) -> Vec<String> { fn break_line(line: &str, cols: usize) -> Vec<String> {
let breaks = (line.len() / cols).saturating_add(1); let width = UnicodeWidthStr::width(line);
let mut lines = Vec::with_capacity(breaks); let mut lines = Vec::new();
// TODO: Use unicode width instead of the length in bytes. if width < cols {
if line.len() < cols {
lines.push(line.to_string()); lines.push(line.to_string());
return lines; return lines;
} }
for _ in 1..=breaks { let gr_idx = UnicodeSegmentation::grapheme_indices(line, true);
let (line1, line2) = line.split_at(cols); let mut last_index = 0;
lines.push(line1.to_string()); let mut total_width = 0;
if line2.len() < cols { for (index, grapheme) in gr_idx {
lines.push(line2.to_string()); let width = UnicodeWidthStr::width(grapheme);
break; total_width += width;
if total_width > cols {
lines.push(line[last_index..index].to_string());
last_index = index;
total_width = width;
} }
line = line2; }
if last_index != line.len() {
lines.push(line[last_index..].to_string());
} }
lines lines
} }
@ -339,7 +355,7 @@ fn calc_range(mut upper_mark: u16, rows: u16, line_count: u16) -> (u16, u16) {
let mut lower_mark = upper_mark.saturating_add(rows); let mut lower_mark = upper_mark.saturating_add(rows);
if lower_mark >= line_count { if lower_mark >= line_count {
upper_mark = line_count.saturating_sub(rows); upper_mark = line_count.saturating_sub(rows).saturating_add(1);
lower_mark = line_count; lower_mark = line_count;
} else { } else {
lower_mark = lower_mark.saturating_sub(1) lower_mark = lower_mark.saturating_sub(1)
@ -348,12 +364,20 @@ fn calc_range(mut upper_mark: u16, rows: u16, line_count: u16) -> (u16, u16) {
} }
// Make a prompt similar to original more // Make a prompt similar to original more
fn make_prompt_and_flush(stdout: &mut Stdout, lower_mark: u16, lc: u16) { fn make_prompt_and_flush(stdout: &mut Stdout, lower_mark: u16, lc: u16, next_file: Option<&str>) {
let status = if lower_mark == lc {
format!("Next file: {}", next_file.unwrap_or_default())
} else {
format!(
"{}%",
(lower_mark as f64 / lc as f64 * 100.0).round() as u16
)
};
write!( write!(
stdout, stdout,
"\r{}--More--({}%){}", "\r{}--More--({}){}",
Attribute::Reverse, Attribute::Reverse,
((lower_mark as f64 / lc as f64) * 100.0).round() as u16, status,
Attribute::Reset Attribute::Reset
) )
.unwrap(); .unwrap();
@ -363,13 +387,14 @@ fn make_prompt_and_flush(stdout: &mut Stdout, lower_mark: u16, lc: u16) {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::{break_line, calc_range}; use super::{break_line, calc_range};
use unicode_width::UnicodeWidthStr;
// It is good to test the above functions // It is good to test the above functions
#[test] #[test]
fn test_calc_range() { fn test_calc_range() {
assert_eq!((0, 24), calc_range(0, 25, 100)); assert_eq!((0, 24), calc_range(0, 25, 100));
assert_eq!((50, 74), calc_range(50, 25, 100)); assert_eq!((50, 74), calc_range(50, 25, 100));
assert_eq!((75, 100), calc_range(85, 25, 100)); assert_eq!((76, 100), calc_range(85, 25, 100));
} }
#[test] #[test]
fn test_break_lines_long() { fn test_break_lines_long() {
@ -379,11 +404,12 @@ mod tests {
} }
let lines = break_line(&test_string, 80); let lines = break_line(&test_string, 80);
let widths: Vec<usize> = lines
.iter()
.map(|s| UnicodeWidthStr::width(&s[..]))
.collect();
assert_eq!( assert_eq!((80, 80, 40), (widths[0], widths[1], widths[2]));
(80, 80, 40),
(lines[0].len(), lines[1].len(), lines[2].len())
);
} }
#[test] #[test]
@ -397,4 +423,22 @@ mod tests {
assert_eq!(20, lines[0].len()); assert_eq!(20, lines[0].len());
} }
#[test]
fn test_break_line_zwj() {
let mut test_string = String::with_capacity(1100);
for _ in 0..20 {
test_string.push_str("👩🏻‍🔬");
}
let lines = break_line(&test_string, 80);
let widths: Vec<usize> = lines
.iter()
.map(|s| UnicodeWidthStr::width(&s[..]))
.collect();
// Each 👩🏻‍🔬 is 6 character width it break line to the closest number to 80 => 6 * 13 = 78
assert_eq!((78, 42), (widths[0], widths[1]));
}
} }

View file

@ -255,7 +255,18 @@ fn handle_dir(path: &Path, options: &Options) -> bool {
// correctly on Windows // correctly on Windows
if let Err(e) = remove_dir_all(path) { if let Err(e) = remove_dir_all(path) {
had_err = true; had_err = true;
show_error!("could not remove '{}': {}", path.display(), e); if e.kind() == std::io::ErrorKind::PermissionDenied {
// GNU compatibility (rm/fail-eacces.sh)
// here, GNU doesn't use some kind of remove_dir_all
// It will show directory+file
show_error!(
"cannot remove '{}': {}",
path.display(),
"Permission denied"
);
} else {
show_error!("cannot remove '{}': {}", path.display(), e);
}
} }
} else { } else {
let mut dirs: VecDeque<DirEntry> = VecDeque::new(); let mut dirs: VecDeque<DirEntry> = VecDeque::new();
@ -314,7 +325,16 @@ fn remove_dir(path: &Path, options: &Options) -> bool {
} }
} }
Err(e) => { Err(e) => {
show_error!("cannot remove '{}': {}", path.display(), e); if e.kind() == std::io::ErrorKind::PermissionDenied {
// GNU compatibility (rm/fail-eacces.sh)
show_error!(
"cannot remove '{}': {}",
path.display(),
"Permission denied"
);
} else {
show_error!("cannot remove '{}': {}", path.display(), e);
}
return true; return true;
} }
} }
@ -352,7 +372,16 @@ fn remove_file(path: &Path, options: &Options) -> bool {
} }
} }
Err(e) => { Err(e) => {
show_error!("removing '{}': {}", path.display(), e); if e.kind() == std::io::ErrorKind::PermissionDenied {
// GNU compatibility (rm/fail-eacces.sh)
show_error!(
"cannot remove '{}': {}",
path.display(),
"Permission denied"
);
} else {
show_error!("cannot remove '{}': {}", path.display(), e);
}
return true; return true;
} }
} }

View file

@ -102,17 +102,17 @@ pub fn read(
carry_over.clear(); carry_over.clear();
carry_over.extend_from_slice(&buffer[read..]); carry_over.extend_from_slice(&buffer[read..]);
let payload = Chunk::new(buffer, |buf| { if read != 0 {
let mut lines = unsafe { let payload = Chunk::new(buffer, |buf| {
// SAFETY: It is safe to transmute to a vector of lines with shorter lifetime, let mut lines = unsafe {
// because it was only temporarily transmuted to a Vec<Line<'static>> to make recycling possible. // SAFETY: It is safe to transmute to a vector of lines with shorter lifetime,
std::mem::transmute::<Vec<Line<'static>>, Vec<Line<'_>>>(lines) // because it was only temporarily transmuted to a Vec<Line<'static>> to make recycling possible.
}; std::mem::transmute::<Vec<Line<'static>>, Vec<Line<'_>>>(lines)
let read = crash_if_err!(1, std::str::from_utf8(&buf[..read])); };
parse_lines(read, &mut lines, separator, &settings); let read = crash_if_err!(1, std::str::from_utf8(&buf[..read]));
lines parse_lines(read, &mut lines, separator, &settings);
}); lines
if !payload.borrow_lines().is_empty() { });
sender.send(payload).unwrap(); sender.send(payload).unwrap();
} }
if !should_continue { if !should_continue {
@ -175,6 +175,7 @@ fn read_to_buffer(
separator: u8, separator: u8,
) -> (usize, bool) { ) -> (usize, bool) {
let mut read_target = &mut buffer[start_offset..]; let mut read_target = &mut buffer[start_offset..];
let mut last_file_target_size = read_target.len();
loop { loop {
match file.read(read_target) { match file.read(read_target) {
Ok(0) => { Ok(0) => {
@ -208,14 +209,27 @@ fn read_to_buffer(
read_target = &mut buffer[len..]; read_target = &mut buffer[len..];
} }
} else { } else {
// This file is empty. // This file has been fully read.
let mut leftover_len = read_target.len();
if last_file_target_size != leftover_len {
// The file was not empty.
let read_len = buffer.len() - leftover_len;
if buffer[read_len - 1] != separator {
// The file did not end with a separator. We have to insert one.
buffer[read_len] = separator;
leftover_len -= 1;
}
let read_len = buffer.len() - leftover_len;
read_target = &mut buffer[read_len..];
}
if let Some(next_file) = next_files.next() { if let Some(next_file) = next_files.next() {
// There is another file. // There is another file.
last_file_target_size = leftover_len;
*file = next_file; *file = next_file;
} else { } else {
// This was the last file. // This was the last file.
let leftover_len = read_target.len(); let read_len = buffer.len() - leftover_len;
return (buffer.len() - leftover_len, false); return (read_len, false);
} }
} }
} }

View file

@ -12,8 +12,12 @@
//! The buffers for the individual chunks are recycled. There are two buffers. //! The buffers for the individual chunks are recycled. There are two buffers.
use std::cmp::Ordering; use std::cmp::Ordering;
use std::fs::File;
use std::io::BufReader;
use std::io::{BufWriter, Write}; use std::io::{BufWriter, Write};
use std::path::Path; use std::path::Path;
use std::process::Child;
use std::process::{Command, Stdio};
use std::{ use std::{
fs::OpenOptions, fs::OpenOptions,
io::Read, io::Read,
@ -25,12 +29,13 @@ use itertools::Itertools;
use tempfile::TempDir; use tempfile::TempDir;
use crate::Line;
use crate::{ use crate::{
chunks::{self, Chunk}, chunks::{self, Chunk},
compare_by, merge, output_sorted_lines, sort_by, GlobalSettings, compare_by, merge, output_sorted_lines, sort_by, GlobalSettings,
}; };
const MIN_BUFFER_SIZE: usize = 8_000; const START_BUFFER_SIZE: usize = 8_000;
/// Sort files by using auxiliary files for storing intermediate chunks (if needed), and output the result. /// Sort files by using auxiliary files for storing intermediate chunks (if needed), and output the result.
pub fn ext_sort(files: &mut impl Iterator<Item = Box<dyn Read + Send>>, settings: &GlobalSettings) { pub fn ext_sort(files: &mut impl Iterator<Item = Box<dyn Read + Send>>, settings: &GlobalSettings) {
@ -63,10 +68,31 @@ pub fn ext_sort(files: &mut impl Iterator<Item = Box<dyn Read + Send>>, settings
); );
match read_result { match read_result {
ReadResult::WroteChunksToFile { chunks_written } => { ReadResult::WroteChunksToFile { chunks_written } => {
let files = (0..chunks_written) let mut children = Vec::new();
.map(|chunk_num| tmp_dir.path().join(chunk_num.to_string())) let files = (0..chunks_written).map(|chunk_num| {
.collect::<Vec<_>>(); let file_path = tmp_dir.path().join(chunk_num.to_string());
let mut merger = merge::merge(&files, settings); let file = File::open(file_path).unwrap();
if let Some(compress_prog) = &settings.compress_prog {
let mut command = Command::new(compress_prog);
command.stdin(file).stdout(Stdio::piped()).arg("-d");
let mut child = crash_if_err!(
2,
command.spawn().map_err(|err| format!(
"couldn't execute compress program: errno {}",
err.raw_os_error().unwrap()
))
);
let child_stdout = child.stdout.take().unwrap();
children.push(child);
Box::new(BufReader::new(child_stdout)) as Box<dyn Read + Send>
} else {
Box::new(BufReader::new(file)) as Box<dyn Read + Send>
}
});
let mut merger = merge::merge_with_file_limit(files, settings);
for child in children {
assert_child_success(child, settings.compress_prog.as_ref().unwrap());
}
merger.write_all(settings); merger.write_all(settings);
} }
ReadResult::SortedSingleChunk(chunk) => { ReadResult::SortedSingleChunk(chunk) => {
@ -132,7 +158,14 @@ fn reader_writer(
for _ in 0..2 { for _ in 0..2 {
chunks::read( chunks::read(
&mut sender_option, &mut sender_option,
vec![0; MIN_BUFFER_SIZE], vec![
0;
if START_BUFFER_SIZE < buffer_size {
START_BUFFER_SIZE
} else {
buffer_size
}
],
Some(buffer_size), Some(buffer_size),
&mut carry_over, &mut carry_over,
&mut file, &mut file,
@ -171,6 +204,7 @@ fn reader_writer(
write( write(
&mut chunk, &mut chunk,
&tmp_dir.path().join(file_number.to_string()), &tmp_dir.path().join(file_number.to_string()),
settings.compress_prog.as_deref(),
separator, separator,
); );
@ -193,14 +227,45 @@ fn reader_writer(
} }
/// Write the lines in `chunk` to `file`, separated by `separator`. /// Write the lines in `chunk` to `file`, separated by `separator`.
fn write(chunk: &mut Chunk, file: &Path, separator: u8) { /// `compress_prog` is used to optionally compress file contents.
fn write(chunk: &mut Chunk, file: &Path, compress_prog: Option<&str>, separator: u8) {
chunk.with_lines_mut(|lines| { chunk.with_lines_mut(|lines| {
// Write the lines to the file // Write the lines to the file
let file = crash_if_err!(1, OpenOptions::new().create(true).write(true).open(file)); let file = crash_if_err!(1, OpenOptions::new().create(true).write(true).open(file));
let mut writer = BufWriter::new(file); if let Some(compress_prog) = compress_prog {
for s in lines.iter() { let mut command = Command::new(compress_prog);
crash_if_err!(1, writer.write_all(s.line.as_bytes())); command.stdin(Stdio::piped()).stdout(file);
crash_if_err!(1, writer.write_all(&[separator])); let mut child = crash_if_err!(
} 2,
command.spawn().map_err(|err| format!(
"couldn't execute compress program: errno {}",
err.raw_os_error().unwrap()
))
);
let mut writer = BufWriter::new(child.stdin.take().unwrap());
write_lines(lines, &mut writer, separator);
writer.flush().unwrap();
drop(writer);
assert_child_success(child, compress_prog);
} else {
let mut writer = BufWriter::new(file);
write_lines(lines, &mut writer, separator);
};
}); });
} }
fn write_lines<'a, T: Write>(lines: &[Line<'a>], writer: &mut T, separator: u8) {
for s in lines {
crash_if_err!(1, writer.write_all(s.line.as_bytes()));
crash_if_err!(1, writer.write_all(&[separator]));
}
}
fn assert_child_success(mut child: Child, program: &str) {
if !matches!(
child.wait().map(|e| e.code()),
Ok(Some(0)) | Ok(None) | Err(_)
) {
crash!(2, "'{}' terminated abnormally", program)
}
}

View file

@ -9,8 +9,8 @@
use std::{ use std::{
cmp::Ordering, cmp::Ordering,
ffi::OsStr, fs::File,
io::{Read, Write}, io::{BufWriter, Read, Write},
iter, iter,
rc::Rc, rc::Rc,
sync::mpsc::{channel, sync_channel, Receiver, Sender, SyncSender}, sync::mpsc::{channel, sync_channel, Receiver, Sender, SyncSender},
@ -18,18 +18,69 @@ use std::{
}; };
use compare::Compare; use compare::Compare;
use itertools::Itertools;
use crate::{ use crate::{
chunks::{self, Chunk}, chunks::{self, Chunk},
compare_by, open, GlobalSettings, compare_by, GlobalSettings,
}; };
// Merge already sorted files. // Merge already sorted files.
pub fn merge<'a>(files: &[impl AsRef<OsStr>], settings: &'a GlobalSettings) -> FileMerger<'a> { pub fn merge_with_file_limit<F: ExactSizeIterator<Item = Box<dyn Read + Send>>>(
files: F,
settings: &GlobalSettings,
) -> FileMerger {
if files.len() > settings.merge_batch_size {
let tmp_dir = tempfile::Builder::new()
.prefix("uutils_sort")
.tempdir_in(&settings.tmp_dir)
.unwrap();
let mut batch_number = 0;
let mut remaining_files = files.len();
let batches = files.chunks(settings.merge_batch_size);
let mut batches = batches.into_iter();
while batch_number + remaining_files > settings.merge_batch_size && remaining_files != 0 {
remaining_files = remaining_files.saturating_sub(settings.merge_batch_size);
let mut merger = merge_without_limit(batches.next().unwrap(), settings);
let tmp_file = File::create(tmp_dir.path().join(batch_number.to_string())).unwrap();
merger.write_all_to(settings, &mut BufWriter::new(tmp_file));
batch_number += 1;
}
let batch_files = (0..batch_number).map(|n| {
Box::new(File::open(tmp_dir.path().join(n.to_string())).unwrap())
as Box<dyn Read + Send>
});
if batch_number > settings.merge_batch_size {
assert!(batches.next().is_none());
merge_with_file_limit(
Box::new(batch_files) as Box<dyn ExactSizeIterator<Item = Box<dyn Read + Send>>>,
settings,
)
} else {
let final_batch = batches.next();
assert!(batches.next().is_none());
merge_without_limit(
batch_files.chain(final_batch.into_iter().flatten()),
settings,
)
}
} else {
merge_without_limit(files, settings)
}
}
/// Merge files without limiting how many files are concurrently open
///
/// It is the responsibility of the caller to ensure that `files` yields only
/// as many files as we are allowed to open concurrently.
fn merge_without_limit<F: Iterator<Item = Box<dyn Read + Send>>>(
files: F,
settings: &GlobalSettings,
) -> FileMerger {
let (request_sender, request_receiver) = channel(); let (request_sender, request_receiver) = channel();
let mut reader_files = Vec::with_capacity(files.len()); let mut reader_files = Vec::with_capacity(files.size_hint().0);
let mut loaded_receivers = Vec::with_capacity(files.len()); let mut loaded_receivers = Vec::with_capacity(files.size_hint().0);
for (file_number, file) in files.iter().map(open).enumerate() { for (file_number, file) in files.enumerate() {
let (sender, receiver) = sync_channel(2); let (sender, receiver) = sync_channel(2);
loaded_receivers.push(receiver); loaded_receivers.push(receiver);
reader_files.push(ReaderFile { reader_files.push(ReaderFile {
@ -146,7 +197,11 @@ impl<'a> FileMerger<'a> {
/// Write the merged contents to the output file. /// Write the merged contents to the output file.
pub fn write_all(&mut self, settings: &GlobalSettings) { pub fn write_all(&mut self, settings: &GlobalSettings) {
let mut out = settings.out_writer(); let mut out = settings.out_writer();
while self.write_next(settings, &mut out) {} self.write_all_to(settings, &mut out);
}
pub fn write_all_to(&mut self, settings: &GlobalSettings, out: &mut impl Write) {
while self.write_next(settings, out) {}
} }
fn write_next(&mut self, settings: &GlobalSettings, out: &mut impl Write) -> bool { fn write_next(&mut self, settings: &GlobalSettings, out: &mut impl Write) -> bool {

View file

@ -96,6 +96,8 @@ static OPT_PARALLEL: &str = "parallel";
static OPT_FILES0_FROM: &str = "files0-from"; static OPT_FILES0_FROM: &str = "files0-from";
static OPT_BUF_SIZE: &str = "buffer-size"; static OPT_BUF_SIZE: &str = "buffer-size";
static OPT_TMP_DIR: &str = "temporary-directory"; static OPT_TMP_DIR: &str = "temporary-directory";
static OPT_COMPRESS_PROG: &str = "compress-program";
static OPT_BATCH_SIZE: &str = "batch-size";
static ARG_FILES: &str = "files"; static ARG_FILES: &str = "files";
@ -156,6 +158,8 @@ pub struct GlobalSettings {
zero_terminated: bool, zero_terminated: bool,
buffer_size: usize, buffer_size: usize,
tmp_dir: PathBuf, tmp_dir: PathBuf,
compress_prog: Option<String>,
merge_batch_size: usize,
} }
impl GlobalSettings { impl GlobalSettings {
@ -223,6 +227,8 @@ impl Default for GlobalSettings {
zero_terminated: false, zero_terminated: false,
buffer_size: DEFAULT_BUF_SIZE, buffer_size: DEFAULT_BUF_SIZE,
tmp_dir: PathBuf::new(), tmp_dir: PathBuf::new(),
compress_prog: None,
merge_batch_size: 16,
} }
} }
} }
@ -1076,6 +1082,19 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
.takes_value(true) .takes_value(true)
.value_name("DIR"), .value_name("DIR"),
) )
.arg(
Arg::with_name(OPT_COMPRESS_PROG)
.long(OPT_COMPRESS_PROG)
.help("compress temporary files with PROG, decompress with PROG -d")
.long_help("PROG has to take input from stdin and output to stdout")
.value_name("PROG")
)
.arg(
Arg::with_name(OPT_BATCH_SIZE)
.long(OPT_BATCH_SIZE)
.help("Merge at most N_MERGE inputs at once.")
.value_name("N_MERGE")
)
.arg( .arg(
Arg::with_name(OPT_FILES0_FROM) Arg::with_name(OPT_FILES0_FROM)
.long(OPT_FILES0_FROM) .long(OPT_FILES0_FROM)
@ -1167,6 +1186,14 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
.map(PathBuf::from) .map(PathBuf::from)
.unwrap_or_else(env::temp_dir); .unwrap_or_else(env::temp_dir);
settings.compress_prog = matches.value_of(OPT_COMPRESS_PROG).map(String::from);
if let Some(n_merge) = matches.value_of(OPT_BATCH_SIZE) {
settings.merge_batch_size = n_merge
.parse()
.unwrap_or_else(|_| crash!(2, "invalid --batch-size argument '{}'", n_merge));
}
settings.zero_terminated = matches.is_present(OPT_ZERO_TERMINATED); settings.zero_terminated = matches.is_present(OPT_ZERO_TERMINATED);
settings.merge = matches.is_present(OPT_MERGE); settings.merge = matches.is_present(OPT_MERGE);
@ -1242,7 +1269,7 @@ fn output_sorted_lines<'a>(iter: impl Iterator<Item = &'a Line<'a>>, settings: &
fn exec(files: &[String], settings: &GlobalSettings) -> i32 { fn exec(files: &[String], settings: &GlobalSettings) -> i32 {
if settings.merge { if settings.merge {
let mut file_merger = merge::merge(files, settings); let mut file_merger = merge::merge_with_file_limit(files.iter().map(open), settings);
file_merger.write_all(settings); file_merger.write_all(settings);
} else if settings.check { } else if settings.check {
if files.len() > 1 { if files.len() > 1 {

View file

@ -837,3 +837,64 @@ fn test_nonexistent_file() {
fn test_blanks() { fn test_blanks() {
test_helper("blanks", &["-b", "--ignore-blanks"]); test_helper("blanks", &["-b", "--ignore-blanks"]);
} }
#[test]
fn sort_multiple() {
new_ucmd!()
.args(&["no_trailing_newline1.txt", "no_trailing_newline2.txt"])
.succeeds()
.stdout_is("a\nb\nb\n");
}
#[test]
fn sort_empty_chunk() {
new_ucmd!()
.args(&["-S", "40B"])
.pipe_in("a\na\n")
.succeeds()
.stdout_is("a\na\n");
}
#[test]
#[cfg(target_os = "linux")]
fn test_compress() {
new_ucmd!()
.args(&[
"ext_sort.txt",
"-n",
"--compress-program",
"gzip",
"-S",
"10",
])
.succeeds()
.stdout_only_fixture("ext_sort.expected");
}
#[test]
fn test_compress_fail() {
new_ucmd!()
.args(&[
"ext_sort.txt",
"-n",
"--compress-program",
"nonexistent-program",
"-S",
"10",
])
.fails()
.stderr_only("sort: couldn't execute compress program: errno 2");
}
#[test]
fn test_merge_batches() {
new_ucmd!()
.args(&[
"ext_sort.txt",
"-n",
"-S",
"150B",
])
.succeeds()
.stdout_only_fixture("ext_sort.expected");
}

View file

@ -0,0 +1,2 @@
a
b

View file

@ -0,0 +1 @@
b

View file

@ -44,7 +44,7 @@ sed -i 's|"\$@|/usr/bin/timeout 600 "\$@|' build-aux/test-driver
# Change the PATH in the Makefile to test the uutils coreutils instead of the GNU coreutils # Change the PATH in the Makefile to test the uutils coreutils instead of the GNU coreutils
sed -i "s/^[[:blank:]]*PATH=.*/ PATH='${BUILDDIR//\//\\/}\$(PATH_SEPARATOR)'\"\$\$PATH\" \\\/" Makefile sed -i "s/^[[:blank:]]*PATH=.*/ PATH='${BUILDDIR//\//\\/}\$(PATH_SEPARATOR)'\"\$\$PATH\" \\\/" Makefile
sed -i 's| tr | /usr/bin/tr |' tests/init.sh sed -i 's| tr | /usr/bin/tr |' tests/init.sh
make make -j "$(nproc)"
# Generate the factor tests, so they can be fixed # Generate the factor tests, so they can be fixed
# Used to be 36. Reduced to 20 to decrease the log size # Used to be 36. Reduced to 20 to decrease the log size
for i in {00..20} for i in {00..20}
@ -59,7 +59,7 @@ do
done done
grep -rl 'path_prepend_' tests/* | xargs sed -i 's|path_prepend_ ./src||' grep -rl 'path_prepend_' tests/* | xargs sed -i 's| path_prepend_ ./src||'
sed -i -e 's|^seq |/usr/bin/seq |' -e 's|sha1sum |/usr/bin/sha1sum |' tests/factor/t*sh sed -i -e 's|^seq |/usr/bin/seq |' -e 's|sha1sum |/usr/bin/sha1sum |' tests/factor/t*sh
# Remove tests checking for --version & --help # Remove tests checking for --version & --help
@ -94,8 +94,28 @@ sed -i 's|cp |/usr/bin/cp |' tests/mv/hard-2.sh
sed -i 's|paste |/usr/bin/paste |' tests/misc/od-endian.sh sed -i 's|paste |/usr/bin/paste |' tests/misc/od-endian.sh
sed -i 's|seq |/usr/bin/seq |' tests/misc/sort-discrim.sh sed -i 's|seq |/usr/bin/seq |' tests/misc/sort-discrim.sh
#Add specific timeout to tests that currently hang to limit time spent waiting # Add specific timeout to tests that currently hang to limit time spent waiting
sed -i 's|seq \$|/usr/bin/timeout 0.1 seq \$|' tests/misc/seq-precision.sh tests/misc/seq-long-double.sh sed -i 's|seq \$|/usr/bin/timeout 0.1 seq \$|' tests/misc/seq-precision.sh tests/misc/seq-long-double.sh
sed -i 's|cat |/usr/bin/timeout 0.1 cat |' tests/misc/cat-self.sh sed -i 's|cat |/usr/bin/timeout 0.1 cat |' tests/misc/cat-self.sh
# Remove dup of /usr/bin/ when executed several times
grep -rl '/usr/bin//usr/bin/' tests/* | xargs --no-run-if-empty sed -i 's|/usr/bin//usr/bin/|/usr/bin/|g'
#### Adjust tests to make them work with Rust/coreutils
# in some cases, what we are doing in rust/coreutils is good (or better)
# we should not regress our project just to match what GNU is going.
# So, do some changes on the fly
sed -i -e "s|rm: cannot remove 'e/slink'|rm: cannot remove 'e'|g" tests/rm/fail-eacces.sh
sed -i -e "s|rm: cannot remove 'a/b/file'|rm: cannot remove 'a'|g" tests/rm/cycle.sh
sed -i -e "s|rm: cannot remove directory 'b/a/p'|rm: cannot remove 'b'|g" tests/rm/rm1.sh
sed -i -e "s|rm: cannot remove 'a/1'|rm: cannot remove 'a'|g" tests/rm/rm2.sh
sed -i -e "s|removed directory 'a/'|removed directory 'a'|g" tests/rm/v-slash.sh
test -f "${BUILDDIR}/getlimits" || cp src/getlimits "${BUILDDIR}" test -f "${BUILDDIR}/getlimits" || cp src/getlimits "${BUILDDIR}"