mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-30 12:37:49 +00:00
Merge branch 'master' of github.com:uutils/coreutils into refactoring_parse_size
This commit is contained in:
commit
12de58aec0
20 changed files with 519 additions and 148 deletions
42
Cargo.lock
generated
42
Cargo.lock
generated
|
@ -44,13 +44,16 @@ dependencies = [
|
|||
]
|
||||
|
||||
[[package]]
|
||||
name = "arrayvec"
|
||||
version = "0.4.12"
|
||||
name = "arrayref"
|
||||
version = "0.3.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cd9fd44efafa8690358b7408d253adf110036b88f55672a933f01d616ad9b1b9"
|
||||
dependencies = [
|
||||
"nodrop",
|
||||
]
|
||||
checksum = "a4c527152e37cf757a3f78aae5a06fbeefdb07ccc535c980a3208ee3060dd544"
|
||||
|
||||
[[package]]
|
||||
name = "arrayvec"
|
||||
version = "0.5.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b"
|
||||
|
||||
[[package]]
|
||||
name = "atty"
|
||||
|
@ -100,11 +103,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||
checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693"
|
||||
|
||||
[[package]]
|
||||
name = "blake2-rfc"
|
||||
version = "0.2.18"
|
||||
name = "blake2b_simd"
|
||||
version = "0.5.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5d6d530bdd2d52966a6d03b7a964add7ae1a288d25214066fd4b600f0f796400"
|
||||
checksum = "afa748e348ad3be8263be728124b24a24f268266f6f5d58af9d75f6a40b5c587"
|
||||
dependencies = [
|
||||
"arrayref",
|
||||
"arrayvec",
|
||||
"constant_time_eq",
|
||||
]
|
||||
|
@ -700,9 +704,9 @@ checksum = "62aca2aba2d62b4a7f5b33f3712cb1b0692779a56fb510499d5c0aa594daeaf3"
|
|||
|
||||
[[package]]
|
||||
name = "heck"
|
||||
version = "0.3.2"
|
||||
version = "0.3.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "87cbf45460356b7deeb5e3415b5563308c0a9b057c85e12b06ad551f98d0a6ac"
|
||||
checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c"
|
||||
dependencies = [
|
||||
"unicode-segmentation",
|
||||
]
|
||||
|
@ -1383,12 +1387,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "regex-automata"
|
||||
version = "0.1.9"
|
||||
version = "0.1.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ae1ded71d66a4a97f5e961fd0cb25a5f366a42a41570d16a763a69c092c26ae4"
|
||||
dependencies = [
|
||||
"byteorder",
|
||||
]
|
||||
checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132"
|
||||
|
||||
[[package]]
|
||||
name = "regex-syntax"
|
||||
|
@ -1501,9 +1502,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "signal-hook-registry"
|
||||
version = "1.3.0"
|
||||
version = "1.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "16f1d0fef1604ba8f7a073c7e701f213e056707210e9020af4528e0101ce11a6"
|
||||
checksum = "e51e73328dc4ac0c7ccbda3a494dfa03df1de2f46018127f60c693f2648455b0"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
@ -1904,6 +1905,7 @@ dependencies = [
|
|||
name = "uu_dircolors"
|
||||
version = "0.0.6"
|
||||
dependencies = [
|
||||
"clap",
|
||||
"glob 0.3.0",
|
||||
"uucore",
|
||||
"uucore_procs",
|
||||
|
@ -2028,7 +2030,7 @@ dependencies = [
|
|||
name = "uu_hashsum"
|
||||
version = "0.0.6"
|
||||
dependencies = [
|
||||
"blake2-rfc",
|
||||
"blake2b_simd",
|
||||
"clap",
|
||||
"digest",
|
||||
"hex",
|
||||
|
@ -2215,6 +2217,8 @@ dependencies = [
|
|||
"nix 0.13.1",
|
||||
"redox_syscall 0.1.57",
|
||||
"redox_termios",
|
||||
"unicode-segmentation",
|
||||
"unicode-width",
|
||||
"uucore",
|
||||
"uucore_procs",
|
||||
]
|
||||
|
|
42
README.md
42
README.md
|
@ -342,22 +342,22 @@ To contribute to uutils, please see [CONTRIBUTING](CONTRIBUTING.md).
|
|||
| Done | Semi-Done | To Do |
|
||||
|-----------|-----------|--------|
|
||||
| arch | cp | chcon |
|
||||
| base32 | expr | csplit |
|
||||
| base64 | install | dd |
|
||||
| basename | ls | df |
|
||||
| cat | more | numfmt |
|
||||
| chgrp | od (`--strings` and 128-bit data types missing) | runcon |
|
||||
| chmod | printf | stty |
|
||||
| chown | sort | |
|
||||
| chroot | split | |
|
||||
| cksum | tail | |
|
||||
| comm | test | |
|
||||
| csplit | date | |
|
||||
| cut | join | |
|
||||
| dircolors | df | |
|
||||
| base32 | date | dd |
|
||||
| base64 | df | runcon |
|
||||
| basename | expr | stty |
|
||||
| cat | install | |
|
||||
| chgrp | join | |
|
||||
| chmod | ls | |
|
||||
| chown | more | |
|
||||
| chroot | numfmt | |
|
||||
| cksum | od (`--strings` and 128-bit data types missing) | |
|
||||
| comm | pr | |
|
||||
| csplit | printf | |
|
||||
| cut | sort | |
|
||||
| dircolors | split | |
|
||||
| dirname | tac | |
|
||||
| du | pr | |
|
||||
| echo | | |
|
||||
| du | tail | |
|
||||
| echo | test | |
|
||||
| env | | |
|
||||
| expand | | |
|
||||
| factor | | |
|
||||
|
@ -374,12 +374,12 @@ To contribute to uutils, please see [CONTRIBUTING](CONTRIBUTING.md).
|
|||
| link | | |
|
||||
| ln | | |
|
||||
| logname | | |
|
||||
| ~~md5sum~~ (replaced by [hashsum](https://github.com/uutils/coreutils/blob/master/src/uu/hashsum/src/hashsum.rs)) | |
|
||||
| ~~sha1sum~~ (replaced by [hashsum](https://github.com/uutils/coreutils/blob/master/src/uu/hashsum/src/hashsum.rs)) | |
|
||||
| ~~sha224sum~~ (replaced by [hashsum](https://github.com/uutils/coreutils/blob/master/src/uu/hashsum/src/hashsum.rs)) | |
|
||||
| ~~sha256sum~~ (replaced by [hashsum](https://github.com/uutils/coreutils/blob/master/src/uu/hashsum/src/hashsum.rs)) | |
|
||||
| ~~sha384sum~~ (replaced by [hashsum](https://github.com/uutils/coreutils/blob/master/src/uu/hashsum/src/hashsum.rs)) | |
|
||||
| ~~sha512sum~~ (replaced by [hashsum](https://github.com/uutils/coreutils/blob/master/src/uu/hashsum/src/hashsum.rs)) | |
|
||||
| ~~md5sum~~ (replaced by [hashsum](https://github.com/uutils/coreutils/blob/master/src/uu/hashsum/src/hashsum.rs)) | | |
|
||||
| ~~sha1sum~~ (replaced by [hashsum](https://github.com/uutils/coreutils/blob/master/src/uu/hashsum/src/hashsum.rs)) | | |
|
||||
| ~~sha224sum~~ (replaced by [hashsum](https://github.com/uutils/coreutils/blob/master/src/uu/hashsum/src/hashsum.rs)) | | |
|
||||
| ~~sha256sum~~ (replaced by [hashsum](https://github.com/uutils/coreutils/blob/master/src/uu/hashsum/src/hashsum.rs)) | | |
|
||||
| ~~sha384sum~~ (replaced by [hashsum](https://github.com/uutils/coreutils/blob/master/src/uu/hashsum/src/hashsum.rs)) | | |
|
||||
| ~~sha512sum~~ (replaced by [hashsum](https://github.com/uutils/coreutils/blob/master/src/uu/hashsum/src/hashsum.rs)) | | |
|
||||
| mkdir | | |
|
||||
| mkfifo | | |
|
||||
| mknod | | |
|
||||
|
|
|
@ -15,6 +15,7 @@ edition = "2018"
|
|||
path = "src/dircolors.rs"
|
||||
|
||||
[dependencies]
|
||||
clap = "2.33"
|
||||
glob = "0.3.0"
|
||||
uucore = { version=">=0.0.8", package="uucore", path="../../uucore" }
|
||||
uucore_procs = { version=">=0.0.5", package="uucore_procs", path="../../uucore_procs" }
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
// This file is part of the uutils coreutils package.
|
||||
//
|
||||
// (c) Jian Zeng <anonymousknight96@gmail.com>
|
||||
// (c) Mitchell Mebane <mitchell.mebane@gmail.com>
|
||||
//
|
||||
// For the full copyright and license information, please view the LICENSE
|
||||
// file that was distributed with this source code.
|
||||
|
@ -15,6 +16,15 @@ use std::env;
|
|||
use std::fs::File;
|
||||
use std::io::{BufRead, BufReader};
|
||||
|
||||
use clap::{crate_version, App, Arg};
|
||||
|
||||
mod options {
|
||||
pub const BOURNE_SHELL: &str = "bourne-shell";
|
||||
pub const C_SHELL: &str = "c-shell";
|
||||
pub const PRINT_DATABASE: &str = "print-database";
|
||||
pub const FILE: &str = "FILE";
|
||||
}
|
||||
|
||||
static SYNTAX: &str = "[OPTION]... [FILE]";
|
||||
static SUMMARY: &str = "Output commands to set the LS_COLORS environment variable.";
|
||||
static LONG_HELP: &str = "
|
||||
|
@ -52,28 +62,56 @@ pub fn guess_syntax() -> OutputFmt {
|
|||
}
|
||||
}
|
||||
|
||||
fn get_usage() -> String {
|
||||
format!("{0} {1}", executable!(), SYNTAX)
|
||||
}
|
||||
|
||||
pub fn uumain(args: impl uucore::Args) -> i32 {
|
||||
let args = args
|
||||
.collect_str(InvalidEncodingHandling::Ignore)
|
||||
.accept_any();
|
||||
|
||||
let matches = app!(SYNTAX, SUMMARY, LONG_HELP)
|
||||
.optflag("b", "sh", "output Bourne shell code to set LS_COLORS")
|
||||
.optflag(
|
||||
"",
|
||||
"bourne-shell",
|
||||
"output Bourne shell code to set LS_COLORS",
|
||||
)
|
||||
.optflag("c", "csh", "output C shell code to set LS_COLORS")
|
||||
.optflag("", "c-shell", "output C shell code to set LS_COLORS")
|
||||
.optflag("p", "print-database", "print the byte counts")
|
||||
.parse(args);
|
||||
let usage = get_usage();
|
||||
|
||||
if (matches.opt_present("csh")
|
||||
|| matches.opt_present("c-shell")
|
||||
|| matches.opt_present("sh")
|
||||
|| matches.opt_present("bourne-shell"))
|
||||
&& matches.opt_present("print-database")
|
||||
let matches = App::new(executable!())
|
||||
.version(crate_version!())
|
||||
.about(SUMMARY)
|
||||
.usage(&usage[..])
|
||||
.after_help(LONG_HELP)
|
||||
.arg(
|
||||
Arg::with_name(options::BOURNE_SHELL)
|
||||
.long("sh")
|
||||
.short("b")
|
||||
.visible_alias("bourne-shell")
|
||||
.help("output Bourne shell code to set LS_COLORS")
|
||||
.display_order(1),
|
||||
)
|
||||
.arg(
|
||||
Arg::with_name(options::C_SHELL)
|
||||
.long("csh")
|
||||
.short("c")
|
||||
.visible_alias("c-shell")
|
||||
.help("output C shell code to set LS_COLORS")
|
||||
.display_order(2),
|
||||
)
|
||||
.arg(
|
||||
Arg::with_name(options::PRINT_DATABASE)
|
||||
.long("print-database")
|
||||
.short("p")
|
||||
.help("print the byte counts")
|
||||
.display_order(3),
|
||||
)
|
||||
.arg(Arg::with_name(options::FILE).hidden(true).multiple(true))
|
||||
.get_matches_from(&args);
|
||||
|
||||
let files = matches
|
||||
.values_of(options::FILE)
|
||||
.map_or(vec![], |file_values| file_values.collect());
|
||||
|
||||
// clap provides .conflicts_with / .conflicts_with_all, but we want to
|
||||
// manually handle conflicts so we can match the output of GNU coreutils
|
||||
if (matches.is_present(options::C_SHELL) || matches.is_present(options::BOURNE_SHELL))
|
||||
&& matches.is_present(options::PRINT_DATABASE)
|
||||
{
|
||||
show_usage_error!(
|
||||
"the options to output dircolors' internal database and\nto select a shell \
|
||||
|
@ -82,12 +120,12 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
|
|||
return 1;
|
||||
}
|
||||
|
||||
if matches.opt_present("print-database") {
|
||||
if !matches.free.is_empty() {
|
||||
if matches.is_present(options::PRINT_DATABASE) {
|
||||
if !files.is_empty() {
|
||||
show_usage_error!(
|
||||
"extra operand ‘{}’\nfile operands cannot be combined with \
|
||||
--print-database (-p)",
|
||||
matches.free[0]
|
||||
files[0]
|
||||
);
|
||||
return 1;
|
||||
}
|
||||
|
@ -96,9 +134,9 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
|
|||
}
|
||||
|
||||
let mut out_format = OutputFmt::Unknown;
|
||||
if matches.opt_present("csh") || matches.opt_present("c-shell") {
|
||||
if matches.is_present(options::C_SHELL) {
|
||||
out_format = OutputFmt::CShell;
|
||||
} else if matches.opt_present("sh") || matches.opt_present("bourne-shell") {
|
||||
} else if matches.is_present(options::BOURNE_SHELL) {
|
||||
out_format = OutputFmt::Shell;
|
||||
}
|
||||
|
||||
|
@ -113,24 +151,20 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
|
|||
}
|
||||
|
||||
let result;
|
||||
if matches.free.is_empty() {
|
||||
if files.is_empty() {
|
||||
result = parse(INTERNAL_DB.lines(), out_format, "")
|
||||
} else {
|
||||
if matches.free.len() > 1 {
|
||||
show_usage_error!("extra operand ‘{}’", matches.free[1]);
|
||||
if files.len() > 1 {
|
||||
show_usage_error!("extra operand ‘{}’", files[1]);
|
||||
return 1;
|
||||
}
|
||||
match File::open(matches.free[0].as_str()) {
|
||||
match File::open(files[0]) {
|
||||
Ok(f) => {
|
||||
let fin = BufReader::new(f);
|
||||
result = parse(
|
||||
fin.lines().filter_map(Result::ok),
|
||||
out_format,
|
||||
matches.free[0].as_str(),
|
||||
)
|
||||
result = parse(fin.lines().filter_map(Result::ok), out_format, files[0])
|
||||
}
|
||||
Err(e) => {
|
||||
show_error!("{}: {}", matches.free[0], e);
|
||||
show_error!("{}: {}", files[0], e);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -393,6 +393,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
|
|||
although the apparent size is usually smaller, it may be larger due to holes \
|
||||
in ('sparse') files, internal fragmentation, indirect blocks, and the like"
|
||||
)
|
||||
.alias("app") // The GNU testsuite uses this alias
|
||||
)
|
||||
.arg(
|
||||
Arg::with_name(options::BLOCK_SIZE)
|
||||
|
|
9
src/uu/hashsum/BENCHMARKING.md
Normal file
9
src/uu/hashsum/BENCHMARKING.md
Normal file
|
@ -0,0 +1,9 @@
|
|||
## Benchmarking hashsum
|
||||
|
||||
### To bench blake2
|
||||
|
||||
Taken from: https://github.com/uutils/coreutils/pull/2296
|
||||
|
||||
With a large file:
|
||||
$ hyperfine "./target/release/coreutils hashsum --b2sum large-file" "b2sum large-file"
|
||||
|
|
@ -25,7 +25,7 @@ regex-syntax = "0.6.7"
|
|||
sha1 = "0.6.0"
|
||||
sha2 = "0.6.0"
|
||||
sha3 = "0.6.0"
|
||||
blake2-rfc = "0.2.18"
|
||||
blake2b_simd = "0.5.11"
|
||||
uucore = { version=">=0.0.8", package="uucore", path="../../uucore" }
|
||||
uucore_procs = { version=">=0.0.5", package="uucore_procs", path="../../uucore_procs" }
|
||||
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
extern crate blake2_rfc;
|
||||
extern crate digest;
|
||||
extern crate md5;
|
||||
extern crate sha1;
|
||||
|
@ -49,9 +48,9 @@ impl Digest for md5::Context {
|
|||
}
|
||||
}
|
||||
|
||||
impl Digest for blake2_rfc::blake2b::Blake2b {
|
||||
impl Digest for blake2b_simd::State {
|
||||
fn new() -> Self {
|
||||
blake2_rfc::blake2b::Blake2b::new(64)
|
||||
Self::new()
|
||||
}
|
||||
|
||||
fn input(&mut self, input: &[u8]) {
|
||||
|
@ -59,12 +58,12 @@ impl Digest for blake2_rfc::blake2b::Blake2b {
|
|||
}
|
||||
|
||||
fn result(&mut self, out: &mut [u8]) {
|
||||
let hash_result = &self.clone().finalize();
|
||||
let hash_result = &self.finalize();
|
||||
out.copy_from_slice(&hash_result.as_bytes());
|
||||
}
|
||||
|
||||
fn reset(&mut self) {
|
||||
*self = blake2_rfc::blake2b::Blake2b::new(64);
|
||||
*self = Self::new();
|
||||
}
|
||||
|
||||
fn output_bits(&self) -> usize {
|
||||
|
|
|
@ -19,7 +19,6 @@ mod digest;
|
|||
|
||||
use self::digest::Digest;
|
||||
|
||||
use blake2_rfc::blake2b::Blake2b;
|
||||
use clap::{App, Arg, ArgMatches};
|
||||
use hex::ToHex;
|
||||
use md5::Context as Md5;
|
||||
|
@ -85,7 +84,11 @@ fn detect_algo<'a>(
|
|||
"sha256sum" => ("SHA256", Box::new(Sha256::new()) as Box<dyn Digest>, 256),
|
||||
"sha384sum" => ("SHA384", Box::new(Sha384::new()) as Box<dyn Digest>, 384),
|
||||
"sha512sum" => ("SHA512", Box::new(Sha512::new()) as Box<dyn Digest>, 512),
|
||||
"b2sum" => ("BLAKE2", Box::new(Blake2b::new(64)) as Box<dyn Digest>, 512),
|
||||
"b2sum" => (
|
||||
"BLAKE2",
|
||||
Box::new(blake2b_simd::State::new()) as Box<dyn Digest>,
|
||||
512,
|
||||
),
|
||||
"sha3sum" => match matches.value_of("bits") {
|
||||
Some(bits_str) => match (&bits_str).parse::<usize>() {
|
||||
Ok(224) => (
|
||||
|
@ -187,7 +190,7 @@ fn detect_algo<'a>(
|
|||
set_or_crash("SHA512", Box::new(Sha512::new()), 512)
|
||||
}
|
||||
if matches.is_present("b2sum") {
|
||||
set_or_crash("BLAKE2", Box::new(Blake2b::new(64)), 512)
|
||||
set_or_crash("BLAKE2", Box::new(blake2b_simd::State::new()), 512)
|
||||
}
|
||||
if matches.is_present("sha3") {
|
||||
match matches.value_of("bits") {
|
||||
|
|
|
@ -20,6 +20,8 @@ uucore = { version = ">=0.0.7", package = "uucore", path = "../../uucore" }
|
|||
uucore_procs = { version = ">=0.0.5", package = "uucore_procs", path = "../../uucore_procs" }
|
||||
crossterm = ">=0.19"
|
||||
atty = "0.2.14"
|
||||
unicode-width = "0.1.7"
|
||||
unicode-segmentation = "1.7.1"
|
||||
|
||||
[target.'cfg(target_os = "redox")'.dependencies]
|
||||
redox_termios = "0.1"
|
||||
|
|
|
@ -29,6 +29,9 @@ use crossterm::{
|
|||
terminal,
|
||||
};
|
||||
|
||||
use unicode_segmentation::UnicodeSegmentation;
|
||||
use unicode_width::UnicodeWidthStr;
|
||||
|
||||
pub mod options {
|
||||
pub const SILENT: &str = "silent";
|
||||
pub const LOGICAL: &str = "logical";
|
||||
|
@ -140,7 +143,9 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
|
|||
if let Some(files) = matches.values_of(options::FILES) {
|
||||
let mut stdout = setup_term();
|
||||
let length = files.len();
|
||||
for (idx, file) in files.enumerate() {
|
||||
|
||||
let mut files_iter = files.peekable();
|
||||
while let (Some(file), next_file) = (files_iter.next(), files_iter.peek()) {
|
||||
let file = Path::new(file);
|
||||
if file.is_dir() {
|
||||
terminal::disable_raw_mode().unwrap();
|
||||
|
@ -157,15 +162,14 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
|
|||
}
|
||||
let mut reader = BufReader::new(File::open(file).unwrap());
|
||||
reader.read_to_string(&mut buff).unwrap();
|
||||
let is_last = idx + 1 == length;
|
||||
more(&buff, &mut stdout, is_last);
|
||||
more(&buff, &mut stdout, next_file.copied());
|
||||
buff.clear();
|
||||
}
|
||||
reset_term(&mut stdout);
|
||||
} else if atty::isnt(atty::Stream::Stdin) {
|
||||
stdin().read_to_string(&mut buff).unwrap();
|
||||
let mut stdout = setup_term();
|
||||
more(&buff, &mut stdout, true);
|
||||
more(&buff, &mut stdout, None);
|
||||
reset_term(&mut stdout);
|
||||
} else {
|
||||
show_usage_error!("bad usage");
|
||||
|
@ -200,7 +204,7 @@ fn reset_term(stdout: &mut std::io::Stdout) {
|
|||
#[inline(always)]
|
||||
fn reset_term(_: &mut usize) {}
|
||||
|
||||
fn more(buff: &str, mut stdout: &mut Stdout, is_last: bool) {
|
||||
fn more(buff: &str, mut stdout: &mut Stdout, next_file: Option<&str>) {
|
||||
let (cols, rows) = terminal::size().unwrap();
|
||||
let lines = break_buff(buff, usize::from(cols));
|
||||
let line_count: u16 = lines.len().try_into().unwrap();
|
||||
|
@ -214,8 +218,11 @@ fn more(buff: &str, mut stdout: &mut Stdout, is_last: bool) {
|
|||
&mut stdout,
|
||||
lines.clone(),
|
||||
line_count,
|
||||
next_file,
|
||||
);
|
||||
|
||||
let is_last = next_file.is_none();
|
||||
|
||||
// Specifies whether we have reached the end of the file and should
|
||||
// return on the next key press. However, we immediately return when
|
||||
// this is the last file.
|
||||
|
@ -267,6 +274,7 @@ fn more(buff: &str, mut stdout: &mut Stdout, is_last: bool) {
|
|||
&mut stdout,
|
||||
lines.clone(),
|
||||
line_count,
|
||||
next_file,
|
||||
);
|
||||
|
||||
if lines_left == 0 {
|
||||
|
@ -285,6 +293,7 @@ fn draw(
|
|||
mut stdout: &mut std::io::Stdout,
|
||||
lines: Vec<String>,
|
||||
lc: u16,
|
||||
next_file: Option<&str>,
|
||||
) {
|
||||
execute!(stdout, terminal::Clear(terminal::ClearType::CurrentLine)).unwrap();
|
||||
let (up_mark, lower_mark) = calc_range(*upper_mark, rows, lc);
|
||||
|
@ -299,7 +308,7 @@ fn draw(
|
|||
.write_all(format!("\r{}\n", line).as_bytes())
|
||||
.unwrap();
|
||||
}
|
||||
make_prompt_and_flush(&mut stdout, lower_mark, lc);
|
||||
make_prompt_and_flush(&mut stdout, lower_mark, lc, next_file);
|
||||
*upper_mark = up_mark;
|
||||
}
|
||||
|
||||
|
@ -313,23 +322,30 @@ fn break_buff(buff: &str, cols: usize) -> Vec<String> {
|
|||
lines
|
||||
}
|
||||
|
||||
fn break_line(mut line: &str, cols: usize) -> Vec<String> {
|
||||
let breaks = (line.len() / cols).saturating_add(1);
|
||||
let mut lines = Vec::with_capacity(breaks);
|
||||
// TODO: Use unicode width instead of the length in bytes.
|
||||
if line.len() < cols {
|
||||
fn break_line(line: &str, cols: usize) -> Vec<String> {
|
||||
let width = UnicodeWidthStr::width(line);
|
||||
let mut lines = Vec::new();
|
||||
if width < cols {
|
||||
lines.push(line.to_string());
|
||||
return lines;
|
||||
}
|
||||
|
||||
for _ in 1..=breaks {
|
||||
let (line1, line2) = line.split_at(cols);
|
||||
lines.push(line1.to_string());
|
||||
if line2.len() < cols {
|
||||
lines.push(line2.to_string());
|
||||
break;
|
||||
let gr_idx = UnicodeSegmentation::grapheme_indices(line, true);
|
||||
let mut last_index = 0;
|
||||
let mut total_width = 0;
|
||||
for (index, grapheme) in gr_idx {
|
||||
let width = UnicodeWidthStr::width(grapheme);
|
||||
total_width += width;
|
||||
|
||||
if total_width > cols {
|
||||
lines.push(line[last_index..index].to_string());
|
||||
last_index = index;
|
||||
total_width = width;
|
||||
}
|
||||
line = line2;
|
||||
}
|
||||
|
||||
if last_index != line.len() {
|
||||
lines.push(line[last_index..].to_string());
|
||||
}
|
||||
lines
|
||||
}
|
||||
|
@ -339,7 +355,7 @@ fn calc_range(mut upper_mark: u16, rows: u16, line_count: u16) -> (u16, u16) {
|
|||
let mut lower_mark = upper_mark.saturating_add(rows);
|
||||
|
||||
if lower_mark >= line_count {
|
||||
upper_mark = line_count.saturating_sub(rows);
|
||||
upper_mark = line_count.saturating_sub(rows).saturating_add(1);
|
||||
lower_mark = line_count;
|
||||
} else {
|
||||
lower_mark = lower_mark.saturating_sub(1)
|
||||
|
@ -348,12 +364,20 @@ fn calc_range(mut upper_mark: u16, rows: u16, line_count: u16) -> (u16, u16) {
|
|||
}
|
||||
|
||||
// Make a prompt similar to original more
|
||||
fn make_prompt_and_flush(stdout: &mut Stdout, lower_mark: u16, lc: u16) {
|
||||
fn make_prompt_and_flush(stdout: &mut Stdout, lower_mark: u16, lc: u16, next_file: Option<&str>) {
|
||||
let status = if lower_mark == lc {
|
||||
format!("Next file: {}", next_file.unwrap_or_default())
|
||||
} else {
|
||||
format!(
|
||||
"{}%",
|
||||
(lower_mark as f64 / lc as f64 * 100.0).round() as u16
|
||||
)
|
||||
};
|
||||
write!(
|
||||
stdout,
|
||||
"\r{}--More--({}%){}",
|
||||
"\r{}--More--({}){}",
|
||||
Attribute::Reverse,
|
||||
((lower_mark as f64 / lc as f64) * 100.0).round() as u16,
|
||||
status,
|
||||
Attribute::Reset
|
||||
)
|
||||
.unwrap();
|
||||
|
@ -363,13 +387,14 @@ fn make_prompt_and_flush(stdout: &mut Stdout, lower_mark: u16, lc: u16) {
|
|||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{break_line, calc_range};
|
||||
use unicode_width::UnicodeWidthStr;
|
||||
|
||||
// It is good to test the above functions
|
||||
#[test]
|
||||
fn test_calc_range() {
|
||||
assert_eq!((0, 24), calc_range(0, 25, 100));
|
||||
assert_eq!((50, 74), calc_range(50, 25, 100));
|
||||
assert_eq!((75, 100), calc_range(85, 25, 100));
|
||||
assert_eq!((76, 100), calc_range(85, 25, 100));
|
||||
}
|
||||
#[test]
|
||||
fn test_break_lines_long() {
|
||||
|
@ -379,11 +404,12 @@ mod tests {
|
|||
}
|
||||
|
||||
let lines = break_line(&test_string, 80);
|
||||
let widths: Vec<usize> = lines
|
||||
.iter()
|
||||
.map(|s| UnicodeWidthStr::width(&s[..]))
|
||||
.collect();
|
||||
|
||||
assert_eq!(
|
||||
(80, 80, 40),
|
||||
(lines[0].len(), lines[1].len(), lines[2].len())
|
||||
);
|
||||
assert_eq!((80, 80, 40), (widths[0], widths[1], widths[2]));
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -397,4 +423,22 @@ mod tests {
|
|||
|
||||
assert_eq!(20, lines[0].len());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_break_line_zwj() {
|
||||
let mut test_string = String::with_capacity(1100);
|
||||
for _ in 0..20 {
|
||||
test_string.push_str("👩🏻🔬");
|
||||
}
|
||||
|
||||
let lines = break_line(&test_string, 80);
|
||||
|
||||
let widths: Vec<usize> = lines
|
||||
.iter()
|
||||
.map(|s| UnicodeWidthStr::width(&s[..]))
|
||||
.collect();
|
||||
|
||||
// Each 👩🏻🔬 is 6 character width it break line to the closest number to 80 => 6 * 13 = 78
|
||||
assert_eq!((78, 42), (widths[0], widths[1]));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -255,7 +255,18 @@ fn handle_dir(path: &Path, options: &Options) -> bool {
|
|||
// correctly on Windows
|
||||
if let Err(e) = remove_dir_all(path) {
|
||||
had_err = true;
|
||||
show_error!("could not remove '{}': {}", path.display(), e);
|
||||
if e.kind() == std::io::ErrorKind::PermissionDenied {
|
||||
// GNU compatibility (rm/fail-eacces.sh)
|
||||
// here, GNU doesn't use some kind of remove_dir_all
|
||||
// It will show directory+file
|
||||
show_error!(
|
||||
"cannot remove '{}': {}",
|
||||
path.display(),
|
||||
"Permission denied"
|
||||
);
|
||||
} else {
|
||||
show_error!("cannot remove '{}': {}", path.display(), e);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
let mut dirs: VecDeque<DirEntry> = VecDeque::new();
|
||||
|
@ -314,7 +325,16 @@ fn remove_dir(path: &Path, options: &Options) -> bool {
|
|||
}
|
||||
}
|
||||
Err(e) => {
|
||||
show_error!("cannot remove '{}': {}", path.display(), e);
|
||||
if e.kind() == std::io::ErrorKind::PermissionDenied {
|
||||
// GNU compatibility (rm/fail-eacces.sh)
|
||||
show_error!(
|
||||
"cannot remove '{}': {}",
|
||||
path.display(),
|
||||
"Permission denied"
|
||||
);
|
||||
} else {
|
||||
show_error!("cannot remove '{}': {}", path.display(), e);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
@ -352,7 +372,16 @@ fn remove_file(path: &Path, options: &Options) -> bool {
|
|||
}
|
||||
}
|
||||
Err(e) => {
|
||||
show_error!("removing '{}': {}", path.display(), e);
|
||||
if e.kind() == std::io::ErrorKind::PermissionDenied {
|
||||
// GNU compatibility (rm/fail-eacces.sh)
|
||||
show_error!(
|
||||
"cannot remove '{}': {}",
|
||||
path.display(),
|
||||
"Permission denied"
|
||||
);
|
||||
} else {
|
||||
show_error!("cannot remove '{}': {}", path.display(), e);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -102,17 +102,17 @@ pub fn read(
|
|||
carry_over.clear();
|
||||
carry_over.extend_from_slice(&buffer[read..]);
|
||||
|
||||
let payload = Chunk::new(buffer, |buf| {
|
||||
let mut lines = unsafe {
|
||||
// SAFETY: It is safe to transmute to a vector of lines with shorter lifetime,
|
||||
// because it was only temporarily transmuted to a Vec<Line<'static>> to make recycling possible.
|
||||
std::mem::transmute::<Vec<Line<'static>>, Vec<Line<'_>>>(lines)
|
||||
};
|
||||
let read = crash_if_err!(1, std::str::from_utf8(&buf[..read]));
|
||||
parse_lines(read, &mut lines, separator, &settings);
|
||||
lines
|
||||
});
|
||||
if !payload.borrow_lines().is_empty() {
|
||||
if read != 0 {
|
||||
let payload = Chunk::new(buffer, |buf| {
|
||||
let mut lines = unsafe {
|
||||
// SAFETY: It is safe to transmute to a vector of lines with shorter lifetime,
|
||||
// because it was only temporarily transmuted to a Vec<Line<'static>> to make recycling possible.
|
||||
std::mem::transmute::<Vec<Line<'static>>, Vec<Line<'_>>>(lines)
|
||||
};
|
||||
let read = crash_if_err!(1, std::str::from_utf8(&buf[..read]));
|
||||
parse_lines(read, &mut lines, separator, &settings);
|
||||
lines
|
||||
});
|
||||
sender.send(payload).unwrap();
|
||||
}
|
||||
if !should_continue {
|
||||
|
@ -175,6 +175,7 @@ fn read_to_buffer(
|
|||
separator: u8,
|
||||
) -> (usize, bool) {
|
||||
let mut read_target = &mut buffer[start_offset..];
|
||||
let mut last_file_target_size = read_target.len();
|
||||
loop {
|
||||
match file.read(read_target) {
|
||||
Ok(0) => {
|
||||
|
@ -208,14 +209,27 @@ fn read_to_buffer(
|
|||
read_target = &mut buffer[len..];
|
||||
}
|
||||
} else {
|
||||
// This file is empty.
|
||||
// This file has been fully read.
|
||||
let mut leftover_len = read_target.len();
|
||||
if last_file_target_size != leftover_len {
|
||||
// The file was not empty.
|
||||
let read_len = buffer.len() - leftover_len;
|
||||
if buffer[read_len - 1] != separator {
|
||||
// The file did not end with a separator. We have to insert one.
|
||||
buffer[read_len] = separator;
|
||||
leftover_len -= 1;
|
||||
}
|
||||
let read_len = buffer.len() - leftover_len;
|
||||
read_target = &mut buffer[read_len..];
|
||||
}
|
||||
if let Some(next_file) = next_files.next() {
|
||||
// There is another file.
|
||||
last_file_target_size = leftover_len;
|
||||
*file = next_file;
|
||||
} else {
|
||||
// This was the last file.
|
||||
let leftover_len = read_target.len();
|
||||
return (buffer.len() - leftover_len, false);
|
||||
let read_len = buffer.len() - leftover_len;
|
||||
return (read_len, false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -12,8 +12,12 @@
|
|||
//! The buffers for the individual chunks are recycled. There are two buffers.
|
||||
|
||||
use std::cmp::Ordering;
|
||||
use std::fs::File;
|
||||
use std::io::BufReader;
|
||||
use std::io::{BufWriter, Write};
|
||||
use std::path::Path;
|
||||
use std::process::Child;
|
||||
use std::process::{Command, Stdio};
|
||||
use std::{
|
||||
fs::OpenOptions,
|
||||
io::Read,
|
||||
|
@ -25,12 +29,13 @@ use itertools::Itertools;
|
|||
|
||||
use tempfile::TempDir;
|
||||
|
||||
use crate::Line;
|
||||
use crate::{
|
||||
chunks::{self, Chunk},
|
||||
compare_by, merge, output_sorted_lines, sort_by, GlobalSettings,
|
||||
};
|
||||
|
||||
const MIN_BUFFER_SIZE: usize = 8_000;
|
||||
const START_BUFFER_SIZE: usize = 8_000;
|
||||
|
||||
/// Sort files by using auxiliary files for storing intermediate chunks (if needed), and output the result.
|
||||
pub fn ext_sort(files: &mut impl Iterator<Item = Box<dyn Read + Send>>, settings: &GlobalSettings) {
|
||||
|
@ -63,10 +68,31 @@ pub fn ext_sort(files: &mut impl Iterator<Item = Box<dyn Read + Send>>, settings
|
|||
);
|
||||
match read_result {
|
||||
ReadResult::WroteChunksToFile { chunks_written } => {
|
||||
let files = (0..chunks_written)
|
||||
.map(|chunk_num| tmp_dir.path().join(chunk_num.to_string()))
|
||||
.collect::<Vec<_>>();
|
||||
let mut merger = merge::merge(&files, settings);
|
||||
let mut children = Vec::new();
|
||||
let files = (0..chunks_written).map(|chunk_num| {
|
||||
let file_path = tmp_dir.path().join(chunk_num.to_string());
|
||||
let file = File::open(file_path).unwrap();
|
||||
if let Some(compress_prog) = &settings.compress_prog {
|
||||
let mut command = Command::new(compress_prog);
|
||||
command.stdin(file).stdout(Stdio::piped()).arg("-d");
|
||||
let mut child = crash_if_err!(
|
||||
2,
|
||||
command.spawn().map_err(|err| format!(
|
||||
"couldn't execute compress program: errno {}",
|
||||
err.raw_os_error().unwrap()
|
||||
))
|
||||
);
|
||||
let child_stdout = child.stdout.take().unwrap();
|
||||
children.push(child);
|
||||
Box::new(BufReader::new(child_stdout)) as Box<dyn Read + Send>
|
||||
} else {
|
||||
Box::new(BufReader::new(file)) as Box<dyn Read + Send>
|
||||
}
|
||||
});
|
||||
let mut merger = merge::merge_with_file_limit(files, settings);
|
||||
for child in children {
|
||||
assert_child_success(child, settings.compress_prog.as_ref().unwrap());
|
||||
}
|
||||
merger.write_all(settings);
|
||||
}
|
||||
ReadResult::SortedSingleChunk(chunk) => {
|
||||
|
@ -132,7 +158,14 @@ fn reader_writer(
|
|||
for _ in 0..2 {
|
||||
chunks::read(
|
||||
&mut sender_option,
|
||||
vec![0; MIN_BUFFER_SIZE],
|
||||
vec![
|
||||
0;
|
||||
if START_BUFFER_SIZE < buffer_size {
|
||||
START_BUFFER_SIZE
|
||||
} else {
|
||||
buffer_size
|
||||
}
|
||||
],
|
||||
Some(buffer_size),
|
||||
&mut carry_over,
|
||||
&mut file,
|
||||
|
@ -171,6 +204,7 @@ fn reader_writer(
|
|||
write(
|
||||
&mut chunk,
|
||||
&tmp_dir.path().join(file_number.to_string()),
|
||||
settings.compress_prog.as_deref(),
|
||||
separator,
|
||||
);
|
||||
|
||||
|
@ -193,14 +227,45 @@ fn reader_writer(
|
|||
}
|
||||
|
||||
/// Write the lines in `chunk` to `file`, separated by `separator`.
|
||||
fn write(chunk: &mut Chunk, file: &Path, separator: u8) {
|
||||
/// `compress_prog` is used to optionally compress file contents.
|
||||
fn write(chunk: &mut Chunk, file: &Path, compress_prog: Option<&str>, separator: u8) {
|
||||
chunk.with_lines_mut(|lines| {
|
||||
// Write the lines to the file
|
||||
let file = crash_if_err!(1, OpenOptions::new().create(true).write(true).open(file));
|
||||
let mut writer = BufWriter::new(file);
|
||||
for s in lines.iter() {
|
||||
crash_if_err!(1, writer.write_all(s.line.as_bytes()));
|
||||
crash_if_err!(1, writer.write_all(&[separator]));
|
||||
}
|
||||
if let Some(compress_prog) = compress_prog {
|
||||
let mut command = Command::new(compress_prog);
|
||||
command.stdin(Stdio::piped()).stdout(file);
|
||||
let mut child = crash_if_err!(
|
||||
2,
|
||||
command.spawn().map_err(|err| format!(
|
||||
"couldn't execute compress program: errno {}",
|
||||
err.raw_os_error().unwrap()
|
||||
))
|
||||
);
|
||||
let mut writer = BufWriter::new(child.stdin.take().unwrap());
|
||||
write_lines(lines, &mut writer, separator);
|
||||
writer.flush().unwrap();
|
||||
drop(writer);
|
||||
assert_child_success(child, compress_prog);
|
||||
} else {
|
||||
let mut writer = BufWriter::new(file);
|
||||
write_lines(lines, &mut writer, separator);
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
fn write_lines<'a, T: Write>(lines: &[Line<'a>], writer: &mut T, separator: u8) {
|
||||
for s in lines {
|
||||
crash_if_err!(1, writer.write_all(s.line.as_bytes()));
|
||||
crash_if_err!(1, writer.write_all(&[separator]));
|
||||
}
|
||||
}
|
||||
|
||||
fn assert_child_success(mut child: Child, program: &str) {
|
||||
if !matches!(
|
||||
child.wait().map(|e| e.code()),
|
||||
Ok(Some(0)) | Ok(None) | Err(_)
|
||||
) {
|
||||
crash!(2, "'{}' terminated abnormally", program)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -9,8 +9,8 @@
|
|||
|
||||
use std::{
|
||||
cmp::Ordering,
|
||||
ffi::OsStr,
|
||||
io::{Read, Write},
|
||||
fs::File,
|
||||
io::{BufWriter, Read, Write},
|
||||
iter,
|
||||
rc::Rc,
|
||||
sync::mpsc::{channel, sync_channel, Receiver, Sender, SyncSender},
|
||||
|
@ -18,18 +18,69 @@ use std::{
|
|||
};
|
||||
|
||||
use compare::Compare;
|
||||
use itertools::Itertools;
|
||||
|
||||
use crate::{
|
||||
chunks::{self, Chunk},
|
||||
compare_by, open, GlobalSettings,
|
||||
compare_by, GlobalSettings,
|
||||
};
|
||||
|
||||
// Merge already sorted files.
|
||||
pub fn merge<'a>(files: &[impl AsRef<OsStr>], settings: &'a GlobalSettings) -> FileMerger<'a> {
|
||||
pub fn merge_with_file_limit<F: ExactSizeIterator<Item = Box<dyn Read + Send>>>(
|
||||
files: F,
|
||||
settings: &GlobalSettings,
|
||||
) -> FileMerger {
|
||||
if files.len() > settings.merge_batch_size {
|
||||
let tmp_dir = tempfile::Builder::new()
|
||||
.prefix("uutils_sort")
|
||||
.tempdir_in(&settings.tmp_dir)
|
||||
.unwrap();
|
||||
let mut batch_number = 0;
|
||||
let mut remaining_files = files.len();
|
||||
let batches = files.chunks(settings.merge_batch_size);
|
||||
let mut batches = batches.into_iter();
|
||||
while batch_number + remaining_files > settings.merge_batch_size && remaining_files != 0 {
|
||||
remaining_files = remaining_files.saturating_sub(settings.merge_batch_size);
|
||||
let mut merger = merge_without_limit(batches.next().unwrap(), settings);
|
||||
let tmp_file = File::create(tmp_dir.path().join(batch_number.to_string())).unwrap();
|
||||
merger.write_all_to(settings, &mut BufWriter::new(tmp_file));
|
||||
batch_number += 1;
|
||||
}
|
||||
let batch_files = (0..batch_number).map(|n| {
|
||||
Box::new(File::open(tmp_dir.path().join(n.to_string())).unwrap())
|
||||
as Box<dyn Read + Send>
|
||||
});
|
||||
if batch_number > settings.merge_batch_size {
|
||||
assert!(batches.next().is_none());
|
||||
merge_with_file_limit(
|
||||
Box::new(batch_files) as Box<dyn ExactSizeIterator<Item = Box<dyn Read + Send>>>,
|
||||
settings,
|
||||
)
|
||||
} else {
|
||||
let final_batch = batches.next();
|
||||
assert!(batches.next().is_none());
|
||||
merge_without_limit(
|
||||
batch_files.chain(final_batch.into_iter().flatten()),
|
||||
settings,
|
||||
)
|
||||
}
|
||||
} else {
|
||||
merge_without_limit(files, settings)
|
||||
}
|
||||
}
|
||||
|
||||
/// Merge files without limiting how many files are concurrently open
|
||||
///
|
||||
/// It is the responsibility of the caller to ensure that `files` yields only
|
||||
/// as many files as we are allowed to open concurrently.
|
||||
fn merge_without_limit<F: Iterator<Item = Box<dyn Read + Send>>>(
|
||||
files: F,
|
||||
settings: &GlobalSettings,
|
||||
) -> FileMerger {
|
||||
let (request_sender, request_receiver) = channel();
|
||||
let mut reader_files = Vec::with_capacity(files.len());
|
||||
let mut loaded_receivers = Vec::with_capacity(files.len());
|
||||
for (file_number, file) in files.iter().map(open).enumerate() {
|
||||
let mut reader_files = Vec::with_capacity(files.size_hint().0);
|
||||
let mut loaded_receivers = Vec::with_capacity(files.size_hint().0);
|
||||
for (file_number, file) in files.enumerate() {
|
||||
let (sender, receiver) = sync_channel(2);
|
||||
loaded_receivers.push(receiver);
|
||||
reader_files.push(ReaderFile {
|
||||
|
@ -146,7 +197,11 @@ impl<'a> FileMerger<'a> {
|
|||
/// Write the merged contents to the output file.
|
||||
pub fn write_all(&mut self, settings: &GlobalSettings) {
|
||||
let mut out = settings.out_writer();
|
||||
while self.write_next(settings, &mut out) {}
|
||||
self.write_all_to(settings, &mut out);
|
||||
}
|
||||
|
||||
pub fn write_all_to(&mut self, settings: &GlobalSettings, out: &mut impl Write) {
|
||||
while self.write_next(settings, out) {}
|
||||
}
|
||||
|
||||
fn write_next(&mut self, settings: &GlobalSettings, out: &mut impl Write) -> bool {
|
||||
|
|
|
@ -96,6 +96,8 @@ static OPT_PARALLEL: &str = "parallel";
|
|||
static OPT_FILES0_FROM: &str = "files0-from";
|
||||
static OPT_BUF_SIZE: &str = "buffer-size";
|
||||
static OPT_TMP_DIR: &str = "temporary-directory";
|
||||
static OPT_COMPRESS_PROG: &str = "compress-program";
|
||||
static OPT_BATCH_SIZE: &str = "batch-size";
|
||||
|
||||
static ARG_FILES: &str = "files";
|
||||
|
||||
|
@ -156,6 +158,8 @@ pub struct GlobalSettings {
|
|||
zero_terminated: bool,
|
||||
buffer_size: usize,
|
||||
tmp_dir: PathBuf,
|
||||
compress_prog: Option<String>,
|
||||
merge_batch_size: usize,
|
||||
}
|
||||
|
||||
impl GlobalSettings {
|
||||
|
@ -223,6 +227,8 @@ impl Default for GlobalSettings {
|
|||
zero_terminated: false,
|
||||
buffer_size: DEFAULT_BUF_SIZE,
|
||||
tmp_dir: PathBuf::new(),
|
||||
compress_prog: None,
|
||||
merge_batch_size: 16,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1076,6 +1082,19 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
|
|||
.takes_value(true)
|
||||
.value_name("DIR"),
|
||||
)
|
||||
.arg(
|
||||
Arg::with_name(OPT_COMPRESS_PROG)
|
||||
.long(OPT_COMPRESS_PROG)
|
||||
.help("compress temporary files with PROG, decompress with PROG -d")
|
||||
.long_help("PROG has to take input from stdin and output to stdout")
|
||||
.value_name("PROG")
|
||||
)
|
||||
.arg(
|
||||
Arg::with_name(OPT_BATCH_SIZE)
|
||||
.long(OPT_BATCH_SIZE)
|
||||
.help("Merge at most N_MERGE inputs at once.")
|
||||
.value_name("N_MERGE")
|
||||
)
|
||||
.arg(
|
||||
Arg::with_name(OPT_FILES0_FROM)
|
||||
.long(OPT_FILES0_FROM)
|
||||
|
@ -1167,6 +1186,14 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
|
|||
.map(PathBuf::from)
|
||||
.unwrap_or_else(env::temp_dir);
|
||||
|
||||
settings.compress_prog = matches.value_of(OPT_COMPRESS_PROG).map(String::from);
|
||||
|
||||
if let Some(n_merge) = matches.value_of(OPT_BATCH_SIZE) {
|
||||
settings.merge_batch_size = n_merge
|
||||
.parse()
|
||||
.unwrap_or_else(|_| crash!(2, "invalid --batch-size argument '{}'", n_merge));
|
||||
}
|
||||
|
||||
settings.zero_terminated = matches.is_present(OPT_ZERO_TERMINATED);
|
||||
settings.merge = matches.is_present(OPT_MERGE);
|
||||
|
||||
|
@ -1242,7 +1269,7 @@ fn output_sorted_lines<'a>(iter: impl Iterator<Item = &'a Line<'a>>, settings: &
|
|||
|
||||
fn exec(files: &[String], settings: &GlobalSettings) -> i32 {
|
||||
if settings.merge {
|
||||
let mut file_merger = merge::merge(files, settings);
|
||||
let mut file_merger = merge::merge_with_file_limit(files.iter().map(open), settings);
|
||||
file_merger.write_all(settings);
|
||||
} else if settings.check {
|
||||
if files.len() > 1 {
|
||||
|
|
|
@ -837,3 +837,64 @@ fn test_nonexistent_file() {
|
|||
fn test_blanks() {
|
||||
test_helper("blanks", &["-b", "--ignore-blanks"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sort_multiple() {
|
||||
new_ucmd!()
|
||||
.args(&["no_trailing_newline1.txt", "no_trailing_newline2.txt"])
|
||||
.succeeds()
|
||||
.stdout_is("a\nb\nb\n");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sort_empty_chunk() {
|
||||
new_ucmd!()
|
||||
.args(&["-S", "40B"])
|
||||
.pipe_in("a\na\n")
|
||||
.succeeds()
|
||||
.stdout_is("a\na\n");
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(target_os = "linux")]
|
||||
fn test_compress() {
|
||||
new_ucmd!()
|
||||
.args(&[
|
||||
"ext_sort.txt",
|
||||
"-n",
|
||||
"--compress-program",
|
||||
"gzip",
|
||||
"-S",
|
||||
"10",
|
||||
])
|
||||
.succeeds()
|
||||
.stdout_only_fixture("ext_sort.expected");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_compress_fail() {
|
||||
new_ucmd!()
|
||||
.args(&[
|
||||
"ext_sort.txt",
|
||||
"-n",
|
||||
"--compress-program",
|
||||
"nonexistent-program",
|
||||
"-S",
|
||||
"10",
|
||||
])
|
||||
.fails()
|
||||
.stderr_only("sort: couldn't execute compress program: errno 2");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_merge_batches() {
|
||||
new_ucmd!()
|
||||
.args(&[
|
||||
"ext_sort.txt",
|
||||
"-n",
|
||||
"-S",
|
||||
"150B",
|
||||
])
|
||||
.succeeds()
|
||||
.stdout_only_fixture("ext_sort.expected");
|
||||
}
|
||||
|
|
2
tests/fixtures/sort/no_trailing_newline1.txt
vendored
Normal file
2
tests/fixtures/sort/no_trailing_newline1.txt
vendored
Normal file
|
@ -0,0 +1,2 @@
|
|||
a
|
||||
b
|
1
tests/fixtures/sort/no_trailing_newline2.txt
vendored
Normal file
1
tests/fixtures/sort/no_trailing_newline2.txt
vendored
Normal file
|
@ -0,0 +1 @@
|
|||
b
|
|
@ -44,7 +44,7 @@ sed -i 's|"\$@|/usr/bin/timeout 600 "\$@|' build-aux/test-driver
|
|||
# Change the PATH in the Makefile to test the uutils coreutils instead of the GNU coreutils
|
||||
sed -i "s/^[[:blank:]]*PATH=.*/ PATH='${BUILDDIR//\//\\/}\$(PATH_SEPARATOR)'\"\$\$PATH\" \\\/" Makefile
|
||||
sed -i 's| tr | /usr/bin/tr |' tests/init.sh
|
||||
make
|
||||
make -j "$(nproc)"
|
||||
# Generate the factor tests, so they can be fixed
|
||||
# Used to be 36. Reduced to 20 to decrease the log size
|
||||
for i in {00..20}
|
||||
|
@ -59,7 +59,7 @@ do
|
|||
done
|
||||
|
||||
|
||||
grep -rl 'path_prepend_' tests/* | xargs sed -i 's|path_prepend_ ./src||'
|
||||
grep -rl 'path_prepend_' tests/* | xargs sed -i 's| path_prepend_ ./src||'
|
||||
sed -i -e 's|^seq |/usr/bin/seq |' -e 's|sha1sum |/usr/bin/sha1sum |' tests/factor/t*sh
|
||||
|
||||
# Remove tests checking for --version & --help
|
||||
|
@ -94,8 +94,28 @@ sed -i 's|cp |/usr/bin/cp |' tests/mv/hard-2.sh
|
|||
sed -i 's|paste |/usr/bin/paste |' tests/misc/od-endian.sh
|
||||
sed -i 's|seq |/usr/bin/seq |' tests/misc/sort-discrim.sh
|
||||
|
||||
#Add specific timeout to tests that currently hang to limit time spent waiting
|
||||
# Add specific timeout to tests that currently hang to limit time spent waiting
|
||||
sed -i 's|seq \$|/usr/bin/timeout 0.1 seq \$|' tests/misc/seq-precision.sh tests/misc/seq-long-double.sh
|
||||
sed -i 's|cat |/usr/bin/timeout 0.1 cat |' tests/misc/cat-self.sh
|
||||
|
||||
|
||||
# Remove dup of /usr/bin/ when executed several times
|
||||
grep -rl '/usr/bin//usr/bin/' tests/* | xargs --no-run-if-empty sed -i 's|/usr/bin//usr/bin/|/usr/bin/|g'
|
||||
|
||||
|
||||
#### Adjust tests to make them work with Rust/coreutils
|
||||
# in some cases, what we are doing in rust/coreutils is good (or better)
|
||||
# we should not regress our project just to match what GNU is going.
|
||||
# So, do some changes on the fly
|
||||
|
||||
sed -i -e "s|rm: cannot remove 'e/slink'|rm: cannot remove 'e'|g" tests/rm/fail-eacces.sh
|
||||
|
||||
sed -i -e "s|rm: cannot remove 'a/b/file'|rm: cannot remove 'a'|g" tests/rm/cycle.sh
|
||||
|
||||
sed -i -e "s|rm: cannot remove directory 'b/a/p'|rm: cannot remove 'b'|g" tests/rm/rm1.sh
|
||||
|
||||
sed -i -e "s|rm: cannot remove 'a/1'|rm: cannot remove 'a'|g" tests/rm/rm2.sh
|
||||
|
||||
sed -i -e "s|removed directory 'a/'|removed directory 'a'|g" tests/rm/v-slash.sh
|
||||
|
||||
test -f "${BUILDDIR}/getlimits" || cp src/getlimits "${BUILDDIR}"
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue