1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-28 11:37:44 +00:00

Merge branch 'master' into id_zero_2351

This commit is contained in:
Jan Scheer 2021-06-08 22:45:27 +02:00
commit babf6ecae4
19 changed files with 461 additions and 165 deletions

39
Cargo.lock generated
View file

@ -44,13 +44,16 @@ dependencies = [
] ]
[[package]] [[package]]
name = "arrayvec" name = "arrayref"
version = "0.4.12" version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cd9fd44efafa8690358b7408d253adf110036b88f55672a933f01d616ad9b1b9" checksum = "a4c527152e37cf757a3f78aae5a06fbeefdb07ccc535c980a3208ee3060dd544"
dependencies = [
"nodrop", [[package]]
] name = "arrayvec"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b"
[[package]] [[package]]
name = "atty" name = "atty"
@ -100,11 +103,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693"
[[package]] [[package]]
name = "blake2-rfc" name = "blake2b_simd"
version = "0.2.18" version = "0.5.11"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5d6d530bdd2d52966a6d03b7a964add7ae1a288d25214066fd4b600f0f796400" checksum = "afa748e348ad3be8263be728124b24a24f268266f6f5d58af9d75f6a40b5c587"
dependencies = [ dependencies = [
"arrayref",
"arrayvec", "arrayvec",
"constant_time_eq", "constant_time_eq",
] ]
@ -700,9 +704,9 @@ checksum = "62aca2aba2d62b4a7f5b33f3712cb1b0692779a56fb510499d5c0aa594daeaf3"
[[package]] [[package]]
name = "heck" name = "heck"
version = "0.3.2" version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "87cbf45460356b7deeb5e3415b5563308c0a9b057c85e12b06ad551f98d0a6ac" checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c"
dependencies = [ dependencies = [
"unicode-segmentation", "unicode-segmentation",
] ]
@ -1383,12 +1387,9 @@ dependencies = [
[[package]] [[package]]
name = "regex-automata" name = "regex-automata"
version = "0.1.9" version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ae1ded71d66a4a97f5e961fd0cb25a5f366a42a41570d16a763a69c092c26ae4" checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132"
dependencies = [
"byteorder",
]
[[package]] [[package]]
name = "regex-syntax" name = "regex-syntax"
@ -1501,9 +1502,9 @@ dependencies = [
[[package]] [[package]]
name = "signal-hook-registry" name = "signal-hook-registry"
version = "1.3.0" version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "16f1d0fef1604ba8f7a073c7e701f213e056707210e9020af4528e0101ce11a6" checksum = "e51e73328dc4ac0c7ccbda3a494dfa03df1de2f46018127f60c693f2648455b0"
dependencies = [ dependencies = [
"libc", "libc",
] ]
@ -2029,7 +2030,7 @@ dependencies = [
name = "uu_hashsum" name = "uu_hashsum"
version = "0.0.6" version = "0.0.6"
dependencies = [ dependencies = [
"blake2-rfc", "blake2b_simd",
"clap", "clap",
"digest", "digest",
"hex", "hex",

View file

@ -349,7 +349,7 @@ sha1 = { version="0.6", features=["std"] }
tempfile = "3.2.0" tempfile = "3.2.0"
time = "0.1" time = "0.1"
unindent = "0.1" unindent = "0.1"
uucore = { version=">=0.0.8", package="uucore", path="src/uucore", features=["entries"] } uucore = { version=">=0.0.8", package="uucore", path="src/uucore", features=["entries", "process"] }
walkdir = "2.2" walkdir = "2.2"
atty = "0.2.14" atty = "0.2.14"

View file

@ -10,7 +10,7 @@
#[macro_use] #[macro_use]
extern crate uucore; extern crate uucore;
use uucore::entries::{get_groups, gid2grp, Locate, Passwd}; use uucore::entries::{get_groups_gnu, gid2grp, Locate, Passwd};
use clap::{crate_version, App, Arg}; use clap::{crate_version, App, Arg};
@ -35,7 +35,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
None => { None => {
println!( println!(
"{}", "{}",
get_groups() get_groups_gnu(None)
.unwrap() .unwrap()
.iter() .iter()
.map(|&g| gid2grp(g).unwrap()) .map(|&g| gid2grp(g).unwrap())

View file

@ -0,0 +1,9 @@
## Benchmarking hashsum
### To bench blake2
Taken from: https://github.com/uutils/coreutils/pull/2296
With a large file:
$ hyperfine "./target/release/coreutils hashsum --b2sum large-file" "b2sum large-file"

View file

@ -25,7 +25,7 @@ regex-syntax = "0.6.7"
sha1 = "0.6.0" sha1 = "0.6.0"
sha2 = "0.6.0" sha2 = "0.6.0"
sha3 = "0.6.0" sha3 = "0.6.0"
blake2-rfc = "0.2.18" blake2b_simd = "0.5.11"
uucore = { version=">=0.0.8", package="uucore", path="../../uucore" } uucore = { version=">=0.0.8", package="uucore", path="../../uucore" }
uucore_procs = { version=">=0.0.5", package="uucore_procs", path="../../uucore_procs" } uucore_procs = { version=">=0.0.5", package="uucore_procs", path="../../uucore_procs" }

View file

@ -1,4 +1,3 @@
extern crate blake2_rfc;
extern crate digest; extern crate digest;
extern crate md5; extern crate md5;
extern crate sha1; extern crate sha1;
@ -49,9 +48,9 @@ impl Digest for md5::Context {
} }
} }
impl Digest for blake2_rfc::blake2b::Blake2b { impl Digest for blake2b_simd::State {
fn new() -> Self { fn new() -> Self {
blake2_rfc::blake2b::Blake2b::new(64) Self::new()
} }
fn input(&mut self, input: &[u8]) { fn input(&mut self, input: &[u8]) {
@ -59,12 +58,12 @@ impl Digest for blake2_rfc::blake2b::Blake2b {
} }
fn result(&mut self, out: &mut [u8]) { fn result(&mut self, out: &mut [u8]) {
let hash_result = &self.clone().finalize(); let hash_result = &self.finalize();
out.copy_from_slice(&hash_result.as_bytes()); out.copy_from_slice(&hash_result.as_bytes());
} }
fn reset(&mut self) { fn reset(&mut self) {
*self = blake2_rfc::blake2b::Blake2b::new(64); *self = Self::new();
} }
fn output_bits(&self) -> usize { fn output_bits(&self) -> usize {

View file

@ -19,7 +19,6 @@ mod digest;
use self::digest::Digest; use self::digest::Digest;
use blake2_rfc::blake2b::Blake2b;
use clap::{App, Arg, ArgMatches}; use clap::{App, Arg, ArgMatches};
use hex::ToHex; use hex::ToHex;
use md5::Context as Md5; use md5::Context as Md5;
@ -85,7 +84,11 @@ fn detect_algo<'a>(
"sha256sum" => ("SHA256", Box::new(Sha256::new()) as Box<dyn Digest>, 256), "sha256sum" => ("SHA256", Box::new(Sha256::new()) as Box<dyn Digest>, 256),
"sha384sum" => ("SHA384", Box::new(Sha384::new()) as Box<dyn Digest>, 384), "sha384sum" => ("SHA384", Box::new(Sha384::new()) as Box<dyn Digest>, 384),
"sha512sum" => ("SHA512", Box::new(Sha512::new()) as Box<dyn Digest>, 512), "sha512sum" => ("SHA512", Box::new(Sha512::new()) as Box<dyn Digest>, 512),
"b2sum" => ("BLAKE2", Box::new(Blake2b::new(64)) as Box<dyn Digest>, 512), "b2sum" => (
"BLAKE2",
Box::new(blake2b_simd::State::new()) as Box<dyn Digest>,
512,
),
"sha3sum" => match matches.value_of("bits") { "sha3sum" => match matches.value_of("bits") {
Some(bits_str) => match (&bits_str).parse::<usize>() { Some(bits_str) => match (&bits_str).parse::<usize>() {
Ok(224) => ( Ok(224) => (
@ -187,7 +190,7 @@ fn detect_algo<'a>(
set_or_crash("SHA512", Box::new(Sha512::new()), 512) set_or_crash("SHA512", Box::new(Sha512::new()), 512)
} }
if matches.is_present("b2sum") { if matches.is_present("b2sum") {
set_or_crash("BLAKE2", Box::new(Blake2b::new(64)), 512) set_or_crash("BLAKE2", Box::new(blake2b_simd::State::new()), 512)
} }
if matches.is_present("sha3") { if matches.is_present("sha3") {
match matches.value_of("bits") { match matches.value_of("bits") {

View file

@ -148,7 +148,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
Arg::with_name(options::OPT_REAL_ID) Arg::with_name(options::OPT_REAL_ID)
.short("r") .short("r")
.long(options::OPT_REAL_ID) .long(options::OPT_REAL_ID)
.help("Display the real ID for the -g and -u options instead of the effective ID."), .help("Display the real ID for the -G, -g and -u options instead of the effective ID."),
) )
.arg( .arg(
Arg::with_name(options::OPT_ZERO) Arg::with_name(options::OPT_ZERO)
@ -234,26 +234,23 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
} }
if gsflag { if gsflag {
let delimiter = if zflag { "" } else { " " }; let delimiter = if zflag { "\0" } else { " " };
let id = possible_pw
.map(|p| p.gid())
.unwrap_or(if rflag { getgid() } else { getegid() });
print!( print!(
"{}{}", "{}{}",
if nflag {
possible_pw possible_pw
.map(|p| p.belongs_to()) .map(|p| p.belongs_to())
.unwrap_or_else(|| entries::get_groups().unwrap()) .unwrap_or_else(|| entries::get_groups_gnu(Some(id)).unwrap())
.iter() .iter()
.map(|&id| entries::gid2grp(id).unwrap()) .map(|&id| if nflag {
.collect::<Vec<_>>() entries::gid2grp(id).unwrap_or_else(|_| id.to_string())
.join(delimiter)
} else { } else {
possible_pw id.to_string()
.map(|p| p.belongs_to()) })
.unwrap_or_else(|| entries::get_groups().unwrap())
.iter()
.map(|&id| id.to_string())
.collect::<Vec<_>>() .collect::<Vec<_>>()
.join(delimiter) .join(delimiter),
},
line_ending line_ending
); );
return 0; return 0;
@ -321,7 +318,7 @@ fn pretty(possible_pw: Option<Passwd>) {
println!( println!(
"groups\t{}", "groups\t{}",
entries::get_groups() entries::get_groups_gnu(None)
.unwrap() .unwrap()
.iter() .iter()
.map(|&gr| entries::gid2grp(gr).unwrap()) .map(|&gr| entries::gid2grp(gr).unwrap())
@ -420,5 +417,3 @@ fn id_print(possible_pw: Option<Passwd>, p_euid: bool, p_egid: bool) {
.join(",") .join(",")
); );
} }
fn get_groups() ->

View file

@ -143,7 +143,9 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
if let Some(files) = matches.values_of(options::FILES) { if let Some(files) = matches.values_of(options::FILES) {
let mut stdout = setup_term(); let mut stdout = setup_term();
let length = files.len(); let length = files.len();
for (idx, file) in files.enumerate() {
let mut files_iter = files.peekable();
while let (Some(file), next_file) = (files_iter.next(), files_iter.peek()) {
let file = Path::new(file); let file = Path::new(file);
if file.is_dir() { if file.is_dir() {
terminal::disable_raw_mode().unwrap(); terminal::disable_raw_mode().unwrap();
@ -160,15 +162,14 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
} }
let mut reader = BufReader::new(File::open(file).unwrap()); let mut reader = BufReader::new(File::open(file).unwrap());
reader.read_to_string(&mut buff).unwrap(); reader.read_to_string(&mut buff).unwrap();
let is_last = idx + 1 == length; more(&buff, &mut stdout, next_file.copied());
more(&buff, &mut stdout, is_last);
buff.clear(); buff.clear();
} }
reset_term(&mut stdout); reset_term(&mut stdout);
} else if atty::isnt(atty::Stream::Stdin) { } else if atty::isnt(atty::Stream::Stdin) {
stdin().read_to_string(&mut buff).unwrap(); stdin().read_to_string(&mut buff).unwrap();
let mut stdout = setup_term(); let mut stdout = setup_term();
more(&buff, &mut stdout, true); more(&buff, &mut stdout, None);
reset_term(&mut stdout); reset_term(&mut stdout);
} else { } else {
show_usage_error!("bad usage"); show_usage_error!("bad usage");
@ -203,7 +204,7 @@ fn reset_term(stdout: &mut std::io::Stdout) {
#[inline(always)] #[inline(always)]
fn reset_term(_: &mut usize) {} fn reset_term(_: &mut usize) {}
fn more(buff: &str, mut stdout: &mut Stdout, is_last: bool) { fn more(buff: &str, mut stdout: &mut Stdout, next_file: Option<&str>) {
let (cols, rows) = terminal::size().unwrap(); let (cols, rows) = terminal::size().unwrap();
let lines = break_buff(buff, usize::from(cols)); let lines = break_buff(buff, usize::from(cols));
let line_count: u16 = lines.len().try_into().unwrap(); let line_count: u16 = lines.len().try_into().unwrap();
@ -217,8 +218,11 @@ fn more(buff: &str, mut stdout: &mut Stdout, is_last: bool) {
&mut stdout, &mut stdout,
lines.clone(), lines.clone(),
line_count, line_count,
next_file,
); );
let is_last = next_file.is_none();
// Specifies whether we have reached the end of the file and should // Specifies whether we have reached the end of the file and should
// return on the next key press. However, we immediately return when // return on the next key press. However, we immediately return when
// this is the last file. // this is the last file.
@ -270,6 +274,7 @@ fn more(buff: &str, mut stdout: &mut Stdout, is_last: bool) {
&mut stdout, &mut stdout,
lines.clone(), lines.clone(),
line_count, line_count,
next_file,
); );
if lines_left == 0 { if lines_left == 0 {
@ -288,6 +293,7 @@ fn draw(
mut stdout: &mut std::io::Stdout, mut stdout: &mut std::io::Stdout,
lines: Vec<String>, lines: Vec<String>,
lc: u16, lc: u16,
next_file: Option<&str>,
) { ) {
execute!(stdout, terminal::Clear(terminal::ClearType::CurrentLine)).unwrap(); execute!(stdout, terminal::Clear(terminal::ClearType::CurrentLine)).unwrap();
let (up_mark, lower_mark) = calc_range(*upper_mark, rows, lc); let (up_mark, lower_mark) = calc_range(*upper_mark, rows, lc);
@ -302,7 +308,7 @@ fn draw(
.write_all(format!("\r{}\n", line).as_bytes()) .write_all(format!("\r{}\n", line).as_bytes())
.unwrap(); .unwrap();
} }
make_prompt_and_flush(&mut stdout, lower_mark, lc); make_prompt_and_flush(&mut stdout, lower_mark, lc, next_file);
*upper_mark = up_mark; *upper_mark = up_mark;
} }
@ -358,12 +364,20 @@ fn calc_range(mut upper_mark: u16, rows: u16, line_count: u16) -> (u16, u16) {
} }
// Make a prompt similar to original more // Make a prompt similar to original more
fn make_prompt_and_flush(stdout: &mut Stdout, lower_mark: u16, lc: u16) { fn make_prompt_and_flush(stdout: &mut Stdout, lower_mark: u16, lc: u16, next_file: Option<&str>) {
let status = if lower_mark == lc {
format!("Next file: {}", next_file.unwrap_or_default())
} else {
format!(
"{}%",
(lower_mark as f64 / lc as f64 * 100.0).round() as u16
)
};
write!( write!(
stdout, stdout,
"\r{}--More--({}%){}", "\r{}--More--({}){}",
Attribute::Reverse, Attribute::Reverse,
((lower_mark as f64 / lc as f64) * 100.0).round() as u16, status,
Attribute::Reset Attribute::Reset
) )
.unwrap(); .unwrap();

View file

@ -102,6 +102,7 @@ pub fn read(
carry_over.clear(); carry_over.clear();
carry_over.extend_from_slice(&buffer[read..]); carry_over.extend_from_slice(&buffer[read..]);
if read != 0 {
let payload = Chunk::new(buffer, |buf| { let payload = Chunk::new(buffer, |buf| {
let mut lines = unsafe { let mut lines = unsafe {
// SAFETY: It is safe to transmute to a vector of lines with shorter lifetime, // SAFETY: It is safe to transmute to a vector of lines with shorter lifetime,
@ -112,7 +113,6 @@ pub fn read(
parse_lines(read, &mut lines, separator, &settings); parse_lines(read, &mut lines, separator, &settings);
lines lines
}); });
if !payload.borrow_lines().is_empty() {
sender.send(payload).unwrap(); sender.send(payload).unwrap();
} }
if !should_continue { if !should_continue {
@ -175,6 +175,7 @@ fn read_to_buffer(
separator: u8, separator: u8,
) -> (usize, bool) { ) -> (usize, bool) {
let mut read_target = &mut buffer[start_offset..]; let mut read_target = &mut buffer[start_offset..];
let mut last_file_target_size = read_target.len();
loop { loop {
match file.read(read_target) { match file.read(read_target) {
Ok(0) => { Ok(0) => {
@ -208,14 +209,27 @@ fn read_to_buffer(
read_target = &mut buffer[len..]; read_target = &mut buffer[len..];
} }
} else { } else {
// This file is empty. // This file has been fully read.
let mut leftover_len = read_target.len();
if last_file_target_size != leftover_len {
// The file was not empty.
let read_len = buffer.len() - leftover_len;
if buffer[read_len - 1] != separator {
// The file did not end with a separator. We have to insert one.
buffer[read_len] = separator;
leftover_len -= 1;
}
let read_len = buffer.len() - leftover_len;
read_target = &mut buffer[read_len..];
}
if let Some(next_file) = next_files.next() { if let Some(next_file) = next_files.next() {
// There is another file. // There is another file.
last_file_target_size = leftover_len;
*file = next_file; *file = next_file;
} else { } else {
// This was the last file. // This was the last file.
let leftover_len = read_target.len(); let read_len = buffer.len() - leftover_len;
return (buffer.len() - leftover_len, false); return (read_len, false);
} }
} }
} }

View file

@ -12,8 +12,12 @@
//! The buffers for the individual chunks are recycled. There are two buffers. //! The buffers for the individual chunks are recycled. There are two buffers.
use std::cmp::Ordering; use std::cmp::Ordering;
use std::fs::File;
use std::io::BufReader;
use std::io::{BufWriter, Write}; use std::io::{BufWriter, Write};
use std::path::Path; use std::path::Path;
use std::process::Child;
use std::process::{Command, Stdio};
use std::{ use std::{
fs::OpenOptions, fs::OpenOptions,
io::Read, io::Read,
@ -25,12 +29,13 @@ use itertools::Itertools;
use tempfile::TempDir; use tempfile::TempDir;
use crate::Line;
use crate::{ use crate::{
chunks::{self, Chunk}, chunks::{self, Chunk},
compare_by, merge, output_sorted_lines, sort_by, GlobalSettings, compare_by, merge, output_sorted_lines, sort_by, GlobalSettings,
}; };
const MIN_BUFFER_SIZE: usize = 8_000; const START_BUFFER_SIZE: usize = 8_000;
/// Sort files by using auxiliary files for storing intermediate chunks (if needed), and output the result. /// Sort files by using auxiliary files for storing intermediate chunks (if needed), and output the result.
pub fn ext_sort(files: &mut impl Iterator<Item = Box<dyn Read + Send>>, settings: &GlobalSettings) { pub fn ext_sort(files: &mut impl Iterator<Item = Box<dyn Read + Send>>, settings: &GlobalSettings) {
@ -63,10 +68,31 @@ pub fn ext_sort(files: &mut impl Iterator<Item = Box<dyn Read + Send>>, settings
); );
match read_result { match read_result {
ReadResult::WroteChunksToFile { chunks_written } => { ReadResult::WroteChunksToFile { chunks_written } => {
let files = (0..chunks_written) let mut children = Vec::new();
.map(|chunk_num| tmp_dir.path().join(chunk_num.to_string())) let files = (0..chunks_written).map(|chunk_num| {
.collect::<Vec<_>>(); let file_path = tmp_dir.path().join(chunk_num.to_string());
let mut merger = merge::merge(&files, settings); let file = File::open(file_path).unwrap();
if let Some(compress_prog) = &settings.compress_prog {
let mut command = Command::new(compress_prog);
command.stdin(file).stdout(Stdio::piped()).arg("-d");
let mut child = crash_if_err!(
2,
command.spawn().map_err(|err| format!(
"couldn't execute compress program: errno {}",
err.raw_os_error().unwrap()
))
);
let child_stdout = child.stdout.take().unwrap();
children.push(child);
Box::new(BufReader::new(child_stdout)) as Box<dyn Read + Send>
} else {
Box::new(BufReader::new(file)) as Box<dyn Read + Send>
}
});
let mut merger = merge::merge_with_file_limit(files, settings);
for child in children {
assert_child_success(child, settings.compress_prog.as_ref().unwrap());
}
merger.write_all(settings); merger.write_all(settings);
} }
ReadResult::SortedSingleChunk(chunk) => { ReadResult::SortedSingleChunk(chunk) => {
@ -132,7 +158,14 @@ fn reader_writer(
for _ in 0..2 { for _ in 0..2 {
chunks::read( chunks::read(
&mut sender_option, &mut sender_option,
vec![0; MIN_BUFFER_SIZE], vec![
0;
if START_BUFFER_SIZE < buffer_size {
START_BUFFER_SIZE
} else {
buffer_size
}
],
Some(buffer_size), Some(buffer_size),
&mut carry_over, &mut carry_over,
&mut file, &mut file,
@ -171,6 +204,7 @@ fn reader_writer(
write( write(
&mut chunk, &mut chunk,
&tmp_dir.path().join(file_number.to_string()), &tmp_dir.path().join(file_number.to_string()),
settings.compress_prog.as_deref(),
separator, separator,
); );
@ -193,14 +227,45 @@ fn reader_writer(
} }
/// Write the lines in `chunk` to `file`, separated by `separator`. /// Write the lines in `chunk` to `file`, separated by `separator`.
fn write(chunk: &mut Chunk, file: &Path, separator: u8) { /// `compress_prog` is used to optionally compress file contents.
fn write(chunk: &mut Chunk, file: &Path, compress_prog: Option<&str>, separator: u8) {
chunk.with_lines_mut(|lines| { chunk.with_lines_mut(|lines| {
// Write the lines to the file // Write the lines to the file
let file = crash_if_err!(1, OpenOptions::new().create(true).write(true).open(file)); let file = crash_if_err!(1, OpenOptions::new().create(true).write(true).open(file));
if let Some(compress_prog) = compress_prog {
let mut command = Command::new(compress_prog);
command.stdin(Stdio::piped()).stdout(file);
let mut child = crash_if_err!(
2,
command.spawn().map_err(|err| format!(
"couldn't execute compress program: errno {}",
err.raw_os_error().unwrap()
))
);
let mut writer = BufWriter::new(child.stdin.take().unwrap());
write_lines(lines, &mut writer, separator);
writer.flush().unwrap();
drop(writer);
assert_child_success(child, compress_prog);
} else {
let mut writer = BufWriter::new(file); let mut writer = BufWriter::new(file);
for s in lines.iter() { write_lines(lines, &mut writer, separator);
};
});
}
fn write_lines<'a, T: Write>(lines: &[Line<'a>], writer: &mut T, separator: u8) {
for s in lines {
crash_if_err!(1, writer.write_all(s.line.as_bytes())); crash_if_err!(1, writer.write_all(s.line.as_bytes()));
crash_if_err!(1, writer.write_all(&[separator])); crash_if_err!(1, writer.write_all(&[separator]));
} }
}); }
fn assert_child_success(mut child: Child, program: &str) {
if !matches!(
child.wait().map(|e| e.code()),
Ok(Some(0)) | Ok(None) | Err(_)
) {
crash!(2, "'{}' terminated abnormally", program)
}
} }

View file

@ -9,8 +9,8 @@
use std::{ use std::{
cmp::Ordering, cmp::Ordering,
ffi::OsStr, fs::File,
io::{Read, Write}, io::{BufWriter, Read, Write},
iter, iter,
rc::Rc, rc::Rc,
sync::mpsc::{channel, sync_channel, Receiver, Sender, SyncSender}, sync::mpsc::{channel, sync_channel, Receiver, Sender, SyncSender},
@ -18,18 +18,69 @@ use std::{
}; };
use compare::Compare; use compare::Compare;
use itertools::Itertools;
use crate::{ use crate::{
chunks::{self, Chunk}, chunks::{self, Chunk},
compare_by, open, GlobalSettings, compare_by, GlobalSettings,
}; };
// Merge already sorted files. // Merge already sorted files.
pub fn merge<'a>(files: &[impl AsRef<OsStr>], settings: &'a GlobalSettings) -> FileMerger<'a> { pub fn merge_with_file_limit<F: ExactSizeIterator<Item = Box<dyn Read + Send>>>(
files: F,
settings: &GlobalSettings,
) -> FileMerger {
if files.len() > settings.merge_batch_size {
let tmp_dir = tempfile::Builder::new()
.prefix("uutils_sort")
.tempdir_in(&settings.tmp_dir)
.unwrap();
let mut batch_number = 0;
let mut remaining_files = files.len();
let batches = files.chunks(settings.merge_batch_size);
let mut batches = batches.into_iter();
while batch_number + remaining_files > settings.merge_batch_size && remaining_files != 0 {
remaining_files = remaining_files.saturating_sub(settings.merge_batch_size);
let mut merger = merge_without_limit(batches.next().unwrap(), settings);
let tmp_file = File::create(tmp_dir.path().join(batch_number.to_string())).unwrap();
merger.write_all_to(settings, &mut BufWriter::new(tmp_file));
batch_number += 1;
}
let batch_files = (0..batch_number).map(|n| {
Box::new(File::open(tmp_dir.path().join(n.to_string())).unwrap())
as Box<dyn Read + Send>
});
if batch_number > settings.merge_batch_size {
assert!(batches.next().is_none());
merge_with_file_limit(
Box::new(batch_files) as Box<dyn ExactSizeIterator<Item = Box<dyn Read + Send>>>,
settings,
)
} else {
let final_batch = batches.next();
assert!(batches.next().is_none());
merge_without_limit(
batch_files.chain(final_batch.into_iter().flatten()),
settings,
)
}
} else {
merge_without_limit(files, settings)
}
}
/// Merge files without limiting how many files are concurrently open
///
/// It is the responsibility of the caller to ensure that `files` yields only
/// as many files as we are allowed to open concurrently.
fn merge_without_limit<F: Iterator<Item = Box<dyn Read + Send>>>(
files: F,
settings: &GlobalSettings,
) -> FileMerger {
let (request_sender, request_receiver) = channel(); let (request_sender, request_receiver) = channel();
let mut reader_files = Vec::with_capacity(files.len()); let mut reader_files = Vec::with_capacity(files.size_hint().0);
let mut loaded_receivers = Vec::with_capacity(files.len()); let mut loaded_receivers = Vec::with_capacity(files.size_hint().0);
for (file_number, file) in files.iter().map(open).enumerate() { for (file_number, file) in files.enumerate() {
let (sender, receiver) = sync_channel(2); let (sender, receiver) = sync_channel(2);
loaded_receivers.push(receiver); loaded_receivers.push(receiver);
reader_files.push(ReaderFile { reader_files.push(ReaderFile {
@ -146,7 +197,11 @@ impl<'a> FileMerger<'a> {
/// Write the merged contents to the output file. /// Write the merged contents to the output file.
pub fn write_all(&mut self, settings: &GlobalSettings) { pub fn write_all(&mut self, settings: &GlobalSettings) {
let mut out = settings.out_writer(); let mut out = settings.out_writer();
while self.write_next(settings, &mut out) {} self.write_all_to(settings, &mut out);
}
pub fn write_all_to(&mut self, settings: &GlobalSettings, out: &mut impl Write) {
while self.write_next(settings, out) {}
} }
fn write_next(&mut self, settings: &GlobalSettings, out: &mut impl Write) -> bool { fn write_next(&mut self, settings: &GlobalSettings, out: &mut impl Write) -> bool {

View file

@ -95,6 +95,8 @@ static OPT_PARALLEL: &str = "parallel";
static OPT_FILES0_FROM: &str = "files0-from"; static OPT_FILES0_FROM: &str = "files0-from";
static OPT_BUF_SIZE: &str = "buffer-size"; static OPT_BUF_SIZE: &str = "buffer-size";
static OPT_TMP_DIR: &str = "temporary-directory"; static OPT_TMP_DIR: &str = "temporary-directory";
static OPT_COMPRESS_PROG: &str = "compress-program";
static OPT_BATCH_SIZE: &str = "batch-size";
static ARG_FILES: &str = "files"; static ARG_FILES: &str = "files";
@ -155,6 +157,8 @@ pub struct GlobalSettings {
zero_terminated: bool, zero_terminated: bool,
buffer_size: usize, buffer_size: usize,
tmp_dir: PathBuf, tmp_dir: PathBuf,
compress_prog: Option<String>,
merge_batch_size: usize,
} }
impl GlobalSettings { impl GlobalSettings {
@ -223,6 +227,8 @@ impl Default for GlobalSettings {
zero_terminated: false, zero_terminated: false,
buffer_size: DEFAULT_BUF_SIZE, buffer_size: DEFAULT_BUF_SIZE,
tmp_dir: PathBuf::new(), tmp_dir: PathBuf::new(),
compress_prog: None,
merge_batch_size: 16,
} }
} }
} }
@ -1076,6 +1082,19 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
.takes_value(true) .takes_value(true)
.value_name("DIR"), .value_name("DIR"),
) )
.arg(
Arg::with_name(OPT_COMPRESS_PROG)
.long(OPT_COMPRESS_PROG)
.help("compress temporary files with PROG, decompress with PROG -d")
.long_help("PROG has to take input from stdin and output to stdout")
.value_name("PROG")
)
.arg(
Arg::with_name(OPT_BATCH_SIZE)
.long(OPT_BATCH_SIZE)
.help("Merge at most N_MERGE inputs at once.")
.value_name("N_MERGE")
)
.arg( .arg(
Arg::with_name(OPT_FILES0_FROM) Arg::with_name(OPT_FILES0_FROM)
.long(OPT_FILES0_FROM) .long(OPT_FILES0_FROM)
@ -1165,6 +1184,14 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
.map(PathBuf::from) .map(PathBuf::from)
.unwrap_or_else(env::temp_dir); .unwrap_or_else(env::temp_dir);
settings.compress_prog = matches.value_of(OPT_COMPRESS_PROG).map(String::from);
if let Some(n_merge) = matches.value_of(OPT_BATCH_SIZE) {
settings.merge_batch_size = n_merge
.parse()
.unwrap_or_else(|_| crash!(2, "invalid --batch-size argument '{}'", n_merge));
}
settings.zero_terminated = matches.is_present(OPT_ZERO_TERMINATED); settings.zero_terminated = matches.is_present(OPT_ZERO_TERMINATED);
settings.merge = matches.is_present(OPT_MERGE); settings.merge = matches.is_present(OPT_MERGE);
@ -1240,7 +1267,7 @@ fn output_sorted_lines<'a>(iter: impl Iterator<Item = &'a Line<'a>>, settings: &
fn exec(files: &[String], settings: &GlobalSettings) -> i32 { fn exec(files: &[String], settings: &GlobalSettings) -> i32 {
if settings.merge { if settings.merge {
let mut file_merger = merge::merge(files, settings); let mut file_merger = merge::merge_with_file_limit(files.iter().map(open), settings);
file_merger.write_all(settings); file_merger.write_all(settings);
} else if settings.check { } else if settings.check {
if files.len() > 1 { if files.len() > 1 {

View file

@ -5,7 +5,7 @@
// For the full copyright and license information, please view the LICENSE // For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code. // file that was distributed with this source code.
// spell-checker:ignore (vars) Passwd cstr fnam gecos ngroups // spell-checker:ignore (vars) Passwd cstr fnam gecos ngroups egid
//! Get password/group file entry //! Get password/group file entry
//! //!
@ -72,6 +72,41 @@ pub fn get_groups() -> IOResult<Vec<gid_t>> {
} }
} }
/// The list of group IDs returned from GNU's `groups` and GNU's `id --groups`
/// starts with the effective group ID (egid).
/// This is a wrapper for `get_groups()` to mimic this behavior.
///
/// If `arg_id` is `None` (default), `get_groups_gnu` moves the effective
/// group id (egid) to the first entry in the returned Vector.
/// If `arg_id` is `Some(x)`, `get_groups_gnu` moves the id with value `x`
/// to the first entry in the returned Vector. This might be necessary
/// for `id --groups --real` if `gid` and `egid` are not equal.
///
/// From: https://www.man7.org/linux/man-pages/man3/getgroups.3p.html
/// As implied by the definition of supplementary groups, the
/// effective group ID may appear in the array returned by
/// getgroups() or it may be returned only by getegid(). Duplication
/// may exist, but the application needs to call getegid() to be sure
/// of getting all of the information. Various implementation
/// variations and administrative sequences cause the set of groups
/// appearing in the result of getgroups() to vary in order and as to
/// whether the effective group ID is included, even when the set of
/// groups is the same (in the mathematical sense of ``set''). (The
/// history of a process and its parents could affect the details of
/// the result.)
pub fn get_groups_gnu(arg_id: Option<u32>) -> IOResult<Vec<gid_t>> {
let mut groups = get_groups()?;
let egid = arg_id.unwrap_or_else(crate::features::process::getegid);
if !groups.is_empty() && *groups.first().unwrap() == egid {
return Ok(groups);
} else if let Some(index) = groups.iter().position(|&x| x == egid) {
groups.remove(index);
}
groups.insert(0, egid);
Ok(groups)
}
#[derive(Copy, Clone)]
pub struct Passwd { pub struct Passwd {
inner: passwd, inner: passwd,
} }
@ -268,3 +303,18 @@ pub fn usr2uid(name: &str) -> IOResult<uid_t> {
pub fn grp2gid(name: &str) -> IOResult<gid_t> { pub fn grp2gid(name: &str) -> IOResult<gid_t> {
Group::locate(name).map(|p| p.gid()) Group::locate(name).map(|p| p.gid())
} }
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_entries_get_groups_gnu() {
if let Ok(mut groups) = get_groups() {
if let Some(last) = groups.pop() {
groups.insert(0, last);
assert_eq!(get_groups_gnu(Some(last)).unwrap(), groups);
}
}
}
}

View file

@ -1,41 +1,53 @@
use crate::common::util::*; use crate::common::util::*;
#[test] #[test]
#[cfg(any(target_vendor = "apple", target_os = "linux"))]
fn test_groups() { fn test_groups() {
let result = new_ucmd!().run(); if !is_ci() {
println!("result.stdout = {}", result.stdout_str()); new_ucmd!().succeeds().stdout_is(expected_result(&[]));
println!("result.stderr = {}", result.stderr_str()); } else {
if is_ci() && result.stdout_str().trim().is_empty() { // TODO: investigate how this could be tested in CI
// In the CI, some server are failing to return the group. // stderr = groups: cannot find name for group ID 116
// As seems to be a configuration issue, ignoring it println!("test skipped:");
return;
} }
result.success();
assert!(!result.stdout_str().trim().is_empty());
} }
#[test] #[test]
fn test_groups_arg() { #[cfg(any(target_os = "linux"))]
// get the username with the "id -un" command #[ignore = "fixme: 'groups USERNAME' needs more debugging"]
let result = TestScenario::new("id").ucmd_keepenv().arg("-un").run(); fn test_groups_username() {
println!("result.stdout = {}", result.stdout_str()); let scene = TestScenario::new(util_name!());
println!("result.stderr = {}", result.stderr_str()); let whoami_result = scene.cmd("whoami").run();
let s1 = String::from(result.stdout_str().trim());
if is_ci() && s1.parse::<f64>().is_ok() { let username = if whoami_result.succeeded() {
// In the CI, some server are failing to return id -un. whoami_result.stdout_move_str()
// So, if we are getting a uid, just skip this test } else if is_ci() {
// As seems to be a configuration issue, ignoring it String::from("docker")
} else {
println!("test skipped:");
return; return;
};
// TODO: stdout should be in the form: "username : group1 group2 group3"
scene
.ucmd()
.arg(&username)
.succeeds()
.stdout_is(expected_result(&[&username]));
} }
println!("result.stdout = {}", result.stdout_str()); #[cfg(any(target_vendor = "apple", target_os = "linux"))]
println!("result.stderr = {}", result.stderr_str()); fn expected_result(args: &[&str]) -> String {
result.success(); #[cfg(target_os = "linux")]
assert!(!result.stdout_str().is_empty()); let util_name = util_name!();
let username = result.stdout_str().trim(); #[cfg(target_vendor = "apple")]
let util_name = format!("g{}", util_name!());
// call groups with the user name to check that we TestScenario::new(&util_name)
// are getting something .cmd_keepenv(util_name)
new_ucmd!().arg(username).succeeds(); .env("LANGUAGE", "C")
assert!(!result.stdout_str().is_empty()); .args(args)
.succeeds()
.stdout_move_str()
} }

View file

@ -112,28 +112,23 @@ fn test_id_group() {
} }
#[test] #[test]
#[cfg(any(target_vendor = "apple", target_os = "linux"))]
fn test_id_groups() { fn test_id_groups() {
let scene = TestScenario::new(util_name!()); let scene = TestScenario::new(util_name!());
for g_flag in &["-G", "--groups"] {
let result = scene.ucmd().arg("-G").succeeds(); scene
let groups = result.stdout_str().trim().split_whitespace(); .ucmd()
for s in groups { .arg(g_flag)
assert!(s.parse::<u64>().is_ok()); .succeeds()
.stdout_is(expected_result(&[g_flag], false));
for &r_flag in &["-r", "--real"] {
let args = [g_flag, r_flag];
scene
.ucmd()
.args(&args)
.succeeds()
.stdout_is(expected_result(&args, false));
} }
let result = scene.ucmd().arg("--groups").succeeds();
let groups = result.stdout_str().trim().split_whitespace();
for s in groups {
assert!(s.parse::<u64>().is_ok());
}
#[cfg(any(target_vendor = "apple", target_os = "linux"))]
for args in &["-G", "--groups"] {
let expect = expected_result(&[args], false);
let actual = new_ucmd!().arg(&args).succeeds().stdout_move_str();
let mut v_actual: Vec<&str> = actual.split_whitespace().collect();
let mut v_expect: Vec<&str> = expect.split_whitespace().collect();
assert_eq!(v_actual.sort_unstable(), v_expect.sort_unstable());
} }
} }
@ -196,26 +191,28 @@ fn test_id_password_style() {
#[test] #[test]
#[cfg(any(target_vendor = "apple", target_os = "linux"))] #[cfg(any(target_vendor = "apple", target_os = "linux"))]
fn test_id_default_format() { fn test_id_default_format() {
let scene = TestScenario::new(util_name!());
// -ugG // -ugG
for flag in &["--name", "--real"] { for flag in &["--name", "--real"] {
new_ucmd!() scene
.ucmd()
.arg(flag) .arg(flag)
.fails() .fails()
.stderr_is(expected_result(&[flag], true)); .stderr_is(expected_result(&[flag], true));
for &opt in &["--user", "--group", "--groups"] { for &opt in &["--user", "--group", "--groups"] {
if is_ci() && *flag == "--name" { if is_ci() && *flag == "--name" {
// '--name' does not work in CI: // '--name' does not work on CICD ubuntu-16/ubuntu-18
// id: cannot find name for user ID 1001 // id: cannot find name for user ID 1001
// id: cannot find name for group ID 116 // id: cannot find name for group ID 116
println!("test skipped:"); println!("test skipped");
continue; continue;
} }
let args = [opt, flag]; let args = [opt, flag];
let expect = expected_result(&args, false); scene
let actual = new_ucmd!().args(&args).succeeds().stdout_move_str(); .ucmd()
let mut v_actual: Vec<&str> = actual.split_whitespace().collect(); .args(&args)
let mut v_expect: Vec<&str> = expect.split_whitespace().collect(); .succeeds()
assert_eq!(v_actual.sort_unstable(), v_expect.sort_unstable()); .stdout_is(expected_result(&args, false));
} }
} }
} }
@ -231,21 +228,13 @@ fn test_id_zero() {
.fails() .fails()
.stderr_is(expected_result(&args, true)); .stderr_is(expected_result(&args, true));
} }
for &opt in &["-u", "--user", "-g", "--group"] { for &opt in &["-u", "--user", "-g", "--group", "-G", "--groups"] {
let args = [opt, z_flag]; let args = [opt, z_flag];
new_ucmd!() new_ucmd!()
.args(&args) .args(&args)
.succeeds() .succeeds()
.stdout_is(expected_result(&args, false)); .stdout_is(expected_result(&args, false));
} }
// '--groups' ids are in no particular order and when paired with '--zero' there's no
// delimiter which makes the split_whitespace-collect-into-vector comparison impossible.
for opt in &["-G", "--groups"] {
let args = [opt, z_flag];
let result = new_ucmd!().args(&args).succeeds().stdout_move_str();
assert!(!result.contains(" "));
assert!(result.ends_with('\0'));
}
} }
} }
@ -271,7 +260,6 @@ fn expected_result(args: &[&str], exp_fail: bool) -> String {
.fails() .fails()
.stderr_move_str() .stderr_move_str()
}; };
// #[cfg(target_vendor = "apple")]
return if cfg!(target_os = "macos") && result.starts_with("gid") { return if cfg!(target_os = "macos") && result.starts_with("gid") {
result[1..].to_string() result[1..].to_string()
} else { } else {

View file

@ -792,3 +792,64 @@ fn test_nonexistent_file() {
fn test_blanks() { fn test_blanks() {
test_helper("blanks", &["-b", "--ignore-blanks"]); test_helper("blanks", &["-b", "--ignore-blanks"]);
} }
#[test]
fn sort_multiple() {
new_ucmd!()
.args(&["no_trailing_newline1.txt", "no_trailing_newline2.txt"])
.succeeds()
.stdout_is("a\nb\nb\n");
}
#[test]
fn sort_empty_chunk() {
new_ucmd!()
.args(&["-S", "40B"])
.pipe_in("a\na\n")
.succeeds()
.stdout_is("a\na\n");
}
#[test]
#[cfg(target_os = "linux")]
fn test_compress() {
new_ucmd!()
.args(&[
"ext_sort.txt",
"-n",
"--compress-program",
"gzip",
"-S",
"10",
])
.succeeds()
.stdout_only_fixture("ext_sort.expected");
}
#[test]
fn test_compress_fail() {
new_ucmd!()
.args(&[
"ext_sort.txt",
"-n",
"--compress-program",
"nonexistent-program",
"-S",
"10",
])
.fails()
.stderr_only("sort: couldn't execute compress program: errno 2");
}
#[test]
fn test_merge_batches() {
new_ucmd!()
.args(&[
"ext_sort.txt",
"-n",
"-S",
"150B",
])
.succeeds()
.stdout_only_fixture("ext_sort.expected");
}

View file

@ -0,0 +1,2 @@
a
b

View file

@ -0,0 +1 @@
b