1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-28 11:37:44 +00:00

Merge pull request #2520 from miDeb/sort/fixes

sort: fix some exit codes and error messages
This commit is contained in:
Sylvestre Ledru 2021-07-31 10:11:12 +02:00 committed by GitHub
commit 638b31a877
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 198 additions and 121 deletions

View file

@ -56,7 +56,7 @@ pub fn check(path: &str, settings: &GlobalSettings) -> i32 {
) == Ordering::Greater
{
if !settings.check_silent {
println!("sort: {}:{}: disorder: {}", path, line_idx, new_first.line);
eprintln!("sort: {}:{}: disorder: {}", path, line_idx, new_first.line);
}
return 1;
}
@ -68,7 +68,7 @@ pub fn check(path: &str, settings: &GlobalSettings) -> i32 {
if compare_by(a, b, settings, chunk.line_data(), chunk.line_data()) == Ordering::Greater
{
if !settings.check_silent {
println!("sort: {}:{}: disorder: {}", path, line_idx, b.line);
eprintln!("sort: {}:{}: disorder: {}", path, line_idx, b.line);
}
return 1;
}

View file

@ -175,7 +175,7 @@ pub fn read<T: Read>(
// because it was only temporarily transmuted to a Vec<Line<'static>> to make recycling possible.
std::mem::transmute::<Vec<Line<'static>>, Vec<Line<'_>>>(lines)
};
let read = crash_if_err!(1, std::str::from_utf8(&buffer[..read]));
let read = crash_if_err!(2, std::str::from_utf8(&buffer[..read]));
let mut line_data = LineData {
selections,
num_infos,
@ -313,7 +313,7 @@ fn read_to_buffer<T: Read>(
Err(e) if e.kind() == ErrorKind::Interrupted => {
// retry
}
Err(e) => crash!(1, "{}", e),
Err(e) => crash!(2, "{}", e),
}
}
}

View file

@ -28,6 +28,7 @@ use crate::merge::ClosedTmpFile;
use crate::merge::WriteableCompressedTmpFile;
use crate::merge::WriteablePlainTmpFile;
use crate::merge::WriteableTmpFile;
use crate::Output;
use crate::{
chunks::{self, Chunk},
compare_by, merge, sort_by, GlobalSettings,
@ -38,7 +39,11 @@ use tempfile::TempDir;
const START_BUFFER_SIZE: usize = 8_000;
/// Sort files by using auxiliary files for storing intermediate chunks (if needed), and output the result.
pub fn ext_sort(files: &mut impl Iterator<Item = Box<dyn Read + Send>>, settings: &GlobalSettings) {
pub fn ext_sort(
files: &mut impl Iterator<Item = Box<dyn Read + Send>>,
settings: &GlobalSettings,
output: Output,
) {
let (sorted_sender, sorted_receiver) = std::sync::mpsc::sync_channel(1);
let (recycled_sender, recycled_receiver) = std::sync::mpsc::sync_channel(1);
thread::spawn({
@ -51,6 +56,7 @@ pub fn ext_sort(files: &mut impl Iterator<Item = Box<dyn Read + Send>>, settings
settings,
sorted_receiver,
recycled_sender,
output,
);
} else {
reader_writer::<_, WriteablePlainTmpFile>(
@ -58,6 +64,7 @@ pub fn ext_sort(files: &mut impl Iterator<Item = Box<dyn Read + Send>>, settings
settings,
sorted_receiver,
recycled_sender,
output,
);
}
}
@ -67,6 +74,7 @@ fn reader_writer<F: Iterator<Item = Box<dyn Read + Send>>, Tmp: WriteableTmpFile
settings: &GlobalSettings,
receiver: Receiver<Chunk>,
sender: SyncSender<Chunk>,
output: Output,
) {
let separator = if settings.zero_terminated {
b'\0'
@ -96,7 +104,7 @@ fn reader_writer<F: Iterator<Item = Box<dyn Read + Send>>, Tmp: WriteableTmpFile
settings,
Some((tmp_dir, tmp_dir_size)),
);
merger.write_all(settings);
merger.write_all(settings, output);
}
ReadResult::SortedSingleChunk(chunk) => {
if settings.unique {
@ -106,9 +114,10 @@ fn reader_writer<F: Iterator<Item = Box<dyn Read + Send>>, Tmp: WriteableTmpFile
== Ordering::Equal
}),
settings,
output,
);
} else {
print_sorted(chunk.lines().iter(), settings);
print_sorted(chunk.lines().iter(), settings, output);
}
}
ReadResult::SortedTwoChunks([a, b]) => {
@ -128,9 +137,10 @@ fn reader_writer<F: Iterator<Item = Box<dyn Read + Send>>, Tmp: WriteableTmpFile
})
.map(|(line, _)| line),
settings,
output,
);
} else {
print_sorted(merged_iter.map(|(line, _)| line), settings);
print_sorted(merged_iter.map(|(line, _)| line), settings, output);
}
}
ReadResult::EmptyInput => {
@ -211,7 +221,7 @@ fn read_write_loop<I: WriteableTmpFile>(
}
let tmp_dir = crash_if_err!(
1,
2,
tempfile::Builder::new()
.prefix("uutils_sort")
.tempdir_in(tmp_dir_parent)

View file

@ -25,7 +25,7 @@ use tempfile::TempDir;
use crate::{
chunks::{self, Chunk, RecycledChunk},
compare_by, GlobalSettings,
compare_by, GlobalSettings, Output,
};
/// Merge pre-sorted `Box<dyn Read>`s.
@ -238,8 +238,8 @@ pub struct FileMerger<'a> {
impl<'a> FileMerger<'a> {
/// Write the merged contents to the output file.
pub fn write_all(&mut self, settings: &GlobalSettings) {
let mut out = settings.out_writer();
pub fn write_all(&mut self, settings: &GlobalSettings, output: Output) {
let mut out = output.into_write();
self.write_all_to(settings, &mut out);
}

View file

@ -37,7 +37,7 @@ use std::env;
use std::ffi::OsStr;
use std::fs::File;
use std::hash::{Hash, Hasher};
use std::io::{stdin, stdout, BufRead, BufReader, BufWriter, Read, Write};
use std::io::{stdin, stdout, BufRead, BufReader, Read, Write};
use std::ops::Range;
use std::path::Path;
use std::path::PathBuf;
@ -146,6 +146,29 @@ impl SortMode {
}
}
pub struct Output {
file: Option<File>,
}
impl Output {
fn new(name: Option<&str>) -> Self {
Self {
file: name.map(|name| {
File::create(name).unwrap_or_else(|e| {
crash!(2, "open failed: {}: {}", name, strip_errno(&e.to_string()))
})
}),
}
}
fn into_write(self) -> Box<dyn Write> {
match self.file {
Some(file) => Box::new(file),
None => Box::new(stdout()),
}
}
}
#[derive(Clone)]
pub struct GlobalSettings {
mode: SortMode,
@ -156,7 +179,6 @@ pub struct GlobalSettings {
ignore_non_printing: bool,
merge: bool,
reverse: bool,
output_file: Option<String>,
stable: bool,
unique: bool,
check: bool,
@ -209,19 +231,6 @@ impl GlobalSettings {
}
}
fn out_writer(&self) -> BufWriter<Box<dyn Write>> {
match self.output_file {
Some(ref filename) => match File::create(Path::new(&filename)) {
Ok(f) => BufWriter::new(Box::new(f) as Box<dyn Write>),
Err(e) => {
show_error!("{0}: {1}", filename, e.to_string());
panic!("Could not open output file");
}
},
None => BufWriter::new(Box::new(stdout()) as Box<dyn Write>),
}
}
/// Precompute some data needed for sorting.
/// This function **must** be called before starting to sort, and `GlobalSettings` may not be altered
/// afterwards.
@ -253,7 +262,6 @@ impl Default for GlobalSettings {
ignore_non_printing: false,
merge: false,
reverse: false,
output_file: None,
stable: false,
unique: false,
check: false,
@ -942,7 +950,9 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
let usage = get_usage();
let mut settings: GlobalSettings = Default::default();
let matches = uu_app().usage(&usage[..]).get_matches_from(args);
let matches = uu_app().usage(&usage[..]).get_matches_from_safe(args);
let matches = crash_if_err!(2, matches);
settings.debug = matches.is_present(options::DEBUG);
@ -1051,7 +1061,6 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
settings.ignore_leading_blanks = matches.is_present(options::IGNORE_LEADING_BLANKS);
settings.output_file = matches.value_of(options::OUTPUT).map(String::from);
settings.reverse = matches.is_present(options::REVERSE);
settings.stable = matches.is_present(options::STABLE);
settings.unique = matches.is_present(options::UNIQUE);
@ -1060,7 +1069,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
/* if no file, default to stdin */
files.push("-".to_owned());
} else if settings.check && files.len() != 1 {
crash!(1, "extra operand `{}' not allowed with -c", files[1])
crash!(2, "extra operand `{}' not allowed with -c", files[1])
}
if let Some(arg) = matches.args.get(options::SEPARATOR) {
@ -1070,7 +1079,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
separator = "\0";
}
if separator.len() != 1 {
crash!(1, "separator must be exactly one character long");
crash!(2, "separator must be exactly one character long");
}
settings.separator = Some(separator.chars().next().unwrap())
}
@ -1100,9 +1109,19 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
);
}
// Verify that we can open all input files.
// It is the correct behavior to close all files afterwards,
// and to reopen them at a later point. This is different from how the output file is handled,
// probably to prevent running out of file descriptors.
for file in &files {
open(file);
}
let output = Output::new(matches.value_of(options::OUTPUT));
settings.init_precomputed();
exec(&files, &settings)
exec(&files, &settings, output)
}
pub fn uu_app() -> App<'static, 'static> {
@ -1113,73 +1132,57 @@ pub fn uu_app() -> App<'static, 'static> {
Arg::with_name(options::modes::SORT)
.long(options::modes::SORT)
.takes_value(true)
.possible_values(
&[
"general-numeric",
"human-numeric",
"month",
"numeric",
"version",
"random",
]
)
.conflicts_with_all(&options::modes::ALL_SORT_MODES)
)
.arg(
make_sort_mode_arg(
options::modes::HUMAN_NUMERIC,
"h",
"compare according to human readable sizes, eg 1M > 100k"
),
)
.arg(
make_sort_mode_arg(
options::modes::MONTH,
"M",
"compare according to month name abbreviation"
),
)
.arg(
make_sort_mode_arg(
options::modes::NUMERIC,
"n",
"compare according to string numerical value"
),
)
.arg(
make_sort_mode_arg(
options::modes::GENERAL_NUMERIC,
"g",
"compare according to string general numerical value"
),
)
.arg(
make_sort_mode_arg(
options::modes::VERSION,
"V",
"Sort by SemVer version number, eg 1.12.2 > 1.1.2",
),
)
.arg(
make_sort_mode_arg(
options::modes::RANDOM,
"R",
"shuffle in random order",
),
.possible_values(&[
"general-numeric",
"human-numeric",
"month",
"numeric",
"version",
"random",
])
.conflicts_with_all(&options::modes::ALL_SORT_MODES),
)
.arg(make_sort_mode_arg(
options::modes::HUMAN_NUMERIC,
"h",
"compare according to human readable sizes, eg 1M > 100k",
))
.arg(make_sort_mode_arg(
options::modes::MONTH,
"M",
"compare according to month name abbreviation",
))
.arg(make_sort_mode_arg(
options::modes::NUMERIC,
"n",
"compare according to string numerical value",
))
.arg(make_sort_mode_arg(
options::modes::GENERAL_NUMERIC,
"g",
"compare according to string general numerical value",
))
.arg(make_sort_mode_arg(
options::modes::VERSION,
"V",
"Sort by SemVer version number, eg 1.12.2 > 1.1.2",
))
.arg(make_sort_mode_arg(
options::modes::RANDOM,
"R",
"shuffle in random order",
))
.arg(
Arg::with_name(options::DICTIONARY_ORDER)
.short("d")
.long(options::DICTIONARY_ORDER)
.help("consider only blanks and alphanumeric characters")
.conflicts_with_all(
&[
options::modes::NUMERIC,
options::modes::GENERAL_NUMERIC,
options::modes::HUMAN_NUMERIC,
options::modes::MONTH,
]
),
.conflicts_with_all(&[
options::modes::NUMERIC,
options::modes::GENERAL_NUMERIC,
options::modes::HUMAN_NUMERIC,
options::modes::MONTH,
]),
)
.arg(
Arg::with_name(options::MERGE)
@ -1207,7 +1210,10 @@ pub fn uu_app() -> App<'static, 'static> {
.short("C")
.long(options::check::CHECK_SILENT)
.conflicts_with(options::OUTPUT)
.help("exit successfully if the given file is already sorted, and exit with status 1 otherwise."),
.help(
"exit successfully if the given file is already sorted,\
and exit with status 1 otherwise.",
),
)
.arg(
Arg::with_name(options::IGNORE_CASE)
@ -1220,14 +1226,12 @@ pub fn uu_app() -> App<'static, 'static> {
.short("i")
.long(options::IGNORE_NONPRINTING)
.help("ignore nonprinting characters")
.conflicts_with_all(
&[
options::modes::NUMERIC,
options::modes::GENERAL_NUMERIC,
options::modes::HUMAN_NUMERIC,
options::modes::MONTH
]
),
.conflicts_with_all(&[
options::modes::NUMERIC,
options::modes::GENERAL_NUMERIC,
options::modes::HUMAN_NUMERIC,
options::modes::MONTH,
]),
)
.arg(
Arg::with_name(options::IGNORE_LEADING_BLANKS)
@ -1276,7 +1280,8 @@ pub fn uu_app() -> App<'static, 'static> {
.short("t")
.long(options::SEPARATOR)
.help("custom separator for -k")
.takes_value(true))
.takes_value(true),
)
.arg(
Arg::with_name(options::ZERO_TERMINATED)
.short("z")
@ -1311,13 +1316,13 @@ pub fn uu_app() -> App<'static, 'static> {
.long(options::COMPRESS_PROG)
.help("compress temporary files with PROG, decompress with PROG -d")
.long_help("PROG has to take input from stdin and output to stdout")
.value_name("PROG")
.value_name("PROG"),
)
.arg(
Arg::with_name(options::BATCH_SIZE)
.long(options::BATCH_SIZE)
.help("Merge at most N_MERGE inputs at once.")
.value_name("N_MERGE")
.value_name("N_MERGE"),
)
.arg(
Arg::with_name(options::FILES0_FROM)
@ -1332,22 +1337,26 @@ pub fn uu_app() -> App<'static, 'static> {
.long(options::DEBUG)
.help("underline the parts of the line that are actually used for sorting"),
)
.arg(Arg::with_name(options::FILES).multiple(true).takes_value(true))
.arg(
Arg::with_name(options::FILES)
.multiple(true)
.takes_value(true),
)
}
fn exec(files: &[String], settings: &GlobalSettings) -> i32 {
fn exec(files: &[String], settings: &GlobalSettings, output: Output) -> i32 {
if settings.merge {
let mut file_merger = merge::merge(files.iter().map(open), settings);
file_merger.write_all(settings);
file_merger.write_all(settings, output);
} else if settings.check {
if files.len() > 1 {
crash!(1, "only one file allowed with -c");
crash!(2, "only one file allowed with -c");
}
return check::check(files.first().unwrap(), settings);
} else {
let mut lines = files.iter().map(open);
ext_sort(&mut lines, settings);
ext_sort(&mut lines, settings, output);
}
0
}
@ -1619,14 +1628,22 @@ fn month_compare(a: &str, b: &str) -> Ordering {
}
}
fn print_sorted<'a, T: Iterator<Item = &'a Line<'a>>>(iter: T, settings: &GlobalSettings) {
let mut writer = settings.out_writer();
fn print_sorted<'a, T: Iterator<Item = &'a Line<'a>>>(
iter: T,
settings: &GlobalSettings,
output: Output,
) {
let mut writer = output.into_write();
for line in iter {
line.print(&mut writer, settings);
}
}
// from cat.rs
/// Strips the trailing " (os error XX)" from io error strings.
fn strip_errno(err: &str) -> &str {
&err[..err.find(" (os error ").unwrap_or(err.len())]
}
fn open(path: impl AsRef<OsStr>) -> Box<dyn Read + Send> {
let path = path.as_ref();
if path == "-" {
@ -1634,10 +1651,17 @@ fn open(path: impl AsRef<OsStr>) -> Box<dyn Read + Send> {
return Box::new(stdin) as Box<dyn Read + Send>;
}
match File::open(Path::new(path)) {
let path = Path::new(path);
match File::open(path) {
Ok(f) => Box::new(f) as Box<dyn Read + Send>,
Err(e) => {
crash!(2, "cannot read: {0:?}: {1}", path, e);
crash!(
2,
"cannot read: {0}: {1}",
path.to_string_lossy(),
strip_errno(&e.to_string())
);
}
}
}

View file

@ -181,7 +181,7 @@ fn test_check_zero_terminated_failure() {
.arg("-c")
.arg("zero-terminated.txt")
.fails()
.stdout_is("sort: zero-terminated.txt:2: disorder: ../../fixtures/du\n");
.stderr_only("sort: zero-terminated.txt:2: disorder: ../../fixtures/du\n");
}
#[test]
@ -775,13 +775,13 @@ fn test_check() {
.arg(diagnose_arg)
.arg("check_fail.txt")
.fails()
.stdout_is("sort: check_fail.txt:6: disorder: 5\n");
.stderr_only("sort: check_fail.txt:6: disorder: 5\n");
new_ucmd!()
.arg(diagnose_arg)
.arg("multiple_files.expected")
.succeeds()
.stdout_is("");
.stderr_is("");
}
}
@ -839,9 +839,9 @@ fn test_nonexistent_file() {
.status_code(2)
.stderr_only(
#[cfg(not(windows))]
"sort: cannot read: \"nonexistent.txt\": No such file or directory (os error 2)",
"sort: cannot read: nonexistent.txt: No such file or directory",
#[cfg(windows)]
"sort: cannot read: \"nonexistent.txt\": The system cannot find the file specified. (os error 2)",
"sort: cannot read: nonexistent.txt: The system cannot find the file specified.",
);
}
@ -960,6 +960,49 @@ fn test_key_takes_one_arg() {
.stdout_is_fixture("keys_open_ended.expected");
}
#[test]
fn test_verifies_out_file() {
let inputs = ["" /* no input */, "some input"];
for &input in &inputs {
new_ucmd!()
.args(&["-o", "nonexistent_dir/nonexistent_file"])
.pipe_in(input)
.fails()
.status_code(2)
.stderr_only(
#[cfg(not(windows))]
"sort: open failed: nonexistent_dir/nonexistent_file: No such file or directory",
#[cfg(windows)]
"sort: open failed: nonexistent_dir/nonexistent_file: The system cannot find the path specified.",
);
}
}
#[test]
fn test_verifies_files_after_keys() {
new_ucmd!()
.args(&[
"-o",
"nonexistent_dir/nonexistent_file",
"-k",
"0",
"nonexistent_dir/input_file",
])
.fails()
.status_code(2)
.stderr_contains("failed to parse key");
}
#[test]
#[cfg(unix)]
fn test_verifies_input_files() {
new_ucmd!()
.args(&["/dev/random", "nonexistent_file"])
.fails()
.status_code(2)
.stderr_is("sort: cannot read: nonexistent_file: No such file or directory");
}
#[test]
fn test_separator_null() {
new_ucmd!()