From b2893db5a45a465469539a531f74ad2bdccd3645 Mon Sep 17 00:00:00 2001 From: Charlie-Zheng Date: Mon, 2 Jun 2025 03:14:49 -0600 Subject: [PATCH] sort: Implement the last changes to make sort-files0-from pass (#8011) --- src/uu/sort/src/merge.rs | 4 +- src/uu/sort/src/sort.rs | 118 +++++++++++++++++++++-------- tests/by-util/test_sort.rs | 149 +++++++++++++++++++++++++++++++++++++ 3 files changed, 240 insertions(+), 31 deletions(-) diff --git a/src/uu/sort/src/merge.rs b/src/uu/sort/src/merge.rs index fb7e2c8bf..bbd0c2c83 100644 --- a/src/uu/sort/src/merge.rs +++ b/src/uu/sort/src/merge.rs @@ -13,7 +13,7 @@ use std::{ cmp::Ordering, - ffi::OsString, + ffi::{OsStr, OsString}, fs::{self, File}, io::{BufWriter, Read, Write}, iter, @@ -38,7 +38,7 @@ use crate::{ /// and replace its occurrences in the inputs with that copy. fn replace_output_file_in_input_files( files: &mut [OsString], - output: Option<&str>, + output: Option<&OsStr>, tmp_dir: &mut TmpDirWrapper, ) -> UResult<()> { let mut copy: Option = None; diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index 2e44fba29..7de7eb1ed 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -131,7 +131,10 @@ pub enum SortError { }, #[error("open failed: {}: {}", .path.maybe_quote(), strip_errno(.error))] - OpenFailed { path: String, error: std::io::Error }, + OpenFailed { + path: PathBuf, + error: std::io::Error, + }, #[error("failed to parse key {}: {}", .key.quote(), .msg)] ParseKeyError { key: String, msg: String }, @@ -154,11 +157,23 @@ pub enum SortError { #[error("cannot create temporary file in '{}':", .path.display())] TmpFileCreationFailed { path: PathBuf }, + #[error("extra operand '{}'\nfile operands cannot be combined with --files0-from\nTry '{} --help' for more information.", .file.display(), uucore::execution_phrase())] + FileOperandsCombined { file: PathBuf }, + #[error("{error}")] Uft8Error { error: Utf8Error }, #[error("multiple output files specified")] MultipleOutputFiles, + + #[error("when reading file names from stdin, no file name of '-' allowed")] + MinusInStdIn, + + #[error("no input from '{}'", .file.display())] + EmptyInputFile { file: PathBuf }, + + #[error("{}:{}: invalid zero-length file name", .file.display(), .line_num)] + ZeroLengthFileName { file: PathBuf, line_num: usize }, } impl UError for SortError { @@ -204,24 +219,25 @@ impl SortMode { } pub struct Output { - file: Option<(String, File)>, + file: Option<(OsString, File)>, } impl Output { - fn new(name: Option<&str>) -> UResult { + fn new(name: Option<&OsStr>) -> UResult { let file = if let Some(name) = name { + let path = Path::new(name); // This is different from `File::create()` because we don't truncate the output yet. // This allows using the output file as an input file. #[allow(clippy::suspicious_open_options)] let file = OpenOptions::new() .write(true) .create(true) - .open(name) + .open(path) .map_err(|e| SortError::OpenFailed { - path: name.to_owned(), + path: path.to_owned(), error: e, })?; - Some((name.to_owned(), file)) + Some((name.to_os_string(), file)) } else { None }; @@ -239,9 +255,9 @@ impl Output { }) } - fn as_output_name(&self) -> Option<&str> { + fn as_output_name(&self) -> Option<&OsStr> { match &self.file { - Some((name, _file)) => Some(name), + Some((name, _file)) => Some(name.as_os_str()), None => None, } } @@ -1016,6 +1032,8 @@ fn get_rlimit() -> UResult { } } +const STDIN_FILE: &str = "-"; + #[uucore::main] #[allow(clippy::cognitive_complexity)] pub fn uumain(args: impl uucore::Args) -> UResult<()> { @@ -1039,7 +1057,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { // Prevent -o/--output to be specified multiple times if matches - .get_occurrences::(options::OUTPUT) + .get_occurrences::(options::OUTPUT) .is_some_and(|out| out.len() > 1) { return Err(SortError::MultipleOutputFiles.into()); @@ -1049,21 +1067,45 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { // check whether user specified a zero terminated list of files for input, otherwise read files from args let mut files: Vec = if matches.contains_id(options::FILES0_FROM) { - let files0_from: Vec = matches - .get_many::(options::FILES0_FROM) - .map(|v| v.map(ToOwned::to_owned).collect()) + let files0_from: PathBuf = matches + .get_one::(options::FILES0_FROM) + .map(|v| v.into()) .unwrap_or_default(); + // Cannot combine FILES with FILES0_FROM + if let Some(s) = matches.get_one::(options::FILES) { + return Err(SortError::FileOperandsCombined { file: s.into() }.into()); + } + let mut files = Vec::new(); - for path in &files0_from { - let reader = open(path)?; - let buf_reader = BufReader::new(reader); - for line in buf_reader.split(b'\0').flatten() { - files.push(OsString::from( - std::str::from_utf8(&line) - .expect("Could not parse string from zero terminated input."), - )); + + // sort errors with "cannot open: [...]" instead of "cannot read: [...]" here + let reader = open_with_open_failed_error(&files0_from)?; + let buf_reader = BufReader::new(reader); + for (line_num, line) in buf_reader.split(b'\0').flatten().enumerate() { + let f = std::str::from_utf8(&line) + .expect("Could not parse string from zero terminated input."); + match f { + STDIN_FILE => { + return Err(SortError::MinusInStdIn.into()); + } + "" => { + return Err(SortError::ZeroLengthFileName { + file: files0_from, + line_num: line_num + 1, + } + .into()); + } + _ => {} } + + files.push(OsString::from( + std::str::from_utf8(&line) + .expect("Could not parse string from zero terminated input."), + )); + } + if files.is_empty() { + return Err(SortError::EmptyInputFile { file: files0_from }.into()); } files } else { @@ -1212,7 +1254,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { if files.is_empty() { /* if no file, default to stdin */ - files.push("-".to_string().into()); + files.push(OsString::from(STDIN_FILE)); } else if settings.check && files.len() != 1 { return Err(UUsageError::new( 2, @@ -1282,8 +1324,8 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { let output = Output::new( matches - .get_one::(options::OUTPUT) - .map(|s| s.as_str()), + .get_one::(options::OUTPUT) + .map(|s| s.as_os_str()), )?; settings.init_precomputed(); @@ -1437,6 +1479,7 @@ pub fn uu_app() -> Command { .short('o') .long(options::OUTPUT) .help("write output to FILENAME instead of stdout") + .value_parser(ValueParser::os_string()) .value_name("FILENAME") .value_hint(clap::ValueHint::FilePath) // To detect multiple occurrences and raise an error @@ -1522,9 +1565,8 @@ pub fn uu_app() -> Command { .arg( Arg::new(options::FILES0_FROM) .long(options::FILES0_FROM) - .help("read input from the files specified by NUL-terminated NUL_FILES") - .value_name("NUL_FILES") - .action(ArgAction::Append) + .help("read input from the files specified by NUL-terminated NUL_FILE") + .value_name("NUL_FILE") .value_parser(ValueParser::os_string()) .value_hint(clap::ValueHint::FilePath), ) @@ -1865,7 +1907,7 @@ fn print_sorted<'a, T: Iterator>>( ) -> UResult<()> { let output_name = output .as_output_name() - .unwrap_or("standard output") + .unwrap_or(OsStr::new("standard output")) .to_owned(); let ctx = || format!("write failed: {}", output_name.maybe_quote()); @@ -1879,13 +1921,12 @@ fn print_sorted<'a, T: Iterator>>( fn open(path: impl AsRef) -> UResult> { let path = path.as_ref(); - if path == "-" { + if path == STDIN_FILE { let stdin = stdin(); return Ok(Box::new(stdin) as Box); } let path = Path::new(path); - match File::open(path) { Ok(f) => Ok(Box::new(f) as Box), Err(error) => Err(SortError::ReadFailed { @@ -1896,6 +1937,25 @@ fn open(path: impl AsRef) -> UResult> { } } +fn open_with_open_failed_error(path: impl AsRef) -> UResult> { + // On error, returns an OpenFailed error instead of a ReadFailed error + let path = path.as_ref(); + if path == STDIN_FILE { + let stdin = stdin(); + return Ok(Box::new(stdin) as Box); + } + + let path = Path::new(path); + match File::open(path) { + Ok(f) => Ok(Box::new(f) as Box), + Err(error) => Err(SortError::OpenFailed { + path: path.to_owned(), + error, + } + .into()), + } +} + fn format_error_message(error: &ParseSizeError, s: &str, option: &str) -> String { // NOTE: // GNU's sort echos affected flag, -S or --buffer-size, depending on user's selection diff --git a/tests/by-util/test_sort.rs b/tests/by-util/test_sort.rs index 988360ca6..0e20f1c9e 100644 --- a/tests/by-util/test_sort.rs +++ b/tests/by-util/test_sort.rs @@ -1354,3 +1354,152 @@ fn test_multiple_output_files() { .fails_with_code(2) .stderr_is("sort: multiple output files specified\n"); } + +#[test] +// Test for GNU tests/sort/sort-files0-from.pl "f-extra-arg" +fn test_files0_from_extra_arg() { + new_ucmd!() + .args(&["--files0-from", "-", "foo"]) + .fails_with_code(2) + .stderr_contains( + "sort: extra operand 'foo'\nfile operands cannot be combined with --files0-from\n", + ) + .no_stdout(); +} + +#[test] +// Test for GNU tests/sort/sort-files0-from.pl "missing" +fn test_files0_from_missing() { + new_ucmd!() + .args(&["--files0-from", "missing_file"]) + .fails_with_code(2) + .stderr_only( + #[cfg(not(windows))] + "sort: open failed: missing_file: No such file or directory\n", + #[cfg(windows)] + "sort: open failed: missing_file: The system cannot find the file specified.\n", + ); +} + +#[test] +// Test for GNU tests/sort/sort-files0-from.pl "minus-in-stdin" +fn test_files0_from_minus_in_stdin() { + new_ucmd!() + .args(&["--files0-from", "-"]) + .pipe_in("-") + .fails_with_code(2) + .stderr_only("sort: when reading file names from stdin, no file name of '-' allowed\n"); +} + +#[test] +// Test for GNU tests/sort/sort-files0-from.pl "empty" +fn test_files0_from_empty() { + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + at.touch("file"); + scene + .ucmd() + .args(&["--files0-from", "file"]) + .fails_with_code(2) + .stderr_only("sort: no input from 'file'\n"); +} + +#[cfg(target_os = "linux")] +#[test] +// Test for GNU tests/sort/sort-files0-from.pl "empty-non-regular" +fn test_files0_from_empty_non_regular() { + new_ucmd!() + .args(&["--files0-from", "/dev/null"]) + .fails_with_code(2) + .stderr_only("sort: no input from '/dev/null'\n"); +} + +#[test] +// Test for GNU tests/sort/sort-files0-from.pl "nul-1" +fn test_files0_from_nul() { + new_ucmd!() + .args(&["--files0-from", "-"]) + .pipe_in("\0") + .fails_with_code(2) + .stderr_only("sort: -:1: invalid zero-length file name\n"); +} + +#[test] +// Test for GNU tests/sort/sort-files0-from.pl "nul-2" +fn test_files0_from_nul2() { + new_ucmd!() + .args(&["--files0-from", "-"]) + .pipe_in("\0\0") + .fails_with_code(2) + .stderr_only("sort: -:1: invalid zero-length file name\n"); +} + +#[test] +// Test for GNU tests/sort/sort-files0-from.pl "1" +fn test_files0_from_1() { + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + at.touch("file"); + at.append("file", "a"); + scene + .ucmd() + .args(&["--files0-from", "-"]) + .pipe_in("file") + .succeeds() + .stdout_only("a\n"); +} + +#[test] +// Test for GNU tests/sort/sort-files0-from.pl "1a" +fn test_files0_from_1a() { + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + at.touch("file"); + at.append("file", "a"); + scene + .ucmd() + .args(&["--files0-from", "-"]) + .pipe_in("file\0") + .succeeds() + .stdout_only("a\n"); +} + +#[test] +// Test for GNU tests/sort/sort-files0-from.pl "2" +fn test_files0_from_2() { + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + at.touch("file"); + at.append("file", "a"); + scene + .ucmd() + .args(&["--files0-from", "-"]) + .pipe_in("file\0file") + .succeeds() + .stdout_only("a\na\n"); +} + +#[test] +// Test for GNU tests/sort/sort-files0-from.pl "2a" +fn test_files0_from_2a() { + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + at.touch("file"); + at.append("file", "a"); + scene + .ucmd() + .args(&["--files0-from", "-"]) + .pipe_in("file\0file\0") + .succeeds() + .stdout_only("a\na\n"); +} + +#[test] +// Test for GNU tests/sort/sort-files0-from.pl "zero-len" +fn test_files0_from_zero_length() { + new_ucmd!() + .args(&["--files0-from", "-"]) + .pipe_in("g\0\0b\0\0") + .fails_with_code(2) + .stderr_only("sort: -:2: invalid zero-length file name\n"); +}