1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-28 11:37:44 +00:00

sort: Implement the last changes to make sort-files0-from pass (#8011)

This commit is contained in:
Charlie-Zheng 2025-06-02 03:14:49 -06:00 committed by GitHub
parent dfc4072181
commit b2893db5a4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 240 additions and 31 deletions

View file

@ -13,7 +13,7 @@
use std::{ use std::{
cmp::Ordering, cmp::Ordering,
ffi::OsString, ffi::{OsStr, OsString},
fs::{self, File}, fs::{self, File},
io::{BufWriter, Read, Write}, io::{BufWriter, Read, Write},
iter, iter,
@ -38,7 +38,7 @@ use crate::{
/// and replace its occurrences in the inputs with that copy. /// and replace its occurrences in the inputs with that copy.
fn replace_output_file_in_input_files( fn replace_output_file_in_input_files(
files: &mut [OsString], files: &mut [OsString],
output: Option<&str>, output: Option<&OsStr>,
tmp_dir: &mut TmpDirWrapper, tmp_dir: &mut TmpDirWrapper,
) -> UResult<()> { ) -> UResult<()> {
let mut copy: Option<PathBuf> = None; let mut copy: Option<PathBuf> = None;

View file

@ -131,7 +131,10 @@ pub enum SortError {
}, },
#[error("open failed: {}: {}", .path.maybe_quote(), strip_errno(.error))] #[error("open failed: {}: {}", .path.maybe_quote(), strip_errno(.error))]
OpenFailed { path: String, error: std::io::Error }, OpenFailed {
path: PathBuf,
error: std::io::Error,
},
#[error("failed to parse key {}: {}", .key.quote(), .msg)] #[error("failed to parse key {}: {}", .key.quote(), .msg)]
ParseKeyError { key: String, msg: String }, ParseKeyError { key: String, msg: String },
@ -154,11 +157,23 @@ pub enum SortError {
#[error("cannot create temporary file in '{}':", .path.display())] #[error("cannot create temporary file in '{}':", .path.display())]
TmpFileCreationFailed { path: PathBuf }, TmpFileCreationFailed { path: PathBuf },
#[error("extra operand '{}'\nfile operands cannot be combined with --files0-from\nTry '{} --help' for more information.", .file.display(), uucore::execution_phrase())]
FileOperandsCombined { file: PathBuf },
#[error("{error}")] #[error("{error}")]
Uft8Error { error: Utf8Error }, Uft8Error { error: Utf8Error },
#[error("multiple output files specified")] #[error("multiple output files specified")]
MultipleOutputFiles, MultipleOutputFiles,
#[error("when reading file names from stdin, no file name of '-' allowed")]
MinusInStdIn,
#[error("no input from '{}'", .file.display())]
EmptyInputFile { file: PathBuf },
#[error("{}:{}: invalid zero-length file name", .file.display(), .line_num)]
ZeroLengthFileName { file: PathBuf, line_num: usize },
} }
impl UError for SortError { impl UError for SortError {
@ -204,24 +219,25 @@ impl SortMode {
} }
pub struct Output { pub struct Output {
file: Option<(String, File)>, file: Option<(OsString, File)>,
} }
impl Output { impl Output {
fn new(name: Option<&str>) -> UResult<Self> { fn new(name: Option<&OsStr>) -> UResult<Self> {
let file = if let Some(name) = name { let file = if let Some(name) = name {
let path = Path::new(name);
// This is different from `File::create()` because we don't truncate the output yet. // This is different from `File::create()` because we don't truncate the output yet.
// This allows using the output file as an input file. // This allows using the output file as an input file.
#[allow(clippy::suspicious_open_options)] #[allow(clippy::suspicious_open_options)]
let file = OpenOptions::new() let file = OpenOptions::new()
.write(true) .write(true)
.create(true) .create(true)
.open(name) .open(path)
.map_err(|e| SortError::OpenFailed { .map_err(|e| SortError::OpenFailed {
path: name.to_owned(), path: path.to_owned(),
error: e, error: e,
})?; })?;
Some((name.to_owned(), file)) Some((name.to_os_string(), file))
} else { } else {
None None
}; };
@ -239,9 +255,9 @@ impl Output {
}) })
} }
fn as_output_name(&self) -> Option<&str> { fn as_output_name(&self) -> Option<&OsStr> {
match &self.file { match &self.file {
Some((name, _file)) => Some(name), Some((name, _file)) => Some(name.as_os_str()),
None => None, None => None,
} }
} }
@ -1016,6 +1032,8 @@ fn get_rlimit() -> UResult<usize> {
} }
} }
const STDIN_FILE: &str = "-";
#[uucore::main] #[uucore::main]
#[allow(clippy::cognitive_complexity)] #[allow(clippy::cognitive_complexity)]
pub fn uumain(args: impl uucore::Args) -> UResult<()> { pub fn uumain(args: impl uucore::Args) -> UResult<()> {
@ -1039,7 +1057,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
// Prevent -o/--output to be specified multiple times // Prevent -o/--output to be specified multiple times
if matches if matches
.get_occurrences::<String>(options::OUTPUT) .get_occurrences::<OsString>(options::OUTPUT)
.is_some_and(|out| out.len() > 1) .is_some_and(|out| out.len() > 1)
{ {
return Err(SortError::MultipleOutputFiles.into()); return Err(SortError::MultipleOutputFiles.into());
@ -1049,21 +1067,45 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
// check whether user specified a zero terminated list of files for input, otherwise read files from args // check whether user specified a zero terminated list of files for input, otherwise read files from args
let mut files: Vec<OsString> = if matches.contains_id(options::FILES0_FROM) { let mut files: Vec<OsString> = if matches.contains_id(options::FILES0_FROM) {
let files0_from: Vec<OsString> = matches let files0_from: PathBuf = matches
.get_many::<OsString>(options::FILES0_FROM) .get_one::<OsString>(options::FILES0_FROM)
.map(|v| v.map(ToOwned::to_owned).collect()) .map(|v| v.into())
.unwrap_or_default(); .unwrap_or_default();
// Cannot combine FILES with FILES0_FROM
if let Some(s) = matches.get_one::<OsString>(options::FILES) {
return Err(SortError::FileOperandsCombined { file: s.into() }.into());
}
let mut files = Vec::new(); let mut files = Vec::new();
for path in &files0_from {
let reader = open(path)?; // sort errors with "cannot open: [...]" instead of "cannot read: [...]" here
let buf_reader = BufReader::new(reader); let reader = open_with_open_failed_error(&files0_from)?;
for line in buf_reader.split(b'\0').flatten() { let buf_reader = BufReader::new(reader);
files.push(OsString::from( for (line_num, line) in buf_reader.split(b'\0').flatten().enumerate() {
std::str::from_utf8(&line) let f = std::str::from_utf8(&line)
.expect("Could not parse string from zero terminated input."), .expect("Could not parse string from zero terminated input.");
)); match f {
STDIN_FILE => {
return Err(SortError::MinusInStdIn.into());
}
"" => {
return Err(SortError::ZeroLengthFileName {
file: files0_from,
line_num: line_num + 1,
}
.into());
}
_ => {}
} }
files.push(OsString::from(
std::str::from_utf8(&line)
.expect("Could not parse string from zero terminated input."),
));
}
if files.is_empty() {
return Err(SortError::EmptyInputFile { file: files0_from }.into());
} }
files files
} else { } else {
@ -1212,7 +1254,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
if files.is_empty() { if files.is_empty() {
/* if no file, default to stdin */ /* if no file, default to stdin */
files.push("-".to_string().into()); files.push(OsString::from(STDIN_FILE));
} else if settings.check && files.len() != 1 { } else if settings.check && files.len() != 1 {
return Err(UUsageError::new( return Err(UUsageError::new(
2, 2,
@ -1282,8 +1324,8 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
let output = Output::new( let output = Output::new(
matches matches
.get_one::<String>(options::OUTPUT) .get_one::<OsString>(options::OUTPUT)
.map(|s| s.as_str()), .map(|s| s.as_os_str()),
)?; )?;
settings.init_precomputed(); settings.init_precomputed();
@ -1437,6 +1479,7 @@ pub fn uu_app() -> Command {
.short('o') .short('o')
.long(options::OUTPUT) .long(options::OUTPUT)
.help("write output to FILENAME instead of stdout") .help("write output to FILENAME instead of stdout")
.value_parser(ValueParser::os_string())
.value_name("FILENAME") .value_name("FILENAME")
.value_hint(clap::ValueHint::FilePath) .value_hint(clap::ValueHint::FilePath)
// To detect multiple occurrences and raise an error // To detect multiple occurrences and raise an error
@ -1522,9 +1565,8 @@ pub fn uu_app() -> Command {
.arg( .arg(
Arg::new(options::FILES0_FROM) Arg::new(options::FILES0_FROM)
.long(options::FILES0_FROM) .long(options::FILES0_FROM)
.help("read input from the files specified by NUL-terminated NUL_FILES") .help("read input from the files specified by NUL-terminated NUL_FILE")
.value_name("NUL_FILES") .value_name("NUL_FILE")
.action(ArgAction::Append)
.value_parser(ValueParser::os_string()) .value_parser(ValueParser::os_string())
.value_hint(clap::ValueHint::FilePath), .value_hint(clap::ValueHint::FilePath),
) )
@ -1865,7 +1907,7 @@ fn print_sorted<'a, T: Iterator<Item = &'a Line<'a>>>(
) -> UResult<()> { ) -> UResult<()> {
let output_name = output let output_name = output
.as_output_name() .as_output_name()
.unwrap_or("standard output") .unwrap_or(OsStr::new("standard output"))
.to_owned(); .to_owned();
let ctx = || format!("write failed: {}", output_name.maybe_quote()); let ctx = || format!("write failed: {}", output_name.maybe_quote());
@ -1879,13 +1921,12 @@ fn print_sorted<'a, T: Iterator<Item = &'a Line<'a>>>(
fn open(path: impl AsRef<OsStr>) -> UResult<Box<dyn Read + Send>> { fn open(path: impl AsRef<OsStr>) -> UResult<Box<dyn Read + Send>> {
let path = path.as_ref(); let path = path.as_ref();
if path == "-" { if path == STDIN_FILE {
let stdin = stdin(); let stdin = stdin();
return Ok(Box::new(stdin) as Box<dyn Read + Send>); return Ok(Box::new(stdin) as Box<dyn Read + Send>);
} }
let path = Path::new(path); let path = Path::new(path);
match File::open(path) { match File::open(path) {
Ok(f) => Ok(Box::new(f) as Box<dyn Read + Send>), Ok(f) => Ok(Box::new(f) as Box<dyn Read + Send>),
Err(error) => Err(SortError::ReadFailed { Err(error) => Err(SortError::ReadFailed {
@ -1896,6 +1937,25 @@ fn open(path: impl AsRef<OsStr>) -> UResult<Box<dyn Read + Send>> {
} }
} }
fn open_with_open_failed_error(path: impl AsRef<OsStr>) -> UResult<Box<dyn Read + Send>> {
// On error, returns an OpenFailed error instead of a ReadFailed error
let path = path.as_ref();
if path == STDIN_FILE {
let stdin = stdin();
return Ok(Box::new(stdin) as Box<dyn Read + Send>);
}
let path = Path::new(path);
match File::open(path) {
Ok(f) => Ok(Box::new(f) as Box<dyn Read + Send>),
Err(error) => Err(SortError::OpenFailed {
path: path.to_owned(),
error,
}
.into()),
}
}
fn format_error_message(error: &ParseSizeError, s: &str, option: &str) -> String { fn format_error_message(error: &ParseSizeError, s: &str, option: &str) -> String {
// NOTE: // NOTE:
// GNU's sort echos affected flag, -S or --buffer-size, depending on user's selection // GNU's sort echos affected flag, -S or --buffer-size, depending on user's selection

View file

@ -1354,3 +1354,152 @@ fn test_multiple_output_files() {
.fails_with_code(2) .fails_with_code(2)
.stderr_is("sort: multiple output files specified\n"); .stderr_is("sort: multiple output files specified\n");
} }
#[test]
// Test for GNU tests/sort/sort-files0-from.pl "f-extra-arg"
fn test_files0_from_extra_arg() {
new_ucmd!()
.args(&["--files0-from", "-", "foo"])
.fails_with_code(2)
.stderr_contains(
"sort: extra operand 'foo'\nfile operands cannot be combined with --files0-from\n",
)
.no_stdout();
}
#[test]
// Test for GNU tests/sort/sort-files0-from.pl "missing"
fn test_files0_from_missing() {
new_ucmd!()
.args(&["--files0-from", "missing_file"])
.fails_with_code(2)
.stderr_only(
#[cfg(not(windows))]
"sort: open failed: missing_file: No such file or directory\n",
#[cfg(windows)]
"sort: open failed: missing_file: The system cannot find the file specified.\n",
);
}
#[test]
// Test for GNU tests/sort/sort-files0-from.pl "minus-in-stdin"
fn test_files0_from_minus_in_stdin() {
new_ucmd!()
.args(&["--files0-from", "-"])
.pipe_in("-")
.fails_with_code(2)
.stderr_only("sort: when reading file names from stdin, no file name of '-' allowed\n");
}
#[test]
// Test for GNU tests/sort/sort-files0-from.pl "empty"
fn test_files0_from_empty() {
let scene = TestScenario::new(util_name!());
let at = &scene.fixtures;
at.touch("file");
scene
.ucmd()
.args(&["--files0-from", "file"])
.fails_with_code(2)
.stderr_only("sort: no input from 'file'\n");
}
#[cfg(target_os = "linux")]
#[test]
// Test for GNU tests/sort/sort-files0-from.pl "empty-non-regular"
fn test_files0_from_empty_non_regular() {
new_ucmd!()
.args(&["--files0-from", "/dev/null"])
.fails_with_code(2)
.stderr_only("sort: no input from '/dev/null'\n");
}
#[test]
// Test for GNU tests/sort/sort-files0-from.pl "nul-1"
fn test_files0_from_nul() {
new_ucmd!()
.args(&["--files0-from", "-"])
.pipe_in("\0")
.fails_with_code(2)
.stderr_only("sort: -:1: invalid zero-length file name\n");
}
#[test]
// Test for GNU tests/sort/sort-files0-from.pl "nul-2"
fn test_files0_from_nul2() {
new_ucmd!()
.args(&["--files0-from", "-"])
.pipe_in("\0\0")
.fails_with_code(2)
.stderr_only("sort: -:1: invalid zero-length file name\n");
}
#[test]
// Test for GNU tests/sort/sort-files0-from.pl "1"
fn test_files0_from_1() {
let scene = TestScenario::new(util_name!());
let at = &scene.fixtures;
at.touch("file");
at.append("file", "a");
scene
.ucmd()
.args(&["--files0-from", "-"])
.pipe_in("file")
.succeeds()
.stdout_only("a\n");
}
#[test]
// Test for GNU tests/sort/sort-files0-from.pl "1a"
fn test_files0_from_1a() {
let scene = TestScenario::new(util_name!());
let at = &scene.fixtures;
at.touch("file");
at.append("file", "a");
scene
.ucmd()
.args(&["--files0-from", "-"])
.pipe_in("file\0")
.succeeds()
.stdout_only("a\n");
}
#[test]
// Test for GNU tests/sort/sort-files0-from.pl "2"
fn test_files0_from_2() {
let scene = TestScenario::new(util_name!());
let at = &scene.fixtures;
at.touch("file");
at.append("file", "a");
scene
.ucmd()
.args(&["--files0-from", "-"])
.pipe_in("file\0file")
.succeeds()
.stdout_only("a\na\n");
}
#[test]
// Test for GNU tests/sort/sort-files0-from.pl "2a"
fn test_files0_from_2a() {
let scene = TestScenario::new(util_name!());
let at = &scene.fixtures;
at.touch("file");
at.append("file", "a");
scene
.ucmd()
.args(&["--files0-from", "-"])
.pipe_in("file\0file\0")
.succeeds()
.stdout_only("a\na\n");
}
#[test]
// Test for GNU tests/sort/sort-files0-from.pl "zero-len"
fn test_files0_from_zero_length() {
new_ucmd!()
.args(&["--files0-from", "-"])
.pipe_in("g\0\0b\0\0")
.fails_with_code(2)
.stderr_only("sort: -:2: invalid zero-length file name\n");
}