1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-28 03:27:44 +00:00

sort: Implement the last changes to make sort-files0-from pass (#8011)

This commit is contained in:
Charlie-Zheng 2025-06-02 03:14:49 -06:00 committed by GitHub
parent dfc4072181
commit b2893db5a4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 240 additions and 31 deletions

View file

@ -13,7 +13,7 @@
use std::{
cmp::Ordering,
ffi::OsString,
ffi::{OsStr, OsString},
fs::{self, File},
io::{BufWriter, Read, Write},
iter,
@ -38,7 +38,7 @@ use crate::{
/// and replace its occurrences in the inputs with that copy.
fn replace_output_file_in_input_files(
files: &mut [OsString],
output: Option<&str>,
output: Option<&OsStr>,
tmp_dir: &mut TmpDirWrapper,
) -> UResult<()> {
let mut copy: Option<PathBuf> = None;

View file

@ -131,7 +131,10 @@ pub enum SortError {
},
#[error("open failed: {}: {}", .path.maybe_quote(), strip_errno(.error))]
OpenFailed { path: String, error: std::io::Error },
OpenFailed {
path: PathBuf,
error: std::io::Error,
},
#[error("failed to parse key {}: {}", .key.quote(), .msg)]
ParseKeyError { key: String, msg: String },
@ -154,11 +157,23 @@ pub enum SortError {
#[error("cannot create temporary file in '{}':", .path.display())]
TmpFileCreationFailed { path: PathBuf },
#[error("extra operand '{}'\nfile operands cannot be combined with --files0-from\nTry '{} --help' for more information.", .file.display(), uucore::execution_phrase())]
FileOperandsCombined { file: PathBuf },
#[error("{error}")]
Uft8Error { error: Utf8Error },
#[error("multiple output files specified")]
MultipleOutputFiles,
#[error("when reading file names from stdin, no file name of '-' allowed")]
MinusInStdIn,
#[error("no input from '{}'", .file.display())]
EmptyInputFile { file: PathBuf },
#[error("{}:{}: invalid zero-length file name", .file.display(), .line_num)]
ZeroLengthFileName { file: PathBuf, line_num: usize },
}
impl UError for SortError {
@ -204,24 +219,25 @@ impl SortMode {
}
pub struct Output {
file: Option<(String, File)>,
file: Option<(OsString, File)>,
}
impl Output {
fn new(name: Option<&str>) -> UResult<Self> {
fn new(name: Option<&OsStr>) -> UResult<Self> {
let file = if let Some(name) = name {
let path = Path::new(name);
// This is different from `File::create()` because we don't truncate the output yet.
// This allows using the output file as an input file.
#[allow(clippy::suspicious_open_options)]
let file = OpenOptions::new()
.write(true)
.create(true)
.open(name)
.open(path)
.map_err(|e| SortError::OpenFailed {
path: name.to_owned(),
path: path.to_owned(),
error: e,
})?;
Some((name.to_owned(), file))
Some((name.to_os_string(), file))
} else {
None
};
@ -239,9 +255,9 @@ impl Output {
})
}
fn as_output_name(&self) -> Option<&str> {
fn as_output_name(&self) -> Option<&OsStr> {
match &self.file {
Some((name, _file)) => Some(name),
Some((name, _file)) => Some(name.as_os_str()),
None => None,
}
}
@ -1016,6 +1032,8 @@ fn get_rlimit() -> UResult<usize> {
}
}
const STDIN_FILE: &str = "-";
#[uucore::main]
#[allow(clippy::cognitive_complexity)]
pub fn uumain(args: impl uucore::Args) -> UResult<()> {
@ -1039,7 +1057,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
// Prevent -o/--output to be specified multiple times
if matches
.get_occurrences::<String>(options::OUTPUT)
.get_occurrences::<OsString>(options::OUTPUT)
.is_some_and(|out| out.len() > 1)
{
return Err(SortError::MultipleOutputFiles.into());
@ -1049,21 +1067,45 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
// check whether user specified a zero terminated list of files for input, otherwise read files from args
let mut files: Vec<OsString> = if matches.contains_id(options::FILES0_FROM) {
let files0_from: Vec<OsString> = matches
.get_many::<OsString>(options::FILES0_FROM)
.map(|v| v.map(ToOwned::to_owned).collect())
let files0_from: PathBuf = matches
.get_one::<OsString>(options::FILES0_FROM)
.map(|v| v.into())
.unwrap_or_default();
// Cannot combine FILES with FILES0_FROM
if let Some(s) = matches.get_one::<OsString>(options::FILES) {
return Err(SortError::FileOperandsCombined { file: s.into() }.into());
}
let mut files = Vec::new();
for path in &files0_from {
let reader = open(path)?;
let buf_reader = BufReader::new(reader);
for line in buf_reader.split(b'\0').flatten() {
files.push(OsString::from(
std::str::from_utf8(&line)
.expect("Could not parse string from zero terminated input."),
));
// sort errors with "cannot open: [...]" instead of "cannot read: [...]" here
let reader = open_with_open_failed_error(&files0_from)?;
let buf_reader = BufReader::new(reader);
for (line_num, line) in buf_reader.split(b'\0').flatten().enumerate() {
let f = std::str::from_utf8(&line)
.expect("Could not parse string from zero terminated input.");
match f {
STDIN_FILE => {
return Err(SortError::MinusInStdIn.into());
}
"" => {
return Err(SortError::ZeroLengthFileName {
file: files0_from,
line_num: line_num + 1,
}
.into());
}
_ => {}
}
files.push(OsString::from(
std::str::from_utf8(&line)
.expect("Could not parse string from zero terminated input."),
));
}
if files.is_empty() {
return Err(SortError::EmptyInputFile { file: files0_from }.into());
}
files
} else {
@ -1212,7 +1254,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
if files.is_empty() {
/* if no file, default to stdin */
files.push("-".to_string().into());
files.push(OsString::from(STDIN_FILE));
} else if settings.check && files.len() != 1 {
return Err(UUsageError::new(
2,
@ -1282,8 +1324,8 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
let output = Output::new(
matches
.get_one::<String>(options::OUTPUT)
.map(|s| s.as_str()),
.get_one::<OsString>(options::OUTPUT)
.map(|s| s.as_os_str()),
)?;
settings.init_precomputed();
@ -1437,6 +1479,7 @@ pub fn uu_app() -> Command {
.short('o')
.long(options::OUTPUT)
.help("write output to FILENAME instead of stdout")
.value_parser(ValueParser::os_string())
.value_name("FILENAME")
.value_hint(clap::ValueHint::FilePath)
// To detect multiple occurrences and raise an error
@ -1522,9 +1565,8 @@ pub fn uu_app() -> Command {
.arg(
Arg::new(options::FILES0_FROM)
.long(options::FILES0_FROM)
.help("read input from the files specified by NUL-terminated NUL_FILES")
.value_name("NUL_FILES")
.action(ArgAction::Append)
.help("read input from the files specified by NUL-terminated NUL_FILE")
.value_name("NUL_FILE")
.value_parser(ValueParser::os_string())
.value_hint(clap::ValueHint::FilePath),
)
@ -1865,7 +1907,7 @@ fn print_sorted<'a, T: Iterator<Item = &'a Line<'a>>>(
) -> UResult<()> {
let output_name = output
.as_output_name()
.unwrap_or("standard output")
.unwrap_or(OsStr::new("standard output"))
.to_owned();
let ctx = || format!("write failed: {}", output_name.maybe_quote());
@ -1879,13 +1921,12 @@ fn print_sorted<'a, T: Iterator<Item = &'a Line<'a>>>(
fn open(path: impl AsRef<OsStr>) -> UResult<Box<dyn Read + Send>> {
let path = path.as_ref();
if path == "-" {
if path == STDIN_FILE {
let stdin = stdin();
return Ok(Box::new(stdin) as Box<dyn Read + Send>);
}
let path = Path::new(path);
match File::open(path) {
Ok(f) => Ok(Box::new(f) as Box<dyn Read + Send>),
Err(error) => Err(SortError::ReadFailed {
@ -1896,6 +1937,25 @@ fn open(path: impl AsRef<OsStr>) -> UResult<Box<dyn Read + Send>> {
}
}
fn open_with_open_failed_error(path: impl AsRef<OsStr>) -> UResult<Box<dyn Read + Send>> {
// On error, returns an OpenFailed error instead of a ReadFailed error
let path = path.as_ref();
if path == STDIN_FILE {
let stdin = stdin();
return Ok(Box::new(stdin) as Box<dyn Read + Send>);
}
let path = Path::new(path);
match File::open(path) {
Ok(f) => Ok(Box::new(f) as Box<dyn Read + Send>),
Err(error) => Err(SortError::OpenFailed {
path: path.to_owned(),
error,
}
.into()),
}
}
fn format_error_message(error: &ParseSizeError, s: &str, option: &str) -> String {
// NOTE:
// GNU's sort echos affected flag, -S or --buffer-size, depending on user's selection

View file

@ -1354,3 +1354,152 @@ fn test_multiple_output_files() {
.fails_with_code(2)
.stderr_is("sort: multiple output files specified\n");
}
#[test]
// Test for GNU tests/sort/sort-files0-from.pl "f-extra-arg"
fn test_files0_from_extra_arg() {
new_ucmd!()
.args(&["--files0-from", "-", "foo"])
.fails_with_code(2)
.stderr_contains(
"sort: extra operand 'foo'\nfile operands cannot be combined with --files0-from\n",
)
.no_stdout();
}
#[test]
// Test for GNU tests/sort/sort-files0-from.pl "missing"
fn test_files0_from_missing() {
new_ucmd!()
.args(&["--files0-from", "missing_file"])
.fails_with_code(2)
.stderr_only(
#[cfg(not(windows))]
"sort: open failed: missing_file: No such file or directory\n",
#[cfg(windows)]
"sort: open failed: missing_file: The system cannot find the file specified.\n",
);
}
#[test]
// Test for GNU tests/sort/sort-files0-from.pl "minus-in-stdin"
fn test_files0_from_minus_in_stdin() {
new_ucmd!()
.args(&["--files0-from", "-"])
.pipe_in("-")
.fails_with_code(2)
.stderr_only("sort: when reading file names from stdin, no file name of '-' allowed\n");
}
#[test]
// Test for GNU tests/sort/sort-files0-from.pl "empty"
fn test_files0_from_empty() {
let scene = TestScenario::new(util_name!());
let at = &scene.fixtures;
at.touch("file");
scene
.ucmd()
.args(&["--files0-from", "file"])
.fails_with_code(2)
.stderr_only("sort: no input from 'file'\n");
}
#[cfg(target_os = "linux")]
#[test]
// Test for GNU tests/sort/sort-files0-from.pl "empty-non-regular"
fn test_files0_from_empty_non_regular() {
new_ucmd!()
.args(&["--files0-from", "/dev/null"])
.fails_with_code(2)
.stderr_only("sort: no input from '/dev/null'\n");
}
#[test]
// Test for GNU tests/sort/sort-files0-from.pl "nul-1"
fn test_files0_from_nul() {
new_ucmd!()
.args(&["--files0-from", "-"])
.pipe_in("\0")
.fails_with_code(2)
.stderr_only("sort: -:1: invalid zero-length file name\n");
}
#[test]
// Test for GNU tests/sort/sort-files0-from.pl "nul-2"
fn test_files0_from_nul2() {
new_ucmd!()
.args(&["--files0-from", "-"])
.pipe_in("\0\0")
.fails_with_code(2)
.stderr_only("sort: -:1: invalid zero-length file name\n");
}
#[test]
// Test for GNU tests/sort/sort-files0-from.pl "1"
fn test_files0_from_1() {
let scene = TestScenario::new(util_name!());
let at = &scene.fixtures;
at.touch("file");
at.append("file", "a");
scene
.ucmd()
.args(&["--files0-from", "-"])
.pipe_in("file")
.succeeds()
.stdout_only("a\n");
}
#[test]
// Test for GNU tests/sort/sort-files0-from.pl "1a"
fn test_files0_from_1a() {
let scene = TestScenario::new(util_name!());
let at = &scene.fixtures;
at.touch("file");
at.append("file", "a");
scene
.ucmd()
.args(&["--files0-from", "-"])
.pipe_in("file\0")
.succeeds()
.stdout_only("a\n");
}
#[test]
// Test for GNU tests/sort/sort-files0-from.pl "2"
fn test_files0_from_2() {
let scene = TestScenario::new(util_name!());
let at = &scene.fixtures;
at.touch("file");
at.append("file", "a");
scene
.ucmd()
.args(&["--files0-from", "-"])
.pipe_in("file\0file")
.succeeds()
.stdout_only("a\na\n");
}
#[test]
// Test for GNU tests/sort/sort-files0-from.pl "2a"
fn test_files0_from_2a() {
let scene = TestScenario::new(util_name!());
let at = &scene.fixtures;
at.touch("file");
at.append("file", "a");
scene
.ucmd()
.args(&["--files0-from", "-"])
.pipe_in("file\0file\0")
.succeeds()
.stdout_only("a\na\n");
}
#[test]
// Test for GNU tests/sort/sort-files0-from.pl "zero-len"
fn test_files0_from_zero_length() {
new_ucmd!()
.args(&["--files0-from", "-"])
.pipe_in("g\0\0b\0\0")
.fails_with_code(2)
.stderr_only("sort: -:2: invalid zero-length file name\n");
}