1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-29 03:57:44 +00:00

sort: use UResult

This commit is contained in:
Michael Debertol 2021-08-01 16:51:56 +02:00
parent e8eb15f05e
commit a4709c805c
5 changed files with 370 additions and 227 deletions

View file

@ -9,7 +9,7 @@
use crate::{ use crate::{
chunks::{self, Chunk, RecycledChunk}, chunks::{self, Chunk, RecycledChunk},
compare_by, open, GlobalSettings, compare_by, open, GlobalSettings, SortError,
}; };
use itertools::Itertools; use itertools::Itertools;
use std::{ use std::{
@ -20,13 +20,14 @@ use std::{
sync::mpsc::{sync_channel, Receiver, SyncSender}, sync::mpsc::{sync_channel, Receiver, SyncSender},
thread, thread,
}; };
use uucore::error::UResult;
/// Check if the file at `path` is ordered. /// Check if the file at `path` is ordered.
/// ///
/// # Returns /// # Returns
/// ///
/// The code we should exit with. /// The code we should exit with.
pub fn check(path: &OsStr, settings: &GlobalSettings) -> i32 { pub fn check(path: &OsStr, settings: &GlobalSettings) -> UResult<()> {
let max_allowed_cmp = if settings.unique { let max_allowed_cmp = if settings.unique {
// If `unique` is enabled, the previous line must compare _less_ to the next one. // If `unique` is enabled, the previous line must compare _less_ to the next one.
Ordering::Less Ordering::Less
@ -34,7 +35,7 @@ pub fn check(path: &OsStr, settings: &GlobalSettings) -> i32 {
// Otherwise, the line previous line must compare _less or equal_ to the next one. // Otherwise, the line previous line must compare _less or equal_ to the next one.
Ordering::Equal Ordering::Equal
}; };
let file = open(path); let file = open(path)?;
let (recycled_sender, recycled_receiver) = sync_channel(2); let (recycled_sender, recycled_receiver) = sync_channel(2);
let (loaded_sender, loaded_receiver) = sync_channel(2); let (loaded_sender, loaded_receiver) = sync_channel(2);
thread::spawn({ thread::spawn({
@ -69,15 +70,13 @@ pub fn check(path: &OsStr, settings: &GlobalSettings) -> i32 {
chunk.line_data(), chunk.line_data(),
) > max_allowed_cmp ) > max_allowed_cmp
{ {
if !settings.check_silent { return Err(SortError::Disorder {
eprintln!( file: path.to_owned(),
"sort: {}:{}: disorder: {}", line_number: line_idx,
path.to_string_lossy(), line: new_first.line.to_owned(),
line_idx, silent: settings.check_silent,
new_first.line
);
} }
return 1; .into());
} }
let _ = recycled_sender.send(prev_chunk.recycle()); let _ = recycled_sender.send(prev_chunk.recycle());
} }
@ -85,21 +84,19 @@ pub fn check(path: &OsStr, settings: &GlobalSettings) -> i32 {
for (a, b) in chunk.lines().iter().tuple_windows() { for (a, b) in chunk.lines().iter().tuple_windows() {
line_idx += 1; line_idx += 1;
if compare_by(a, b, settings, chunk.line_data(), chunk.line_data()) > max_allowed_cmp { if compare_by(a, b, settings, chunk.line_data(), chunk.line_data()) > max_allowed_cmp {
if !settings.check_silent { return Err(SortError::Disorder {
eprintln!( file: path.to_owned(),
"sort: {}:{}: disorder: {}", line_number: line_idx,
path.to_string_lossy(), line: b.line.to_owned(),
line_idx, silent: settings.check_silent,
b.line
);
} }
return 1; .into());
} }
} }
prev_chunk = Some(chunk); prev_chunk = Some(chunk);
} }
0 Ok(())
} }
/// The function running on the reader thread. /// The function running on the reader thread.
@ -108,7 +105,7 @@ fn reader(
receiver: Receiver<RecycledChunk>, receiver: Receiver<RecycledChunk>,
sender: SyncSender<Chunk>, sender: SyncSender<Chunk>,
settings: &GlobalSettings, settings: &GlobalSettings,
) { ) -> UResult<()> {
let mut carry_over = vec![]; let mut carry_over = vec![];
for recycled_chunk in receiver.iter() { for recycled_chunk in receiver.iter() {
let should_continue = chunks::read( let should_continue = chunks::read(
@ -124,9 +121,10 @@ fn reader(
b'\n' b'\n'
}, },
settings, settings,
); )?;
if !should_continue { if !should_continue {
break; break;
} }
} }
Ok(())
} }

View file

@ -14,8 +14,9 @@ use std::{
use memchr::memchr_iter; use memchr::memchr_iter;
use ouroboros::self_referencing; use ouroboros::self_referencing;
use uucore::error::{UResult, USimpleError};
use crate::{numeric_str_cmp::NumInfo, GeneralF64ParseResult, GlobalSettings, Line}; use crate::{numeric_str_cmp::NumInfo, GeneralF64ParseResult, GlobalSettings, Line, SortError};
/// The chunk that is passed around between threads. /// The chunk that is passed around between threads.
/// `lines` consist of slices into `buffer`. /// `lines` consist of slices into `buffer`.
@ -137,10 +138,10 @@ pub fn read<T: Read>(
max_buffer_size: Option<usize>, max_buffer_size: Option<usize>,
carry_over: &mut Vec<u8>, carry_over: &mut Vec<u8>,
file: &mut T, file: &mut T,
next_files: &mut impl Iterator<Item = T>, next_files: &mut impl Iterator<Item = UResult<T>>,
separator: u8, separator: u8,
settings: &GlobalSettings, settings: &GlobalSettings,
) -> bool { ) -> UResult<bool> {
let RecycledChunk { let RecycledChunk {
lines, lines,
selections, selections,
@ -159,12 +160,12 @@ pub fn read<T: Read>(
max_buffer_size, max_buffer_size,
carry_over.len(), carry_over.len(),
separator, separator,
); )?;
carry_over.clear(); carry_over.clear();
carry_over.extend_from_slice(&buffer[read..]); carry_over.extend_from_slice(&buffer[read..]);
if read != 0 { if read != 0 {
let payload = Chunk::new(buffer, |buffer| { let payload: UResult<Chunk> = Chunk::try_new(buffer, |buffer| {
let selections = unsafe { let selections = unsafe {
// SAFETY: It is safe to transmute to an empty vector of selections with shorter lifetime. // SAFETY: It is safe to transmute to an empty vector of selections with shorter lifetime.
// It was only temporarily transmuted to a Vec<Line<'static>> to make recycling possible. // It was only temporarily transmuted to a Vec<Line<'static>> to make recycling possible.
@ -175,18 +176,19 @@ pub fn read<T: Read>(
// because it was only temporarily transmuted to a Vec<Line<'static>> to make recycling possible. // because it was only temporarily transmuted to a Vec<Line<'static>> to make recycling possible.
std::mem::transmute::<Vec<Line<'static>>, Vec<Line<'_>>>(lines) std::mem::transmute::<Vec<Line<'static>>, Vec<Line<'_>>>(lines)
}; };
let read = crash_if_err!(2, std::str::from_utf8(&buffer[..read])); let read = std::str::from_utf8(&buffer[..read])
.map_err(|error| SortError::Uft8Error { error })?;
let mut line_data = LineData { let mut line_data = LineData {
selections, selections,
num_infos, num_infos,
parsed_floats, parsed_floats,
}; };
parse_lines(read, &mut lines, &mut line_data, separator, settings); parse_lines(read, &mut lines, &mut line_data, separator, settings);
ChunkContents { lines, line_data } Ok(ChunkContents { lines, line_data })
}); });
sender.send(payload).unwrap(); sender.send(payload?).unwrap();
} }
should_continue Ok(should_continue)
} }
/// Split `read` into `Line`s, and add them to `lines`. /// Split `read` into `Line`s, and add them to `lines`.
@ -242,12 +244,12 @@ fn parse_lines<'a>(
/// * Whether this function should be called again. /// * Whether this function should be called again.
fn read_to_buffer<T: Read>( fn read_to_buffer<T: Read>(
file: &mut T, file: &mut T,
next_files: &mut impl Iterator<Item = T>, next_files: &mut impl Iterator<Item = UResult<T>>,
buffer: &mut Vec<u8>, buffer: &mut Vec<u8>,
max_buffer_size: Option<usize>, max_buffer_size: Option<usize>,
start_offset: usize, start_offset: usize,
separator: u8, separator: u8,
) -> (usize, bool) { ) -> UResult<(usize, bool)> {
let mut read_target = &mut buffer[start_offset..]; let mut read_target = &mut buffer[start_offset..];
let mut last_file_target_size = read_target.len(); let mut last_file_target_size = read_target.len();
loop { loop {
@ -274,7 +276,7 @@ fn read_to_buffer<T: Read>(
// We read enough lines. // We read enough lines.
let end = last_line_end.unwrap(); let end = last_line_end.unwrap();
// We want to include the separator here, because it shouldn't be carried over. // We want to include the separator here, because it shouldn't be carried over.
return (end + 1, true); return Ok((end + 1, true));
} else { } else {
// We need to read more lines // We need to read more lines
let len = buffer.len(); let len = buffer.len();
@ -299,11 +301,11 @@ fn read_to_buffer<T: Read>(
if let Some(next_file) = next_files.next() { if let Some(next_file) = next_files.next() {
// There is another file. // There is another file.
last_file_target_size = leftover_len; last_file_target_size = leftover_len;
*file = next_file; *file = next_file?;
} else { } else {
// This was the last file. // This was the last file.
let read_len = buffer.len() - leftover_len; let read_len = buffer.len() - leftover_len;
return (read_len, false); return Ok((read_len, false));
} }
} }
} }
@ -313,7 +315,7 @@ fn read_to_buffer<T: Read>(
Err(e) if e.kind() == ErrorKind::Interrupted => { Err(e) if e.kind() == ErrorKind::Interrupted => {
// retry // retry
} }
Err(e) => crash!(2, "{}", e), Err(e) => return Err(USimpleError::new(2, e.to_string())),
} }
} }
} }

View file

@ -22,6 +22,7 @@ use std::{
}; };
use itertools::Itertools; use itertools::Itertools;
use uucore::error::UResult;
use crate::chunks::RecycledChunk; use crate::chunks::RecycledChunk;
use crate::merge::ClosedTmpFile; use crate::merge::ClosedTmpFile;
@ -29,6 +30,7 @@ use crate::merge::WriteableCompressedTmpFile;
use crate::merge::WriteablePlainTmpFile; use crate::merge::WriteablePlainTmpFile;
use crate::merge::WriteableTmpFile; use crate::merge::WriteableTmpFile;
use crate::Output; use crate::Output;
use crate::SortError;
use crate::{ use crate::{
chunks::{self, Chunk}, chunks::{self, Chunk},
compare_by, merge, sort_by, GlobalSettings, compare_by, merge, sort_by, GlobalSettings,
@ -40,10 +42,10 @@ const START_BUFFER_SIZE: usize = 8_000;
/// Sort files by using auxiliary files for storing intermediate chunks (if needed), and output the result. /// Sort files by using auxiliary files for storing intermediate chunks (if needed), and output the result.
pub fn ext_sort( pub fn ext_sort(
files: &mut impl Iterator<Item = Box<dyn Read + Send>>, files: &mut impl Iterator<Item = UResult<Box<dyn Read + Send>>>,
settings: &GlobalSettings, settings: &GlobalSettings,
output: Output, output: Output,
) { ) -> UResult<()> {
let (sorted_sender, sorted_receiver) = std::sync::mpsc::sync_channel(1); let (sorted_sender, sorted_receiver) = std::sync::mpsc::sync_channel(1);
let (recycled_sender, recycled_receiver) = std::sync::mpsc::sync_channel(1); let (recycled_sender, recycled_receiver) = std::sync::mpsc::sync_channel(1);
thread::spawn({ thread::spawn({
@ -57,7 +59,7 @@ pub fn ext_sort(
sorted_receiver, sorted_receiver,
recycled_sender, recycled_sender,
output, output,
); )
} else { } else {
reader_writer::<_, WriteablePlainTmpFile>( reader_writer::<_, WriteablePlainTmpFile>(
files, files,
@ -65,17 +67,20 @@ pub fn ext_sort(
sorted_receiver, sorted_receiver,
recycled_sender, recycled_sender,
output, output,
); )
} }
} }
fn reader_writer<F: Iterator<Item = Box<dyn Read + Send>>, Tmp: WriteableTmpFile + 'static>( fn reader_writer<
F: Iterator<Item = UResult<Box<dyn Read + Send>>>,
Tmp: WriteableTmpFile + 'static,
>(
files: F, files: F,
settings: &GlobalSettings, settings: &GlobalSettings,
receiver: Receiver<Chunk>, receiver: Receiver<Chunk>,
sender: SyncSender<Chunk>, sender: SyncSender<Chunk>,
output: Output, output: Output,
) { ) -> UResult<()> {
let separator = if settings.zero_terminated { let separator = if settings.zero_terminated {
b'\0' b'\0'
} else { } else {
@ -93,16 +98,16 @@ fn reader_writer<F: Iterator<Item = Box<dyn Read + Send>>, Tmp: WriteableTmpFile
settings, settings,
receiver, receiver,
sender, sender,
); )?;
match read_result { match read_result {
ReadResult::WroteChunksToFile { tmp_files, tmp_dir } => { ReadResult::WroteChunksToFile { tmp_files, tmp_dir } => {
let tmp_dir_size = tmp_files.len(); let tmp_dir_size = tmp_files.len();
let mut merger = merge::merge_with_file_limit::<_, _, Tmp>( let merger = merge::merge_with_file_limit::<_, _, Tmp>(
tmp_files.into_iter().map(|c| c.reopen()), tmp_files.into_iter().map(|c| c.reopen()),
settings, settings,
Some((tmp_dir, tmp_dir_size)), Some((tmp_dir, tmp_dir_size)),
); )?;
merger.write_all(settings, output); merger.write_all(settings, output)?;
} }
ReadResult::SortedSingleChunk(chunk) => { ReadResult::SortedSingleChunk(chunk) => {
if settings.unique { if settings.unique {
@ -145,6 +150,7 @@ fn reader_writer<F: Iterator<Item = Box<dyn Read + Send>>, Tmp: WriteableTmpFile
// don't output anything // don't output anything
} }
} }
Ok(())
} }
/// The function that is executed on the sorter thread. /// The function that is executed on the sorter thread.
@ -153,7 +159,11 @@ fn sorter(receiver: Receiver<Chunk>, sender: SyncSender<Chunk>, settings: Global
payload.with_contents_mut(|contents| { payload.with_contents_mut(|contents| {
sort_by(&mut contents.lines, &settings, &contents.line_data) sort_by(&mut contents.lines, &settings, &contents.line_data)
}); });
sender.send(payload).unwrap(); if sender.send(payload).is_err() {
// The receiver has gone away, likely because the other thread hit an error.
// We stop silently because the actual error is printed by the other thread.
return;
}
} }
} }
@ -173,15 +183,15 @@ enum ReadResult<I: WriteableTmpFile> {
} }
/// The function that is executed on the reader/writer thread. /// The function that is executed on the reader/writer thread.
fn read_write_loop<I: WriteableTmpFile>( fn read_write_loop<I: WriteableTmpFile>(
mut files: impl Iterator<Item = Box<dyn Read + Send>>, mut files: impl Iterator<Item = UResult<Box<dyn Read + Send>>>,
tmp_dir_parent: &Path, tmp_dir_parent: &Path,
separator: u8, separator: u8,
buffer_size: usize, buffer_size: usize,
settings: &GlobalSettings, settings: &GlobalSettings,
receiver: Receiver<Chunk>, receiver: Receiver<Chunk>,
sender: SyncSender<Chunk>, sender: SyncSender<Chunk>,
) -> ReadResult<I> { ) -> UResult<ReadResult<I>> {
let mut file = files.next().unwrap(); let mut file = files.next().unwrap()?;
let mut carry_over = vec![]; let mut carry_over = vec![];
// kick things off with two reads // kick things off with two reads
@ -199,14 +209,14 @@ fn read_write_loop<I: WriteableTmpFile>(
&mut files, &mut files,
separator, separator,
settings, settings,
); )?;
if !should_continue { if !should_continue {
drop(sender); drop(sender);
// We have already read the whole input. Since we are in our first two reads, // We have already read the whole input. Since we are in our first two reads,
// this means that we can fit the whole input into memory. Bypass writing below and // this means that we can fit the whole input into memory. Bypass writing below and
// handle this case in a more straightforward way. // handle this case in a more straightforward way.
return if let Ok(first_chunk) = receiver.recv() { return Ok(if let Ok(first_chunk) = receiver.recv() {
if let Ok(second_chunk) = receiver.recv() { if let Ok(second_chunk) = receiver.recv() {
ReadResult::SortedTwoChunks([first_chunk, second_chunk]) ReadResult::SortedTwoChunks([first_chunk, second_chunk])
} else { } else {
@ -214,16 +224,14 @@ fn read_write_loop<I: WriteableTmpFile>(
} }
} else { } else {
ReadResult::EmptyInput ReadResult::EmptyInput
}; });
} }
} }
let tmp_dir = crash_if_err!( let tmp_dir = tempfile::Builder::new()
2, .prefix("uutils_sort")
tempfile::Builder::new() .tempdir_in(tmp_dir_parent)
.prefix("uutils_sort") .map_err(|_| SortError::TmpDirCreationFailed)?;
.tempdir_in(tmp_dir_parent)
);
let mut sender_option = Some(sender); let mut sender_option = Some(sender);
let mut file_number = 0; let mut file_number = 0;
@ -232,7 +240,7 @@ fn read_write_loop<I: WriteableTmpFile>(
let mut chunk = match receiver.recv() { let mut chunk = match receiver.recv() {
Ok(it) => it, Ok(it) => it,
_ => { _ => {
return ReadResult::WroteChunksToFile { tmp_files, tmp_dir }; return Ok(ReadResult::WroteChunksToFile { tmp_files, tmp_dir });
} }
}; };
@ -241,7 +249,7 @@ fn read_write_loop<I: WriteableTmpFile>(
tmp_dir.path().join(file_number.to_string()), tmp_dir.path().join(file_number.to_string()),
settings.compress_prog.as_deref(), settings.compress_prog.as_deref(),
separator, separator,
); )?;
tmp_files.push(tmp_file); tmp_files.push(tmp_file);
file_number += 1; file_number += 1;
@ -258,7 +266,7 @@ fn read_write_loop<I: WriteableTmpFile>(
&mut files, &mut files,
separator, separator,
settings, settings,
); )?;
if !should_continue { if !should_continue {
sender_option = None; sender_option = None;
} }
@ -273,8 +281,8 @@ fn write<I: WriteableTmpFile>(
file: PathBuf, file: PathBuf,
compress_prog: Option<&str>, compress_prog: Option<&str>,
separator: u8, separator: u8,
) -> I::Closed { ) -> UResult<I::Closed> {
let mut tmp_file = I::create(file, compress_prog); let mut tmp_file = I::create(file, compress_prog)?;
write_lines(chunk.lines(), tmp_file.as_write(), separator); write_lines(chunk.lines(), tmp_file.as_write(), separator);
tmp_file.finished_writing() tmp_file.finished_writing()
} }

View file

@ -17,16 +17,17 @@ use std::{
process::{Child, ChildStdin, ChildStdout, Command, Stdio}, process::{Child, ChildStdin, ChildStdout, Command, Stdio},
rc::Rc, rc::Rc,
sync::mpsc::{channel, sync_channel, Receiver, Sender, SyncSender}, sync::mpsc::{channel, sync_channel, Receiver, Sender, SyncSender},
thread, thread::{self, JoinHandle},
}; };
use compare::Compare; use compare::Compare;
use itertools::Itertools; use itertools::Itertools;
use tempfile::TempDir; use tempfile::TempDir;
use uucore::error::UResult;
use crate::{ use crate::{
chunks::{self, Chunk, RecycledChunk}, chunks::{self, Chunk, RecycledChunk},
compare_by, open, GlobalSettings, Output, compare_by, open, GlobalSettings, Output, SortError,
}; };
/// If the output file occurs in the input files as well, copy the contents of the output file /// If the output file occurs in the input files as well, copy the contents of the output file
@ -35,7 +36,7 @@ fn replace_output_file_in_input_files(
files: &mut [OsString], files: &mut [OsString],
settings: &GlobalSettings, settings: &GlobalSettings,
output: Option<&str>, output: Option<&str>,
) -> Option<(TempDir, usize)> { ) -> UResult<Option<(TempDir, usize)>> {
let mut copy: Option<(TempDir, PathBuf)> = None; let mut copy: Option<(TempDir, PathBuf)> = None;
if let Some(Ok(output_path)) = output.map(|path| Path::new(path).canonicalize()) { if let Some(Ok(output_path)) = output.map(|path| Path::new(path).canonicalize()) {
for file in files { for file in files {
@ -47,9 +48,10 @@ fn replace_output_file_in_input_files(
let tmp_dir = tempfile::Builder::new() let tmp_dir = tempfile::Builder::new()
.prefix("uutils_sort") .prefix("uutils_sort")
.tempdir_in(&settings.tmp_dir) .tempdir_in(&settings.tmp_dir)
.unwrap(); .map_err(|_| SortError::TmpDirCreationFailed)?;
let copy_path = tmp_dir.path().join("0"); let copy_path = tmp_dir.path().join("0");
std::fs::copy(file_path, &copy_path).unwrap(); std::fs::copy(file_path, &copy_path)
.map_err(|error| SortError::OpenTmpFileFailed { error })?;
*file = copy_path.clone().into_os_string(); *file = copy_path.clone().into_os_string();
copy = Some((tmp_dir, copy_path)) copy = Some((tmp_dir, copy_path))
} }
@ -58,7 +60,7 @@ fn replace_output_file_in_input_files(
} }
} }
// if we created a TempDir its size must be one. // if we created a TempDir its size must be one.
copy.map(|(dir, _copy)| (dir, 1)) Ok(copy.map(|(dir, _copy)| (dir, 1)))
} }
/// Merge pre-sorted `Box<dyn Read>`s. /// Merge pre-sorted `Box<dyn Read>`s.
@ -69,13 +71,13 @@ pub fn merge<'a>(
files: &mut [OsString], files: &mut [OsString],
settings: &'a GlobalSettings, settings: &'a GlobalSettings,
output: Option<&str>, output: Option<&str>,
) -> FileMerger<'a> { ) -> UResult<FileMerger<'a>> {
let tmp_dir = replace_output_file_in_input_files(files, settings, output); let tmp_dir = replace_output_file_in_input_files(files, settings, output)?;
if settings.compress_prog.is_none() { if settings.compress_prog.is_none() {
merge_with_file_limit::<_, _, WriteablePlainTmpFile>( merge_with_file_limit::<_, _, WriteablePlainTmpFile>(
files files
.iter() .iter()
.map(|file| PlainMergeInput { inner: open(file) }), .map(|file| open(file).map(|file| PlainMergeInput { inner: file })),
settings, settings,
tmp_dir, tmp_dir,
) )
@ -83,7 +85,7 @@ pub fn merge<'a>(
merge_with_file_limit::<_, _, WriteableCompressedTmpFile>( merge_with_file_limit::<_, _, WriteableCompressedTmpFile>(
files files
.iter() .iter()
.map(|file| PlainMergeInput { inner: open(file) }), .map(|file| open(file).map(|file| PlainMergeInput { inner: file })),
settings, settings,
tmp_dir, tmp_dir,
) )
@ -93,24 +95,25 @@ pub fn merge<'a>(
// Merge already sorted `MergeInput`s. // Merge already sorted `MergeInput`s.
pub fn merge_with_file_limit< pub fn merge_with_file_limit<
M: MergeInput + 'static, M: MergeInput + 'static,
F: ExactSizeIterator<Item = M>, F: ExactSizeIterator<Item = UResult<M>>,
Tmp: WriteableTmpFile + 'static, Tmp: WriteableTmpFile + 'static,
>( >(
files: F, files: F,
settings: &GlobalSettings, settings: &GlobalSettings,
tmp_dir: Option<(TempDir, usize)>, tmp_dir: Option<(TempDir, usize)>,
) -> FileMerger { ) -> UResult<FileMerger> {
if files.len() > settings.merge_batch_size { if files.len() > settings.merge_batch_size {
// If we did not get a tmp_dir, create one. // If we did not get a tmp_dir, create one.
let (tmp_dir, mut tmp_dir_size) = tmp_dir.unwrap_or_else(|| { let (tmp_dir, mut tmp_dir_size) = match tmp_dir {
( Some(x) => x,
None => (
tempfile::Builder::new() tempfile::Builder::new()
.prefix("uutils_sort") .prefix("uutils_sort")
.tempdir_in(&settings.tmp_dir) .tempdir_in(&settings.tmp_dir)
.unwrap(), .map_err(|_| SortError::TmpDirCreationFailed)?,
0, 0,
) ),
}); };
let mut remaining_files = files.len(); let mut remaining_files = files.len();
let batches = files.chunks(settings.merge_batch_size); let batches = files.chunks(settings.merge_batch_size);
let mut batches = batches.into_iter(); let mut batches = batches.into_iter();
@ -118,14 +121,14 @@ pub fn merge_with_file_limit<
while remaining_files != 0 { while remaining_files != 0 {
// Work around the fact that `Chunks` is not an `ExactSizeIterator`. // Work around the fact that `Chunks` is not an `ExactSizeIterator`.
remaining_files = remaining_files.saturating_sub(settings.merge_batch_size); remaining_files = remaining_files.saturating_sub(settings.merge_batch_size);
let mut merger = merge_without_limit(batches.next().unwrap(), settings); let merger = merge_without_limit(batches.next().unwrap(), settings)?;
let mut tmp_file = Tmp::create( let mut tmp_file = Tmp::create(
tmp_dir.path().join(tmp_dir_size.to_string()), tmp_dir.path().join(tmp_dir_size.to_string()),
settings.compress_prog.as_deref(), settings.compress_prog.as_deref(),
); )?;
tmp_dir_size += 1; tmp_dir_size += 1;
merger.write_all_to(settings, tmp_file.as_write()); merger.write_all_to(settings, tmp_file.as_write())?;
temporary_files.push(tmp_file.finished_writing()); temporary_files.push(tmp_file.finished_writing()?);
} }
assert!(batches.next().is_none()); assert!(batches.next().is_none());
merge_with_file_limit::<_, _, Tmp>( merge_with_file_limit::<_, _, Tmp>(
@ -133,7 +136,7 @@ pub fn merge_with_file_limit<
.into_iter() .into_iter()
.map(Box::new(|c: Tmp::Closed| c.reopen()) .map(Box::new(|c: Tmp::Closed| c.reopen())
as Box< as Box<
dyn FnMut(Tmp::Closed) -> <Tmp::Closed as ClosedTmpFile>::Reopened, dyn FnMut(Tmp::Closed) -> UResult<<Tmp::Closed as ClosedTmpFile>::Reopened>,
>), >),
settings, settings,
Some((tmp_dir, tmp_dir_size)), Some((tmp_dir, tmp_dir_size)),
@ -147,10 +150,10 @@ pub fn merge_with_file_limit<
/// ///
/// It is the responsibility of the caller to ensure that `files` yields only /// It is the responsibility of the caller to ensure that `files` yields only
/// as many files as we are allowed to open concurrently. /// as many files as we are allowed to open concurrently.
fn merge_without_limit<M: MergeInput + 'static, F: Iterator<Item = M>>( fn merge_without_limit<M: MergeInput + 'static, F: Iterator<Item = UResult<M>>>(
files: F, files: F,
settings: &GlobalSettings, settings: &GlobalSettings,
) -> FileMerger { ) -> UResult<FileMerger> {
let (request_sender, request_receiver) = channel(); let (request_sender, request_receiver) = channel();
let mut reader_files = Vec::with_capacity(files.size_hint().0); let mut reader_files = Vec::with_capacity(files.size_hint().0);
let mut loaded_receivers = Vec::with_capacity(files.size_hint().0); let mut loaded_receivers = Vec::with_capacity(files.size_hint().0);
@ -158,7 +161,7 @@ fn merge_without_limit<M: MergeInput + 'static, F: Iterator<Item = M>>(
let (sender, receiver) = sync_channel(2); let (sender, receiver) = sync_channel(2);
loaded_receivers.push(receiver); loaded_receivers.push(receiver);
reader_files.push(Some(ReaderFile { reader_files.push(Some(ReaderFile {
file, file: file?,
sender, sender,
carry_over: vec![], carry_over: vec![],
})); }));
@ -175,7 +178,7 @@ fn merge_without_limit<M: MergeInput + 'static, F: Iterator<Item = M>>(
.unwrap(); .unwrap();
} }
thread::spawn({ let reader_join_handle = thread::spawn({
let settings = settings.clone(); let settings = settings.clone();
move || { move || {
reader( reader(
@ -204,14 +207,15 @@ fn merge_without_limit<M: MergeInput + 'static, F: Iterator<Item = M>>(
} }
} }
FileMerger { Ok(FileMerger {
heap: binary_heap_plus::BinaryHeap::from_vec_cmp( heap: binary_heap_plus::BinaryHeap::from_vec_cmp(
mergeable_files, mergeable_files,
FileComparator { settings }, FileComparator { settings },
), ),
request_sender, request_sender,
prev: None, prev: None,
} reader_join_handle,
})
} }
/// The struct on the reader thread representing an input file /// The struct on the reader thread representing an input file
struct ReaderFile<M: MergeInput> { struct ReaderFile<M: MergeInput> {
@ -226,7 +230,7 @@ fn reader(
files: &mut [Option<ReaderFile<impl MergeInput>>], files: &mut [Option<ReaderFile<impl MergeInput>>],
settings: &GlobalSettings, settings: &GlobalSettings,
separator: u8, separator: u8,
) { ) -> UResult<()> {
for (file_idx, recycled_chunk) in recycled_receiver.iter() { for (file_idx, recycled_chunk) in recycled_receiver.iter() {
if let Some(ReaderFile { if let Some(ReaderFile {
file, file,
@ -243,15 +247,16 @@ fn reader(
&mut iter::empty(), &mut iter::empty(),
separator, separator,
settings, settings,
); )?;
if !should_continue { if !should_continue {
// Remove the file from the list by replacing it with `None`. // Remove the file from the list by replacing it with `None`.
let ReaderFile { file, .. } = files[file_idx].take().unwrap(); let ReaderFile { file, .. } = files[file_idx].take().unwrap();
// Depending on the kind of the `MergeInput`, this may delete the file: // Depending on the kind of the `MergeInput`, this may delete the file:
file.finished_reading(); file.finished_reading()?;
} }
} }
} }
Ok(())
} }
/// The struct on the main thread representing an input file /// The struct on the main thread representing an input file
pub struct MergeableFile { pub struct MergeableFile {
@ -275,17 +280,20 @@ pub struct FileMerger<'a> {
heap: binary_heap_plus::BinaryHeap<MergeableFile, FileComparator<'a>>, heap: binary_heap_plus::BinaryHeap<MergeableFile, FileComparator<'a>>,
request_sender: Sender<(usize, RecycledChunk)>, request_sender: Sender<(usize, RecycledChunk)>,
prev: Option<PreviousLine>, prev: Option<PreviousLine>,
reader_join_handle: JoinHandle<UResult<()>>,
} }
impl<'a> FileMerger<'a> { impl<'a> FileMerger<'a> {
/// Write the merged contents to the output file. /// Write the merged contents to the output file.
pub fn write_all(&mut self, settings: &GlobalSettings, output: Output) { pub fn write_all(self, settings: &GlobalSettings, output: Output) -> UResult<()> {
let mut out = output.into_write(); let mut out = output.into_write();
self.write_all_to(settings, &mut out); self.write_all_to(settings, &mut out)
} }
pub fn write_all_to(&mut self, settings: &GlobalSettings, out: &mut impl Write) { pub fn write_all_to(mut self, settings: &GlobalSettings, out: &mut impl Write) -> UResult<()> {
while self.write_next(settings, out) {} while self.write_next(settings, out) {}
drop(self.request_sender);
self.reader_join_handle.join().unwrap()
} }
fn write_next(&mut self, settings: &GlobalSettings, out: &mut impl Write) -> bool { fn write_next(&mut self, settings: &GlobalSettings, out: &mut impl Write) -> bool {
@ -369,36 +377,41 @@ impl<'a> Compare<MergeableFile> for FileComparator<'a> {
} }
// Wait for the child to exit and check its exit code. // Wait for the child to exit and check its exit code.
fn assert_child_success(mut child: Child, program: &str) { fn check_child_success(mut child: Child, program: &str) -> UResult<()> {
if !matches!( if !matches!(
child.wait().map(|e| e.code()), child.wait().map(|e| e.code()),
Ok(Some(0)) | Ok(None) | Err(_) Ok(Some(0)) | Ok(None) | Err(_)
) { ) {
crash!(2, "'{}' terminated abnormally", program) Err(SortError::CompressProgTerminatedAbnormally {
prog: program.to_owned(),
}
.into())
} else {
Ok(())
} }
} }
/// A temporary file that can be written to. /// A temporary file that can be written to.
pub trait WriteableTmpFile { pub trait WriteableTmpFile: Sized {
type Closed: ClosedTmpFile; type Closed: ClosedTmpFile;
type InnerWrite: Write; type InnerWrite: Write;
fn create(path: PathBuf, compress_prog: Option<&str>) -> Self; fn create(path: PathBuf, compress_prog: Option<&str>) -> UResult<Self>;
/// Closes the temporary file. /// Closes the temporary file.
fn finished_writing(self) -> Self::Closed; fn finished_writing(self) -> UResult<Self::Closed>;
fn as_write(&mut self) -> &mut Self::InnerWrite; fn as_write(&mut self) -> &mut Self::InnerWrite;
} }
/// A temporary file that is (temporarily) closed, but can be reopened. /// A temporary file that is (temporarily) closed, but can be reopened.
pub trait ClosedTmpFile { pub trait ClosedTmpFile {
type Reopened: MergeInput; type Reopened: MergeInput;
/// Reopens the temporary file. /// Reopens the temporary file.
fn reopen(self) -> Self::Reopened; fn reopen(self) -> UResult<Self::Reopened>;
} }
/// A pre-sorted input for merging. /// A pre-sorted input for merging.
pub trait MergeInput: Send { pub trait MergeInput: Send {
type InnerRead: Read; type InnerRead: Read;
/// Cleans this `MergeInput` up. /// Cleans this `MergeInput` up.
/// Implementations may delete the backing file. /// Implementations may delete the backing file.
fn finished_reading(self); fn finished_reading(self) -> UResult<()>;
fn as_read(&mut self) -> &mut Self::InnerRead; fn as_read(&mut self) -> &mut Self::InnerRead;
} }
@ -417,15 +430,17 @@ impl WriteableTmpFile for WriteablePlainTmpFile {
type Closed = ClosedPlainTmpFile; type Closed = ClosedPlainTmpFile;
type InnerWrite = BufWriter<File>; type InnerWrite = BufWriter<File>;
fn create(path: PathBuf, _: Option<&str>) -> Self { fn create(path: PathBuf, _: Option<&str>) -> UResult<Self> {
WriteablePlainTmpFile { Ok(WriteablePlainTmpFile {
file: BufWriter::new(File::create(&path).unwrap()), file: BufWriter::new(
File::create(&path).map_err(|error| SortError::OpenTmpFileFailed { error })?,
),
path, path,
} })
} }
fn finished_writing(self) -> Self::Closed { fn finished_writing(self) -> UResult<Self::Closed> {
ClosedPlainTmpFile { path: self.path } Ok(ClosedPlainTmpFile { path: self.path })
} }
fn as_write(&mut self) -> &mut Self::InnerWrite { fn as_write(&mut self) -> &mut Self::InnerWrite {
@ -434,18 +449,22 @@ impl WriteableTmpFile for WriteablePlainTmpFile {
} }
impl ClosedTmpFile for ClosedPlainTmpFile { impl ClosedTmpFile for ClosedPlainTmpFile {
type Reopened = PlainTmpMergeInput; type Reopened = PlainTmpMergeInput;
fn reopen(self) -> Self::Reopened { fn reopen(self) -> UResult<Self::Reopened> {
PlainTmpMergeInput { Ok(PlainTmpMergeInput {
file: File::open(&self.path).unwrap(), file: File::open(&self.path).map_err(|error| SortError::OpenTmpFileFailed { error })?,
path: self.path, path: self.path,
} })
} }
} }
impl MergeInput for PlainTmpMergeInput { impl MergeInput for PlainTmpMergeInput {
type InnerRead = File; type InnerRead = File;
fn finished_reading(self) { fn finished_reading(self) -> UResult<()> {
fs::remove_file(self.path).ok(); // we ignore failures to delete the temporary file,
// because there is a race at the end of the execution and the whole
// temporary directory might already be gone.
let _ = fs::remove_file(self.path);
Ok(())
} }
fn as_read(&mut self) -> &mut Self::InnerRead { fn as_read(&mut self) -> &mut Self::InnerRead {
@ -473,35 +492,33 @@ impl WriteableTmpFile for WriteableCompressedTmpFile {
type Closed = ClosedCompressedTmpFile; type Closed = ClosedCompressedTmpFile;
type InnerWrite = BufWriter<ChildStdin>; type InnerWrite = BufWriter<ChildStdin>;
fn create(path: PathBuf, compress_prog: Option<&str>) -> Self { fn create(path: PathBuf, compress_prog: Option<&str>) -> UResult<Self> {
let compress_prog = compress_prog.unwrap(); let compress_prog = compress_prog.unwrap();
let mut command = Command::new(compress_prog); let mut command = Command::new(compress_prog);
command let tmp_file =
.stdin(Stdio::piped()) File::create(&path).map_err(|error| SortError::OpenTmpFileFailed { error })?;
.stdout(File::create(&path).unwrap()); command.stdin(Stdio::piped()).stdout(tmp_file);
let mut child = crash_if_err!( let mut child = command
2, .spawn()
command.spawn().map_err(|err| format!( .map_err(|err| SortError::CompressProgExecutionFailed {
"couldn't execute compress program: errno {}", code: err.raw_os_error().unwrap(),
err.raw_os_error().unwrap() })?;
))
);
let child_stdin = child.stdin.take().unwrap(); let child_stdin = child.stdin.take().unwrap();
WriteableCompressedTmpFile { Ok(WriteableCompressedTmpFile {
path, path,
compress_prog: compress_prog.to_owned(), compress_prog: compress_prog.to_owned(),
child, child,
child_stdin: BufWriter::new(child_stdin), child_stdin: BufWriter::new(child_stdin),
} })
} }
fn finished_writing(self) -> Self::Closed { fn finished_writing(self) -> UResult<Self::Closed> {
drop(self.child_stdin); drop(self.child_stdin);
assert_child_success(self.child, &self.compress_prog); check_child_success(self.child, &self.compress_prog)?;
ClosedCompressedTmpFile { Ok(ClosedCompressedTmpFile {
path: self.path, path: self.path,
compress_prog: self.compress_prog, compress_prog: self.compress_prog,
} })
} }
fn as_write(&mut self) -> &mut Self::InnerWrite { fn as_write(&mut self) -> &mut Self::InnerWrite {
@ -511,33 +528,32 @@ impl WriteableTmpFile for WriteableCompressedTmpFile {
impl ClosedTmpFile for ClosedCompressedTmpFile { impl ClosedTmpFile for ClosedCompressedTmpFile {
type Reopened = CompressedTmpMergeInput; type Reopened = CompressedTmpMergeInput;
fn reopen(self) -> Self::Reopened { fn reopen(self) -> UResult<Self::Reopened> {
let mut command = Command::new(&self.compress_prog); let mut command = Command::new(&self.compress_prog);
let file = File::open(&self.path).unwrap(); let file = File::open(&self.path).unwrap();
command.stdin(file).stdout(Stdio::piped()).arg("-d"); command.stdin(file).stdout(Stdio::piped()).arg("-d");
let mut child = crash_if_err!( let mut child = command
2, .spawn()
command.spawn().map_err(|err| format!( .map_err(|err| SortError::CompressProgExecutionFailed {
"couldn't execute compress program: errno {}", code: err.raw_os_error().unwrap(),
err.raw_os_error().unwrap() })?;
))
);
let child_stdout = child.stdout.take().unwrap(); let child_stdout = child.stdout.take().unwrap();
CompressedTmpMergeInput { Ok(CompressedTmpMergeInput {
path: self.path, path: self.path,
compress_prog: self.compress_prog, compress_prog: self.compress_prog,
child, child,
child_stdout, child_stdout,
} })
} }
} }
impl MergeInput for CompressedTmpMergeInput { impl MergeInput for CompressedTmpMergeInput {
type InnerRead = ChildStdout; type InnerRead = ChildStdout;
fn finished_reading(self) { fn finished_reading(self) -> UResult<()> {
drop(self.child_stdout); drop(self.child_stdout);
assert_child_success(self.child, &self.compress_prog); check_child_success(self.child, &self.compress_prog)?;
fs::remove_file(self.path).ok(); let _ = fs::remove_file(self.path);
Ok(())
} }
fn as_read(&mut self) -> &mut Self::InnerRead { fn as_read(&mut self) -> &mut Self::InnerRead {
@ -550,7 +566,9 @@ pub struct PlainMergeInput<R: Read + Send> {
} }
impl<R: Read + Send> MergeInput for PlainMergeInput<R> { impl<R: Read + Send> MergeInput for PlainMergeInput<R> {
type InnerRead = R; type InnerRead = R;
fn finished_reading(self) {} fn finished_reading(self) -> UResult<()> {
Ok(())
}
fn as_read(&mut self) -> &mut Self::InnerRead { fn as_read(&mut self) -> &mut Self::InnerRead {
&mut self.inner &mut self.inner
} }

View file

@ -33,14 +33,18 @@ use rand::{thread_rng, Rng};
use rayon::prelude::*; use rayon::prelude::*;
use std::cmp::Ordering; use std::cmp::Ordering;
use std::env; use std::env;
use std::error::Error;
use std::ffi::{OsStr, OsString}; use std::ffi::{OsStr, OsString};
use std::fmt::Display;
use std::fs::{File, OpenOptions}; use std::fs::{File, OpenOptions};
use std::hash::{Hash, Hasher}; use std::hash::{Hash, Hasher};
use std::io::{stdin, stdout, BufRead, BufReader, BufWriter, Read, Write}; use std::io::{stdin, stdout, BufRead, BufReader, BufWriter, Read, Write};
use std::ops::Range; use std::ops::Range;
use std::path::Path; use std::path::Path;
use std::path::PathBuf; use std::path::PathBuf;
use std::str::Utf8Error;
use unicode_width::UnicodeWidthStr; use unicode_width::UnicodeWidthStr;
use uucore::error::{set_exit_code, UCustomError, UResult, USimpleError, UUsageError};
use uucore::parse_size::{parse_size, ParseSizeError}; use uucore::parse_size::{parse_size, ParseSizeError};
use uucore::version_cmp::version_cmp; use uucore::version_cmp::version_cmp;
use uucore::InvalidEncodingHandling; use uucore::InvalidEncodingHandling;
@ -120,6 +124,111 @@ const POSITIVE: char = '+';
// available memory into consideration, instead of relying on this constant only. // available memory into consideration, instead of relying on this constant only.
const DEFAULT_BUF_SIZE: usize = 1_000_000_000; // 1 GB const DEFAULT_BUF_SIZE: usize = 1_000_000_000; // 1 GB
#[derive(Debug)]
enum SortError {
Disorder {
file: OsString,
line_number: usize,
line: String,
silent: bool,
},
OpenFailed {
path: String,
error: std::io::Error,
},
ReadFailed {
path: String,
error: std::io::Error,
},
ParseKeyError {
key: String,
msg: String,
},
OpenTmpFileFailed {
error: std::io::Error,
},
CompressProgExecutionFailed {
code: i32,
},
CompressProgTerminatedAbnormally {
prog: String,
},
TmpDirCreationFailed,
Uft8Error {
error: Utf8Error,
},
}
impl Error for SortError {}
impl UCustomError for SortError {
fn code(&self) -> i32 {
match self {
SortError::Disorder { .. } => 1,
_ => 2,
}
}
fn usage(&self) -> bool {
false
}
}
impl Display for SortError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
SortError::Disorder {
file,
line_number,
line,
silent,
} => {
if !silent {
write!(
f,
"{}:{}: disorder: {}",
file.to_string_lossy(),
line_number,
line
)
} else {
Ok(())
}
}
SortError::OpenFailed { path, error } => write!(
f,
"open failed: {}: {}",
path,
strip_errno(&error.to_string())
),
SortError::ParseKeyError { key, msg } => {
write!(f, "failed to parse key `{}`: {}", key, msg)
}
SortError::ReadFailed { path, error } => write!(
f,
"cannot read: {}: {}",
path,
strip_errno(&error.to_string())
),
SortError::OpenTmpFileFailed { error } => {
write!(
f,
"failed to open temporary file: {}",
strip_errno(&error.to_string())
)
}
SortError::CompressProgExecutionFailed { code } => {
write!(f, "couldn't execute compress program: errno {}", code)
}
SortError::CompressProgTerminatedAbnormally { prog } => {
write!(f, "'{}' terminated abnormally", prog)
}
SortError::TmpDirCreationFailed => write!(f, "could not create temporary directory"),
SortError::Uft8Error { error } => write!(f, "{}", error),
}
}
}
#[derive(Eq, Ord, PartialEq, PartialOrd, Clone, Copy, Debug)] #[derive(Eq, Ord, PartialEq, PartialOrd, Clone, Copy, Debug)]
enum SortMode { enum SortMode {
Numeric, Numeric,
@ -150,23 +259,23 @@ pub struct Output {
} }
impl Output { impl Output {
fn new(name: Option<&str>) -> Self { fn new(name: Option<&str>) -> UResult<Self> {
Self { let file = if let Some(name) = name {
file: name.map(|name| { // This is different from `File::create()` because we don't truncate the output yet.
// This is different from `File::create()` because we don't truncate the output yet. // This allows using the output file as an input file.
// This allows using the output file as an input file. let file = OpenOptions::new()
( .write(true)
name.to_owned(), .create(true)
OpenOptions::new() .open(name)
.write(true) .map_err(|e| SortError::OpenFailed {
.create(true) path: name.to_owned(),
.open(name) error: e,
.unwrap_or_else(|e| { })?;
crash!(2, "open failed: {}: {}", name, strip_errno(&e.to_string())) Some((name.to_owned(), file))
}), } else {
) None
}), };
} Ok(Self { file })
} }
fn into_write(self) -> BufWriter<Box<dyn Write>> { fn into_write(self) -> BufWriter<Box<dyn Write>> {
@ -724,33 +833,37 @@ impl FieldSelector {
} }
} }
fn parse(key: &str, global_settings: &GlobalSettings) -> Self { fn parse(key: &str, global_settings: &GlobalSettings) -> UResult<Self> {
let mut from_to = key.split(','); let mut from_to = key.split(',');
let (from, from_options) = Self::split_key_options(from_to.next().unwrap()); let (from, from_options) = Self::split_key_options(from_to.next().unwrap());
let to = from_to.next().map(|to| Self::split_key_options(to)); let to = from_to.next().map(|to| Self::split_key_options(to));
let options_are_empty = from_options.is_empty() && matches!(to, None | Some((_, ""))); let options_are_empty = from_options.is_empty() && matches!(to, None | Some((_, "")));
crash_if_err!(
2, if options_are_empty {
if options_are_empty { // Inherit the global settings if there are no options attached to this key.
// Inherit the global settings if there are no options attached to this key. (|| {
(|| { // This would be ideal for a try block, I think. In the meantime this closure allows
// This would be ideal for a try block, I think. In the meantime this closure allows // to use the `?` operator here.
// to use the `?` operator here. Self::new(
Self::new( KeyPosition::new(from, 1, global_settings.ignore_leading_blanks)?,
KeyPosition::new(from, 1, global_settings.ignore_leading_blanks)?, to.map(|(to, _)| {
to.map(|(to, _)| { KeyPosition::new(to, 0, global_settings.ignore_leading_blanks)
KeyPosition::new(to, 0, global_settings.ignore_leading_blanks) })
}) .transpose()?,
.transpose()?, KeySettings::from(global_settings),
KeySettings::from(global_settings), )
) })()
})() } else {
} else { // Do not inherit from `global_settings`, as there are options attached to this key.
// Do not inherit from `global_settings`, as there are options attached to this key. Self::parse_with_options((from, from_options), to)
Self::parse_with_options((from, from_options), to) }
.map_err(|msg| {
SortError::ParseKeyError {
key: key.to_owned(),
msg,
} }
.map_err(|e| format!("failed to parse key `{}`: {}", key, e)) .into()
) })
} }
fn parse_with_options( fn parse_with_options(
@ -962,7 +1075,8 @@ fn make_sort_mode_arg<'a, 'b>(mode: &'a str, short: &'b str, help: &'b str) -> A
arg arg
} }
pub fn uumain(args: impl uucore::Args) -> i32 { #[uucore_procs::gen_uumain]
pub fn uumain(args: impl uucore::Args) -> UResult<()> {
let args = args let args = args
.collect_str(InvalidEncodingHandling::Ignore) .collect_str(InvalidEncodingHandling::Ignore)
.accept_any(); .accept_any();
@ -979,11 +1093,11 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
// (clap returns 1). // (clap returns 1).
if e.use_stderr() { if e.use_stderr() {
eprintln!("{}", e.message); eprintln!("{}", e.message);
return 2; set_exit_code(2);
} else { } else {
println!("{}", e.message); println!("{}", e.message);
return 0;
} }
return Ok(());
} }
}; };
@ -998,7 +1112,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
let mut files = Vec::new(); let mut files = Vec::new();
for path in &files0_from { for path in &files0_from {
let reader = open(&path); let reader = open(&path)?;
let buf_reader = BufReader::new(reader); let buf_reader = BufReader::new(reader);
for line in buf_reader.split(b'\0').flatten() { for line in buf_reader.split(b'\0').flatten() {
files.push(OsString::from( files.push(OsString::from(
@ -1055,12 +1169,14 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
env::set_var("RAYON_NUM_THREADS", &settings.threads); env::set_var("RAYON_NUM_THREADS", &settings.threads);
} }
settings.buffer_size = matches settings.buffer_size =
.value_of(options::BUF_SIZE) matches
.map_or(DEFAULT_BUF_SIZE, |s| { .value_of(options::BUF_SIZE)
GlobalSettings::parse_byte_count(s) .map_or(Ok(DEFAULT_BUF_SIZE), |s| {
.unwrap_or_else(|e| crash!(2, "{}", format_error_message(e, s, options::BUF_SIZE))) GlobalSettings::parse_byte_count(s).map_err(|e| {
}); USimpleError::new(2, format_error_message(e, s, options::BUF_SIZE))
})
})?;
settings.tmp_dir = matches settings.tmp_dir = matches
.value_of(options::TMP_DIR) .value_of(options::TMP_DIR)
@ -1070,9 +1186,9 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
settings.compress_prog = matches.value_of(options::COMPRESS_PROG).map(String::from); settings.compress_prog = matches.value_of(options::COMPRESS_PROG).map(String::from);
if let Some(n_merge) = matches.value_of(options::BATCH_SIZE) { if let Some(n_merge) = matches.value_of(options::BATCH_SIZE) {
settings.merge_batch_size = n_merge settings.merge_batch_size = n_merge.parse().map_err(|_| {
.parse() UUsageError::new(2, format!("invalid --batch-size argument '{}'", n_merge))
.unwrap_or_else(|_| crash!(2, "invalid --batch-size argument '{}'", n_merge)); })?;
} }
settings.zero_terminated = matches.is_present(options::ZERO_TERMINATED); settings.zero_terminated = matches.is_present(options::ZERO_TERMINATED);
@ -1101,11 +1217,13 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
/* if no file, default to stdin */ /* if no file, default to stdin */
files.push("-".to_string().into()); files.push("-".to_string().into());
} else if settings.check && files.len() != 1 { } else if settings.check && files.len() != 1 {
crash!( return Err(UUsageError::new(
2, 2,
"extra operand `{}' not allowed with -c", format!(
files[1].to_string_lossy() "extra operand `{}' not allowed with -c",
) files[1].to_string_lossy()
),
));
} }
if let Some(arg) = matches.args.get(options::SEPARATOR) { if let Some(arg) = matches.args.get(options::SEPARATOR) {
@ -1115,14 +1233,17 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
separator = "\0"; separator = "\0";
} }
if separator.len() != 1 { if separator.len() != 1 {
crash!(2, "separator must be exactly one character long"); return Err(UUsageError::new(
2,
"separator must be exactly one character long".into(),
));
} }
settings.separator = Some(separator.chars().next().unwrap()) settings.separator = Some(separator.chars().next().unwrap())
} }
if let Some(values) = matches.values_of(options::KEY) { if let Some(values) = matches.values_of(options::KEY) {
for value in values { for value in values {
let selector = FieldSelector::parse(value, &settings); let selector = FieldSelector::parse(value, &settings)?;
if selector.settings.mode == SortMode::Random && settings.salt.is_none() { if selector.settings.mode == SortMode::Random && settings.salt.is_none() {
settings.salt = Some(get_rand_string()); settings.salt = Some(get_rand_string());
} }
@ -1152,10 +1273,10 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
// and to reopen them at a later point. This is different from how the output file is handled, // and to reopen them at a later point. This is different from how the output file is handled,
// probably to prevent running out of file descriptors. // probably to prevent running out of file descriptors.
for file in &files { for file in &files {
open(file); open(file)?;
} }
let output = Output::new(matches.value_of(options::OUTPUT)); let output = Output::new(matches.value_of(options::OUTPUT))?;
settings.init_precomputed(); settings.init_precomputed();
@ -1382,21 +1503,20 @@ pub fn uu_app() -> App<'static, 'static> {
) )
} }
fn exec(files: &mut [OsString], settings: &GlobalSettings, output: Output) -> i32 { fn exec(files: &mut [OsString], settings: &GlobalSettings, output: Output) -> UResult<()> {
if settings.merge { if settings.merge {
let mut file_merger = merge::merge(files, settings, output.as_output_name()); let file_merger = merge::merge(files, settings, output.as_output_name())?;
file_merger.write_all(settings, output); file_merger.write_all(settings, output)
} else if settings.check { } else if settings.check {
if files.len() > 1 { if files.len() > 1 {
crash!(2, "only one file allowed with -c"); Err(UUsageError::new(2, "only one file allowed with -c".into()))
} else {
check::check(files.first().unwrap(), settings)
} }
return check::check(files.first().unwrap(), settings);
} else { } else {
let mut lines = files.iter().map(open); let mut lines = files.iter().map(open);
ext_sort(&mut lines, settings, output)
ext_sort(&mut lines, settings, output);
} }
0
} }
fn sort_by<'a>(unsorted: &mut Vec<Line<'a>>, settings: &GlobalSettings, line_data: &LineData<'a>) { fn sort_by<'a>(unsorted: &mut Vec<Line<'a>>, settings: &GlobalSettings, line_data: &LineData<'a>) {
@ -1692,25 +1812,22 @@ fn strip_errno(err: &str) -> &str {
&err[..err.find(" (os error ").unwrap_or(err.len())] &err[..err.find(" (os error ").unwrap_or(err.len())]
} }
fn open(path: impl AsRef<OsStr>) -> Box<dyn Read + Send> { fn open(path: impl AsRef<OsStr>) -> UResult<Box<dyn Read + Send>> {
let path = path.as_ref(); let path = path.as_ref();
if path == "-" { if path == "-" {
let stdin = stdin(); let stdin = stdin();
return Box::new(stdin) as Box<dyn Read + Send>; return Ok(Box::new(stdin) as Box<dyn Read + Send>);
} }
let path = Path::new(path); let path = Path::new(path);
match File::open(path) { match File::open(path) {
Ok(f) => Box::new(f) as Box<dyn Read + Send>, Ok(f) => Ok(Box::new(f) as Box<dyn Read + Send>),
Err(e) => { Err(error) => Err(SortError::ReadFailed {
crash!( path: path.to_string_lossy().to_string(),
2, error,
"cannot read: {0}: {1}",
path.to_string_lossy(),
strip_errno(&e.to_string())
);
} }
.into()),
} }
} }