mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-29 03:57:44 +00:00
sort: use UResult
This commit is contained in:
parent
e8eb15f05e
commit
a4709c805c
5 changed files with 370 additions and 227 deletions
|
@ -9,7 +9,7 @@
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
chunks::{self, Chunk, RecycledChunk},
|
chunks::{self, Chunk, RecycledChunk},
|
||||||
compare_by, open, GlobalSettings,
|
compare_by, open, GlobalSettings, SortError,
|
||||||
};
|
};
|
||||||
use itertools::Itertools;
|
use itertools::Itertools;
|
||||||
use std::{
|
use std::{
|
||||||
|
@ -20,13 +20,14 @@ use std::{
|
||||||
sync::mpsc::{sync_channel, Receiver, SyncSender},
|
sync::mpsc::{sync_channel, Receiver, SyncSender},
|
||||||
thread,
|
thread,
|
||||||
};
|
};
|
||||||
|
use uucore::error::UResult;
|
||||||
|
|
||||||
/// Check if the file at `path` is ordered.
|
/// Check if the file at `path` is ordered.
|
||||||
///
|
///
|
||||||
/// # Returns
|
/// # Returns
|
||||||
///
|
///
|
||||||
/// The code we should exit with.
|
/// The code we should exit with.
|
||||||
pub fn check(path: &OsStr, settings: &GlobalSettings) -> i32 {
|
pub fn check(path: &OsStr, settings: &GlobalSettings) -> UResult<()> {
|
||||||
let max_allowed_cmp = if settings.unique {
|
let max_allowed_cmp = if settings.unique {
|
||||||
// If `unique` is enabled, the previous line must compare _less_ to the next one.
|
// If `unique` is enabled, the previous line must compare _less_ to the next one.
|
||||||
Ordering::Less
|
Ordering::Less
|
||||||
|
@ -34,7 +35,7 @@ pub fn check(path: &OsStr, settings: &GlobalSettings) -> i32 {
|
||||||
// Otherwise, the line previous line must compare _less or equal_ to the next one.
|
// Otherwise, the line previous line must compare _less or equal_ to the next one.
|
||||||
Ordering::Equal
|
Ordering::Equal
|
||||||
};
|
};
|
||||||
let file = open(path);
|
let file = open(path)?;
|
||||||
let (recycled_sender, recycled_receiver) = sync_channel(2);
|
let (recycled_sender, recycled_receiver) = sync_channel(2);
|
||||||
let (loaded_sender, loaded_receiver) = sync_channel(2);
|
let (loaded_sender, loaded_receiver) = sync_channel(2);
|
||||||
thread::spawn({
|
thread::spawn({
|
||||||
|
@ -69,15 +70,13 @@ pub fn check(path: &OsStr, settings: &GlobalSettings) -> i32 {
|
||||||
chunk.line_data(),
|
chunk.line_data(),
|
||||||
) > max_allowed_cmp
|
) > max_allowed_cmp
|
||||||
{
|
{
|
||||||
if !settings.check_silent {
|
return Err(SortError::Disorder {
|
||||||
eprintln!(
|
file: path.to_owned(),
|
||||||
"sort: {}:{}: disorder: {}",
|
line_number: line_idx,
|
||||||
path.to_string_lossy(),
|
line: new_first.line.to_owned(),
|
||||||
line_idx,
|
silent: settings.check_silent,
|
||||||
new_first.line
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
return 1;
|
.into());
|
||||||
}
|
}
|
||||||
let _ = recycled_sender.send(prev_chunk.recycle());
|
let _ = recycled_sender.send(prev_chunk.recycle());
|
||||||
}
|
}
|
||||||
|
@ -85,21 +84,19 @@ pub fn check(path: &OsStr, settings: &GlobalSettings) -> i32 {
|
||||||
for (a, b) in chunk.lines().iter().tuple_windows() {
|
for (a, b) in chunk.lines().iter().tuple_windows() {
|
||||||
line_idx += 1;
|
line_idx += 1;
|
||||||
if compare_by(a, b, settings, chunk.line_data(), chunk.line_data()) > max_allowed_cmp {
|
if compare_by(a, b, settings, chunk.line_data(), chunk.line_data()) > max_allowed_cmp {
|
||||||
if !settings.check_silent {
|
return Err(SortError::Disorder {
|
||||||
eprintln!(
|
file: path.to_owned(),
|
||||||
"sort: {}:{}: disorder: {}",
|
line_number: line_idx,
|
||||||
path.to_string_lossy(),
|
line: b.line.to_owned(),
|
||||||
line_idx,
|
silent: settings.check_silent,
|
||||||
b.line
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
return 1;
|
.into());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
prev_chunk = Some(chunk);
|
prev_chunk = Some(chunk);
|
||||||
}
|
}
|
||||||
0
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// The function running on the reader thread.
|
/// The function running on the reader thread.
|
||||||
|
@ -108,7 +105,7 @@ fn reader(
|
||||||
receiver: Receiver<RecycledChunk>,
|
receiver: Receiver<RecycledChunk>,
|
||||||
sender: SyncSender<Chunk>,
|
sender: SyncSender<Chunk>,
|
||||||
settings: &GlobalSettings,
|
settings: &GlobalSettings,
|
||||||
) {
|
) -> UResult<()> {
|
||||||
let mut carry_over = vec![];
|
let mut carry_over = vec![];
|
||||||
for recycled_chunk in receiver.iter() {
|
for recycled_chunk in receiver.iter() {
|
||||||
let should_continue = chunks::read(
|
let should_continue = chunks::read(
|
||||||
|
@ -124,9 +121,10 @@ fn reader(
|
||||||
b'\n'
|
b'\n'
|
||||||
},
|
},
|
||||||
settings,
|
settings,
|
||||||
);
|
)?;
|
||||||
if !should_continue {
|
if !should_continue {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
|
@ -14,8 +14,9 @@ use std::{
|
||||||
|
|
||||||
use memchr::memchr_iter;
|
use memchr::memchr_iter;
|
||||||
use ouroboros::self_referencing;
|
use ouroboros::self_referencing;
|
||||||
|
use uucore::error::{UResult, USimpleError};
|
||||||
|
|
||||||
use crate::{numeric_str_cmp::NumInfo, GeneralF64ParseResult, GlobalSettings, Line};
|
use crate::{numeric_str_cmp::NumInfo, GeneralF64ParseResult, GlobalSettings, Line, SortError};
|
||||||
|
|
||||||
/// The chunk that is passed around between threads.
|
/// The chunk that is passed around between threads.
|
||||||
/// `lines` consist of slices into `buffer`.
|
/// `lines` consist of slices into `buffer`.
|
||||||
|
@ -137,10 +138,10 @@ pub fn read<T: Read>(
|
||||||
max_buffer_size: Option<usize>,
|
max_buffer_size: Option<usize>,
|
||||||
carry_over: &mut Vec<u8>,
|
carry_over: &mut Vec<u8>,
|
||||||
file: &mut T,
|
file: &mut T,
|
||||||
next_files: &mut impl Iterator<Item = T>,
|
next_files: &mut impl Iterator<Item = UResult<T>>,
|
||||||
separator: u8,
|
separator: u8,
|
||||||
settings: &GlobalSettings,
|
settings: &GlobalSettings,
|
||||||
) -> bool {
|
) -> UResult<bool> {
|
||||||
let RecycledChunk {
|
let RecycledChunk {
|
||||||
lines,
|
lines,
|
||||||
selections,
|
selections,
|
||||||
|
@ -159,12 +160,12 @@ pub fn read<T: Read>(
|
||||||
max_buffer_size,
|
max_buffer_size,
|
||||||
carry_over.len(),
|
carry_over.len(),
|
||||||
separator,
|
separator,
|
||||||
);
|
)?;
|
||||||
carry_over.clear();
|
carry_over.clear();
|
||||||
carry_over.extend_from_slice(&buffer[read..]);
|
carry_over.extend_from_slice(&buffer[read..]);
|
||||||
|
|
||||||
if read != 0 {
|
if read != 0 {
|
||||||
let payload = Chunk::new(buffer, |buffer| {
|
let payload: UResult<Chunk> = Chunk::try_new(buffer, |buffer| {
|
||||||
let selections = unsafe {
|
let selections = unsafe {
|
||||||
// SAFETY: It is safe to transmute to an empty vector of selections with shorter lifetime.
|
// SAFETY: It is safe to transmute to an empty vector of selections with shorter lifetime.
|
||||||
// It was only temporarily transmuted to a Vec<Line<'static>> to make recycling possible.
|
// It was only temporarily transmuted to a Vec<Line<'static>> to make recycling possible.
|
||||||
|
@ -175,18 +176,19 @@ pub fn read<T: Read>(
|
||||||
// because it was only temporarily transmuted to a Vec<Line<'static>> to make recycling possible.
|
// because it was only temporarily transmuted to a Vec<Line<'static>> to make recycling possible.
|
||||||
std::mem::transmute::<Vec<Line<'static>>, Vec<Line<'_>>>(lines)
|
std::mem::transmute::<Vec<Line<'static>>, Vec<Line<'_>>>(lines)
|
||||||
};
|
};
|
||||||
let read = crash_if_err!(2, std::str::from_utf8(&buffer[..read]));
|
let read = std::str::from_utf8(&buffer[..read])
|
||||||
|
.map_err(|error| SortError::Uft8Error { error })?;
|
||||||
let mut line_data = LineData {
|
let mut line_data = LineData {
|
||||||
selections,
|
selections,
|
||||||
num_infos,
|
num_infos,
|
||||||
parsed_floats,
|
parsed_floats,
|
||||||
};
|
};
|
||||||
parse_lines(read, &mut lines, &mut line_data, separator, settings);
|
parse_lines(read, &mut lines, &mut line_data, separator, settings);
|
||||||
ChunkContents { lines, line_data }
|
Ok(ChunkContents { lines, line_data })
|
||||||
});
|
});
|
||||||
sender.send(payload).unwrap();
|
sender.send(payload?).unwrap();
|
||||||
}
|
}
|
||||||
should_continue
|
Ok(should_continue)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Split `read` into `Line`s, and add them to `lines`.
|
/// Split `read` into `Line`s, and add them to `lines`.
|
||||||
|
@ -242,12 +244,12 @@ fn parse_lines<'a>(
|
||||||
/// * Whether this function should be called again.
|
/// * Whether this function should be called again.
|
||||||
fn read_to_buffer<T: Read>(
|
fn read_to_buffer<T: Read>(
|
||||||
file: &mut T,
|
file: &mut T,
|
||||||
next_files: &mut impl Iterator<Item = T>,
|
next_files: &mut impl Iterator<Item = UResult<T>>,
|
||||||
buffer: &mut Vec<u8>,
|
buffer: &mut Vec<u8>,
|
||||||
max_buffer_size: Option<usize>,
|
max_buffer_size: Option<usize>,
|
||||||
start_offset: usize,
|
start_offset: usize,
|
||||||
separator: u8,
|
separator: u8,
|
||||||
) -> (usize, bool) {
|
) -> UResult<(usize, bool)> {
|
||||||
let mut read_target = &mut buffer[start_offset..];
|
let mut read_target = &mut buffer[start_offset..];
|
||||||
let mut last_file_target_size = read_target.len();
|
let mut last_file_target_size = read_target.len();
|
||||||
loop {
|
loop {
|
||||||
|
@ -274,7 +276,7 @@ fn read_to_buffer<T: Read>(
|
||||||
// We read enough lines.
|
// We read enough lines.
|
||||||
let end = last_line_end.unwrap();
|
let end = last_line_end.unwrap();
|
||||||
// We want to include the separator here, because it shouldn't be carried over.
|
// We want to include the separator here, because it shouldn't be carried over.
|
||||||
return (end + 1, true);
|
return Ok((end + 1, true));
|
||||||
} else {
|
} else {
|
||||||
// We need to read more lines
|
// We need to read more lines
|
||||||
let len = buffer.len();
|
let len = buffer.len();
|
||||||
|
@ -299,11 +301,11 @@ fn read_to_buffer<T: Read>(
|
||||||
if let Some(next_file) = next_files.next() {
|
if let Some(next_file) = next_files.next() {
|
||||||
// There is another file.
|
// There is another file.
|
||||||
last_file_target_size = leftover_len;
|
last_file_target_size = leftover_len;
|
||||||
*file = next_file;
|
*file = next_file?;
|
||||||
} else {
|
} else {
|
||||||
// This was the last file.
|
// This was the last file.
|
||||||
let read_len = buffer.len() - leftover_len;
|
let read_len = buffer.len() - leftover_len;
|
||||||
return (read_len, false);
|
return Ok((read_len, false));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -313,7 +315,7 @@ fn read_to_buffer<T: Read>(
|
||||||
Err(e) if e.kind() == ErrorKind::Interrupted => {
|
Err(e) if e.kind() == ErrorKind::Interrupted => {
|
||||||
// retry
|
// retry
|
||||||
}
|
}
|
||||||
Err(e) => crash!(2, "{}", e),
|
Err(e) => return Err(USimpleError::new(2, e.to_string())),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,6 +22,7 @@ use std::{
|
||||||
};
|
};
|
||||||
|
|
||||||
use itertools::Itertools;
|
use itertools::Itertools;
|
||||||
|
use uucore::error::UResult;
|
||||||
|
|
||||||
use crate::chunks::RecycledChunk;
|
use crate::chunks::RecycledChunk;
|
||||||
use crate::merge::ClosedTmpFile;
|
use crate::merge::ClosedTmpFile;
|
||||||
|
@ -29,6 +30,7 @@ use crate::merge::WriteableCompressedTmpFile;
|
||||||
use crate::merge::WriteablePlainTmpFile;
|
use crate::merge::WriteablePlainTmpFile;
|
||||||
use crate::merge::WriteableTmpFile;
|
use crate::merge::WriteableTmpFile;
|
||||||
use crate::Output;
|
use crate::Output;
|
||||||
|
use crate::SortError;
|
||||||
use crate::{
|
use crate::{
|
||||||
chunks::{self, Chunk},
|
chunks::{self, Chunk},
|
||||||
compare_by, merge, sort_by, GlobalSettings,
|
compare_by, merge, sort_by, GlobalSettings,
|
||||||
|
@ -40,10 +42,10 @@ const START_BUFFER_SIZE: usize = 8_000;
|
||||||
|
|
||||||
/// Sort files by using auxiliary files for storing intermediate chunks (if needed), and output the result.
|
/// Sort files by using auxiliary files for storing intermediate chunks (if needed), and output the result.
|
||||||
pub fn ext_sort(
|
pub fn ext_sort(
|
||||||
files: &mut impl Iterator<Item = Box<dyn Read + Send>>,
|
files: &mut impl Iterator<Item = UResult<Box<dyn Read + Send>>>,
|
||||||
settings: &GlobalSettings,
|
settings: &GlobalSettings,
|
||||||
output: Output,
|
output: Output,
|
||||||
) {
|
) -> UResult<()> {
|
||||||
let (sorted_sender, sorted_receiver) = std::sync::mpsc::sync_channel(1);
|
let (sorted_sender, sorted_receiver) = std::sync::mpsc::sync_channel(1);
|
||||||
let (recycled_sender, recycled_receiver) = std::sync::mpsc::sync_channel(1);
|
let (recycled_sender, recycled_receiver) = std::sync::mpsc::sync_channel(1);
|
||||||
thread::spawn({
|
thread::spawn({
|
||||||
|
@ -57,7 +59,7 @@ pub fn ext_sort(
|
||||||
sorted_receiver,
|
sorted_receiver,
|
||||||
recycled_sender,
|
recycled_sender,
|
||||||
output,
|
output,
|
||||||
);
|
)
|
||||||
} else {
|
} else {
|
||||||
reader_writer::<_, WriteablePlainTmpFile>(
|
reader_writer::<_, WriteablePlainTmpFile>(
|
||||||
files,
|
files,
|
||||||
|
@ -65,17 +67,20 @@ pub fn ext_sort(
|
||||||
sorted_receiver,
|
sorted_receiver,
|
||||||
recycled_sender,
|
recycled_sender,
|
||||||
output,
|
output,
|
||||||
);
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn reader_writer<F: Iterator<Item = Box<dyn Read + Send>>, Tmp: WriteableTmpFile + 'static>(
|
fn reader_writer<
|
||||||
|
F: Iterator<Item = UResult<Box<dyn Read + Send>>>,
|
||||||
|
Tmp: WriteableTmpFile + 'static,
|
||||||
|
>(
|
||||||
files: F,
|
files: F,
|
||||||
settings: &GlobalSettings,
|
settings: &GlobalSettings,
|
||||||
receiver: Receiver<Chunk>,
|
receiver: Receiver<Chunk>,
|
||||||
sender: SyncSender<Chunk>,
|
sender: SyncSender<Chunk>,
|
||||||
output: Output,
|
output: Output,
|
||||||
) {
|
) -> UResult<()> {
|
||||||
let separator = if settings.zero_terminated {
|
let separator = if settings.zero_terminated {
|
||||||
b'\0'
|
b'\0'
|
||||||
} else {
|
} else {
|
||||||
|
@ -93,16 +98,16 @@ fn reader_writer<F: Iterator<Item = Box<dyn Read + Send>>, Tmp: WriteableTmpFile
|
||||||
settings,
|
settings,
|
||||||
receiver,
|
receiver,
|
||||||
sender,
|
sender,
|
||||||
);
|
)?;
|
||||||
match read_result {
|
match read_result {
|
||||||
ReadResult::WroteChunksToFile { tmp_files, tmp_dir } => {
|
ReadResult::WroteChunksToFile { tmp_files, tmp_dir } => {
|
||||||
let tmp_dir_size = tmp_files.len();
|
let tmp_dir_size = tmp_files.len();
|
||||||
let mut merger = merge::merge_with_file_limit::<_, _, Tmp>(
|
let merger = merge::merge_with_file_limit::<_, _, Tmp>(
|
||||||
tmp_files.into_iter().map(|c| c.reopen()),
|
tmp_files.into_iter().map(|c| c.reopen()),
|
||||||
settings,
|
settings,
|
||||||
Some((tmp_dir, tmp_dir_size)),
|
Some((tmp_dir, tmp_dir_size)),
|
||||||
);
|
)?;
|
||||||
merger.write_all(settings, output);
|
merger.write_all(settings, output)?;
|
||||||
}
|
}
|
||||||
ReadResult::SortedSingleChunk(chunk) => {
|
ReadResult::SortedSingleChunk(chunk) => {
|
||||||
if settings.unique {
|
if settings.unique {
|
||||||
|
@ -145,6 +150,7 @@ fn reader_writer<F: Iterator<Item = Box<dyn Read + Send>>, Tmp: WriteableTmpFile
|
||||||
// don't output anything
|
// don't output anything
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// The function that is executed on the sorter thread.
|
/// The function that is executed on the sorter thread.
|
||||||
|
@ -153,7 +159,11 @@ fn sorter(receiver: Receiver<Chunk>, sender: SyncSender<Chunk>, settings: Global
|
||||||
payload.with_contents_mut(|contents| {
|
payload.with_contents_mut(|contents| {
|
||||||
sort_by(&mut contents.lines, &settings, &contents.line_data)
|
sort_by(&mut contents.lines, &settings, &contents.line_data)
|
||||||
});
|
});
|
||||||
sender.send(payload).unwrap();
|
if sender.send(payload).is_err() {
|
||||||
|
// The receiver has gone away, likely because the other thread hit an error.
|
||||||
|
// We stop silently because the actual error is printed by the other thread.
|
||||||
|
return;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -173,15 +183,15 @@ enum ReadResult<I: WriteableTmpFile> {
|
||||||
}
|
}
|
||||||
/// The function that is executed on the reader/writer thread.
|
/// The function that is executed on the reader/writer thread.
|
||||||
fn read_write_loop<I: WriteableTmpFile>(
|
fn read_write_loop<I: WriteableTmpFile>(
|
||||||
mut files: impl Iterator<Item = Box<dyn Read + Send>>,
|
mut files: impl Iterator<Item = UResult<Box<dyn Read + Send>>>,
|
||||||
tmp_dir_parent: &Path,
|
tmp_dir_parent: &Path,
|
||||||
separator: u8,
|
separator: u8,
|
||||||
buffer_size: usize,
|
buffer_size: usize,
|
||||||
settings: &GlobalSettings,
|
settings: &GlobalSettings,
|
||||||
receiver: Receiver<Chunk>,
|
receiver: Receiver<Chunk>,
|
||||||
sender: SyncSender<Chunk>,
|
sender: SyncSender<Chunk>,
|
||||||
) -> ReadResult<I> {
|
) -> UResult<ReadResult<I>> {
|
||||||
let mut file = files.next().unwrap();
|
let mut file = files.next().unwrap()?;
|
||||||
|
|
||||||
let mut carry_over = vec![];
|
let mut carry_over = vec![];
|
||||||
// kick things off with two reads
|
// kick things off with two reads
|
||||||
|
@ -199,14 +209,14 @@ fn read_write_loop<I: WriteableTmpFile>(
|
||||||
&mut files,
|
&mut files,
|
||||||
separator,
|
separator,
|
||||||
settings,
|
settings,
|
||||||
);
|
)?;
|
||||||
|
|
||||||
if !should_continue {
|
if !should_continue {
|
||||||
drop(sender);
|
drop(sender);
|
||||||
// We have already read the whole input. Since we are in our first two reads,
|
// We have already read the whole input. Since we are in our first two reads,
|
||||||
// this means that we can fit the whole input into memory. Bypass writing below and
|
// this means that we can fit the whole input into memory. Bypass writing below and
|
||||||
// handle this case in a more straightforward way.
|
// handle this case in a more straightforward way.
|
||||||
return if let Ok(first_chunk) = receiver.recv() {
|
return Ok(if let Ok(first_chunk) = receiver.recv() {
|
||||||
if let Ok(second_chunk) = receiver.recv() {
|
if let Ok(second_chunk) = receiver.recv() {
|
||||||
ReadResult::SortedTwoChunks([first_chunk, second_chunk])
|
ReadResult::SortedTwoChunks([first_chunk, second_chunk])
|
||||||
} else {
|
} else {
|
||||||
|
@ -214,16 +224,14 @@ fn read_write_loop<I: WriteableTmpFile>(
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
ReadResult::EmptyInput
|
ReadResult::EmptyInput
|
||||||
};
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let tmp_dir = crash_if_err!(
|
let tmp_dir = tempfile::Builder::new()
|
||||||
2,
|
.prefix("uutils_sort")
|
||||||
tempfile::Builder::new()
|
.tempdir_in(tmp_dir_parent)
|
||||||
.prefix("uutils_sort")
|
.map_err(|_| SortError::TmpDirCreationFailed)?;
|
||||||
.tempdir_in(tmp_dir_parent)
|
|
||||||
);
|
|
||||||
|
|
||||||
let mut sender_option = Some(sender);
|
let mut sender_option = Some(sender);
|
||||||
let mut file_number = 0;
|
let mut file_number = 0;
|
||||||
|
@ -232,7 +240,7 @@ fn read_write_loop<I: WriteableTmpFile>(
|
||||||
let mut chunk = match receiver.recv() {
|
let mut chunk = match receiver.recv() {
|
||||||
Ok(it) => it,
|
Ok(it) => it,
|
||||||
_ => {
|
_ => {
|
||||||
return ReadResult::WroteChunksToFile { tmp_files, tmp_dir };
|
return Ok(ReadResult::WroteChunksToFile { tmp_files, tmp_dir });
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -241,7 +249,7 @@ fn read_write_loop<I: WriteableTmpFile>(
|
||||||
tmp_dir.path().join(file_number.to_string()),
|
tmp_dir.path().join(file_number.to_string()),
|
||||||
settings.compress_prog.as_deref(),
|
settings.compress_prog.as_deref(),
|
||||||
separator,
|
separator,
|
||||||
);
|
)?;
|
||||||
tmp_files.push(tmp_file);
|
tmp_files.push(tmp_file);
|
||||||
|
|
||||||
file_number += 1;
|
file_number += 1;
|
||||||
|
@ -258,7 +266,7 @@ fn read_write_loop<I: WriteableTmpFile>(
|
||||||
&mut files,
|
&mut files,
|
||||||
separator,
|
separator,
|
||||||
settings,
|
settings,
|
||||||
);
|
)?;
|
||||||
if !should_continue {
|
if !should_continue {
|
||||||
sender_option = None;
|
sender_option = None;
|
||||||
}
|
}
|
||||||
|
@ -273,8 +281,8 @@ fn write<I: WriteableTmpFile>(
|
||||||
file: PathBuf,
|
file: PathBuf,
|
||||||
compress_prog: Option<&str>,
|
compress_prog: Option<&str>,
|
||||||
separator: u8,
|
separator: u8,
|
||||||
) -> I::Closed {
|
) -> UResult<I::Closed> {
|
||||||
let mut tmp_file = I::create(file, compress_prog);
|
let mut tmp_file = I::create(file, compress_prog)?;
|
||||||
write_lines(chunk.lines(), tmp_file.as_write(), separator);
|
write_lines(chunk.lines(), tmp_file.as_write(), separator);
|
||||||
tmp_file.finished_writing()
|
tmp_file.finished_writing()
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,16 +17,17 @@ use std::{
|
||||||
process::{Child, ChildStdin, ChildStdout, Command, Stdio},
|
process::{Child, ChildStdin, ChildStdout, Command, Stdio},
|
||||||
rc::Rc,
|
rc::Rc,
|
||||||
sync::mpsc::{channel, sync_channel, Receiver, Sender, SyncSender},
|
sync::mpsc::{channel, sync_channel, Receiver, Sender, SyncSender},
|
||||||
thread,
|
thread::{self, JoinHandle},
|
||||||
};
|
};
|
||||||
|
|
||||||
use compare::Compare;
|
use compare::Compare;
|
||||||
use itertools::Itertools;
|
use itertools::Itertools;
|
||||||
use tempfile::TempDir;
|
use tempfile::TempDir;
|
||||||
|
use uucore::error::UResult;
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
chunks::{self, Chunk, RecycledChunk},
|
chunks::{self, Chunk, RecycledChunk},
|
||||||
compare_by, open, GlobalSettings, Output,
|
compare_by, open, GlobalSettings, Output, SortError,
|
||||||
};
|
};
|
||||||
|
|
||||||
/// If the output file occurs in the input files as well, copy the contents of the output file
|
/// If the output file occurs in the input files as well, copy the contents of the output file
|
||||||
|
@ -35,7 +36,7 @@ fn replace_output_file_in_input_files(
|
||||||
files: &mut [OsString],
|
files: &mut [OsString],
|
||||||
settings: &GlobalSettings,
|
settings: &GlobalSettings,
|
||||||
output: Option<&str>,
|
output: Option<&str>,
|
||||||
) -> Option<(TempDir, usize)> {
|
) -> UResult<Option<(TempDir, usize)>> {
|
||||||
let mut copy: Option<(TempDir, PathBuf)> = None;
|
let mut copy: Option<(TempDir, PathBuf)> = None;
|
||||||
if let Some(Ok(output_path)) = output.map(|path| Path::new(path).canonicalize()) {
|
if let Some(Ok(output_path)) = output.map(|path| Path::new(path).canonicalize()) {
|
||||||
for file in files {
|
for file in files {
|
||||||
|
@ -47,9 +48,10 @@ fn replace_output_file_in_input_files(
|
||||||
let tmp_dir = tempfile::Builder::new()
|
let tmp_dir = tempfile::Builder::new()
|
||||||
.prefix("uutils_sort")
|
.prefix("uutils_sort")
|
||||||
.tempdir_in(&settings.tmp_dir)
|
.tempdir_in(&settings.tmp_dir)
|
||||||
.unwrap();
|
.map_err(|_| SortError::TmpDirCreationFailed)?;
|
||||||
let copy_path = tmp_dir.path().join("0");
|
let copy_path = tmp_dir.path().join("0");
|
||||||
std::fs::copy(file_path, ©_path).unwrap();
|
std::fs::copy(file_path, ©_path)
|
||||||
|
.map_err(|error| SortError::OpenTmpFileFailed { error })?;
|
||||||
*file = copy_path.clone().into_os_string();
|
*file = copy_path.clone().into_os_string();
|
||||||
copy = Some((tmp_dir, copy_path))
|
copy = Some((tmp_dir, copy_path))
|
||||||
}
|
}
|
||||||
|
@ -58,7 +60,7 @@ fn replace_output_file_in_input_files(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// if we created a TempDir its size must be one.
|
// if we created a TempDir its size must be one.
|
||||||
copy.map(|(dir, _copy)| (dir, 1))
|
Ok(copy.map(|(dir, _copy)| (dir, 1)))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Merge pre-sorted `Box<dyn Read>`s.
|
/// Merge pre-sorted `Box<dyn Read>`s.
|
||||||
|
@ -69,13 +71,13 @@ pub fn merge<'a>(
|
||||||
files: &mut [OsString],
|
files: &mut [OsString],
|
||||||
settings: &'a GlobalSettings,
|
settings: &'a GlobalSettings,
|
||||||
output: Option<&str>,
|
output: Option<&str>,
|
||||||
) -> FileMerger<'a> {
|
) -> UResult<FileMerger<'a>> {
|
||||||
let tmp_dir = replace_output_file_in_input_files(files, settings, output);
|
let tmp_dir = replace_output_file_in_input_files(files, settings, output)?;
|
||||||
if settings.compress_prog.is_none() {
|
if settings.compress_prog.is_none() {
|
||||||
merge_with_file_limit::<_, _, WriteablePlainTmpFile>(
|
merge_with_file_limit::<_, _, WriteablePlainTmpFile>(
|
||||||
files
|
files
|
||||||
.iter()
|
.iter()
|
||||||
.map(|file| PlainMergeInput { inner: open(file) }),
|
.map(|file| open(file).map(|file| PlainMergeInput { inner: file })),
|
||||||
settings,
|
settings,
|
||||||
tmp_dir,
|
tmp_dir,
|
||||||
)
|
)
|
||||||
|
@ -83,7 +85,7 @@ pub fn merge<'a>(
|
||||||
merge_with_file_limit::<_, _, WriteableCompressedTmpFile>(
|
merge_with_file_limit::<_, _, WriteableCompressedTmpFile>(
|
||||||
files
|
files
|
||||||
.iter()
|
.iter()
|
||||||
.map(|file| PlainMergeInput { inner: open(file) }),
|
.map(|file| open(file).map(|file| PlainMergeInput { inner: file })),
|
||||||
settings,
|
settings,
|
||||||
tmp_dir,
|
tmp_dir,
|
||||||
)
|
)
|
||||||
|
@ -93,24 +95,25 @@ pub fn merge<'a>(
|
||||||
// Merge already sorted `MergeInput`s.
|
// Merge already sorted `MergeInput`s.
|
||||||
pub fn merge_with_file_limit<
|
pub fn merge_with_file_limit<
|
||||||
M: MergeInput + 'static,
|
M: MergeInput + 'static,
|
||||||
F: ExactSizeIterator<Item = M>,
|
F: ExactSizeIterator<Item = UResult<M>>,
|
||||||
Tmp: WriteableTmpFile + 'static,
|
Tmp: WriteableTmpFile + 'static,
|
||||||
>(
|
>(
|
||||||
files: F,
|
files: F,
|
||||||
settings: &GlobalSettings,
|
settings: &GlobalSettings,
|
||||||
tmp_dir: Option<(TempDir, usize)>,
|
tmp_dir: Option<(TempDir, usize)>,
|
||||||
) -> FileMerger {
|
) -> UResult<FileMerger> {
|
||||||
if files.len() > settings.merge_batch_size {
|
if files.len() > settings.merge_batch_size {
|
||||||
// If we did not get a tmp_dir, create one.
|
// If we did not get a tmp_dir, create one.
|
||||||
let (tmp_dir, mut tmp_dir_size) = tmp_dir.unwrap_or_else(|| {
|
let (tmp_dir, mut tmp_dir_size) = match tmp_dir {
|
||||||
(
|
Some(x) => x,
|
||||||
|
None => (
|
||||||
tempfile::Builder::new()
|
tempfile::Builder::new()
|
||||||
.prefix("uutils_sort")
|
.prefix("uutils_sort")
|
||||||
.tempdir_in(&settings.tmp_dir)
|
.tempdir_in(&settings.tmp_dir)
|
||||||
.unwrap(),
|
.map_err(|_| SortError::TmpDirCreationFailed)?,
|
||||||
0,
|
0,
|
||||||
)
|
),
|
||||||
});
|
};
|
||||||
let mut remaining_files = files.len();
|
let mut remaining_files = files.len();
|
||||||
let batches = files.chunks(settings.merge_batch_size);
|
let batches = files.chunks(settings.merge_batch_size);
|
||||||
let mut batches = batches.into_iter();
|
let mut batches = batches.into_iter();
|
||||||
|
@ -118,14 +121,14 @@ pub fn merge_with_file_limit<
|
||||||
while remaining_files != 0 {
|
while remaining_files != 0 {
|
||||||
// Work around the fact that `Chunks` is not an `ExactSizeIterator`.
|
// Work around the fact that `Chunks` is not an `ExactSizeIterator`.
|
||||||
remaining_files = remaining_files.saturating_sub(settings.merge_batch_size);
|
remaining_files = remaining_files.saturating_sub(settings.merge_batch_size);
|
||||||
let mut merger = merge_without_limit(batches.next().unwrap(), settings);
|
let merger = merge_without_limit(batches.next().unwrap(), settings)?;
|
||||||
let mut tmp_file = Tmp::create(
|
let mut tmp_file = Tmp::create(
|
||||||
tmp_dir.path().join(tmp_dir_size.to_string()),
|
tmp_dir.path().join(tmp_dir_size.to_string()),
|
||||||
settings.compress_prog.as_deref(),
|
settings.compress_prog.as_deref(),
|
||||||
);
|
)?;
|
||||||
tmp_dir_size += 1;
|
tmp_dir_size += 1;
|
||||||
merger.write_all_to(settings, tmp_file.as_write());
|
merger.write_all_to(settings, tmp_file.as_write())?;
|
||||||
temporary_files.push(tmp_file.finished_writing());
|
temporary_files.push(tmp_file.finished_writing()?);
|
||||||
}
|
}
|
||||||
assert!(batches.next().is_none());
|
assert!(batches.next().is_none());
|
||||||
merge_with_file_limit::<_, _, Tmp>(
|
merge_with_file_limit::<_, _, Tmp>(
|
||||||
|
@ -133,7 +136,7 @@ pub fn merge_with_file_limit<
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(Box::new(|c: Tmp::Closed| c.reopen())
|
.map(Box::new(|c: Tmp::Closed| c.reopen())
|
||||||
as Box<
|
as Box<
|
||||||
dyn FnMut(Tmp::Closed) -> <Tmp::Closed as ClosedTmpFile>::Reopened,
|
dyn FnMut(Tmp::Closed) -> UResult<<Tmp::Closed as ClosedTmpFile>::Reopened>,
|
||||||
>),
|
>),
|
||||||
settings,
|
settings,
|
||||||
Some((tmp_dir, tmp_dir_size)),
|
Some((tmp_dir, tmp_dir_size)),
|
||||||
|
@ -147,10 +150,10 @@ pub fn merge_with_file_limit<
|
||||||
///
|
///
|
||||||
/// It is the responsibility of the caller to ensure that `files` yields only
|
/// It is the responsibility of the caller to ensure that `files` yields only
|
||||||
/// as many files as we are allowed to open concurrently.
|
/// as many files as we are allowed to open concurrently.
|
||||||
fn merge_without_limit<M: MergeInput + 'static, F: Iterator<Item = M>>(
|
fn merge_without_limit<M: MergeInput + 'static, F: Iterator<Item = UResult<M>>>(
|
||||||
files: F,
|
files: F,
|
||||||
settings: &GlobalSettings,
|
settings: &GlobalSettings,
|
||||||
) -> FileMerger {
|
) -> UResult<FileMerger> {
|
||||||
let (request_sender, request_receiver) = channel();
|
let (request_sender, request_receiver) = channel();
|
||||||
let mut reader_files = Vec::with_capacity(files.size_hint().0);
|
let mut reader_files = Vec::with_capacity(files.size_hint().0);
|
||||||
let mut loaded_receivers = Vec::with_capacity(files.size_hint().0);
|
let mut loaded_receivers = Vec::with_capacity(files.size_hint().0);
|
||||||
|
@ -158,7 +161,7 @@ fn merge_without_limit<M: MergeInput + 'static, F: Iterator<Item = M>>(
|
||||||
let (sender, receiver) = sync_channel(2);
|
let (sender, receiver) = sync_channel(2);
|
||||||
loaded_receivers.push(receiver);
|
loaded_receivers.push(receiver);
|
||||||
reader_files.push(Some(ReaderFile {
|
reader_files.push(Some(ReaderFile {
|
||||||
file,
|
file: file?,
|
||||||
sender,
|
sender,
|
||||||
carry_over: vec![],
|
carry_over: vec![],
|
||||||
}));
|
}));
|
||||||
|
@ -175,7 +178,7 @@ fn merge_without_limit<M: MergeInput + 'static, F: Iterator<Item = M>>(
|
||||||
.unwrap();
|
.unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
thread::spawn({
|
let reader_join_handle = thread::spawn({
|
||||||
let settings = settings.clone();
|
let settings = settings.clone();
|
||||||
move || {
|
move || {
|
||||||
reader(
|
reader(
|
||||||
|
@ -204,14 +207,15 @@ fn merge_without_limit<M: MergeInput + 'static, F: Iterator<Item = M>>(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
FileMerger {
|
Ok(FileMerger {
|
||||||
heap: binary_heap_plus::BinaryHeap::from_vec_cmp(
|
heap: binary_heap_plus::BinaryHeap::from_vec_cmp(
|
||||||
mergeable_files,
|
mergeable_files,
|
||||||
FileComparator { settings },
|
FileComparator { settings },
|
||||||
),
|
),
|
||||||
request_sender,
|
request_sender,
|
||||||
prev: None,
|
prev: None,
|
||||||
}
|
reader_join_handle,
|
||||||
|
})
|
||||||
}
|
}
|
||||||
/// The struct on the reader thread representing an input file
|
/// The struct on the reader thread representing an input file
|
||||||
struct ReaderFile<M: MergeInput> {
|
struct ReaderFile<M: MergeInput> {
|
||||||
|
@ -226,7 +230,7 @@ fn reader(
|
||||||
files: &mut [Option<ReaderFile<impl MergeInput>>],
|
files: &mut [Option<ReaderFile<impl MergeInput>>],
|
||||||
settings: &GlobalSettings,
|
settings: &GlobalSettings,
|
||||||
separator: u8,
|
separator: u8,
|
||||||
) {
|
) -> UResult<()> {
|
||||||
for (file_idx, recycled_chunk) in recycled_receiver.iter() {
|
for (file_idx, recycled_chunk) in recycled_receiver.iter() {
|
||||||
if let Some(ReaderFile {
|
if let Some(ReaderFile {
|
||||||
file,
|
file,
|
||||||
|
@ -243,15 +247,16 @@ fn reader(
|
||||||
&mut iter::empty(),
|
&mut iter::empty(),
|
||||||
separator,
|
separator,
|
||||||
settings,
|
settings,
|
||||||
);
|
)?;
|
||||||
if !should_continue {
|
if !should_continue {
|
||||||
// Remove the file from the list by replacing it with `None`.
|
// Remove the file from the list by replacing it with `None`.
|
||||||
let ReaderFile { file, .. } = files[file_idx].take().unwrap();
|
let ReaderFile { file, .. } = files[file_idx].take().unwrap();
|
||||||
// Depending on the kind of the `MergeInput`, this may delete the file:
|
// Depending on the kind of the `MergeInput`, this may delete the file:
|
||||||
file.finished_reading();
|
file.finished_reading()?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
/// The struct on the main thread representing an input file
|
/// The struct on the main thread representing an input file
|
||||||
pub struct MergeableFile {
|
pub struct MergeableFile {
|
||||||
|
@ -275,17 +280,20 @@ pub struct FileMerger<'a> {
|
||||||
heap: binary_heap_plus::BinaryHeap<MergeableFile, FileComparator<'a>>,
|
heap: binary_heap_plus::BinaryHeap<MergeableFile, FileComparator<'a>>,
|
||||||
request_sender: Sender<(usize, RecycledChunk)>,
|
request_sender: Sender<(usize, RecycledChunk)>,
|
||||||
prev: Option<PreviousLine>,
|
prev: Option<PreviousLine>,
|
||||||
|
reader_join_handle: JoinHandle<UResult<()>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> FileMerger<'a> {
|
impl<'a> FileMerger<'a> {
|
||||||
/// Write the merged contents to the output file.
|
/// Write the merged contents to the output file.
|
||||||
pub fn write_all(&mut self, settings: &GlobalSettings, output: Output) {
|
pub fn write_all(self, settings: &GlobalSettings, output: Output) -> UResult<()> {
|
||||||
let mut out = output.into_write();
|
let mut out = output.into_write();
|
||||||
self.write_all_to(settings, &mut out);
|
self.write_all_to(settings, &mut out)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn write_all_to(&mut self, settings: &GlobalSettings, out: &mut impl Write) {
|
pub fn write_all_to(mut self, settings: &GlobalSettings, out: &mut impl Write) -> UResult<()> {
|
||||||
while self.write_next(settings, out) {}
|
while self.write_next(settings, out) {}
|
||||||
|
drop(self.request_sender);
|
||||||
|
self.reader_join_handle.join().unwrap()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn write_next(&mut self, settings: &GlobalSettings, out: &mut impl Write) -> bool {
|
fn write_next(&mut self, settings: &GlobalSettings, out: &mut impl Write) -> bool {
|
||||||
|
@ -369,36 +377,41 @@ impl<'a> Compare<MergeableFile> for FileComparator<'a> {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Wait for the child to exit and check its exit code.
|
// Wait for the child to exit and check its exit code.
|
||||||
fn assert_child_success(mut child: Child, program: &str) {
|
fn check_child_success(mut child: Child, program: &str) -> UResult<()> {
|
||||||
if !matches!(
|
if !matches!(
|
||||||
child.wait().map(|e| e.code()),
|
child.wait().map(|e| e.code()),
|
||||||
Ok(Some(0)) | Ok(None) | Err(_)
|
Ok(Some(0)) | Ok(None) | Err(_)
|
||||||
) {
|
) {
|
||||||
crash!(2, "'{}' terminated abnormally", program)
|
Err(SortError::CompressProgTerminatedAbnormally {
|
||||||
|
prog: program.to_owned(),
|
||||||
|
}
|
||||||
|
.into())
|
||||||
|
} else {
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A temporary file that can be written to.
|
/// A temporary file that can be written to.
|
||||||
pub trait WriteableTmpFile {
|
pub trait WriteableTmpFile: Sized {
|
||||||
type Closed: ClosedTmpFile;
|
type Closed: ClosedTmpFile;
|
||||||
type InnerWrite: Write;
|
type InnerWrite: Write;
|
||||||
fn create(path: PathBuf, compress_prog: Option<&str>) -> Self;
|
fn create(path: PathBuf, compress_prog: Option<&str>) -> UResult<Self>;
|
||||||
/// Closes the temporary file.
|
/// Closes the temporary file.
|
||||||
fn finished_writing(self) -> Self::Closed;
|
fn finished_writing(self) -> UResult<Self::Closed>;
|
||||||
fn as_write(&mut self) -> &mut Self::InnerWrite;
|
fn as_write(&mut self) -> &mut Self::InnerWrite;
|
||||||
}
|
}
|
||||||
/// A temporary file that is (temporarily) closed, but can be reopened.
|
/// A temporary file that is (temporarily) closed, but can be reopened.
|
||||||
pub trait ClosedTmpFile {
|
pub trait ClosedTmpFile {
|
||||||
type Reopened: MergeInput;
|
type Reopened: MergeInput;
|
||||||
/// Reopens the temporary file.
|
/// Reopens the temporary file.
|
||||||
fn reopen(self) -> Self::Reopened;
|
fn reopen(self) -> UResult<Self::Reopened>;
|
||||||
}
|
}
|
||||||
/// A pre-sorted input for merging.
|
/// A pre-sorted input for merging.
|
||||||
pub trait MergeInput: Send {
|
pub trait MergeInput: Send {
|
||||||
type InnerRead: Read;
|
type InnerRead: Read;
|
||||||
/// Cleans this `MergeInput` up.
|
/// Cleans this `MergeInput` up.
|
||||||
/// Implementations may delete the backing file.
|
/// Implementations may delete the backing file.
|
||||||
fn finished_reading(self);
|
fn finished_reading(self) -> UResult<()>;
|
||||||
fn as_read(&mut self) -> &mut Self::InnerRead;
|
fn as_read(&mut self) -> &mut Self::InnerRead;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -417,15 +430,17 @@ impl WriteableTmpFile for WriteablePlainTmpFile {
|
||||||
type Closed = ClosedPlainTmpFile;
|
type Closed = ClosedPlainTmpFile;
|
||||||
type InnerWrite = BufWriter<File>;
|
type InnerWrite = BufWriter<File>;
|
||||||
|
|
||||||
fn create(path: PathBuf, _: Option<&str>) -> Self {
|
fn create(path: PathBuf, _: Option<&str>) -> UResult<Self> {
|
||||||
WriteablePlainTmpFile {
|
Ok(WriteablePlainTmpFile {
|
||||||
file: BufWriter::new(File::create(&path).unwrap()),
|
file: BufWriter::new(
|
||||||
|
File::create(&path).map_err(|error| SortError::OpenTmpFileFailed { error })?,
|
||||||
|
),
|
||||||
path,
|
path,
|
||||||
}
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
fn finished_writing(self) -> Self::Closed {
|
fn finished_writing(self) -> UResult<Self::Closed> {
|
||||||
ClosedPlainTmpFile { path: self.path }
|
Ok(ClosedPlainTmpFile { path: self.path })
|
||||||
}
|
}
|
||||||
|
|
||||||
fn as_write(&mut self) -> &mut Self::InnerWrite {
|
fn as_write(&mut self) -> &mut Self::InnerWrite {
|
||||||
|
@ -434,18 +449,22 @@ impl WriteableTmpFile for WriteablePlainTmpFile {
|
||||||
}
|
}
|
||||||
impl ClosedTmpFile for ClosedPlainTmpFile {
|
impl ClosedTmpFile for ClosedPlainTmpFile {
|
||||||
type Reopened = PlainTmpMergeInput;
|
type Reopened = PlainTmpMergeInput;
|
||||||
fn reopen(self) -> Self::Reopened {
|
fn reopen(self) -> UResult<Self::Reopened> {
|
||||||
PlainTmpMergeInput {
|
Ok(PlainTmpMergeInput {
|
||||||
file: File::open(&self.path).unwrap(),
|
file: File::open(&self.path).map_err(|error| SortError::OpenTmpFileFailed { error })?,
|
||||||
path: self.path,
|
path: self.path,
|
||||||
}
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
impl MergeInput for PlainTmpMergeInput {
|
impl MergeInput for PlainTmpMergeInput {
|
||||||
type InnerRead = File;
|
type InnerRead = File;
|
||||||
|
|
||||||
fn finished_reading(self) {
|
fn finished_reading(self) -> UResult<()> {
|
||||||
fs::remove_file(self.path).ok();
|
// we ignore failures to delete the temporary file,
|
||||||
|
// because there is a race at the end of the execution and the whole
|
||||||
|
// temporary directory might already be gone.
|
||||||
|
let _ = fs::remove_file(self.path);
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn as_read(&mut self) -> &mut Self::InnerRead {
|
fn as_read(&mut self) -> &mut Self::InnerRead {
|
||||||
|
@ -473,35 +492,33 @@ impl WriteableTmpFile for WriteableCompressedTmpFile {
|
||||||
type Closed = ClosedCompressedTmpFile;
|
type Closed = ClosedCompressedTmpFile;
|
||||||
type InnerWrite = BufWriter<ChildStdin>;
|
type InnerWrite = BufWriter<ChildStdin>;
|
||||||
|
|
||||||
fn create(path: PathBuf, compress_prog: Option<&str>) -> Self {
|
fn create(path: PathBuf, compress_prog: Option<&str>) -> UResult<Self> {
|
||||||
let compress_prog = compress_prog.unwrap();
|
let compress_prog = compress_prog.unwrap();
|
||||||
let mut command = Command::new(compress_prog);
|
let mut command = Command::new(compress_prog);
|
||||||
command
|
let tmp_file =
|
||||||
.stdin(Stdio::piped())
|
File::create(&path).map_err(|error| SortError::OpenTmpFileFailed { error })?;
|
||||||
.stdout(File::create(&path).unwrap());
|
command.stdin(Stdio::piped()).stdout(tmp_file);
|
||||||
let mut child = crash_if_err!(
|
let mut child = command
|
||||||
2,
|
.spawn()
|
||||||
command.spawn().map_err(|err| format!(
|
.map_err(|err| SortError::CompressProgExecutionFailed {
|
||||||
"couldn't execute compress program: errno {}",
|
code: err.raw_os_error().unwrap(),
|
||||||
err.raw_os_error().unwrap()
|
})?;
|
||||||
))
|
|
||||||
);
|
|
||||||
let child_stdin = child.stdin.take().unwrap();
|
let child_stdin = child.stdin.take().unwrap();
|
||||||
WriteableCompressedTmpFile {
|
Ok(WriteableCompressedTmpFile {
|
||||||
path,
|
path,
|
||||||
compress_prog: compress_prog.to_owned(),
|
compress_prog: compress_prog.to_owned(),
|
||||||
child,
|
child,
|
||||||
child_stdin: BufWriter::new(child_stdin),
|
child_stdin: BufWriter::new(child_stdin),
|
||||||
}
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
fn finished_writing(self) -> Self::Closed {
|
fn finished_writing(self) -> UResult<Self::Closed> {
|
||||||
drop(self.child_stdin);
|
drop(self.child_stdin);
|
||||||
assert_child_success(self.child, &self.compress_prog);
|
check_child_success(self.child, &self.compress_prog)?;
|
||||||
ClosedCompressedTmpFile {
|
Ok(ClosedCompressedTmpFile {
|
||||||
path: self.path,
|
path: self.path,
|
||||||
compress_prog: self.compress_prog,
|
compress_prog: self.compress_prog,
|
||||||
}
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
fn as_write(&mut self) -> &mut Self::InnerWrite {
|
fn as_write(&mut self) -> &mut Self::InnerWrite {
|
||||||
|
@ -511,33 +528,32 @@ impl WriteableTmpFile for WriteableCompressedTmpFile {
|
||||||
impl ClosedTmpFile for ClosedCompressedTmpFile {
|
impl ClosedTmpFile for ClosedCompressedTmpFile {
|
||||||
type Reopened = CompressedTmpMergeInput;
|
type Reopened = CompressedTmpMergeInput;
|
||||||
|
|
||||||
fn reopen(self) -> Self::Reopened {
|
fn reopen(self) -> UResult<Self::Reopened> {
|
||||||
let mut command = Command::new(&self.compress_prog);
|
let mut command = Command::new(&self.compress_prog);
|
||||||
let file = File::open(&self.path).unwrap();
|
let file = File::open(&self.path).unwrap();
|
||||||
command.stdin(file).stdout(Stdio::piped()).arg("-d");
|
command.stdin(file).stdout(Stdio::piped()).arg("-d");
|
||||||
let mut child = crash_if_err!(
|
let mut child = command
|
||||||
2,
|
.spawn()
|
||||||
command.spawn().map_err(|err| format!(
|
.map_err(|err| SortError::CompressProgExecutionFailed {
|
||||||
"couldn't execute compress program: errno {}",
|
code: err.raw_os_error().unwrap(),
|
||||||
err.raw_os_error().unwrap()
|
})?;
|
||||||
))
|
|
||||||
);
|
|
||||||
let child_stdout = child.stdout.take().unwrap();
|
let child_stdout = child.stdout.take().unwrap();
|
||||||
CompressedTmpMergeInput {
|
Ok(CompressedTmpMergeInput {
|
||||||
path: self.path,
|
path: self.path,
|
||||||
compress_prog: self.compress_prog,
|
compress_prog: self.compress_prog,
|
||||||
child,
|
child,
|
||||||
child_stdout,
|
child_stdout,
|
||||||
}
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
impl MergeInput for CompressedTmpMergeInput {
|
impl MergeInput for CompressedTmpMergeInput {
|
||||||
type InnerRead = ChildStdout;
|
type InnerRead = ChildStdout;
|
||||||
|
|
||||||
fn finished_reading(self) {
|
fn finished_reading(self) -> UResult<()> {
|
||||||
drop(self.child_stdout);
|
drop(self.child_stdout);
|
||||||
assert_child_success(self.child, &self.compress_prog);
|
check_child_success(self.child, &self.compress_prog)?;
|
||||||
fs::remove_file(self.path).ok();
|
let _ = fs::remove_file(self.path);
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn as_read(&mut self) -> &mut Self::InnerRead {
|
fn as_read(&mut self) -> &mut Self::InnerRead {
|
||||||
|
@ -550,7 +566,9 @@ pub struct PlainMergeInput<R: Read + Send> {
|
||||||
}
|
}
|
||||||
impl<R: Read + Send> MergeInput for PlainMergeInput<R> {
|
impl<R: Read + Send> MergeInput for PlainMergeInput<R> {
|
||||||
type InnerRead = R;
|
type InnerRead = R;
|
||||||
fn finished_reading(self) {}
|
fn finished_reading(self) -> UResult<()> {
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
fn as_read(&mut self) -> &mut Self::InnerRead {
|
fn as_read(&mut self) -> &mut Self::InnerRead {
|
||||||
&mut self.inner
|
&mut self.inner
|
||||||
}
|
}
|
||||||
|
|
|
@ -33,14 +33,18 @@ use rand::{thread_rng, Rng};
|
||||||
use rayon::prelude::*;
|
use rayon::prelude::*;
|
||||||
use std::cmp::Ordering;
|
use std::cmp::Ordering;
|
||||||
use std::env;
|
use std::env;
|
||||||
|
use std::error::Error;
|
||||||
use std::ffi::{OsStr, OsString};
|
use std::ffi::{OsStr, OsString};
|
||||||
|
use std::fmt::Display;
|
||||||
use std::fs::{File, OpenOptions};
|
use std::fs::{File, OpenOptions};
|
||||||
use std::hash::{Hash, Hasher};
|
use std::hash::{Hash, Hasher};
|
||||||
use std::io::{stdin, stdout, BufRead, BufReader, BufWriter, Read, Write};
|
use std::io::{stdin, stdout, BufRead, BufReader, BufWriter, Read, Write};
|
||||||
use std::ops::Range;
|
use std::ops::Range;
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
|
use std::str::Utf8Error;
|
||||||
use unicode_width::UnicodeWidthStr;
|
use unicode_width::UnicodeWidthStr;
|
||||||
|
use uucore::error::{set_exit_code, UCustomError, UResult, USimpleError, UUsageError};
|
||||||
use uucore::parse_size::{parse_size, ParseSizeError};
|
use uucore::parse_size::{parse_size, ParseSizeError};
|
||||||
use uucore::version_cmp::version_cmp;
|
use uucore::version_cmp::version_cmp;
|
||||||
use uucore::InvalidEncodingHandling;
|
use uucore::InvalidEncodingHandling;
|
||||||
|
@ -120,6 +124,111 @@ const POSITIVE: char = '+';
|
||||||
// available memory into consideration, instead of relying on this constant only.
|
// available memory into consideration, instead of relying on this constant only.
|
||||||
const DEFAULT_BUF_SIZE: usize = 1_000_000_000; // 1 GB
|
const DEFAULT_BUF_SIZE: usize = 1_000_000_000; // 1 GB
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
enum SortError {
|
||||||
|
Disorder {
|
||||||
|
file: OsString,
|
||||||
|
line_number: usize,
|
||||||
|
line: String,
|
||||||
|
silent: bool,
|
||||||
|
},
|
||||||
|
OpenFailed {
|
||||||
|
path: String,
|
||||||
|
error: std::io::Error,
|
||||||
|
},
|
||||||
|
ReadFailed {
|
||||||
|
path: String,
|
||||||
|
error: std::io::Error,
|
||||||
|
},
|
||||||
|
ParseKeyError {
|
||||||
|
key: String,
|
||||||
|
msg: String,
|
||||||
|
},
|
||||||
|
OpenTmpFileFailed {
|
||||||
|
error: std::io::Error,
|
||||||
|
},
|
||||||
|
CompressProgExecutionFailed {
|
||||||
|
code: i32,
|
||||||
|
},
|
||||||
|
CompressProgTerminatedAbnormally {
|
||||||
|
prog: String,
|
||||||
|
},
|
||||||
|
TmpDirCreationFailed,
|
||||||
|
Uft8Error {
|
||||||
|
error: Utf8Error,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Error for SortError {}
|
||||||
|
|
||||||
|
impl UCustomError for SortError {
|
||||||
|
fn code(&self) -> i32 {
|
||||||
|
match self {
|
||||||
|
SortError::Disorder { .. } => 1,
|
||||||
|
_ => 2,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn usage(&self) -> bool {
|
||||||
|
false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Display for SortError {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
match self {
|
||||||
|
SortError::Disorder {
|
||||||
|
file,
|
||||||
|
line_number,
|
||||||
|
line,
|
||||||
|
silent,
|
||||||
|
} => {
|
||||||
|
if !silent {
|
||||||
|
write!(
|
||||||
|
f,
|
||||||
|
"{}:{}: disorder: {}",
|
||||||
|
file.to_string_lossy(),
|
||||||
|
line_number,
|
||||||
|
line
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
SortError::OpenFailed { path, error } => write!(
|
||||||
|
f,
|
||||||
|
"open failed: {}: {}",
|
||||||
|
path,
|
||||||
|
strip_errno(&error.to_string())
|
||||||
|
),
|
||||||
|
SortError::ParseKeyError { key, msg } => {
|
||||||
|
write!(f, "failed to parse key `{}`: {}", key, msg)
|
||||||
|
}
|
||||||
|
SortError::ReadFailed { path, error } => write!(
|
||||||
|
f,
|
||||||
|
"cannot read: {}: {}",
|
||||||
|
path,
|
||||||
|
strip_errno(&error.to_string())
|
||||||
|
),
|
||||||
|
SortError::OpenTmpFileFailed { error } => {
|
||||||
|
write!(
|
||||||
|
f,
|
||||||
|
"failed to open temporary file: {}",
|
||||||
|
strip_errno(&error.to_string())
|
||||||
|
)
|
||||||
|
}
|
||||||
|
SortError::CompressProgExecutionFailed { code } => {
|
||||||
|
write!(f, "couldn't execute compress program: errno {}", code)
|
||||||
|
}
|
||||||
|
SortError::CompressProgTerminatedAbnormally { prog } => {
|
||||||
|
write!(f, "'{}' terminated abnormally", prog)
|
||||||
|
}
|
||||||
|
SortError::TmpDirCreationFailed => write!(f, "could not create temporary directory"),
|
||||||
|
SortError::Uft8Error { error } => write!(f, "{}", error),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Eq, Ord, PartialEq, PartialOrd, Clone, Copy, Debug)]
|
#[derive(Eq, Ord, PartialEq, PartialOrd, Clone, Copy, Debug)]
|
||||||
enum SortMode {
|
enum SortMode {
|
||||||
Numeric,
|
Numeric,
|
||||||
|
@ -150,23 +259,23 @@ pub struct Output {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Output {
|
impl Output {
|
||||||
fn new(name: Option<&str>) -> Self {
|
fn new(name: Option<&str>) -> UResult<Self> {
|
||||||
Self {
|
let file = if let Some(name) = name {
|
||||||
file: name.map(|name| {
|
// This is different from `File::create()` because we don't truncate the output yet.
|
||||||
// This is different from `File::create()` because we don't truncate the output yet.
|
// This allows using the output file as an input file.
|
||||||
// This allows using the output file as an input file.
|
let file = OpenOptions::new()
|
||||||
(
|
.write(true)
|
||||||
name.to_owned(),
|
.create(true)
|
||||||
OpenOptions::new()
|
.open(name)
|
||||||
.write(true)
|
.map_err(|e| SortError::OpenFailed {
|
||||||
.create(true)
|
path: name.to_owned(),
|
||||||
.open(name)
|
error: e,
|
||||||
.unwrap_or_else(|e| {
|
})?;
|
||||||
crash!(2, "open failed: {}: {}", name, strip_errno(&e.to_string()))
|
Some((name.to_owned(), file))
|
||||||
}),
|
} else {
|
||||||
)
|
None
|
||||||
}),
|
};
|
||||||
}
|
Ok(Self { file })
|
||||||
}
|
}
|
||||||
|
|
||||||
fn into_write(self) -> BufWriter<Box<dyn Write>> {
|
fn into_write(self) -> BufWriter<Box<dyn Write>> {
|
||||||
|
@ -724,33 +833,37 @@ impl FieldSelector {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse(key: &str, global_settings: &GlobalSettings) -> Self {
|
fn parse(key: &str, global_settings: &GlobalSettings) -> UResult<Self> {
|
||||||
let mut from_to = key.split(',');
|
let mut from_to = key.split(',');
|
||||||
let (from, from_options) = Self::split_key_options(from_to.next().unwrap());
|
let (from, from_options) = Self::split_key_options(from_to.next().unwrap());
|
||||||
let to = from_to.next().map(|to| Self::split_key_options(to));
|
let to = from_to.next().map(|to| Self::split_key_options(to));
|
||||||
let options_are_empty = from_options.is_empty() && matches!(to, None | Some((_, "")));
|
let options_are_empty = from_options.is_empty() && matches!(to, None | Some((_, "")));
|
||||||
crash_if_err!(
|
|
||||||
2,
|
if options_are_empty {
|
||||||
if options_are_empty {
|
// Inherit the global settings if there are no options attached to this key.
|
||||||
// Inherit the global settings if there are no options attached to this key.
|
(|| {
|
||||||
(|| {
|
// This would be ideal for a try block, I think. In the meantime this closure allows
|
||||||
// This would be ideal for a try block, I think. In the meantime this closure allows
|
// to use the `?` operator here.
|
||||||
// to use the `?` operator here.
|
Self::new(
|
||||||
Self::new(
|
KeyPosition::new(from, 1, global_settings.ignore_leading_blanks)?,
|
||||||
KeyPosition::new(from, 1, global_settings.ignore_leading_blanks)?,
|
to.map(|(to, _)| {
|
||||||
to.map(|(to, _)| {
|
KeyPosition::new(to, 0, global_settings.ignore_leading_blanks)
|
||||||
KeyPosition::new(to, 0, global_settings.ignore_leading_blanks)
|
})
|
||||||
})
|
.transpose()?,
|
||||||
.transpose()?,
|
KeySettings::from(global_settings),
|
||||||
KeySettings::from(global_settings),
|
)
|
||||||
)
|
})()
|
||||||
})()
|
} else {
|
||||||
} else {
|
// Do not inherit from `global_settings`, as there are options attached to this key.
|
||||||
// Do not inherit from `global_settings`, as there are options attached to this key.
|
Self::parse_with_options((from, from_options), to)
|
||||||
Self::parse_with_options((from, from_options), to)
|
}
|
||||||
|
.map_err(|msg| {
|
||||||
|
SortError::ParseKeyError {
|
||||||
|
key: key.to_owned(),
|
||||||
|
msg,
|
||||||
}
|
}
|
||||||
.map_err(|e| format!("failed to parse key `{}`: {}", key, e))
|
.into()
|
||||||
)
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_with_options(
|
fn parse_with_options(
|
||||||
|
@ -962,7 +1075,8 @@ fn make_sort_mode_arg<'a, 'b>(mode: &'a str, short: &'b str, help: &'b str) -> A
|
||||||
arg
|
arg
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn uumain(args: impl uucore::Args) -> i32 {
|
#[uucore_procs::gen_uumain]
|
||||||
|
pub fn uumain(args: impl uucore::Args) -> UResult<()> {
|
||||||
let args = args
|
let args = args
|
||||||
.collect_str(InvalidEncodingHandling::Ignore)
|
.collect_str(InvalidEncodingHandling::Ignore)
|
||||||
.accept_any();
|
.accept_any();
|
||||||
|
@ -979,11 +1093,11 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
|
||||||
// (clap returns 1).
|
// (clap returns 1).
|
||||||
if e.use_stderr() {
|
if e.use_stderr() {
|
||||||
eprintln!("{}", e.message);
|
eprintln!("{}", e.message);
|
||||||
return 2;
|
set_exit_code(2);
|
||||||
} else {
|
} else {
|
||||||
println!("{}", e.message);
|
println!("{}", e.message);
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
return Ok(());
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -998,7 +1112,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
|
||||||
|
|
||||||
let mut files = Vec::new();
|
let mut files = Vec::new();
|
||||||
for path in &files0_from {
|
for path in &files0_from {
|
||||||
let reader = open(&path);
|
let reader = open(&path)?;
|
||||||
let buf_reader = BufReader::new(reader);
|
let buf_reader = BufReader::new(reader);
|
||||||
for line in buf_reader.split(b'\0').flatten() {
|
for line in buf_reader.split(b'\0').flatten() {
|
||||||
files.push(OsString::from(
|
files.push(OsString::from(
|
||||||
|
@ -1055,12 +1169,14 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
|
||||||
env::set_var("RAYON_NUM_THREADS", &settings.threads);
|
env::set_var("RAYON_NUM_THREADS", &settings.threads);
|
||||||
}
|
}
|
||||||
|
|
||||||
settings.buffer_size = matches
|
settings.buffer_size =
|
||||||
.value_of(options::BUF_SIZE)
|
matches
|
||||||
.map_or(DEFAULT_BUF_SIZE, |s| {
|
.value_of(options::BUF_SIZE)
|
||||||
GlobalSettings::parse_byte_count(s)
|
.map_or(Ok(DEFAULT_BUF_SIZE), |s| {
|
||||||
.unwrap_or_else(|e| crash!(2, "{}", format_error_message(e, s, options::BUF_SIZE)))
|
GlobalSettings::parse_byte_count(s).map_err(|e| {
|
||||||
});
|
USimpleError::new(2, format_error_message(e, s, options::BUF_SIZE))
|
||||||
|
})
|
||||||
|
})?;
|
||||||
|
|
||||||
settings.tmp_dir = matches
|
settings.tmp_dir = matches
|
||||||
.value_of(options::TMP_DIR)
|
.value_of(options::TMP_DIR)
|
||||||
|
@ -1070,9 +1186,9 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
|
||||||
settings.compress_prog = matches.value_of(options::COMPRESS_PROG).map(String::from);
|
settings.compress_prog = matches.value_of(options::COMPRESS_PROG).map(String::from);
|
||||||
|
|
||||||
if let Some(n_merge) = matches.value_of(options::BATCH_SIZE) {
|
if let Some(n_merge) = matches.value_of(options::BATCH_SIZE) {
|
||||||
settings.merge_batch_size = n_merge
|
settings.merge_batch_size = n_merge.parse().map_err(|_| {
|
||||||
.parse()
|
UUsageError::new(2, format!("invalid --batch-size argument '{}'", n_merge))
|
||||||
.unwrap_or_else(|_| crash!(2, "invalid --batch-size argument '{}'", n_merge));
|
})?;
|
||||||
}
|
}
|
||||||
|
|
||||||
settings.zero_terminated = matches.is_present(options::ZERO_TERMINATED);
|
settings.zero_terminated = matches.is_present(options::ZERO_TERMINATED);
|
||||||
|
@ -1101,11 +1217,13 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
|
||||||
/* if no file, default to stdin */
|
/* if no file, default to stdin */
|
||||||
files.push("-".to_string().into());
|
files.push("-".to_string().into());
|
||||||
} else if settings.check && files.len() != 1 {
|
} else if settings.check && files.len() != 1 {
|
||||||
crash!(
|
return Err(UUsageError::new(
|
||||||
2,
|
2,
|
||||||
"extra operand `{}' not allowed with -c",
|
format!(
|
||||||
files[1].to_string_lossy()
|
"extra operand `{}' not allowed with -c",
|
||||||
)
|
files[1].to_string_lossy()
|
||||||
|
),
|
||||||
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(arg) = matches.args.get(options::SEPARATOR) {
|
if let Some(arg) = matches.args.get(options::SEPARATOR) {
|
||||||
|
@ -1115,14 +1233,17 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
|
||||||
separator = "\0";
|
separator = "\0";
|
||||||
}
|
}
|
||||||
if separator.len() != 1 {
|
if separator.len() != 1 {
|
||||||
crash!(2, "separator must be exactly one character long");
|
return Err(UUsageError::new(
|
||||||
|
2,
|
||||||
|
"separator must be exactly one character long".into(),
|
||||||
|
));
|
||||||
}
|
}
|
||||||
settings.separator = Some(separator.chars().next().unwrap())
|
settings.separator = Some(separator.chars().next().unwrap())
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(values) = matches.values_of(options::KEY) {
|
if let Some(values) = matches.values_of(options::KEY) {
|
||||||
for value in values {
|
for value in values {
|
||||||
let selector = FieldSelector::parse(value, &settings);
|
let selector = FieldSelector::parse(value, &settings)?;
|
||||||
if selector.settings.mode == SortMode::Random && settings.salt.is_none() {
|
if selector.settings.mode == SortMode::Random && settings.salt.is_none() {
|
||||||
settings.salt = Some(get_rand_string());
|
settings.salt = Some(get_rand_string());
|
||||||
}
|
}
|
||||||
|
@ -1152,10 +1273,10 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
|
||||||
// and to reopen them at a later point. This is different from how the output file is handled,
|
// and to reopen them at a later point. This is different from how the output file is handled,
|
||||||
// probably to prevent running out of file descriptors.
|
// probably to prevent running out of file descriptors.
|
||||||
for file in &files {
|
for file in &files {
|
||||||
open(file);
|
open(file)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
let output = Output::new(matches.value_of(options::OUTPUT));
|
let output = Output::new(matches.value_of(options::OUTPUT))?;
|
||||||
|
|
||||||
settings.init_precomputed();
|
settings.init_precomputed();
|
||||||
|
|
||||||
|
@ -1382,21 +1503,20 @@ pub fn uu_app() -> App<'static, 'static> {
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn exec(files: &mut [OsString], settings: &GlobalSettings, output: Output) -> i32 {
|
fn exec(files: &mut [OsString], settings: &GlobalSettings, output: Output) -> UResult<()> {
|
||||||
if settings.merge {
|
if settings.merge {
|
||||||
let mut file_merger = merge::merge(files, settings, output.as_output_name());
|
let file_merger = merge::merge(files, settings, output.as_output_name())?;
|
||||||
file_merger.write_all(settings, output);
|
file_merger.write_all(settings, output)
|
||||||
} else if settings.check {
|
} else if settings.check {
|
||||||
if files.len() > 1 {
|
if files.len() > 1 {
|
||||||
crash!(2, "only one file allowed with -c");
|
Err(UUsageError::new(2, "only one file allowed with -c".into()))
|
||||||
|
} else {
|
||||||
|
check::check(files.first().unwrap(), settings)
|
||||||
}
|
}
|
||||||
return check::check(files.first().unwrap(), settings);
|
|
||||||
} else {
|
} else {
|
||||||
let mut lines = files.iter().map(open);
|
let mut lines = files.iter().map(open);
|
||||||
|
ext_sort(&mut lines, settings, output)
|
||||||
ext_sort(&mut lines, settings, output);
|
|
||||||
}
|
}
|
||||||
0
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn sort_by<'a>(unsorted: &mut Vec<Line<'a>>, settings: &GlobalSettings, line_data: &LineData<'a>) {
|
fn sort_by<'a>(unsorted: &mut Vec<Line<'a>>, settings: &GlobalSettings, line_data: &LineData<'a>) {
|
||||||
|
@ -1692,25 +1812,22 @@ fn strip_errno(err: &str) -> &str {
|
||||||
&err[..err.find(" (os error ").unwrap_or(err.len())]
|
&err[..err.find(" (os error ").unwrap_or(err.len())]
|
||||||
}
|
}
|
||||||
|
|
||||||
fn open(path: impl AsRef<OsStr>) -> Box<dyn Read + Send> {
|
fn open(path: impl AsRef<OsStr>) -> UResult<Box<dyn Read + Send>> {
|
||||||
let path = path.as_ref();
|
let path = path.as_ref();
|
||||||
if path == "-" {
|
if path == "-" {
|
||||||
let stdin = stdin();
|
let stdin = stdin();
|
||||||
return Box::new(stdin) as Box<dyn Read + Send>;
|
return Ok(Box::new(stdin) as Box<dyn Read + Send>);
|
||||||
}
|
}
|
||||||
|
|
||||||
let path = Path::new(path);
|
let path = Path::new(path);
|
||||||
|
|
||||||
match File::open(path) {
|
match File::open(path) {
|
||||||
Ok(f) => Box::new(f) as Box<dyn Read + Send>,
|
Ok(f) => Ok(Box::new(f) as Box<dyn Read + Send>),
|
||||||
Err(e) => {
|
Err(error) => Err(SortError::ReadFailed {
|
||||||
crash!(
|
path: path.to_string_lossy().to_string(),
|
||||||
2,
|
error,
|
||||||
"cannot read: {0}: {1}",
|
|
||||||
path.to_string_lossy(),
|
|
||||||
strip_errno(&e.to_string())
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
.into()),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue