1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-28 11:37:44 +00:00

Clippy lints, more work on ext_sorter leads to 2 failing tests

This commit is contained in:
electricboogie 2021-04-19 10:57:53 -05:00
parent e7bcd59558
commit b8d667c383
3 changed files with 29 additions and 13 deletions

View file

@ -86,14 +86,24 @@ impl ExternalSorter {
let mut tempdir: Option<tempfile::TempDir> = None; let mut tempdir: Option<tempfile::TempDir> = None;
let mut sort_dir: Option<PathBuf> = None; let mut sort_dir: Option<PathBuf> = None;
let mut count = 0;
let mut segments_file: Vec<File> = Vec::new(); let mut segments_file: Vec<File> = Vec::new();
// FYI, the initialization size of struct Line is 96 bytes, but below works for all <T>
let size_of_items = std::mem::size_of::<T>(); let size_of_items = std::mem::size_of::<T>();
let mut buffer: Vec<T> = Vec::with_capacity(self.segment_size / size_of_items); let initial_capacity =
if self.segment_size / size_of_items >= 2 {
self.segment_size / size_of_items
} else { 2 };
let mut buffer: Vec<T> = Vec::with_capacity(initial_capacity);
for next_item in iterator { for next_item in iterator {
count += 1;
buffer.push(next_item); buffer.push(next_item);
if buffer.len() > self.segment_size { // if after push, number of elements in vector > initial capacity
if buffer.len() > initial_capacity {
let sort_dir = self.lazy_create_dir(&mut tempdir, &mut sort_dir)?; let sort_dir = self.lazy_create_dir(&mut tempdir, &mut sort_dir)?;
self.sort_and_write_segment(sort_dir, &mut segments_file, &mut buffer, &cmp)?; self.sort_and_write_segment(sort_dir, &mut segments_file, &mut buffer, &cmp)?;
// Resize buffer after write out
// buffer.shrink_to_fit();
} }
} }
@ -108,7 +118,7 @@ impl ExternalSorter {
Some(VecDeque::from(buffer)) Some(VecDeque::from(buffer))
}; };
SortedIterator::new(tempdir, pass_through_queue, segments_file, cmp) SortedIterator::new(tempdir, pass_through_queue, segments_file, count, cmp)
} }
/// We only want to create directory if it's needed (i.e. if the dataset /// We only want to create directory if it's needed (i.e. if the dataset
@ -158,7 +168,10 @@ impl ExternalSorter {
.open(&segment_path)?; .open(&segment_path)?;
let mut buf_writer = BufWriter::new(segment_file); let mut buf_writer = BufWriter::new(segment_file);
for item in buffer.drain(0..) { // Possible panic here.
// Why use drain here, if we want to dump the entire buffer?
// Was "buffer.drain(0..)"
for item in buffer {
item.encode(&mut buf_writer); item.encode(&mut buf_writer);
} }
@ -185,6 +198,7 @@ pub struct SortedIterator<T: Sortable, F> {
pass_through_queue: Option<VecDeque<T>>, pass_through_queue: Option<VecDeque<T>>,
segments_file: Vec<BufReader<File>>, segments_file: Vec<BufReader<File>>,
next_values: Vec<Option<T>>, next_values: Vec<Option<T>>,
count: u64,
cmp: F, cmp: F,
} }
@ -193,6 +207,7 @@ impl<T: Sortable, F: Fn(&T, &T) -> Ordering + Send + Sync> SortedIterator<T, F>
tempdir: Option<tempfile::TempDir>, tempdir: Option<tempfile::TempDir>,
pass_through_queue: Option<VecDeque<T>>, pass_through_queue: Option<VecDeque<T>>,
mut segments_file: Vec<File>, mut segments_file: Vec<File>,
count: u64,
cmp: F, cmp: F,
) -> Result<SortedIterator<T, F>, Error> { ) -> Result<SortedIterator<T, F>, Error> {
for segment in &mut segments_file { for segment in &mut segments_file {
@ -211,9 +226,14 @@ impl<T: Sortable, F: Fn(&T, &T) -> Ordering + Send + Sync> SortedIterator<T, F>
pass_through_queue, pass_through_queue,
segments_file: segments_file_buffered, segments_file: segments_file_buffered,
next_values, next_values,
count,
cmp, cmp,
}) })
} }
pub fn sorted_count(&self) -> u64 {
self.count
}
} }
impl<T: Sortable, F: Fn(&T, &T) -> Ordering> Iterator for SortedIterator<T, F> { impl<T: Sortable, F: Fn(&T, &T) -> Ordering> Iterator for SortedIterator<T, F> {

View file

@ -924,15 +924,12 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
let result = matches let result = matches
.value_of(OPT_TMP_DIR) .value_of(OPT_TMP_DIR)
.map(String::from) .map(String::from)
.unwrap_or(DEFAULT_TMPDIR.to_owned()); .unwrap_or_else(|| DEFAULT_TMPDIR.to_owned());
settings.tmp_dir = PathBuf::from(format!(r"{}", result)); settings.tmp_dir = PathBuf::from(result);
} else { } else {
for (key, value) in env::vars_os() { for (key, value) in env::vars_os() {
if key == OsString::from("TMPDIR") { if key == OsString::from("TMPDIR") {
settings.tmp_dir = PathBuf::from(format!( settings.tmp_dir = PathBuf::from(value);
r"{}",
value.into_string().unwrap_or("/tmp".to_owned())
));
break; break;
} }
settings.tmp_dir = PathBuf::from(DEFAULT_TMPDIR); settings.tmp_dir = PathBuf::from(DEFAULT_TMPDIR);
@ -1124,11 +1121,10 @@ fn ext_sort_by(lines: Vec<Line>, settings: &GlobalSettings) -> Vec<Line> {
.with_segment_size(settings.buffer_size) .with_segment_size(settings.buffer_size)
.with_sort_dir(settings.tmp_dir.clone()) .with_sort_dir(settings.tmp_dir.clone())
.with_parallel_sort(); .with_parallel_sort();
let result = sorter sorter
.sort_by(lines.into_iter(), |a, b| compare_by(a, b, &settings)) .sort_by(lines.into_iter(), |a, b| compare_by(a, b, &settings))
.unwrap() .unwrap()
.collect(); .collect()
result
} }
fn sort_by(lines: &mut Vec<Line>, settings: &GlobalSettings) { fn sort_by(lines: &mut Vec<Line>, settings: &GlobalSettings) {