From b8d667c38393e17baf7fd10ef61eeeb717fb2cec Mon Sep 17 00:00:00 2001 From: electricboogie <32370782+electricboogie@users.noreply.github.com> Date: Mon, 19 Apr 2021 10:57:53 -0500 Subject: [PATCH] Clippy lints, more work on ext_sorter leads to 2 failing tests --- .../ext_sorter/{APACHE_LICENSE => LICENSE} | 0 src/uu/sort/src/ext_sorter/mod.rs | 28 ++++++++++++++++--- src/uu/sort/src/sort.rs | 14 ++++------ 3 files changed, 29 insertions(+), 13 deletions(-) rename src/uu/sort/src/ext_sorter/{APACHE_LICENSE => LICENSE} (100%) diff --git a/src/uu/sort/src/ext_sorter/APACHE_LICENSE b/src/uu/sort/src/ext_sorter/LICENSE similarity index 100% rename from src/uu/sort/src/ext_sorter/APACHE_LICENSE rename to src/uu/sort/src/ext_sorter/LICENSE diff --git a/src/uu/sort/src/ext_sorter/mod.rs b/src/uu/sort/src/ext_sorter/mod.rs index 92b88637d..eef7befe4 100644 --- a/src/uu/sort/src/ext_sorter/mod.rs +++ b/src/uu/sort/src/ext_sorter/mod.rs @@ -86,14 +86,24 @@ impl ExternalSorter { let mut tempdir: Option = None; let mut sort_dir: Option = None; + let mut count = 0; let mut segments_file: Vec = Vec::new(); + // FYI, the initialization size of struct Line is 96 bytes, but below works for all let size_of_items = std::mem::size_of::(); - let mut buffer: Vec = Vec::with_capacity(self.segment_size / size_of_items); + let initial_capacity = + if self.segment_size / size_of_items >= 2 { + self.segment_size / size_of_items + } else { 2 }; + let mut buffer: Vec = Vec::with_capacity(initial_capacity); for next_item in iterator { + count += 1; buffer.push(next_item); - if buffer.len() > self.segment_size { + // if after push, number of elements in vector > initial capacity + if buffer.len() > initial_capacity { let sort_dir = self.lazy_create_dir(&mut tempdir, &mut sort_dir)?; self.sort_and_write_segment(sort_dir, &mut segments_file, &mut buffer, &cmp)?; + // Resize buffer after write out + // buffer.shrink_to_fit(); } } @@ -108,7 +118,7 @@ impl ExternalSorter { Some(VecDeque::from(buffer)) }; - SortedIterator::new(tempdir, pass_through_queue, segments_file, cmp) + SortedIterator::new(tempdir, pass_through_queue, segments_file, count, cmp) } /// We only want to create directory if it's needed (i.e. if the dataset @@ -158,7 +168,10 @@ impl ExternalSorter { .open(&segment_path)?; let mut buf_writer = BufWriter::new(segment_file); - for item in buffer.drain(0..) { + // Possible panic here. + // Why use drain here, if we want to dump the entire buffer? + // Was "buffer.drain(0..)" + for item in buffer { item.encode(&mut buf_writer); } @@ -185,6 +198,7 @@ pub struct SortedIterator { pass_through_queue: Option>, segments_file: Vec>, next_values: Vec>, + count: u64, cmp: F, } @@ -193,6 +207,7 @@ impl Ordering + Send + Sync> SortedIterator tempdir: Option, pass_through_queue: Option>, mut segments_file: Vec, + count: u64, cmp: F, ) -> Result, Error> { for segment in &mut segments_file { @@ -211,9 +226,14 @@ impl Ordering + Send + Sync> SortedIterator pass_through_queue, segments_file: segments_file_buffered, next_values, + count, cmp, }) } + + pub fn sorted_count(&self) -> u64 { + self.count + } } impl Ordering> Iterator for SortedIterator { diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index d5a2ccbf4..2bbc02e78 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -924,15 +924,12 @@ pub fn uumain(args: impl uucore::Args) -> i32 { let result = matches .value_of(OPT_TMP_DIR) .map(String::from) - .unwrap_or(DEFAULT_TMPDIR.to_owned()); - settings.tmp_dir = PathBuf::from(format!(r"{}", result)); + .unwrap_or_else(|| DEFAULT_TMPDIR.to_owned()); + settings.tmp_dir = PathBuf::from(result); } else { for (key, value) in env::vars_os() { if key == OsString::from("TMPDIR") { - settings.tmp_dir = PathBuf::from(format!( - r"{}", - value.into_string().unwrap_or("/tmp".to_owned()) - )); + settings.tmp_dir = PathBuf::from(value); break; } settings.tmp_dir = PathBuf::from(DEFAULT_TMPDIR); @@ -1124,11 +1121,10 @@ fn ext_sort_by(lines: Vec, settings: &GlobalSettings) -> Vec { .with_segment_size(settings.buffer_size) .with_sort_dir(settings.tmp_dir.clone()) .with_parallel_sort(); - let result = sorter + sorter .sort_by(lines.into_iter(), |a, b| compare_by(a, b, &settings)) .unwrap() - .collect(); - result + .collect() } fn sort_by(lines: &mut Vec, settings: &GlobalSettings) {