mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-28 11:37:44 +00:00
Clippy lints, more work on ext_sorter leads to 2 failing tests
This commit is contained in:
parent
e7bcd59558
commit
b8d667c383
3 changed files with 29 additions and 13 deletions
|
@ -86,14 +86,24 @@ impl ExternalSorter {
|
||||||
let mut tempdir: Option<tempfile::TempDir> = None;
|
let mut tempdir: Option<tempfile::TempDir> = None;
|
||||||
let mut sort_dir: Option<PathBuf> = None;
|
let mut sort_dir: Option<PathBuf> = None;
|
||||||
|
|
||||||
|
let mut count = 0;
|
||||||
let mut segments_file: Vec<File> = Vec::new();
|
let mut segments_file: Vec<File> = Vec::new();
|
||||||
|
// FYI, the initialization size of struct Line is 96 bytes, but below works for all <T>
|
||||||
let size_of_items = std::mem::size_of::<T>();
|
let size_of_items = std::mem::size_of::<T>();
|
||||||
let mut buffer: Vec<T> = Vec::with_capacity(self.segment_size / size_of_items);
|
let initial_capacity =
|
||||||
|
if self.segment_size / size_of_items >= 2 {
|
||||||
|
self.segment_size / size_of_items
|
||||||
|
} else { 2 };
|
||||||
|
let mut buffer: Vec<T> = Vec::with_capacity(initial_capacity);
|
||||||
for next_item in iterator {
|
for next_item in iterator {
|
||||||
|
count += 1;
|
||||||
buffer.push(next_item);
|
buffer.push(next_item);
|
||||||
if buffer.len() > self.segment_size {
|
// if after push, number of elements in vector > initial capacity
|
||||||
|
if buffer.len() > initial_capacity {
|
||||||
let sort_dir = self.lazy_create_dir(&mut tempdir, &mut sort_dir)?;
|
let sort_dir = self.lazy_create_dir(&mut tempdir, &mut sort_dir)?;
|
||||||
self.sort_and_write_segment(sort_dir, &mut segments_file, &mut buffer, &cmp)?;
|
self.sort_and_write_segment(sort_dir, &mut segments_file, &mut buffer, &cmp)?;
|
||||||
|
// Resize buffer after write out
|
||||||
|
// buffer.shrink_to_fit();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -108,7 +118,7 @@ impl ExternalSorter {
|
||||||
Some(VecDeque::from(buffer))
|
Some(VecDeque::from(buffer))
|
||||||
};
|
};
|
||||||
|
|
||||||
SortedIterator::new(tempdir, pass_through_queue, segments_file, cmp)
|
SortedIterator::new(tempdir, pass_through_queue, segments_file, count, cmp)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// We only want to create directory if it's needed (i.e. if the dataset
|
/// We only want to create directory if it's needed (i.e. if the dataset
|
||||||
|
@ -158,7 +168,10 @@ impl ExternalSorter {
|
||||||
.open(&segment_path)?;
|
.open(&segment_path)?;
|
||||||
let mut buf_writer = BufWriter::new(segment_file);
|
let mut buf_writer = BufWriter::new(segment_file);
|
||||||
|
|
||||||
for item in buffer.drain(0..) {
|
// Possible panic here.
|
||||||
|
// Why use drain here, if we want to dump the entire buffer?
|
||||||
|
// Was "buffer.drain(0..)"
|
||||||
|
for item in buffer {
|
||||||
item.encode(&mut buf_writer);
|
item.encode(&mut buf_writer);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -185,6 +198,7 @@ pub struct SortedIterator<T: Sortable, F> {
|
||||||
pass_through_queue: Option<VecDeque<T>>,
|
pass_through_queue: Option<VecDeque<T>>,
|
||||||
segments_file: Vec<BufReader<File>>,
|
segments_file: Vec<BufReader<File>>,
|
||||||
next_values: Vec<Option<T>>,
|
next_values: Vec<Option<T>>,
|
||||||
|
count: u64,
|
||||||
cmp: F,
|
cmp: F,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -193,6 +207,7 @@ impl<T: Sortable, F: Fn(&T, &T) -> Ordering + Send + Sync> SortedIterator<T, F>
|
||||||
tempdir: Option<tempfile::TempDir>,
|
tempdir: Option<tempfile::TempDir>,
|
||||||
pass_through_queue: Option<VecDeque<T>>,
|
pass_through_queue: Option<VecDeque<T>>,
|
||||||
mut segments_file: Vec<File>,
|
mut segments_file: Vec<File>,
|
||||||
|
count: u64,
|
||||||
cmp: F,
|
cmp: F,
|
||||||
) -> Result<SortedIterator<T, F>, Error> {
|
) -> Result<SortedIterator<T, F>, Error> {
|
||||||
for segment in &mut segments_file {
|
for segment in &mut segments_file {
|
||||||
|
@ -211,9 +226,14 @@ impl<T: Sortable, F: Fn(&T, &T) -> Ordering + Send + Sync> SortedIterator<T, F>
|
||||||
pass_through_queue,
|
pass_through_queue,
|
||||||
segments_file: segments_file_buffered,
|
segments_file: segments_file_buffered,
|
||||||
next_values,
|
next_values,
|
||||||
|
count,
|
||||||
cmp,
|
cmp,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn sorted_count(&self) -> u64 {
|
||||||
|
self.count
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<T: Sortable, F: Fn(&T, &T) -> Ordering> Iterator for SortedIterator<T, F> {
|
impl<T: Sortable, F: Fn(&T, &T) -> Ordering> Iterator for SortedIterator<T, F> {
|
||||||
|
|
|
@ -924,15 +924,12 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
|
||||||
let result = matches
|
let result = matches
|
||||||
.value_of(OPT_TMP_DIR)
|
.value_of(OPT_TMP_DIR)
|
||||||
.map(String::from)
|
.map(String::from)
|
||||||
.unwrap_or(DEFAULT_TMPDIR.to_owned());
|
.unwrap_or_else(|| DEFAULT_TMPDIR.to_owned());
|
||||||
settings.tmp_dir = PathBuf::from(format!(r"{}", result));
|
settings.tmp_dir = PathBuf::from(result);
|
||||||
} else {
|
} else {
|
||||||
for (key, value) in env::vars_os() {
|
for (key, value) in env::vars_os() {
|
||||||
if key == OsString::from("TMPDIR") {
|
if key == OsString::from("TMPDIR") {
|
||||||
settings.tmp_dir = PathBuf::from(format!(
|
settings.tmp_dir = PathBuf::from(value);
|
||||||
r"{}",
|
|
||||||
value.into_string().unwrap_or("/tmp".to_owned())
|
|
||||||
));
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
settings.tmp_dir = PathBuf::from(DEFAULT_TMPDIR);
|
settings.tmp_dir = PathBuf::from(DEFAULT_TMPDIR);
|
||||||
|
@ -1124,11 +1121,10 @@ fn ext_sort_by(lines: Vec<Line>, settings: &GlobalSettings) -> Vec<Line> {
|
||||||
.with_segment_size(settings.buffer_size)
|
.with_segment_size(settings.buffer_size)
|
||||||
.with_sort_dir(settings.tmp_dir.clone())
|
.with_sort_dir(settings.tmp_dir.clone())
|
||||||
.with_parallel_sort();
|
.with_parallel_sort();
|
||||||
let result = sorter
|
sorter
|
||||||
.sort_by(lines.into_iter(), |a, b| compare_by(a, b, &settings))
|
.sort_by(lines.into_iter(), |a, b| compare_by(a, b, &settings))
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.collect();
|
.collect()
|
||||||
result
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn sort_by(lines: &mut Vec<Line>, settings: &GlobalSettings) {
|
fn sort_by(lines: &mut Vec<Line>, settings: &GlobalSettings) {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue