From 733949b2e7f4d1672fdcf8e0521d8b25b8ffab5a Mon Sep 17 00:00:00 2001 From: electricboogie <32370782+electricboogie@users.noreply.github.com> Date: Sun, 25 Apr 2021 15:13:27 -0500 Subject: [PATCH] Add dynamic buffer adjustment, fix test comment --- src/uu/sort/src/external_sort/mod.rs | 19 +++++++++++++------ src/uu/sort/src/sort.rs | 13 ++++++------- tests/by-util/test_sort.rs | 6 +++--- 3 files changed, 22 insertions(+), 16 deletions(-) diff --git a/src/uu/sort/src/external_sort/mod.rs b/src/uu/sort/src/external_sort/mod.rs index e2595fc78..f5a3a03af 100644 --- a/src/uu/sort/src/external_sort/mod.rs +++ b/src/uu/sort/src/external_sort/mod.rs @@ -174,13 +174,23 @@ where { let mut total_read = 0; let mut chunk = Vec::new(); + // Initial buffer is specified by user + let mut adjusted_buffer_size = self.buffer_bytes; // make the initial chunks on disk for seq in unsorted { - total_read += seq.get_size(); + let seq_size = seq.get_size(); + total_read += seq_size; + // Grow buffer size for a Line larger than buffer + adjusted_buffer_size = + if adjusted_buffer_size < seq_size { + seq_size + } else { + adjusted_buffer_size + }; chunk.push(seq); - if total_read >= self.buffer_bytes { + if total_read >= adjusted_buffer_size { super::sort_by(&mut chunk, &self.settings); self.write_chunk( &iter.tmp_dir.path().join(iter.chunks.to_string()), @@ -247,10 +257,7 @@ where let line_s = line?; bytes_read += line_s.len() + 1; // This is where the bad stuff happens usually - let deserialized: Line = match serde_json::from_str(&line_s) { - Ok(x) => x, - Err(err) => panic!("JSON read error: {}", err), - }; + let deserialized: Line = serde_json::from_str(&line_s).expect("JSON read error: "); total_read += deserialized.get_size(); vec.push_back(deserialized); if total_read > max_bytes { diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index bf138e0c0..0be91eef6 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -141,13 +141,12 @@ impl GlobalSettings { .expect("Error parsing buffer size: "); let suf_usize: usize = match suf_str.to_uppercase().as_str() { // SI Units - "K" => 1000usize, - "M" => 1000000usize, - "G" => 1000000000usize, - "T" => 1000000000000usize, - "P" => 1000000000000000usize, - "E" => 1000000000000000000usize, - _ => 1usize, + "K" => 1024usize, + "M" => 1024000usize, + "G" => 1024000000usize, + "T" => 1024000000000usize, + // GNU regards empty human numeric value as 1024 bytes + _ => 1024usize, }; num_usize * suf_usize } diff --git a/tests/by-util/test_sort.rs b/tests/by-util/test_sort.rs index 865e2be21..cd3a3a496 100644 --- a/tests/by-util/test_sort.rs +++ b/tests/by-util/test_sort.rs @@ -59,13 +59,13 @@ fn test_human_numeric_whitespace() { test_helper("human-numeric-whitespace", "-h"); } -// This tests the ext sort feature, but it also tests where -// serde might fail when reading back JSON if it finds a null value +// This tests where serde often fails when reading back JSON +// if it finds a null value #[test] fn test_extsort_as64_bailout() { new_ucmd!() .arg("-g") - .arg("-S 10K") + .arg("-S 5K") .arg("multiple_decimals_general.txt") .succeeds() .stdout_is_fixture("multiple_decimals_general.expected");