From 088443276a6d0229dc93b5a9502ff3d09870d782 Mon Sep 17 00:00:00 2001 From: Michael Debertol Date: Sat, 22 May 2021 14:00:07 +0200 Subject: [PATCH] sort: improve handling of buffer size cmd arg Instead of overflowing when calculating the buffer size, use saturating_{pow, mul}. When failing to parse the buffer size, we now crash instead of silently ignoring the error. --- src/uu/sort/src/sort.rs | 54 ++++++++++++++++++++++---------------- tests/by-util/test_sort.rs | 24 ++++++++++------- 2 files changed, 46 insertions(+), 32 deletions(-) diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index 78388a298..bc3b65492 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -93,10 +93,10 @@ static THOUSANDS_SEP: char = ','; static NEGATIVE: char = '-'; static POSITIVE: char = '+'; -/// Choosing a higher buffer size does not result in performance improvements -/// (at least not on my machine). TODO: In the future, we should also take the amount of -/// available memory into consideration, instead of relying on this constant only. -static DEFAULT_BUF_SIZE: usize = 1_000_000_000; +// Choosing a higher buffer size does not result in performance improvements +// (at least not on my machine). TODO: In the future, we should also take the amount of +// available memory into consideration, instead of relying on this constant only. +static DEFAULT_BUF_SIZE: usize = 1_000_000_000; // 1 GB #[derive(Eq, Ord, PartialEq, PartialOrd, Clone, Copy)] enum SortMode { @@ -133,24 +133,32 @@ pub struct GlobalSettings { } impl GlobalSettings { - // It's back to do conversions for command line opts! - // Probably want to do through numstrcmp somehow now? - fn human_numeric_convert(a: &str) -> usize { - let num_str = &a[get_leading_gen(a)]; - let (_, suf_str) = a.split_at(num_str.len()); - let num_usize = num_str - .parse::() - .expect("Error parsing buffer size: "); - let suf_usize: usize = match suf_str.to_uppercase().as_str() { - // SI Units - "B" => 1usize, - "K" => 1000usize, - "M" => 1000000usize, - "G" => 1000000000usize, - // GNU regards empty human numeric values as K by default - _ => 1000usize, - }; - num_usize * suf_usize + /// Interpret this `&str` as a number with an optional trailing si unit. + /// + /// If there is no trailing si unit, the implicit unit is K. + /// The suffix B causes the number to be interpreted as a byte count. + fn parse_byte_count(input: &str) -> usize { + const SI_UNITS: &[char] = &['B', 'K', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y']; + + let input = input.trim(); + + let (num_str, si_unit) = + if input.ends_with(|c: char| SI_UNITS.contains(&c.to_ascii_uppercase())) { + let mut chars = input.chars(); + let si_suffix = chars.next_back().unwrap().to_ascii_uppercase(); + let si_unit = SI_UNITS.iter().position(|&c| c == si_suffix).unwrap(); + let num_str = chars.as_str(); + (num_str, si_unit) + } else { + (input, 1) + }; + + let num_usize: usize = num_str + .trim() + .parse() + .unwrap_or_else(|e| crash!(1, "failed to parse buffer size `{}`: {}", num_str, e)); + + num_usize.saturating_mul(1000usize.saturating_pow(si_unit as u32)) } fn out_writer(&self) -> BufWriter> { @@ -944,7 +952,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { settings.buffer_size = matches .value_of(OPT_BUF_SIZE) - .map(GlobalSettings::human_numeric_convert) + .map(GlobalSettings::parse_byte_count) .unwrap_or(DEFAULT_BUF_SIZE); settings.tmp_dir = matches diff --git a/tests/by-util/test_sort.rs b/tests/by-util/test_sort.rs index 59058d5bc..23705d2ee 100644 --- a/tests/by-util/test_sort.rs +++ b/tests/by-util/test_sort.rs @@ -17,7 +17,9 @@ fn test_helper(file_name: &str, args: &str) { #[test] fn test_buffer_sizes() { - let buffer_sizes = ["0", "50K", "1M", "1000G"]; + let buffer_sizes = [ + "0", "50K", "50k", "1M", "100M", "1000G", "10T", "500E", "1Y", + ]; for buffer_size in &buffer_sizes { new_ucmd!() .arg("-n") @@ -30,14 +32,18 @@ fn test_buffer_sizes() { } #[test] -fn test_smaller_than_specified_segment() { - new_ucmd!() - .arg("-n") - .arg("-S") - .arg("100M") - .arg("ext_sort.txt") - .succeeds() - .stdout_is_fixture("ext_sort.expected"); +fn test_invalid_buffer_size() { + let buffer_sizes = ["asd", "100f"]; + for invalid_buffer_size in &buffer_sizes { + new_ucmd!() + .arg("-S") + .arg(invalid_buffer_size) + .fails() + .stderr_only(format!( + "sort: error: failed to parse buffer size `{}`: invalid digit found in string", + invalid_buffer_size + )); + } } #[test]