From 6b8de1dd8bfd3935467c5c2679c3b2bf0a6478cf Mon Sep 17 00:00:00 2001 From: Jan Scheer Date: Wed, 2 Jun 2021 04:16:41 +0200 Subject: [PATCH] sort: use "parse_size" from uucore * make parsing of SIZE argument consistent with GNU's behavior * add error handling * add tests --- src/uu/sort/src/sort.rs | 100 +++++++++++++++++++++++++++---------- tests/by-util/test_sort.rs | 45 +++++++++++++++-- 2 files changed, 115 insertions(+), 30 deletions(-) diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index ab3b06451..208010d09 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -43,6 +43,7 @@ use std::ops::Range; use std::path::Path; use std::path::PathBuf; use unicode_width::UnicodeWidthStr; +use uucore::parse_size::{parse_size, ParseSizeError}; use uucore::InvalidEncodingHandling; static NAME: &str = "sort"; @@ -159,32 +160,31 @@ pub struct GlobalSettings { } impl GlobalSettings { - /// Interpret this `&str` as a number with an optional trailing si unit. - /// - /// If there is no trailing si unit, the implicit unit is K. - /// The suffix B causes the number to be interpreted as a byte count. - fn parse_byte_count(input: &str) -> usize { - const SI_UNITS: &[char] = &['B', 'K', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y']; + /// Parse a SIZE string into a number of bytes. + /// A size string comprises an integer and an optional unit. + /// The unit may be k, K, m, M, g, G, t, T, P, E, Z, Y (powers of 1024), or b which is 1. + /// Default is K. + fn parse_byte_count(input: &str) -> Result { + // GNU sort (8.32) valid: 1b, k, K, m, M, g, G, t, T, P, E, Z, Y + // GNU sort (8.32) invalid: b, B, 1B, p, e, z, y + const ALLOW_LIST: &[char] = &[ + 'b', 'k', 'K', 'm', 'M', 'g', 'G', 't', 'T', 'P', 'E', 'Z', 'Y', + ]; + let mut size_string = input.trim().to_string(); - let input = input.trim(); - - let (num_str, si_unit) = - if input.ends_with(|c: char| SI_UNITS.contains(&c.to_ascii_uppercase())) { - let mut chars = input.chars(); - let si_suffix = chars.next_back().unwrap().to_ascii_uppercase(); - let si_unit = SI_UNITS.iter().position(|&c| c == si_suffix).unwrap(); - let num_str = chars.as_str(); - (num_str, si_unit) - } else { - (input, 1) - }; - - let num_usize: usize = num_str - .trim() - .parse() - .unwrap_or_else(|e| crash!(1, "failed to parse buffer size `{}`: {}", num_str, e)); - - num_usize.saturating_mul(1000usize.saturating_pow(si_unit as u32)) + if size_string.ends_with(|c: char| ALLOW_LIST.contains(&c)) + || size_string.ends_with(|c: char| c.is_digit(10)) + { + // b 1, K 1024 (default) + if size_string.ends_with(|c: char| c.is_digit(10)) { + size_string.push('K'); + } else if size_string.ends_with('b') { + size_string.pop(); + } + parse_size(&size_string) + } else { + Err(ParseSizeError::ParseFailure("invalid suffix".to_string())) + } } fn out_writer(&self) -> BufWriter> { @@ -1148,7 +1148,11 @@ pub fn uumain(args: impl uucore::Args) -> i32 { settings.buffer_size = matches .value_of(OPT_BUF_SIZE) - .map(GlobalSettings::parse_byte_count) + .map(|v| match GlobalSettings::parse_byte_count(v) { + Ok(n) => n, + Err(ParseSizeError::ParseFailure(_)) => crash!(2, "invalid -S argument '{}'", v), + Err(ParseSizeError::SizeTooBig(_)) => crash!(2, "-S argument '{}' too large", v), + }) .unwrap_or(DEFAULT_BUF_SIZE); settings.tmp_dir = matches @@ -1640,4 +1644,48 @@ mod tests { // How big is a selection? Constant cost all lines pay when we need selections. assert_eq!(std::mem::size_of::(), 24); } + + #[test] + fn test_parse_byte_count() { + let valid_input = [ + ("0", 0), + ("50K", 50 * 1024), + ("50k", 50 * 1024), + ("1M", 1024 * 1024), + ("100M", 100 * 1024 * 1024), + #[cfg(not(target_pointer_width = "32"))] + ("1000G", 1000 * 1024 * 1024 * 1024), + #[cfg(not(target_pointer_width = "32"))] + ("10T", 10 * 1024 * 1024 * 1024 * 1024), + ("1b", 1), + ("1024b", 1024), + ("1024Mb", 1024 * 1024 * 1024), // TODO: This might not be what GNU `sort` does? + ("1", 1024), // K is default + ("50", 50 * 1024), + ("K", 1024), + ("k", 1024), + ("m", 1024 * 1024), + #[cfg(not(target_pointer_width = "32"))] + ("E", 1024 * 1024 * 1024 * 1024 * 1024 * 1024), + ]; + for (input, expected_output) in &valid_input { + assert_eq!( + GlobalSettings::parse_byte_count(input), + Ok(*expected_output) + ); + } + + // SizeTooBig + let invalid_input = ["500E", "1Y"]; + for input in &invalid_input { + #[cfg(not(target_pointer_width = "128"))] + assert!(GlobalSettings::parse_byte_count(input).is_err()); + } + + // ParseFailure + let invalid_input = ["nonsense", "1B", "B", "b", "p", "e", "z", "y"]; + for input in &invalid_input { + assert!(GlobalSettings::parse_byte_count(input).is_err()); + } + } } diff --git a/tests/by-util/test_sort.rs b/tests/by-util/test_sort.rs index d2b447925..054789edf 100644 --- a/tests/by-util/test_sort.rs +++ b/tests/by-util/test_sort.rs @@ -21,9 +21,7 @@ fn test_helper(file_name: &str, possible_args: &[&str]) { #[test] fn test_buffer_sizes() { - let buffer_sizes = [ - "0", "50K", "50k", "1M", "100M", "1000G", "10T", "500E", "1Y", - ]; + let buffer_sizes = ["0", "50K", "50k", "1M", "100M"]; for buffer_size in &buffer_sizes { new_ucmd!() .arg("-n") @@ -32,6 +30,20 @@ fn test_buffer_sizes() { .arg("ext_sort.txt") .succeeds() .stdout_is_fixture("ext_sort.expected"); + + #[cfg(not(target_pointer_width = "32"))] + { + let buffer_sizes = ["1000G", "10T"]; + for buffer_size in &buffer_sizes { + new_ucmd!() + .arg("-n") + .arg("-S") + .arg(buffer_size) + .arg("ext_sort.txt") + .succeeds() + .stdout_is_fixture("ext_sort.expected"); + } + } } } @@ -43,11 +55,36 @@ fn test_invalid_buffer_size() { .arg("-S") .arg(invalid_buffer_size) .fails() + .code_is(2) .stderr_only(format!( - "sort: failed to parse buffer size `{}`: invalid digit found in string", + "sort: invalid -S argument '{}'", invalid_buffer_size )); } + #[cfg(not(target_pointer_width = "128"))] + new_ucmd!() + .arg("-n") + .arg("-S") + .arg("1Y") + .arg("ext_sort.txt") + .fails() + .code_is(2) + .stderr_only("sort: -S argument '1Y' too large"); + + #[cfg(target_pointer_width = "32")] + { + let buffer_sizes = ["1000G", "10T"]; + for buffer_size in &buffer_sizes { + new_ucmd!() + .arg("-n") + .arg("-S") + .arg(buffer_size) + .arg("ext_sort.txt") + .fails() + .code_is(2) + .stderr_only(format!("sort: -S argument '{}' too large", buffer_size)); + } + } } #[test]