1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-29 12:07:46 +00:00

sort: use "parse_size" from uucore

* make parsing of SIZE argument consistent with GNU's behavior
* add error handling
* add tests
This commit is contained in:
Jan Scheer 2021-06-02 04:16:41 +02:00
parent a900c7421a
commit 6b8de1dd8b
2 changed files with 115 additions and 30 deletions

View file

@ -43,6 +43,7 @@ use std::ops::Range;
use std::path::Path;
use std::path::PathBuf;
use unicode_width::UnicodeWidthStr;
use uucore::parse_size::{parse_size, ParseSizeError};
use uucore::InvalidEncodingHandling;
static NAME: &str = "sort";
@ -159,32 +160,31 @@ pub struct GlobalSettings {
}
impl GlobalSettings {
/// Interpret this `&str` as a number with an optional trailing si unit.
///
/// If there is no trailing si unit, the implicit unit is K.
/// The suffix B causes the number to be interpreted as a byte count.
fn parse_byte_count(input: &str) -> usize {
const SI_UNITS: &[char] = &['B', 'K', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y'];
/// Parse a SIZE string into a number of bytes.
/// A size string comprises an integer and an optional unit.
/// The unit may be k, K, m, M, g, G, t, T, P, E, Z, Y (powers of 1024), or b which is 1.
/// Default is K.
fn parse_byte_count(input: &str) -> Result<usize, ParseSizeError> {
// GNU sort (8.32) valid: 1b, k, K, m, M, g, G, t, T, P, E, Z, Y
// GNU sort (8.32) invalid: b, B, 1B, p, e, z, y
const ALLOW_LIST: &[char] = &[
'b', 'k', 'K', 'm', 'M', 'g', 'G', 't', 'T', 'P', 'E', 'Z', 'Y',
];
let mut size_string = input.trim().to_string();
let input = input.trim();
let (num_str, si_unit) =
if input.ends_with(|c: char| SI_UNITS.contains(&c.to_ascii_uppercase())) {
let mut chars = input.chars();
let si_suffix = chars.next_back().unwrap().to_ascii_uppercase();
let si_unit = SI_UNITS.iter().position(|&c| c == si_suffix).unwrap();
let num_str = chars.as_str();
(num_str, si_unit)
if size_string.ends_with(|c: char| ALLOW_LIST.contains(&c))
|| size_string.ends_with(|c: char| c.is_digit(10))
{
// b 1, K 1024 (default)
if size_string.ends_with(|c: char| c.is_digit(10)) {
size_string.push('K');
} else if size_string.ends_with('b') {
size_string.pop();
}
parse_size(&size_string)
} else {
(input, 1)
};
let num_usize: usize = num_str
.trim()
.parse()
.unwrap_or_else(|e| crash!(1, "failed to parse buffer size `{}`: {}", num_str, e));
num_usize.saturating_mul(1000usize.saturating_pow(si_unit as u32))
Err(ParseSizeError::ParseFailure("invalid suffix".to_string()))
}
}
fn out_writer(&self) -> BufWriter<Box<dyn Write>> {
@ -1148,7 +1148,11 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
settings.buffer_size = matches
.value_of(OPT_BUF_SIZE)
.map(GlobalSettings::parse_byte_count)
.map(|v| match GlobalSettings::parse_byte_count(v) {
Ok(n) => n,
Err(ParseSizeError::ParseFailure(_)) => crash!(2, "invalid -S argument '{}'", v),
Err(ParseSizeError::SizeTooBig(_)) => crash!(2, "-S argument '{}' too large", v),
})
.unwrap_or(DEFAULT_BUF_SIZE);
settings.tmp_dir = matches
@ -1640,4 +1644,48 @@ mod tests {
// How big is a selection? Constant cost all lines pay when we need selections.
assert_eq!(std::mem::size_of::<Selection>(), 24);
}
#[test]
fn test_parse_byte_count() {
let valid_input = [
("0", 0),
("50K", 50 * 1024),
("50k", 50 * 1024),
("1M", 1024 * 1024),
("100M", 100 * 1024 * 1024),
#[cfg(not(target_pointer_width = "32"))]
("1000G", 1000 * 1024 * 1024 * 1024),
#[cfg(not(target_pointer_width = "32"))]
("10T", 10 * 1024 * 1024 * 1024 * 1024),
("1b", 1),
("1024b", 1024),
("1024Mb", 1024 * 1024 * 1024), // TODO: This might not be what GNU `sort` does?
("1", 1024), // K is default
("50", 50 * 1024),
("K", 1024),
("k", 1024),
("m", 1024 * 1024),
#[cfg(not(target_pointer_width = "32"))]
("E", 1024 * 1024 * 1024 * 1024 * 1024 * 1024),
];
for (input, expected_output) in &valid_input {
assert_eq!(
GlobalSettings::parse_byte_count(input),
Ok(*expected_output)
);
}
// SizeTooBig
let invalid_input = ["500E", "1Y"];
for input in &invalid_input {
#[cfg(not(target_pointer_width = "128"))]
assert!(GlobalSettings::parse_byte_count(input).is_err());
}
// ParseFailure
let invalid_input = ["nonsense", "1B", "B", "b", "p", "e", "z", "y"];
for input in &invalid_input {
assert!(GlobalSettings::parse_byte_count(input).is_err());
}
}
}

View file

@ -21,9 +21,7 @@ fn test_helper(file_name: &str, possible_args: &[&str]) {
#[test]
fn test_buffer_sizes() {
let buffer_sizes = [
"0", "50K", "50k", "1M", "100M", "1000G", "10T", "500E", "1Y",
];
let buffer_sizes = ["0", "50K", "50k", "1M", "100M"];
for buffer_size in &buffer_sizes {
new_ucmd!()
.arg("-n")
@ -32,6 +30,20 @@ fn test_buffer_sizes() {
.arg("ext_sort.txt")
.succeeds()
.stdout_is_fixture("ext_sort.expected");
#[cfg(not(target_pointer_width = "32"))]
{
let buffer_sizes = ["1000G", "10T"];
for buffer_size in &buffer_sizes {
new_ucmd!()
.arg("-n")
.arg("-S")
.arg(buffer_size)
.arg("ext_sort.txt")
.succeeds()
.stdout_is_fixture("ext_sort.expected");
}
}
}
}
@ -43,11 +55,36 @@ fn test_invalid_buffer_size() {
.arg("-S")
.arg(invalid_buffer_size)
.fails()
.code_is(2)
.stderr_only(format!(
"sort: failed to parse buffer size `{}`: invalid digit found in string",
"sort: invalid -S argument '{}'",
invalid_buffer_size
));
}
#[cfg(not(target_pointer_width = "128"))]
new_ucmd!()
.arg("-n")
.arg("-S")
.arg("1Y")
.arg("ext_sort.txt")
.fails()
.code_is(2)
.stderr_only("sort: -S argument '1Y' too large");
#[cfg(target_pointer_width = "32")]
{
let buffer_sizes = ["1000G", "10T"];
for buffer_size in &buffer_sizes {
new_ucmd!()
.arg("-n")
.arg("-S")
.arg(buffer_size)
.arg("ext_sort.txt")
.fails()
.code_is(2)
.stderr_only(format!("sort: -S argument '{}' too large", buffer_size));
}
}
}
#[test]