mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-29 12:07:46 +00:00
sort: use "parse_size" from uucore
* make parsing of SIZE argument consistent with GNU's behavior * add error handling * add tests
This commit is contained in:
parent
a900c7421a
commit
6b8de1dd8b
2 changed files with 115 additions and 30 deletions
|
@ -43,6 +43,7 @@ use std::ops::Range;
|
|||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
use unicode_width::UnicodeWidthStr;
|
||||
use uucore::parse_size::{parse_size, ParseSizeError};
|
||||
use uucore::InvalidEncodingHandling;
|
||||
|
||||
static NAME: &str = "sort";
|
||||
|
@ -159,32 +160,31 @@ pub struct GlobalSettings {
|
|||
}
|
||||
|
||||
impl GlobalSettings {
|
||||
/// Interpret this `&str` as a number with an optional trailing si unit.
|
||||
///
|
||||
/// If there is no trailing si unit, the implicit unit is K.
|
||||
/// The suffix B causes the number to be interpreted as a byte count.
|
||||
fn parse_byte_count(input: &str) -> usize {
|
||||
const SI_UNITS: &[char] = &['B', 'K', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y'];
|
||||
/// Parse a SIZE string into a number of bytes.
|
||||
/// A size string comprises an integer and an optional unit.
|
||||
/// The unit may be k, K, m, M, g, G, t, T, P, E, Z, Y (powers of 1024), or b which is 1.
|
||||
/// Default is K.
|
||||
fn parse_byte_count(input: &str) -> Result<usize, ParseSizeError> {
|
||||
// GNU sort (8.32) valid: 1b, k, K, m, M, g, G, t, T, P, E, Z, Y
|
||||
// GNU sort (8.32) invalid: b, B, 1B, p, e, z, y
|
||||
const ALLOW_LIST: &[char] = &[
|
||||
'b', 'k', 'K', 'm', 'M', 'g', 'G', 't', 'T', 'P', 'E', 'Z', 'Y',
|
||||
];
|
||||
let mut size_string = input.trim().to_string();
|
||||
|
||||
let input = input.trim();
|
||||
|
||||
let (num_str, si_unit) =
|
||||
if input.ends_with(|c: char| SI_UNITS.contains(&c.to_ascii_uppercase())) {
|
||||
let mut chars = input.chars();
|
||||
let si_suffix = chars.next_back().unwrap().to_ascii_uppercase();
|
||||
let si_unit = SI_UNITS.iter().position(|&c| c == si_suffix).unwrap();
|
||||
let num_str = chars.as_str();
|
||||
(num_str, si_unit)
|
||||
if size_string.ends_with(|c: char| ALLOW_LIST.contains(&c))
|
||||
|| size_string.ends_with(|c: char| c.is_digit(10))
|
||||
{
|
||||
// b 1, K 1024 (default)
|
||||
if size_string.ends_with(|c: char| c.is_digit(10)) {
|
||||
size_string.push('K');
|
||||
} else if size_string.ends_with('b') {
|
||||
size_string.pop();
|
||||
}
|
||||
parse_size(&size_string)
|
||||
} else {
|
||||
(input, 1)
|
||||
};
|
||||
|
||||
let num_usize: usize = num_str
|
||||
.trim()
|
||||
.parse()
|
||||
.unwrap_or_else(|e| crash!(1, "failed to parse buffer size `{}`: {}", num_str, e));
|
||||
|
||||
num_usize.saturating_mul(1000usize.saturating_pow(si_unit as u32))
|
||||
Err(ParseSizeError::ParseFailure("invalid suffix".to_string()))
|
||||
}
|
||||
}
|
||||
|
||||
fn out_writer(&self) -> BufWriter<Box<dyn Write>> {
|
||||
|
@ -1148,7 +1148,11 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
|
|||
|
||||
settings.buffer_size = matches
|
||||
.value_of(OPT_BUF_SIZE)
|
||||
.map(GlobalSettings::parse_byte_count)
|
||||
.map(|v| match GlobalSettings::parse_byte_count(v) {
|
||||
Ok(n) => n,
|
||||
Err(ParseSizeError::ParseFailure(_)) => crash!(2, "invalid -S argument '{}'", v),
|
||||
Err(ParseSizeError::SizeTooBig(_)) => crash!(2, "-S argument '{}' too large", v),
|
||||
})
|
||||
.unwrap_or(DEFAULT_BUF_SIZE);
|
||||
|
||||
settings.tmp_dir = matches
|
||||
|
@ -1640,4 +1644,48 @@ mod tests {
|
|||
// How big is a selection? Constant cost all lines pay when we need selections.
|
||||
assert_eq!(std::mem::size_of::<Selection>(), 24);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_byte_count() {
|
||||
let valid_input = [
|
||||
("0", 0),
|
||||
("50K", 50 * 1024),
|
||||
("50k", 50 * 1024),
|
||||
("1M", 1024 * 1024),
|
||||
("100M", 100 * 1024 * 1024),
|
||||
#[cfg(not(target_pointer_width = "32"))]
|
||||
("1000G", 1000 * 1024 * 1024 * 1024),
|
||||
#[cfg(not(target_pointer_width = "32"))]
|
||||
("10T", 10 * 1024 * 1024 * 1024 * 1024),
|
||||
("1b", 1),
|
||||
("1024b", 1024),
|
||||
("1024Mb", 1024 * 1024 * 1024), // TODO: This might not be what GNU `sort` does?
|
||||
("1", 1024), // K is default
|
||||
("50", 50 * 1024),
|
||||
("K", 1024),
|
||||
("k", 1024),
|
||||
("m", 1024 * 1024),
|
||||
#[cfg(not(target_pointer_width = "32"))]
|
||||
("E", 1024 * 1024 * 1024 * 1024 * 1024 * 1024),
|
||||
];
|
||||
for (input, expected_output) in &valid_input {
|
||||
assert_eq!(
|
||||
GlobalSettings::parse_byte_count(input),
|
||||
Ok(*expected_output)
|
||||
);
|
||||
}
|
||||
|
||||
// SizeTooBig
|
||||
let invalid_input = ["500E", "1Y"];
|
||||
for input in &invalid_input {
|
||||
#[cfg(not(target_pointer_width = "128"))]
|
||||
assert!(GlobalSettings::parse_byte_count(input).is_err());
|
||||
}
|
||||
|
||||
// ParseFailure
|
||||
let invalid_input = ["nonsense", "1B", "B", "b", "p", "e", "z", "y"];
|
||||
for input in &invalid_input {
|
||||
assert!(GlobalSettings::parse_byte_count(input).is_err());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -21,9 +21,7 @@ fn test_helper(file_name: &str, possible_args: &[&str]) {
|
|||
|
||||
#[test]
|
||||
fn test_buffer_sizes() {
|
||||
let buffer_sizes = [
|
||||
"0", "50K", "50k", "1M", "100M", "1000G", "10T", "500E", "1Y",
|
||||
];
|
||||
let buffer_sizes = ["0", "50K", "50k", "1M", "100M"];
|
||||
for buffer_size in &buffer_sizes {
|
||||
new_ucmd!()
|
||||
.arg("-n")
|
||||
|
@ -32,6 +30,20 @@ fn test_buffer_sizes() {
|
|||
.arg("ext_sort.txt")
|
||||
.succeeds()
|
||||
.stdout_is_fixture("ext_sort.expected");
|
||||
|
||||
#[cfg(not(target_pointer_width = "32"))]
|
||||
{
|
||||
let buffer_sizes = ["1000G", "10T"];
|
||||
for buffer_size in &buffer_sizes {
|
||||
new_ucmd!()
|
||||
.arg("-n")
|
||||
.arg("-S")
|
||||
.arg(buffer_size)
|
||||
.arg("ext_sort.txt")
|
||||
.succeeds()
|
||||
.stdout_is_fixture("ext_sort.expected");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -43,11 +55,36 @@ fn test_invalid_buffer_size() {
|
|||
.arg("-S")
|
||||
.arg(invalid_buffer_size)
|
||||
.fails()
|
||||
.code_is(2)
|
||||
.stderr_only(format!(
|
||||
"sort: failed to parse buffer size `{}`: invalid digit found in string",
|
||||
"sort: invalid -S argument '{}'",
|
||||
invalid_buffer_size
|
||||
));
|
||||
}
|
||||
#[cfg(not(target_pointer_width = "128"))]
|
||||
new_ucmd!()
|
||||
.arg("-n")
|
||||
.arg("-S")
|
||||
.arg("1Y")
|
||||
.arg("ext_sort.txt")
|
||||
.fails()
|
||||
.code_is(2)
|
||||
.stderr_only("sort: -S argument '1Y' too large");
|
||||
|
||||
#[cfg(target_pointer_width = "32")]
|
||||
{
|
||||
let buffer_sizes = ["1000G", "10T"];
|
||||
for buffer_size in &buffer_sizes {
|
||||
new_ucmd!()
|
||||
.arg("-n")
|
||||
.arg("-S")
|
||||
.arg(buffer_size)
|
||||
.arg("ext_sort.txt")
|
||||
.fails()
|
||||
.code_is(2)
|
||||
.stderr_only(format!("sort: -S argument '{}' too large", buffer_size));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue