mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-29 12:07:46 +00:00
sort: use "parse_size" from uucore
* make parsing of SIZE argument consistent with GNU's behavior * add error handling * add tests
This commit is contained in:
parent
a900c7421a
commit
6b8de1dd8b
2 changed files with 115 additions and 30 deletions
|
@ -43,6 +43,7 @@ use std::ops::Range;
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
use unicode_width::UnicodeWidthStr;
|
use unicode_width::UnicodeWidthStr;
|
||||||
|
use uucore::parse_size::{parse_size, ParseSizeError};
|
||||||
use uucore::InvalidEncodingHandling;
|
use uucore::InvalidEncodingHandling;
|
||||||
|
|
||||||
static NAME: &str = "sort";
|
static NAME: &str = "sort";
|
||||||
|
@ -159,32 +160,31 @@ pub struct GlobalSettings {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl GlobalSettings {
|
impl GlobalSettings {
|
||||||
/// Interpret this `&str` as a number with an optional trailing si unit.
|
/// Parse a SIZE string into a number of bytes.
|
||||||
///
|
/// A size string comprises an integer and an optional unit.
|
||||||
/// If there is no trailing si unit, the implicit unit is K.
|
/// The unit may be k, K, m, M, g, G, t, T, P, E, Z, Y (powers of 1024), or b which is 1.
|
||||||
/// The suffix B causes the number to be interpreted as a byte count.
|
/// Default is K.
|
||||||
fn parse_byte_count(input: &str) -> usize {
|
fn parse_byte_count(input: &str) -> Result<usize, ParseSizeError> {
|
||||||
const SI_UNITS: &[char] = &['B', 'K', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y'];
|
// GNU sort (8.32) valid: 1b, k, K, m, M, g, G, t, T, P, E, Z, Y
|
||||||
|
// GNU sort (8.32) invalid: b, B, 1B, p, e, z, y
|
||||||
|
const ALLOW_LIST: &[char] = &[
|
||||||
|
'b', 'k', 'K', 'm', 'M', 'g', 'G', 't', 'T', 'P', 'E', 'Z', 'Y',
|
||||||
|
];
|
||||||
|
let mut size_string = input.trim().to_string();
|
||||||
|
|
||||||
let input = input.trim();
|
if size_string.ends_with(|c: char| ALLOW_LIST.contains(&c))
|
||||||
|
|| size_string.ends_with(|c: char| c.is_digit(10))
|
||||||
let (num_str, si_unit) =
|
{
|
||||||
if input.ends_with(|c: char| SI_UNITS.contains(&c.to_ascii_uppercase())) {
|
// b 1, K 1024 (default)
|
||||||
let mut chars = input.chars();
|
if size_string.ends_with(|c: char| c.is_digit(10)) {
|
||||||
let si_suffix = chars.next_back().unwrap().to_ascii_uppercase();
|
size_string.push('K');
|
||||||
let si_unit = SI_UNITS.iter().position(|&c| c == si_suffix).unwrap();
|
} else if size_string.ends_with('b') {
|
||||||
let num_str = chars.as_str();
|
size_string.pop();
|
||||||
(num_str, si_unit)
|
}
|
||||||
} else {
|
parse_size(&size_string)
|
||||||
(input, 1)
|
} else {
|
||||||
};
|
Err(ParseSizeError::ParseFailure("invalid suffix".to_string()))
|
||||||
|
}
|
||||||
let num_usize: usize = num_str
|
|
||||||
.trim()
|
|
||||||
.parse()
|
|
||||||
.unwrap_or_else(|e| crash!(1, "failed to parse buffer size `{}`: {}", num_str, e));
|
|
||||||
|
|
||||||
num_usize.saturating_mul(1000usize.saturating_pow(si_unit as u32))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn out_writer(&self) -> BufWriter<Box<dyn Write>> {
|
fn out_writer(&self) -> BufWriter<Box<dyn Write>> {
|
||||||
|
@ -1148,7 +1148,11 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
|
||||||
|
|
||||||
settings.buffer_size = matches
|
settings.buffer_size = matches
|
||||||
.value_of(OPT_BUF_SIZE)
|
.value_of(OPT_BUF_SIZE)
|
||||||
.map(GlobalSettings::parse_byte_count)
|
.map(|v| match GlobalSettings::parse_byte_count(v) {
|
||||||
|
Ok(n) => n,
|
||||||
|
Err(ParseSizeError::ParseFailure(_)) => crash!(2, "invalid -S argument '{}'", v),
|
||||||
|
Err(ParseSizeError::SizeTooBig(_)) => crash!(2, "-S argument '{}' too large", v),
|
||||||
|
})
|
||||||
.unwrap_or(DEFAULT_BUF_SIZE);
|
.unwrap_or(DEFAULT_BUF_SIZE);
|
||||||
|
|
||||||
settings.tmp_dir = matches
|
settings.tmp_dir = matches
|
||||||
|
@ -1640,4 +1644,48 @@ mod tests {
|
||||||
// How big is a selection? Constant cost all lines pay when we need selections.
|
// How big is a selection? Constant cost all lines pay when we need selections.
|
||||||
assert_eq!(std::mem::size_of::<Selection>(), 24);
|
assert_eq!(std::mem::size_of::<Selection>(), 24);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_parse_byte_count() {
|
||||||
|
let valid_input = [
|
||||||
|
("0", 0),
|
||||||
|
("50K", 50 * 1024),
|
||||||
|
("50k", 50 * 1024),
|
||||||
|
("1M", 1024 * 1024),
|
||||||
|
("100M", 100 * 1024 * 1024),
|
||||||
|
#[cfg(not(target_pointer_width = "32"))]
|
||||||
|
("1000G", 1000 * 1024 * 1024 * 1024),
|
||||||
|
#[cfg(not(target_pointer_width = "32"))]
|
||||||
|
("10T", 10 * 1024 * 1024 * 1024 * 1024),
|
||||||
|
("1b", 1),
|
||||||
|
("1024b", 1024),
|
||||||
|
("1024Mb", 1024 * 1024 * 1024), // TODO: This might not be what GNU `sort` does?
|
||||||
|
("1", 1024), // K is default
|
||||||
|
("50", 50 * 1024),
|
||||||
|
("K", 1024),
|
||||||
|
("k", 1024),
|
||||||
|
("m", 1024 * 1024),
|
||||||
|
#[cfg(not(target_pointer_width = "32"))]
|
||||||
|
("E", 1024 * 1024 * 1024 * 1024 * 1024 * 1024),
|
||||||
|
];
|
||||||
|
for (input, expected_output) in &valid_input {
|
||||||
|
assert_eq!(
|
||||||
|
GlobalSettings::parse_byte_count(input),
|
||||||
|
Ok(*expected_output)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// SizeTooBig
|
||||||
|
let invalid_input = ["500E", "1Y"];
|
||||||
|
for input in &invalid_input {
|
||||||
|
#[cfg(not(target_pointer_width = "128"))]
|
||||||
|
assert!(GlobalSettings::parse_byte_count(input).is_err());
|
||||||
|
}
|
||||||
|
|
||||||
|
// ParseFailure
|
||||||
|
let invalid_input = ["nonsense", "1B", "B", "b", "p", "e", "z", "y"];
|
||||||
|
for input in &invalid_input {
|
||||||
|
assert!(GlobalSettings::parse_byte_count(input).is_err());
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,9 +21,7 @@ fn test_helper(file_name: &str, possible_args: &[&str]) {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_buffer_sizes() {
|
fn test_buffer_sizes() {
|
||||||
let buffer_sizes = [
|
let buffer_sizes = ["0", "50K", "50k", "1M", "100M"];
|
||||||
"0", "50K", "50k", "1M", "100M", "1000G", "10T", "500E", "1Y",
|
|
||||||
];
|
|
||||||
for buffer_size in &buffer_sizes {
|
for buffer_size in &buffer_sizes {
|
||||||
new_ucmd!()
|
new_ucmd!()
|
||||||
.arg("-n")
|
.arg("-n")
|
||||||
|
@ -32,6 +30,20 @@ fn test_buffer_sizes() {
|
||||||
.arg("ext_sort.txt")
|
.arg("ext_sort.txt")
|
||||||
.succeeds()
|
.succeeds()
|
||||||
.stdout_is_fixture("ext_sort.expected");
|
.stdout_is_fixture("ext_sort.expected");
|
||||||
|
|
||||||
|
#[cfg(not(target_pointer_width = "32"))]
|
||||||
|
{
|
||||||
|
let buffer_sizes = ["1000G", "10T"];
|
||||||
|
for buffer_size in &buffer_sizes {
|
||||||
|
new_ucmd!()
|
||||||
|
.arg("-n")
|
||||||
|
.arg("-S")
|
||||||
|
.arg(buffer_size)
|
||||||
|
.arg("ext_sort.txt")
|
||||||
|
.succeeds()
|
||||||
|
.stdout_is_fixture("ext_sort.expected");
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -43,11 +55,36 @@ fn test_invalid_buffer_size() {
|
||||||
.arg("-S")
|
.arg("-S")
|
||||||
.arg(invalid_buffer_size)
|
.arg(invalid_buffer_size)
|
||||||
.fails()
|
.fails()
|
||||||
|
.code_is(2)
|
||||||
.stderr_only(format!(
|
.stderr_only(format!(
|
||||||
"sort: failed to parse buffer size `{}`: invalid digit found in string",
|
"sort: invalid -S argument '{}'",
|
||||||
invalid_buffer_size
|
invalid_buffer_size
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
#[cfg(not(target_pointer_width = "128"))]
|
||||||
|
new_ucmd!()
|
||||||
|
.arg("-n")
|
||||||
|
.arg("-S")
|
||||||
|
.arg("1Y")
|
||||||
|
.arg("ext_sort.txt")
|
||||||
|
.fails()
|
||||||
|
.code_is(2)
|
||||||
|
.stderr_only("sort: -S argument '1Y' too large");
|
||||||
|
|
||||||
|
#[cfg(target_pointer_width = "32")]
|
||||||
|
{
|
||||||
|
let buffer_sizes = ["1000G", "10T"];
|
||||||
|
for buffer_size in &buffer_sizes {
|
||||||
|
new_ucmd!()
|
||||||
|
.arg("-n")
|
||||||
|
.arg("-S")
|
||||||
|
.arg(buffer_size)
|
||||||
|
.arg("ext_sort.txt")
|
||||||
|
.fails()
|
||||||
|
.code_is(2)
|
||||||
|
.stderr_only(format!("sort: -S argument '{}' too large", buffer_size));
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue