mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-28 11:37:44 +00:00
sort: improve handling of buffer size cmd arg
Instead of overflowing when calculating the buffer size, use saturating_{pow, mul}. When failing to parse the buffer size, we now crash instead of silently ignoring the error.
This commit is contained in:
parent
e7da8058dc
commit
088443276a
2 changed files with 46 additions and 32 deletions
|
@ -93,10 +93,10 @@ static THOUSANDS_SEP: char = ',';
|
||||||
static NEGATIVE: char = '-';
|
static NEGATIVE: char = '-';
|
||||||
static POSITIVE: char = '+';
|
static POSITIVE: char = '+';
|
||||||
|
|
||||||
/// Choosing a higher buffer size does not result in performance improvements
|
// Choosing a higher buffer size does not result in performance improvements
|
||||||
/// (at least not on my machine). TODO: In the future, we should also take the amount of
|
// (at least not on my machine). TODO: In the future, we should also take the amount of
|
||||||
/// available memory into consideration, instead of relying on this constant only.
|
// available memory into consideration, instead of relying on this constant only.
|
||||||
static DEFAULT_BUF_SIZE: usize = 1_000_000_000;
|
static DEFAULT_BUF_SIZE: usize = 1_000_000_000; // 1 GB
|
||||||
|
|
||||||
#[derive(Eq, Ord, PartialEq, PartialOrd, Clone, Copy)]
|
#[derive(Eq, Ord, PartialEq, PartialOrd, Clone, Copy)]
|
||||||
enum SortMode {
|
enum SortMode {
|
||||||
|
@ -133,24 +133,32 @@ pub struct GlobalSettings {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl GlobalSettings {
|
impl GlobalSettings {
|
||||||
// It's back to do conversions for command line opts!
|
/// Interpret this `&str` as a number with an optional trailing si unit.
|
||||||
// Probably want to do through numstrcmp somehow now?
|
///
|
||||||
fn human_numeric_convert(a: &str) -> usize {
|
/// If there is no trailing si unit, the implicit unit is K.
|
||||||
let num_str = &a[get_leading_gen(a)];
|
/// The suffix B causes the number to be interpreted as a byte count.
|
||||||
let (_, suf_str) = a.split_at(num_str.len());
|
fn parse_byte_count(input: &str) -> usize {
|
||||||
let num_usize = num_str
|
const SI_UNITS: &[char] = &['B', 'K', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y'];
|
||||||
.parse::<usize>()
|
|
||||||
.expect("Error parsing buffer size: ");
|
let input = input.trim();
|
||||||
let suf_usize: usize = match suf_str.to_uppercase().as_str() {
|
|
||||||
// SI Units
|
let (num_str, si_unit) =
|
||||||
"B" => 1usize,
|
if input.ends_with(|c: char| SI_UNITS.contains(&c.to_ascii_uppercase())) {
|
||||||
"K" => 1000usize,
|
let mut chars = input.chars();
|
||||||
"M" => 1000000usize,
|
let si_suffix = chars.next_back().unwrap().to_ascii_uppercase();
|
||||||
"G" => 1000000000usize,
|
let si_unit = SI_UNITS.iter().position(|&c| c == si_suffix).unwrap();
|
||||||
// GNU regards empty human numeric values as K by default
|
let num_str = chars.as_str();
|
||||||
_ => 1000usize,
|
(num_str, si_unit)
|
||||||
};
|
} else {
|
||||||
num_usize * suf_usize
|
(input, 1)
|
||||||
|
};
|
||||||
|
|
||||||
|
let num_usize: usize = num_str
|
||||||
|
.trim()
|
||||||
|
.parse()
|
||||||
|
.unwrap_or_else(|e| crash!(1, "failed to parse buffer size `{}`: {}", num_str, e));
|
||||||
|
|
||||||
|
num_usize.saturating_mul(1000usize.saturating_pow(si_unit as u32))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn out_writer(&self) -> BufWriter<Box<dyn Write>> {
|
fn out_writer(&self) -> BufWriter<Box<dyn Write>> {
|
||||||
|
@ -944,7 +952,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
|
||||||
|
|
||||||
settings.buffer_size = matches
|
settings.buffer_size = matches
|
||||||
.value_of(OPT_BUF_SIZE)
|
.value_of(OPT_BUF_SIZE)
|
||||||
.map(GlobalSettings::human_numeric_convert)
|
.map(GlobalSettings::parse_byte_count)
|
||||||
.unwrap_or(DEFAULT_BUF_SIZE);
|
.unwrap_or(DEFAULT_BUF_SIZE);
|
||||||
|
|
||||||
settings.tmp_dir = matches
|
settings.tmp_dir = matches
|
||||||
|
|
|
@ -17,7 +17,9 @@ fn test_helper(file_name: &str, args: &str) {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_buffer_sizes() {
|
fn test_buffer_sizes() {
|
||||||
let buffer_sizes = ["0", "50K", "1M", "1000G"];
|
let buffer_sizes = [
|
||||||
|
"0", "50K", "50k", "1M", "100M", "1000G", "10T", "500E", "1Y",
|
||||||
|
];
|
||||||
for buffer_size in &buffer_sizes {
|
for buffer_size in &buffer_sizes {
|
||||||
new_ucmd!()
|
new_ucmd!()
|
||||||
.arg("-n")
|
.arg("-n")
|
||||||
|
@ -30,14 +32,18 @@ fn test_buffer_sizes() {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_smaller_than_specified_segment() {
|
fn test_invalid_buffer_size() {
|
||||||
new_ucmd!()
|
let buffer_sizes = ["asd", "100f"];
|
||||||
.arg("-n")
|
for invalid_buffer_size in &buffer_sizes {
|
||||||
.arg("-S")
|
new_ucmd!()
|
||||||
.arg("100M")
|
.arg("-S")
|
||||||
.arg("ext_sort.txt")
|
.arg(invalid_buffer_size)
|
||||||
.succeeds()
|
.fails()
|
||||||
.stdout_is_fixture("ext_sort.expected");
|
.stderr_only(format!(
|
||||||
|
"sort: error: failed to parse buffer size `{}`: invalid digit found in string",
|
||||||
|
invalid_buffer_size
|
||||||
|
));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue