1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-28 11:37:44 +00:00

sort: improve handling of buffer size cmd arg

Instead of overflowing when calculating the buffer size, use
saturating_{pow, mul}.

When failing to parse the buffer size, we now crash instead of silently
ignoring the error.
This commit is contained in:
Michael Debertol 2021-05-22 14:00:07 +02:00
parent e7da8058dc
commit 088443276a
2 changed files with 46 additions and 32 deletions

View file

@ -93,10 +93,10 @@ static THOUSANDS_SEP: char = ',';
static NEGATIVE: char = '-';
static POSITIVE: char = '+';
/// Choosing a higher buffer size does not result in performance improvements
/// (at least not on my machine). TODO: In the future, we should also take the amount of
/// available memory into consideration, instead of relying on this constant only.
static DEFAULT_BUF_SIZE: usize = 1_000_000_000;
// Choosing a higher buffer size does not result in performance improvements
// (at least not on my machine). TODO: In the future, we should also take the amount of
// available memory into consideration, instead of relying on this constant only.
static DEFAULT_BUF_SIZE: usize = 1_000_000_000; // 1 GB
#[derive(Eq, Ord, PartialEq, PartialOrd, Clone, Copy)]
enum SortMode {
@ -133,24 +133,32 @@ pub struct GlobalSettings {
}
impl GlobalSettings {
// It's back to do conversions for command line opts!
// Probably want to do through numstrcmp somehow now?
fn human_numeric_convert(a: &str) -> usize {
let num_str = &a[get_leading_gen(a)];
let (_, suf_str) = a.split_at(num_str.len());
let num_usize = num_str
.parse::<usize>()
.expect("Error parsing buffer size: ");
let suf_usize: usize = match suf_str.to_uppercase().as_str() {
// SI Units
"B" => 1usize,
"K" => 1000usize,
"M" => 1000000usize,
"G" => 1000000000usize,
// GNU regards empty human numeric values as K by default
_ => 1000usize,
/// Interpret this `&str` as a number with an optional trailing si unit.
///
/// If there is no trailing si unit, the implicit unit is K.
/// The suffix B causes the number to be interpreted as a byte count.
fn parse_byte_count(input: &str) -> usize {
const SI_UNITS: &[char] = &['B', 'K', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y'];
let input = input.trim();
let (num_str, si_unit) =
if input.ends_with(|c: char| SI_UNITS.contains(&c.to_ascii_uppercase())) {
let mut chars = input.chars();
let si_suffix = chars.next_back().unwrap().to_ascii_uppercase();
let si_unit = SI_UNITS.iter().position(|&c| c == si_suffix).unwrap();
let num_str = chars.as_str();
(num_str, si_unit)
} else {
(input, 1)
};
num_usize * suf_usize
let num_usize: usize = num_str
.trim()
.parse()
.unwrap_or_else(|e| crash!(1, "failed to parse buffer size `{}`: {}", num_str, e));
num_usize.saturating_mul(1000usize.saturating_pow(si_unit as u32))
}
fn out_writer(&self) -> BufWriter<Box<dyn Write>> {
@ -944,7 +952,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
settings.buffer_size = matches
.value_of(OPT_BUF_SIZE)
.map(GlobalSettings::human_numeric_convert)
.map(GlobalSettings::parse_byte_count)
.unwrap_or(DEFAULT_BUF_SIZE);
settings.tmp_dir = matches

View file

@ -17,7 +17,9 @@ fn test_helper(file_name: &str, args: &str) {
#[test]
fn test_buffer_sizes() {
let buffer_sizes = ["0", "50K", "1M", "1000G"];
let buffer_sizes = [
"0", "50K", "50k", "1M", "100M", "1000G", "10T", "500E", "1Y",
];
for buffer_size in &buffer_sizes {
new_ucmd!()
.arg("-n")
@ -30,14 +32,18 @@ fn test_buffer_sizes() {
}
#[test]
fn test_smaller_than_specified_segment() {
fn test_invalid_buffer_size() {
let buffer_sizes = ["asd", "100f"];
for invalid_buffer_size in &buffer_sizes {
new_ucmd!()
.arg("-n")
.arg("-S")
.arg("100M")
.arg("ext_sort.txt")
.succeeds()
.stdout_is_fixture("ext_sort.expected");
.arg(invalid_buffer_size)
.fails()
.stderr_only(format!(
"sort: error: failed to parse buffer size `{}`: invalid digit found in string",
invalid_buffer_size
));
}
}
#[test]