1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-27 19:17:43 +00:00

Merge pull request #2455 from miDeb/sort/human-numeric

sort: compatibility of human-numeric sort
This commit is contained in:
Sylvestre Ledru 2021-06-25 21:22:21 +02:00 committed by GitHub
commit 5981351222
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 78 additions and 26 deletions

View file

@ -79,7 +79,7 @@ fn parse_suffix(s: &str) -> Result<(f64, Option<Suffix>)> {
Some('Y') => Some((RawSuffix::Y, with_i)),
Some('0'..='9') => None,
_ => return Err(format!("invalid suffix in input: '{}'", s)),
};
};
let suffix_len = match suffix {
None => 0,

View file

@ -81,28 +81,12 @@ impl NumInfo {
}
if Self::is_invalid_char(char, &mut had_decimal_pt, &parse_settings) {
let si_unit = if parse_settings.accept_si_units {
match char {
'K' | 'k' => 3,
'M' => 6,
'G' => 9,
'T' => 12,
'P' => 15,
'E' => 18,
'Z' => 21,
'Y' => 24,
_ => 0,
}
} else {
0
};
return if let Some(start) = start {
let has_si_unit = parse_settings.accept_si_units
&& matches!(char, 'K' | 'k' | 'M' | 'G' | 'T' | 'P' | 'E' | 'Z' | 'Y');
(
NumInfo {
exponent: exponent + si_unit,
sign,
},
start..idx,
NumInfo { exponent, sign },
start..if has_si_unit { idx + 1 } else { idx },
)
} else {
(
@ -182,8 +166,53 @@ impl NumInfo {
}
}
/// compare two numbers as strings without parsing them as a number first. This should be more performant and can handle numbers more precisely.
fn get_unit(unit: Option<char>) -> u8 {
if let Some(unit) = unit {
match unit {
'K' | 'k' => 1,
'M' => 2,
'G' => 3,
'T' => 4,
'P' => 5,
'E' => 6,
'Z' => 7,
'Y' => 8,
_ => 0,
}
} else {
0
}
}
/// Compare two numbers according to the rules of human numeric comparison.
/// The SI-Unit takes precedence over the actual value (i.e. 2000M < 1G).
pub fn human_numeric_str_cmp(
(a, a_info): (&str, &NumInfo),
(b, b_info): (&str, &NumInfo),
) -> Ordering {
// 1. Sign
if a_info.sign != b_info.sign {
return a_info.sign.cmp(&b_info.sign);
}
// 2. Unit
let a_unit = get_unit(a.chars().next_back());
let b_unit = get_unit(b.chars().next_back());
let ordering = a_unit.cmp(&b_unit);
if ordering != Ordering::Equal {
if a_info.sign == Sign::Negative {
ordering.reverse()
} else {
ordering
}
} else {
// 3. Number
numeric_str_cmp((a, a_info), (b, b_info))
}
}
/// Compare two numbers as strings without parsing them as a number first. This should be more performant and can handle numbers more precisely.
/// NumInfo is needed to provide a fast path for most numbers.
#[inline(always)]
pub fn numeric_str_cmp((a, a_info): (&str, &NumInfo), (b, b_info): (&str, &NumInfo)) -> Ordering {
// check for a difference in the sign
if a_info.sign != b_info.sign {

View file

@ -28,7 +28,7 @@ use clap::{crate_version, App, Arg};
use custom_str_cmp::custom_str_cmp;
use ext_sort::ext_sort;
use fnv::FnvHasher;
use numeric_str_cmp::{numeric_str_cmp, NumInfo, NumInfoParseSettings};
use numeric_str_cmp::{human_numeric_str_cmp, numeric_str_cmp, NumInfo, NumInfoParseSettings};
use rand::distributions::Alphanumeric;
use rand::{thread_rng, Rng};
use rayon::prelude::*;
@ -1383,7 +1383,7 @@ fn compare_by<'a>(
let cmp: Ordering = match settings.mode {
SortMode::Random => random_shuffle(a_str, b_str, &global_settings.salt),
SortMode::Numeric | SortMode::HumanNumeric => {
SortMode::Numeric => {
let a_num_info = &a_line_data.num_infos
[a.index * global_settings.precomputed.num_infos_per_line + num_info_index];
let b_num_info = &b_line_data.num_infos
@ -1391,6 +1391,14 @@ fn compare_by<'a>(
num_info_index += 1;
numeric_str_cmp((a_str, a_num_info), (b_str, b_num_info))
}
SortMode::HumanNumeric => {
let a_num_info = &a_line_data.num_infos
[a.index * global_settings.precomputed.num_infos_per_line + num_info_index];
let b_num_info = &b_line_data.num_infos
[b.index * global_settings.precomputed.num_infos_per_line + num_info_index];
num_info_index += 1;
human_numeric_str_cmp((a_str, a_num_info), (b_str, b_num_info))
}
SortMode::GeneralNumeric => {
let a_float = &a_line_data.parsed_floats
[a.index * global_settings.precomputed.floats_per_line + parsed_float_index];

View file

@ -456,10 +456,20 @@ fn test_human_block_sizes2() {
.arg(human_numeric_sort_param)
.pipe_in(input)
.succeeds()
.stdout_only("-8T\n0.8M\n8981K\n21G\n909991M\n");
.stdout_only("-8T\n8981K\n0.8M\n909991M\n21G\n");
}
}
#[test]
fn test_human_numeric_zero_stable() {
let input = "0M\n0K\n-0K\n-P\n-0M\n";
new_ucmd!()
.arg("-hs")
.pipe_in(input)
.succeeds()
.stdout_only(input);
}
#[test]
fn test_month_default2() {
for month_sort_param in &["-M", "--month-sort", "--sort=month"] {

View file

@ -1,3 +1,4 @@
0K
K
844K
981K

View file

@ -1,3 +1,6 @@
0K
__
__
K
^ no match for key
_

View file

@ -9,4 +9,5 @@
844K
981K
13M
K
K
0K