mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-27 19:17:43 +00:00
Merge pull request #2455 from miDeb/sort/human-numeric
sort: compatibility of human-numeric sort
This commit is contained in:
commit
5981351222
7 changed files with 78 additions and 26 deletions
|
@ -79,7 +79,7 @@ fn parse_suffix(s: &str) -> Result<(f64, Option<Suffix>)> {
|
|||
Some('Y') => Some((RawSuffix::Y, with_i)),
|
||||
Some('0'..='9') => None,
|
||||
_ => return Err(format!("invalid suffix in input: '{}'", s)),
|
||||
};
|
||||
};
|
||||
|
||||
let suffix_len = match suffix {
|
||||
None => 0,
|
||||
|
|
|
@ -81,28 +81,12 @@ impl NumInfo {
|
|||
}
|
||||
|
||||
if Self::is_invalid_char(char, &mut had_decimal_pt, &parse_settings) {
|
||||
let si_unit = if parse_settings.accept_si_units {
|
||||
match char {
|
||||
'K' | 'k' => 3,
|
||||
'M' => 6,
|
||||
'G' => 9,
|
||||
'T' => 12,
|
||||
'P' => 15,
|
||||
'E' => 18,
|
||||
'Z' => 21,
|
||||
'Y' => 24,
|
||||
_ => 0,
|
||||
}
|
||||
} else {
|
||||
0
|
||||
};
|
||||
return if let Some(start) = start {
|
||||
let has_si_unit = parse_settings.accept_si_units
|
||||
&& matches!(char, 'K' | 'k' | 'M' | 'G' | 'T' | 'P' | 'E' | 'Z' | 'Y');
|
||||
(
|
||||
NumInfo {
|
||||
exponent: exponent + si_unit,
|
||||
sign,
|
||||
},
|
||||
start..idx,
|
||||
NumInfo { exponent, sign },
|
||||
start..if has_si_unit { idx + 1 } else { idx },
|
||||
)
|
||||
} else {
|
||||
(
|
||||
|
@ -182,8 +166,53 @@ impl NumInfo {
|
|||
}
|
||||
}
|
||||
|
||||
/// compare two numbers as strings without parsing them as a number first. This should be more performant and can handle numbers more precisely.
|
||||
fn get_unit(unit: Option<char>) -> u8 {
|
||||
if let Some(unit) = unit {
|
||||
match unit {
|
||||
'K' | 'k' => 1,
|
||||
'M' => 2,
|
||||
'G' => 3,
|
||||
'T' => 4,
|
||||
'P' => 5,
|
||||
'E' => 6,
|
||||
'Z' => 7,
|
||||
'Y' => 8,
|
||||
_ => 0,
|
||||
}
|
||||
} else {
|
||||
0
|
||||
}
|
||||
}
|
||||
|
||||
/// Compare two numbers according to the rules of human numeric comparison.
|
||||
/// The SI-Unit takes precedence over the actual value (i.e. 2000M < 1G).
|
||||
pub fn human_numeric_str_cmp(
|
||||
(a, a_info): (&str, &NumInfo),
|
||||
(b, b_info): (&str, &NumInfo),
|
||||
) -> Ordering {
|
||||
// 1. Sign
|
||||
if a_info.sign != b_info.sign {
|
||||
return a_info.sign.cmp(&b_info.sign);
|
||||
}
|
||||
// 2. Unit
|
||||
let a_unit = get_unit(a.chars().next_back());
|
||||
let b_unit = get_unit(b.chars().next_back());
|
||||
let ordering = a_unit.cmp(&b_unit);
|
||||
if ordering != Ordering::Equal {
|
||||
if a_info.sign == Sign::Negative {
|
||||
ordering.reverse()
|
||||
} else {
|
||||
ordering
|
||||
}
|
||||
} else {
|
||||
// 3. Number
|
||||
numeric_str_cmp((a, a_info), (b, b_info))
|
||||
}
|
||||
}
|
||||
|
||||
/// Compare two numbers as strings without parsing them as a number first. This should be more performant and can handle numbers more precisely.
|
||||
/// NumInfo is needed to provide a fast path for most numbers.
|
||||
#[inline(always)]
|
||||
pub fn numeric_str_cmp((a, a_info): (&str, &NumInfo), (b, b_info): (&str, &NumInfo)) -> Ordering {
|
||||
// check for a difference in the sign
|
||||
if a_info.sign != b_info.sign {
|
||||
|
|
|
@ -28,7 +28,7 @@ use clap::{crate_version, App, Arg};
|
|||
use custom_str_cmp::custom_str_cmp;
|
||||
use ext_sort::ext_sort;
|
||||
use fnv::FnvHasher;
|
||||
use numeric_str_cmp::{numeric_str_cmp, NumInfo, NumInfoParseSettings};
|
||||
use numeric_str_cmp::{human_numeric_str_cmp, numeric_str_cmp, NumInfo, NumInfoParseSettings};
|
||||
use rand::distributions::Alphanumeric;
|
||||
use rand::{thread_rng, Rng};
|
||||
use rayon::prelude::*;
|
||||
|
@ -1383,7 +1383,7 @@ fn compare_by<'a>(
|
|||
|
||||
let cmp: Ordering = match settings.mode {
|
||||
SortMode::Random => random_shuffle(a_str, b_str, &global_settings.salt),
|
||||
SortMode::Numeric | SortMode::HumanNumeric => {
|
||||
SortMode::Numeric => {
|
||||
let a_num_info = &a_line_data.num_infos
|
||||
[a.index * global_settings.precomputed.num_infos_per_line + num_info_index];
|
||||
let b_num_info = &b_line_data.num_infos
|
||||
|
@ -1391,6 +1391,14 @@ fn compare_by<'a>(
|
|||
num_info_index += 1;
|
||||
numeric_str_cmp((a_str, a_num_info), (b_str, b_num_info))
|
||||
}
|
||||
SortMode::HumanNumeric => {
|
||||
let a_num_info = &a_line_data.num_infos
|
||||
[a.index * global_settings.precomputed.num_infos_per_line + num_info_index];
|
||||
let b_num_info = &b_line_data.num_infos
|
||||
[b.index * global_settings.precomputed.num_infos_per_line + num_info_index];
|
||||
num_info_index += 1;
|
||||
human_numeric_str_cmp((a_str, a_num_info), (b_str, b_num_info))
|
||||
}
|
||||
SortMode::GeneralNumeric => {
|
||||
let a_float = &a_line_data.parsed_floats
|
||||
[a.index * global_settings.precomputed.floats_per_line + parsed_float_index];
|
||||
|
|
|
@ -456,10 +456,20 @@ fn test_human_block_sizes2() {
|
|||
.arg(human_numeric_sort_param)
|
||||
.pipe_in(input)
|
||||
.succeeds()
|
||||
.stdout_only("-8T\n0.8M\n8981K\n21G\n909991M\n");
|
||||
.stdout_only("-8T\n8981K\n0.8M\n909991M\n21G\n");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_human_numeric_zero_stable() {
|
||||
let input = "0M\n0K\n-0K\n-P\n-0M\n";
|
||||
new_ucmd!()
|
||||
.arg("-hs")
|
||||
.pipe_in(input)
|
||||
.succeeds()
|
||||
.stdout_only(input);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_month_default2() {
|
||||
for month_sort_param in &["-M", "--month-sort", "--sort=month"] {
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
0K
|
||||
K
|
||||
844K
|
||||
981K
|
||||
|
|
|
@ -1,3 +1,6 @@
|
|||
0K
|
||||
__
|
||||
__
|
||||
K
|
||||
^ no match for key
|
||||
_
|
||||
|
|
3
tests/fixtures/sort/human_block_sizes.txt
vendored
3
tests/fixtures/sort/human_block_sizes.txt
vendored
|
@ -9,4 +9,5 @@
|
|||
844K
|
||||
981K
|
||||
13M
|
||||
K
|
||||
K
|
||||
0K
|
Loading…
Add table
Add a link
Reference in a new issue