mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-27 19:17:43 +00:00
Merge pull request #2455 from miDeb/sort/human-numeric
sort: compatibility of human-numeric sort
This commit is contained in:
commit
5981351222
7 changed files with 78 additions and 26 deletions
|
@ -79,7 +79,7 @@ fn parse_suffix(s: &str) -> Result<(f64, Option<Suffix>)> {
|
||||||
Some('Y') => Some((RawSuffix::Y, with_i)),
|
Some('Y') => Some((RawSuffix::Y, with_i)),
|
||||||
Some('0'..='9') => None,
|
Some('0'..='9') => None,
|
||||||
_ => return Err(format!("invalid suffix in input: '{}'", s)),
|
_ => return Err(format!("invalid suffix in input: '{}'", s)),
|
||||||
};
|
};
|
||||||
|
|
||||||
let suffix_len = match suffix {
|
let suffix_len = match suffix {
|
||||||
None => 0,
|
None => 0,
|
||||||
|
|
|
@ -81,28 +81,12 @@ impl NumInfo {
|
||||||
}
|
}
|
||||||
|
|
||||||
if Self::is_invalid_char(char, &mut had_decimal_pt, &parse_settings) {
|
if Self::is_invalid_char(char, &mut had_decimal_pt, &parse_settings) {
|
||||||
let si_unit = if parse_settings.accept_si_units {
|
|
||||||
match char {
|
|
||||||
'K' | 'k' => 3,
|
|
||||||
'M' => 6,
|
|
||||||
'G' => 9,
|
|
||||||
'T' => 12,
|
|
||||||
'P' => 15,
|
|
||||||
'E' => 18,
|
|
||||||
'Z' => 21,
|
|
||||||
'Y' => 24,
|
|
||||||
_ => 0,
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
0
|
|
||||||
};
|
|
||||||
return if let Some(start) = start {
|
return if let Some(start) = start {
|
||||||
|
let has_si_unit = parse_settings.accept_si_units
|
||||||
|
&& matches!(char, 'K' | 'k' | 'M' | 'G' | 'T' | 'P' | 'E' | 'Z' | 'Y');
|
||||||
(
|
(
|
||||||
NumInfo {
|
NumInfo { exponent, sign },
|
||||||
exponent: exponent + si_unit,
|
start..if has_si_unit { idx + 1 } else { idx },
|
||||||
sign,
|
|
||||||
},
|
|
||||||
start..idx,
|
|
||||||
)
|
)
|
||||||
} else {
|
} else {
|
||||||
(
|
(
|
||||||
|
@ -182,8 +166,53 @@ impl NumInfo {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// compare two numbers as strings without parsing them as a number first. This should be more performant and can handle numbers more precisely.
|
fn get_unit(unit: Option<char>) -> u8 {
|
||||||
|
if let Some(unit) = unit {
|
||||||
|
match unit {
|
||||||
|
'K' | 'k' => 1,
|
||||||
|
'M' => 2,
|
||||||
|
'G' => 3,
|
||||||
|
'T' => 4,
|
||||||
|
'P' => 5,
|
||||||
|
'E' => 6,
|
||||||
|
'Z' => 7,
|
||||||
|
'Y' => 8,
|
||||||
|
_ => 0,
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Compare two numbers according to the rules of human numeric comparison.
|
||||||
|
/// The SI-Unit takes precedence over the actual value (i.e. 2000M < 1G).
|
||||||
|
pub fn human_numeric_str_cmp(
|
||||||
|
(a, a_info): (&str, &NumInfo),
|
||||||
|
(b, b_info): (&str, &NumInfo),
|
||||||
|
) -> Ordering {
|
||||||
|
// 1. Sign
|
||||||
|
if a_info.sign != b_info.sign {
|
||||||
|
return a_info.sign.cmp(&b_info.sign);
|
||||||
|
}
|
||||||
|
// 2. Unit
|
||||||
|
let a_unit = get_unit(a.chars().next_back());
|
||||||
|
let b_unit = get_unit(b.chars().next_back());
|
||||||
|
let ordering = a_unit.cmp(&b_unit);
|
||||||
|
if ordering != Ordering::Equal {
|
||||||
|
if a_info.sign == Sign::Negative {
|
||||||
|
ordering.reverse()
|
||||||
|
} else {
|
||||||
|
ordering
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// 3. Number
|
||||||
|
numeric_str_cmp((a, a_info), (b, b_info))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Compare two numbers as strings without parsing them as a number first. This should be more performant and can handle numbers more precisely.
|
||||||
/// NumInfo is needed to provide a fast path for most numbers.
|
/// NumInfo is needed to provide a fast path for most numbers.
|
||||||
|
#[inline(always)]
|
||||||
pub fn numeric_str_cmp((a, a_info): (&str, &NumInfo), (b, b_info): (&str, &NumInfo)) -> Ordering {
|
pub fn numeric_str_cmp((a, a_info): (&str, &NumInfo), (b, b_info): (&str, &NumInfo)) -> Ordering {
|
||||||
// check for a difference in the sign
|
// check for a difference in the sign
|
||||||
if a_info.sign != b_info.sign {
|
if a_info.sign != b_info.sign {
|
||||||
|
|
|
@ -28,7 +28,7 @@ use clap::{crate_version, App, Arg};
|
||||||
use custom_str_cmp::custom_str_cmp;
|
use custom_str_cmp::custom_str_cmp;
|
||||||
use ext_sort::ext_sort;
|
use ext_sort::ext_sort;
|
||||||
use fnv::FnvHasher;
|
use fnv::FnvHasher;
|
||||||
use numeric_str_cmp::{numeric_str_cmp, NumInfo, NumInfoParseSettings};
|
use numeric_str_cmp::{human_numeric_str_cmp, numeric_str_cmp, NumInfo, NumInfoParseSettings};
|
||||||
use rand::distributions::Alphanumeric;
|
use rand::distributions::Alphanumeric;
|
||||||
use rand::{thread_rng, Rng};
|
use rand::{thread_rng, Rng};
|
||||||
use rayon::prelude::*;
|
use rayon::prelude::*;
|
||||||
|
@ -1383,7 +1383,7 @@ fn compare_by<'a>(
|
||||||
|
|
||||||
let cmp: Ordering = match settings.mode {
|
let cmp: Ordering = match settings.mode {
|
||||||
SortMode::Random => random_shuffle(a_str, b_str, &global_settings.salt),
|
SortMode::Random => random_shuffle(a_str, b_str, &global_settings.salt),
|
||||||
SortMode::Numeric | SortMode::HumanNumeric => {
|
SortMode::Numeric => {
|
||||||
let a_num_info = &a_line_data.num_infos
|
let a_num_info = &a_line_data.num_infos
|
||||||
[a.index * global_settings.precomputed.num_infos_per_line + num_info_index];
|
[a.index * global_settings.precomputed.num_infos_per_line + num_info_index];
|
||||||
let b_num_info = &b_line_data.num_infos
|
let b_num_info = &b_line_data.num_infos
|
||||||
|
@ -1391,6 +1391,14 @@ fn compare_by<'a>(
|
||||||
num_info_index += 1;
|
num_info_index += 1;
|
||||||
numeric_str_cmp((a_str, a_num_info), (b_str, b_num_info))
|
numeric_str_cmp((a_str, a_num_info), (b_str, b_num_info))
|
||||||
}
|
}
|
||||||
|
SortMode::HumanNumeric => {
|
||||||
|
let a_num_info = &a_line_data.num_infos
|
||||||
|
[a.index * global_settings.precomputed.num_infos_per_line + num_info_index];
|
||||||
|
let b_num_info = &b_line_data.num_infos
|
||||||
|
[b.index * global_settings.precomputed.num_infos_per_line + num_info_index];
|
||||||
|
num_info_index += 1;
|
||||||
|
human_numeric_str_cmp((a_str, a_num_info), (b_str, b_num_info))
|
||||||
|
}
|
||||||
SortMode::GeneralNumeric => {
|
SortMode::GeneralNumeric => {
|
||||||
let a_float = &a_line_data.parsed_floats
|
let a_float = &a_line_data.parsed_floats
|
||||||
[a.index * global_settings.precomputed.floats_per_line + parsed_float_index];
|
[a.index * global_settings.precomputed.floats_per_line + parsed_float_index];
|
||||||
|
|
|
@ -456,10 +456,20 @@ fn test_human_block_sizes2() {
|
||||||
.arg(human_numeric_sort_param)
|
.arg(human_numeric_sort_param)
|
||||||
.pipe_in(input)
|
.pipe_in(input)
|
||||||
.succeeds()
|
.succeeds()
|
||||||
.stdout_only("-8T\n0.8M\n8981K\n21G\n909991M\n");
|
.stdout_only("-8T\n8981K\n0.8M\n909991M\n21G\n");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_human_numeric_zero_stable() {
|
||||||
|
let input = "0M\n0K\n-0K\n-P\n-0M\n";
|
||||||
|
new_ucmd!()
|
||||||
|
.arg("-hs")
|
||||||
|
.pipe_in(input)
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only(input);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_month_default2() {
|
fn test_month_default2() {
|
||||||
for month_sort_param in &["-M", "--month-sort", "--sort=month"] {
|
for month_sort_param in &["-M", "--month-sort", "--sort=month"] {
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
0K
|
||||||
K
|
K
|
||||||
844K
|
844K
|
||||||
981K
|
981K
|
||||||
|
|
|
@ -1,3 +1,6 @@
|
||||||
|
0K
|
||||||
|
__
|
||||||
|
__
|
||||||
K
|
K
|
||||||
^ no match for key
|
^ no match for key
|
||||||
_
|
_
|
||||||
|
|
3
tests/fixtures/sort/human_block_sizes.txt
vendored
3
tests/fixtures/sort/human_block_sizes.txt
vendored
|
@ -9,4 +9,5 @@
|
||||||
844K
|
844K
|
||||||
981K
|
981K
|
||||||
13M
|
13M
|
||||||
K
|
K
|
||||||
|
0K
|
Loading…
Add table
Add a link
Reference in a new issue