From 548a895cd6bd9f014533bb3bd1e58b8410beb40a Mon Sep 17 00:00:00 2001 From: Michael Debertol Date: Fri, 25 Jun 2021 18:19:00 +0200 Subject: [PATCH 1/2] sort: compatibility of human-numeric sort Closes #1985. This makes human-numeric sort follow the same algorithm as GNU's/FreeBSD's sort. As documented by GNU in https://www.gnu.org/software/coreutils/manual/html_node/sort-invocation.html, we first compare by sign, then by si unit and finally by the numeric value. --- src/uu/sort/src/numeric_str_cmp.rs | 71 +++++++++++++------ src/uu/sort/src/sort.rs | 12 +++- tests/by-util/test_sort.rs | 12 +++- .../fixtures/sort/human_block_sizes.expected | 1 + .../sort/human_block_sizes.expected.debug | 3 + tests/fixtures/sort/human_block_sizes.txt | 3 +- 6 files changed, 77 insertions(+), 25 deletions(-) diff --git a/src/uu/sort/src/numeric_str_cmp.rs b/src/uu/sort/src/numeric_str_cmp.rs index 8cd3faab2..d753c2d9d 100644 --- a/src/uu/sort/src/numeric_str_cmp.rs +++ b/src/uu/sort/src/numeric_str_cmp.rs @@ -81,28 +81,12 @@ impl NumInfo { } if Self::is_invalid_char(char, &mut had_decimal_pt, &parse_settings) { - let si_unit = if parse_settings.accept_si_units { - match char { - 'K' | 'k' => 3, - 'M' => 6, - 'G' => 9, - 'T' => 12, - 'P' => 15, - 'E' => 18, - 'Z' => 21, - 'Y' => 24, - _ => 0, - } - } else { - 0 - }; return if let Some(start) = start { + let has_si_unit = parse_settings.accept_si_units + && matches!(char, 'K' | 'k' | 'M' | 'G' | 'T' | 'P' | 'E' | 'Z' | 'Y'); ( - NumInfo { - exponent: exponent + si_unit, - sign, - }, - start..idx, + NumInfo { exponent, sign }, + start..if has_si_unit { idx + 1 } else { idx }, ) } else { ( @@ -182,8 +166,53 @@ impl NumInfo { } } -/// compare two numbers as strings without parsing them as a number first. This should be more performant and can handle numbers more precisely. +fn get_unit(unit: Option) -> u8 { + if let Some(unit) = unit { + match unit { + 'K' | 'k' => 1, + 'M' => 2, + 'G' => 3, + 'T' => 4, + 'P' => 5, + 'E' => 6, + 'Z' => 7, + 'Y' => 8, + _ => 0, + } + } else { + 0 + } +} + +/// Compare two numbers according to the rules of human numeric comparison. +/// The SI-Unit takes precedence over the actual value (i.e. 2000M < 1G). +pub fn human_numeric_str_cmp( + (a, a_info): (&str, &NumInfo), + (b, b_info): (&str, &NumInfo), +) -> Ordering { + // 1. Sign + if a_info.sign != b_info.sign { + return a_info.sign.cmp(&b_info.sign); + } + // 2. Unit + let a_unit = get_unit(a.chars().next_back()); + let b_unit = get_unit(b.chars().next_back()); + let ordering = a_unit.cmp(&b_unit); + if ordering != Ordering::Equal { + if a_info.sign == Sign::Negative { + ordering.reverse() + } else { + ordering + } + } else { + // 3. Number + numeric_str_cmp((a, a_info), (b, b_info)) + } +} + +/// Compare two numbers as strings without parsing them as a number first. This should be more performant and can handle numbers more precisely. /// NumInfo is needed to provide a fast path for most numbers. +#[inline(always)] pub fn numeric_str_cmp((a, a_info): (&str, &NumInfo), (b, b_info): (&str, &NumInfo)) -> Ordering { // check for a difference in the sign if a_info.sign != b_info.sign { diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index 202ab5d1a..d0e574627 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -28,7 +28,7 @@ use clap::{crate_version, App, Arg}; use custom_str_cmp::custom_str_cmp; use ext_sort::ext_sort; use fnv::FnvHasher; -use numeric_str_cmp::{numeric_str_cmp, NumInfo, NumInfoParseSettings}; +use numeric_str_cmp::{human_numeric_str_cmp, numeric_str_cmp, NumInfo, NumInfoParseSettings}; use rand::distributions::Alphanumeric; use rand::{thread_rng, Rng}; use rayon::prelude::*; @@ -1383,7 +1383,7 @@ fn compare_by<'a>( let cmp: Ordering = match settings.mode { SortMode::Random => random_shuffle(a_str, b_str, &global_settings.salt), - SortMode::Numeric | SortMode::HumanNumeric => { + SortMode::Numeric => { let a_num_info = &a_line_data.num_infos [a.index * global_settings.precomputed.num_infos_per_line + num_info_index]; let b_num_info = &b_line_data.num_infos @@ -1391,6 +1391,14 @@ fn compare_by<'a>( num_info_index += 1; numeric_str_cmp((a_str, a_num_info), (b_str, b_num_info)) } + SortMode::HumanNumeric => { + let a_num_info = &a_line_data.num_infos + [a.index * global_settings.precomputed.num_infos_per_line + num_info_index]; + let b_num_info = &b_line_data.num_infos + [b.index * global_settings.precomputed.num_infos_per_line + num_info_index]; + num_info_index += 1; + human_numeric_str_cmp((a_str, a_num_info), (b_str, b_num_info)) + } SortMode::GeneralNumeric => { let a_float = &a_line_data.parsed_floats [a.index * global_settings.precomputed.floats_per_line + parsed_float_index]; diff --git a/tests/by-util/test_sort.rs b/tests/by-util/test_sort.rs index 01fafae00..3e841f630 100644 --- a/tests/by-util/test_sort.rs +++ b/tests/by-util/test_sort.rs @@ -456,10 +456,20 @@ fn test_human_block_sizes2() { .arg(human_numeric_sort_param) .pipe_in(input) .succeeds() - .stdout_only("-8T\n0.8M\n8981K\n21G\n909991M\n"); + .stdout_only("-8T\n8981K\n0.8M\n909991M\n21G\n"); } } +#[test] +fn test_human_numeric_zero_stable() { + let input = "0M\n0K\n-0K\n-P\n-0M\n"; + new_ucmd!() + .arg("-hs") + .pipe_in(input) + .succeeds() + .stdout_only(input); +} + #[test] fn test_month_default2() { for month_sort_param in &["-M", "--month-sort", "--sort=month"] { diff --git a/tests/fixtures/sort/human_block_sizes.expected b/tests/fixtures/sort/human_block_sizes.expected index 0e4fdfbb6..5b4f8bb83 100644 --- a/tests/fixtures/sort/human_block_sizes.expected +++ b/tests/fixtures/sort/human_block_sizes.expected @@ -1,3 +1,4 @@ +0K K 844K 981K diff --git a/tests/fixtures/sort/human_block_sizes.expected.debug b/tests/fixtures/sort/human_block_sizes.expected.debug index cde98628e..398ff9db4 100644 --- a/tests/fixtures/sort/human_block_sizes.expected.debug +++ b/tests/fixtures/sort/human_block_sizes.expected.debug @@ -1,3 +1,6 @@ +0K +__ +__ K ^ no match for key _ diff --git a/tests/fixtures/sort/human_block_sizes.txt b/tests/fixtures/sort/human_block_sizes.txt index 9cc2b3c6c..a5adb9b5e 100644 --- a/tests/fixtures/sort/human_block_sizes.txt +++ b/tests/fixtures/sort/human_block_sizes.txt @@ -9,4 +9,5 @@ 844K 981K 13M -K \ No newline at end of file +K +0K \ No newline at end of file From 004b5d1b386d35019dd114c88162cadb62e0b031 Mon Sep 17 00:00:00 2001 From: Michael Debertol Date: Fri, 25 Jun 2021 19:35:33 +0200 Subject: [PATCH 2/2] format: formatting --- src/uu/numfmt/src/format.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/uu/numfmt/src/format.rs b/src/uu/numfmt/src/format.rs index 4e8cd8f06..e44446818 100644 --- a/src/uu/numfmt/src/format.rs +++ b/src/uu/numfmt/src/format.rs @@ -79,7 +79,7 @@ fn parse_suffix(s: &str) -> Result<(f64, Option)> { Some('Y') => Some((RawSuffix::Y, with_i)), Some('0'..='9') => None, _ => return Err(format!("invalid suffix in input: '{}'", s)), - }; + }; let suffix_len = match suffix { None => 0,