From 06b3092f5f3b47e263c350ba810f1d07b4530519 Mon Sep 17 00:00:00 2001 From: Michael Debertol Date: Tue, 1 Jun 2021 18:06:18 +0200 Subject: [PATCH 1/2] sort: fix debug output for zeros / invalid numbers We were reporting "no match" when sorting something like "0 ". This is because we don't distinguish between 0 and invalid lines when sorting. For debug output we have to get this information back. --- src/uu/sort/src/numeric_str_cmp.rs | 17 ++++++++-- src/uu/sort/src/sort.rs | 34 ++++++++++++------- tests/by-util/test_sort.rs | 8 +++++ .../fixtures/sort/human_block_sizes.expected | 1 + .../sort/human_block_sizes.expected.debug | 3 ++ tests/fixtures/sort/human_block_sizes.txt | 1 + .../sort/numeric_trailing_chars.expected | 5 +++ .../numeric_trailing_chars.expected.debug | 15 ++++++++ .../fixtures/sort/numeric_trailing_chars.txt | 5 +++ 9 files changed, 75 insertions(+), 14 deletions(-) create mode 100644 tests/fixtures/sort/numeric_trailing_chars.expected create mode 100644 tests/fixtures/sort/numeric_trailing_chars.expected.debug create mode 100644 tests/fixtures/sort/numeric_trailing_chars.txt diff --git a/src/uu/sort/src/numeric_str_cmp.rs b/src/uu/sort/src/numeric_str_cmp.rs index 03806b0c8..461f72670 100644 --- a/src/uu/sort/src/numeric_str_cmp.rs +++ b/src/uu/sort/src/numeric_str_cmp.rs @@ -46,7 +46,12 @@ impl Default for NumInfoParseSettings { impl NumInfo { /// Parse NumInfo for this number. - /// Also returns the range of num that should be passed to numeric_str_cmp later + /// Also returns the range of num that should be passed to numeric_str_cmp later. + /// + /// Leading zeros will be excluded from the returned range. If the number consists of only zeros, + /// an empty range (idx..idx) is returned so that idx is the char after the last zero. + /// If the input is not a number (which has to be treated as zero), the returned empty range + /// will be 0..0. pub fn parse(num: &str, parse_settings: NumInfoParseSettings) -> (Self, Range) { let mut exponent = -1; let mut had_decimal_pt = false; @@ -105,7 +110,15 @@ impl NumInfo { sign: if had_digit { sign } else { Sign::Positive }, exponent: 0, }, - 0..0, + if had_digit { + // In this case there were only zeroes. + // For debug output to work properly, we have to match the character after the last zero. + idx..idx + } else { + // This was no number at all. + // For debug output to work properly, we have to match 0..0. + 0..0 + }, ) }; } diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index ab3b06451..f47b561ed 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -414,19 +414,29 @@ impl<'a> Line<'a> { selection.start += num_range.start; selection.end = selection.start + num_range.len(); - // include a trailing si unit - if selector.settings.mode == SortMode::HumanNumeric - && self.line[selection.end..initial_selection.end] - .starts_with(&['k', 'K', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y'][..]) - { - selection.end += 1; - } + if num_range != (0..0) { + // include a trailing si unit + if selector.settings.mode == SortMode::HumanNumeric + && self.line[selection.end..initial_selection.end] + .starts_with(&['k', 'K', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y'][..]) + { + selection.end += 1; + } - // include leading zeroes, a leading minus or a leading decimal point - while self.line[initial_selection.start..selection.start] - .ends_with(&['-', '0', '.'][..]) - { - selection.start -= 1; + // include leading zeroes, a leading minus or a leading decimal point + while self.line[initial_selection.start..selection.start] + .ends_with(&['-', '0', '.'][..]) + { + selection.start -= 1; + } + } else { + // This was not a valid number. + // Report no match at the first non-whitespace character. + let leading_whitespace = self.line[selection.clone()] + .find(|c: char| !c.is_whitespace()) + .unwrap_or(0); + selection.start += leading_whitespace; + selection.end += leading_whitespace; } } SortMode::GeneralNumeric => { diff --git a/tests/by-util/test_sort.rs b/tests/by-util/test_sort.rs index d2b447925..02636b027 100644 --- a/tests/by-util/test_sort.rs +++ b/tests/by-util/test_sort.rs @@ -123,6 +123,14 @@ fn test_multiple_decimals_numeric() { ) } +#[test] +fn test_numeric_with_trailing_invalid_chars() { + test_helper( + "numeric_trailing_chars", + &["-n", "--numeric-sort", "--sort=numeric"], + ) +} + #[test] fn test_check_zero_terminated_failure() { new_ucmd!() diff --git a/tests/fixtures/sort/human_block_sizes.expected b/tests/fixtures/sort/human_block_sizes.expected index 74fad9fdf..0e4fdfbb6 100644 --- a/tests/fixtures/sort/human_block_sizes.expected +++ b/tests/fixtures/sort/human_block_sizes.expected @@ -1,3 +1,4 @@ +K 844K 981K 11M diff --git a/tests/fixtures/sort/human_block_sizes.expected.debug b/tests/fixtures/sort/human_block_sizes.expected.debug index 5f4860a85..cde98628e 100644 --- a/tests/fixtures/sort/human_block_sizes.expected.debug +++ b/tests/fixtures/sort/human_block_sizes.expected.debug @@ -1,3 +1,6 @@ +K +^ no match for key +_ 844K ____ ____ diff --git a/tests/fixtures/sort/human_block_sizes.txt b/tests/fixtures/sort/human_block_sizes.txt index 803666dbe..9cc2b3c6c 100644 --- a/tests/fixtures/sort/human_block_sizes.txt +++ b/tests/fixtures/sort/human_block_sizes.txt @@ -9,3 +9,4 @@ 844K 981K 13M +K \ No newline at end of file diff --git a/tests/fixtures/sort/numeric_trailing_chars.expected b/tests/fixtures/sort/numeric_trailing_chars.expected new file mode 100644 index 000000000..96226fd38 --- /dev/null +++ b/tests/fixtures/sort/numeric_trailing_chars.expected @@ -0,0 +1,5 @@ +-.05, +-x +0.abc +0foo +100 diff --git a/tests/fixtures/sort/numeric_trailing_chars.expected.debug b/tests/fixtures/sort/numeric_trailing_chars.expected.debug new file mode 100644 index 000000000..0226a636f --- /dev/null +++ b/tests/fixtures/sort/numeric_trailing_chars.expected.debug @@ -0,0 +1,15 @@ +-.05, +____ +_____ +-x +^ no match for key +__ +0.abc +__ +_____ +0foo +_ +____ +100 +___ +____ diff --git a/tests/fixtures/sort/numeric_trailing_chars.txt b/tests/fixtures/sort/numeric_trailing_chars.txt new file mode 100644 index 000000000..d6fe3e48d --- /dev/null +++ b/tests/fixtures/sort/numeric_trailing_chars.txt @@ -0,0 +1,5 @@ +0foo +0.abc +100 +-.05, +-x \ No newline at end of file From 67b83647ac41515f762506211ec6390c19fa0b2d Mon Sep 17 00:00:00 2001 From: Michael Debertol Date: Tue, 1 Jun 2021 18:09:39 +0200 Subject: [PATCH 2/2] sort: simplify handling of negative zeros We can simply parse the sign of negative zero as positive, instead of handling the comparison of zeros differently. --- src/uu/sort/src/numeric_str_cmp.rs | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/src/uu/sort/src/numeric_str_cmp.rs b/src/uu/sort/src/numeric_str_cmp.rs index 461f72670..8cd3faab2 100644 --- a/src/uu/sort/src/numeric_str_cmp.rs +++ b/src/uu/sort/src/numeric_str_cmp.rs @@ -107,7 +107,7 @@ impl NumInfo { } else { ( NumInfo { - sign: if had_digit { sign } else { Sign::Positive }, + sign: Sign::Positive, exponent: 0, }, if had_digit { @@ -147,7 +147,7 @@ impl NumInfo { } else { ( NumInfo { - sign: if had_digit { sign } else { Sign::Positive }, + sign: Sign::Positive, exponent: 0, }, if had_digit { @@ -187,11 +187,7 @@ impl NumInfo { pub fn numeric_str_cmp((a, a_info): (&str, &NumInfo), (b, b_info): (&str, &NumInfo)) -> Ordering { // check for a difference in the sign if a_info.sign != b_info.sign { - return if a.is_empty() && b.is_empty() { - Ordering::Equal - } else { - a_info.sign.cmp(&b_info.sign) - }; + return a_info.sign.cmp(&b_info.sign); } // check for a difference in the exponent