1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-28 03:27:44 +00:00

sort: make GNU test sort-debug-keys pass (#2269)

* sort: disable support for thousand separators

In order to be compatible with GNU, we have to disable thousands
separators. GNU does not enable them for the C locale, either.

Once we add support for locales we can add this feature back.

* sort: delete unused fixtures

* sort: compare -0 and 0 equal

I must have misunderstood this when implementing, but GNU considers
-0, 0, and invalid numbers to be equal.

* sort: strip blanks before applying the char index

* sort: don't crash when key start is after key end

* sort: add "no match" for months at the first non-whitespace char

We should put the "^ no match for key" indicator at the first
non-whitespace character of a field.

* sort: improve support for e notation

* sort: use maches! macros
This commit is contained in:
Michael Debertol 2021-05-28 22:38:29 +02:00 committed by GitHub
parent 9442f26fdb
commit e9656a6c32
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
35 changed files with 191 additions and 231 deletions

View file

@ -174,7 +174,11 @@ impl NumInfo {
pub fn numeric_str_cmp((a, a_info): (&str, &NumInfo), (b, b_info): (&str, &NumInfo)) -> Ordering { pub fn numeric_str_cmp((a, a_info): (&str, &NumInfo), (b, b_info): (&str, &NumInfo)) -> Ordering {
// check for a difference in the sign // check for a difference in the sign
if a_info.sign != b_info.sign { if a_info.sign != b_info.sign {
return a_info.sign.cmp(&b_info.sign); return if a.is_empty() && b.is_empty() {
Ordering::Equal
} else {
a_info.sign.cmp(&b_info.sign)
};
} }
// check for a difference in the exponent // check for a difference in the exponent
@ -419,8 +423,8 @@ mod tests {
#[test] #[test]
fn minus_zero() { fn minus_zero() {
// This matches GNU sort behavior. // This matches GNU sort behavior.
test_helper("-0", "0", Ordering::Less); test_helper("-0", "0", Ordering::Equal);
test_helper("-0x", "0", Ordering::Less); test_helper("-0x", "0", Ordering::Equal);
} }
#[test] #[test]
fn double_minus() { fn double_minus() {

View file

@ -99,10 +99,9 @@ static OPT_TMP_DIR: &str = "temporary-directory";
static ARG_FILES: &str = "files"; static ARG_FILES: &str = "files";
static DECIMAL_PT: char = '.'; static DECIMAL_PT: char = '.';
static THOUSANDS_SEP: char = ',';
static NEGATIVE: char = '-'; const NEGATIVE: char = '-';
static POSITIVE: char = '+'; const POSITIVE: char = '+';
// Choosing a higher buffer size does not result in performance improvements // Choosing a higher buffer size does not result in performance improvements
// (at least not on my machine). TODO: In the future, we should also take the amount of // (at least not on my machine). TODO: In the future, we should also take the amount of
@ -330,8 +329,7 @@ impl<'a> Line<'a> {
&self.line[selection.clone()], &self.line[selection.clone()],
NumInfoParseSettings { NumInfoParseSettings {
accept_si_units: selector.settings.mode == SortMode::HumanNumeric, accept_si_units: selector.settings.mode == SortMode::HumanNumeric,
thousands_separator: Some(THOUSANDS_SEP), ..Default::default()
decimal_pt: Some(DECIMAL_PT),
}, },
); );
let initial_selection = selection.clone(); let initial_selection = selection.clone();
@ -367,16 +365,24 @@ impl<'a> Line<'a> {
SortMode::Month => { SortMode::Month => {
let initial_selection = &self.line[selection.clone()]; let initial_selection = &self.line[selection.clone()];
let mut month_chars = initial_selection
.char_indices()
.skip_while(|(_, c)| c.is_whitespace());
let month = if month_parse(initial_selection) == Month::Unknown { let month = if month_parse(initial_selection) == Month::Unknown {
// We failed to parse a month, which is equivalent to matching nothing. // We failed to parse a month, which is equivalent to matching nothing.
0..0 // Add the "no match for key" marker to the first non-whitespace character.
let first_non_whitespace = month_chars.next();
first_non_whitespace.map_or(
initial_selection.len()..initial_selection.len(),
|(idx, _)| idx..idx,
)
} else { } else {
// We parsed a month. Match the three first non-whitespace characters, which must be the month we parsed. // We parsed a month. Match the first three non-whitespace characters, which must be the month we parsed.
let mut chars = initial_selection month_chars.next().unwrap().0
.char_indices() ..month_chars
.skip_while(|(_, c)| c.is_whitespace()); .nth(2)
chars.next().unwrap().0 .map_or(initial_selection.len(), |(idx, _)| idx)
..chars.nth(2).map_or(initial_selection.len(), |(idx, _)| idx)
}; };
// Shorten selection to month. // Shorten selection to month.
@ -606,8 +612,7 @@ impl FieldSelector {
range, range,
NumInfoParseSettings { NumInfoParseSettings {
accept_si_units: self.settings.mode == SortMode::HumanNumeric, accept_si_units: self.settings.mode == SortMode::HumanNumeric,
thousands_separator: Some(THOUSANDS_SEP), ..Default::default()
decimal_pt: Some(DECIMAL_PT),
}, },
); );
// Shorten the range to what we need to pass to numeric_str_cmp later. // Shorten the range to what we need to pass to numeric_str_cmp later.
@ -666,22 +671,21 @@ impl FieldSelector {
} else { } else {
tokens.unwrap()[position.field - 1].start tokens.unwrap()[position.field - 1].start
}; };
// strip blanks if needed
if position.ignore_blanks {
idx += line[idx..]
.char_indices()
.find(|(_, c)| !c.is_whitespace())
.map_or(line[idx..].len(), |(idx, _)| idx);
}
// apply the character index
idx += line[idx..] idx += line[idx..]
.char_indices() .char_indices()
.nth(position.char - 1) .nth(position.char - 1)
.map_or(line.len(), |(idx, _)| idx); .map_or(line[idx..].len(), |(idx, _)| idx);
if idx >= line.len() { if idx >= line.len() {
Resolution::TooHigh Resolution::TooHigh
} else { } else {
if position.ignore_blanks {
if let Some((not_whitespace, _)) =
line[idx..].char_indices().find(|(_, c)| !c.is_whitespace())
{
idx += not_whitespace;
} else {
return Resolution::TooHigh;
}
}
Resolution::StartOfChar(idx) Resolution::StartOfChar(idx)
} }
} }
@ -691,8 +695,9 @@ impl FieldSelector {
Resolution::StartOfChar(from) => { Resolution::StartOfChar(from) => {
let to = self.to.as_ref().map(|to| resolve_index(line, tokens, &to)); let to = self.to.as_ref().map(|to| resolve_index(line, tokens, &to));
match to { let mut range = match to {
Some(Resolution::StartOfChar(mut to)) => { Some(Resolution::StartOfChar(mut to)) => {
// We need to include the character at `to`.
to += line[to..].chars().next().map_or(1, |c| c.len_utf8()); to += line[to..].chars().next().map_or(1, |c| c.len_utf8());
from..to from..to
} }
@ -703,7 +708,11 @@ impl FieldSelector {
// If `to` is before the start of the line, report no match. // If `to` is before the start of the line, report no match.
// This can happen if the line starts with a separator. // This can happen if the line starts with a separator.
Some(Resolution::TooLow) => 0..0, Some(Resolution::TooLow) => 0..0,
};
if range.start > range.end {
range.end = range.start;
} }
range
} }
Resolution::TooLow | Resolution::EndOfChar(_) => { Resolution::TooLow | Resolution::EndOfChar(_) => {
unreachable!("This should only happen if the field start index is 0, but that should already have caused an error.") unreachable!("This should only happen if the field start index is 0, but that should already have caused an error.")
@ -1202,6 +1211,8 @@ fn compare_by<'a>(a: &Line<'a>, b: &Line<'a>, global_settings: &GlobalSettings)
fn get_leading_gen(input: &str) -> Range<usize> { fn get_leading_gen(input: &str) -> Range<usize> {
let trimmed = input.trim_start(); let trimmed = input.trim_start();
let leading_whitespace_len = input.len() - trimmed.len(); let leading_whitespace_len = input.len() - trimmed.len();
// check for inf, -inf and nan
for allowed_prefix in &["inf", "-inf", "nan"] { for allowed_prefix in &["inf", "-inf", "nan"] {
if trimmed.is_char_boundary(allowed_prefix.len()) if trimmed.is_char_boundary(allowed_prefix.len())
&& trimmed[..allowed_prefix.len()].eq_ignore_ascii_case(allowed_prefix) && trimmed[..allowed_prefix.len()].eq_ignore_ascii_case(allowed_prefix)
@ -1210,11 +1221,11 @@ fn get_leading_gen(input: &str) -> Range<usize> {
} }
} }
// Make this iter peekable to see if next char is numeric // Make this iter peekable to see if next char is numeric
let mut char_indices = trimmed.char_indices().peekable(); let mut char_indices = itertools::peek_nth(trimmed.char_indices());
let first = char_indices.peek(); let first = char_indices.peek();
if first.map_or(false, |&(_, c)| c == NEGATIVE || c == POSITIVE) { if matches!(first, Some((_, NEGATIVE)) | Some((_, POSITIVE))) {
char_indices.next(); char_indices.next();
} }
@ -1224,16 +1235,29 @@ fn get_leading_gen(input: &str) -> Range<usize> {
if c.is_ascii_digit() { if c.is_ascii_digit() {
continue; continue;
} }
if c == DECIMAL_PT && !had_decimal_pt { if c == DECIMAL_PT && !had_decimal_pt && !had_e_notation {
had_decimal_pt = true; had_decimal_pt = true;
continue; continue;
} }
let next_char_numeric = char_indices if (c == 'e' || c == 'E') && !had_e_notation {
.peek() // we can only consume the 'e' if what follow is either a digit, or a sign followed by a digit.
.map_or(false, |(_, c)| c.is_ascii_digit()); if let Some(&(_, next_char)) = char_indices.peek() {
if (c == 'e' || c == 'E') && !had_e_notation && next_char_numeric { if (next_char == '+' || next_char == '-')
had_e_notation = true; && matches!(
continue; char_indices.peek_nth(2),
Some((_, c)) if c.is_ascii_digit()
)
{
// Consume the sign. The following digits will be consumed by the main loop.
char_indices.next();
had_e_notation = true;
continue;
}
if next_char.is_ascii_digit() {
had_e_notation = true;
continue;
}
}
} }
return leading_whitespace_len..(leading_whitespace_len + idx); return leading_whitespace_len..(leading_whitespace_len + idx);
} }

View file

@ -526,6 +526,11 @@ fn test_keys_with_options_blanks_start() {
} }
} }
#[test]
fn test_keys_blanks_with_char_idx() {
test_helper("keys_blanks", &["-k 1.2b"])
}
#[test] #[test]
fn test_keys_with_options_blanks_end() { fn test_keys_with_options_blanks_end() {
let input = "a b let input = "a b
@ -574,6 +579,13 @@ aaaa
.stdout_only(input); .stdout_only(input);
} }
#[test]
fn test_keys_negative_size_match() {
// If the end of a field is before its start, we should not crash.
// Debug output should report "no match for key" at the start position (i.e. the later position).
test_helper("keys_negative_size", &["-k 3,1"]);
}
#[test] #[test]
fn test_zero_terminated() { fn test_zero_terminated() {
test_helper("zero-terminated", &["-z"]); test_helper("zero-terminated", &["-z"]);

View file

@ -6,8 +6,15 @@
-12e-5555.5
0b10 // binary not supported
0x10 // hexadecimal not supported, but it should be
55e-20
55e-20.10
5.5.5.5 5.5.5.5
10E 10E
64e+
99e-
1000EDKLD 1000EDKLD
10000K78 10000K78
+100000 +100000
@ -15,5 +22,7 @@
100E6 100E6
100E6 100E6
10e10e10e10 10e10e10e10
13e+10
45e+10.5
50e10 50e10
50e10 50e10

View file

@ -22,12 +22,33 @@
^ no match for key ^ no match for key
^ no match for key ^ no match for key
> -12e-5555.5
_________
______________
0b10 // binary not supported
_
____________________________
0x10 // hexadecimal not supported, but it should be
_
___________________________________________________
55e-20
______
______
55e-20.10
______
_________
5.5.5.5 5.5.5.5
___ ___
_______ _______
10E 10E
__ __
___ ___
64e+
__
____
99e-
__
____
1000EDKLD 1000EDKLD
____ ____
_________ _________
@ -49,6 +70,12 @@ _____
10e10e10e10 10e10e10e10
_____ _____
___________ ___________
13e+10
______
______
45e+10.5
______
________
50e10 50e10
_____ _____
_____ _____

View file

@ -4,16 +4,25 @@
+100000 +100000
10000K78 10000K78
0x10 // hexadecimal not supported, but it should be
10E 10E
0b10 // binary not supported
64e+
99e-
45e+10.5
1000EDKLD 1000EDKLD
13e+10
100E6 100E6
50e10 50e10
-12e-5555.5
+100000 +100000
10e10e10e10 10e10e10e10
5.5.5.5 5.5.5.5
55e-20
55e-20.10

View file

@ -0,0 +1,3 @@
cab
abc
bca

View file

@ -0,0 +1,9 @@
>cab
__
____
>abc
__
____
>bca
__
____

3
tests/fixtures/sort/keys_blanks.txt vendored Normal file
View file

@ -0,0 +1,3 @@
abc
cab
bca

View file

@ -0,0 +1 @@
a b c

View file

@ -0,0 +1,3 @@
a b c
^ no match for key
_____

View file

@ -0,0 +1 @@
a b c

View file

@ -21,10 +21,10 @@ CARAvan
8.013 8.013
45 45
46.89 46.89
4567.
37800
576,446.88800000 576,446.88800000
576,446.890 576,446.890
4567.
37800
4798908.340000000000 4798908.340000000000
4798908.45 4798908.45
4798908.8909800 4798908.8909800

View file

@ -67,18 +67,18 @@ __
46.89 46.89
_____ _____
_____ _____
576,446.88800000
___
________________
576,446.890
___
___________
4567. 4567.
_____ _____
____________________ ____________________
>>>>37800 >>>>37800
_____ _____
_________ _________
576,446.88800000
________________
________________
576,446.890
___________
___________
4798908.340000000000 4798908.340000000000
____________________ ____________________
____________________ ____________________

View file

@ -1,30 +0,0 @@
4798908.8909800
4798908.45
4798908.340000000000
576,446.890
576,446.88800000
37800
4567.
46.89
45
8.013
1.58590
1.444
1.040000000
1
00000001
CARAvan
000
-.05
-1
-8.90880
-896689
-2028789030

View file

@ -1,30 +0,0 @@
4798908.8909800
4798908.45
4798908.340000000000
576,446.890
576,446.88800000
37800
4567.
46.89
45
8.013
1.58590
1.444
1.040000000
1
00000001
CARAvan
000
-.05
-1
-8.90880
-896689
-2028789030

View file

@ -1,30 +0,0 @@
576,446.890
576,446.88800000
4567.
45
46.89
-1
1
00000001
4798908.340000000000
4798908.45
4798908.8909800
37800
-2028789030
-896689
CARAvan
-8.90880
-.05
1.444
1.58590
1.040000000
8.013
000

View file

@ -8,11 +8,14 @@
-0
CARAvan CARAvan
-0
000 000
-0
1 1
00000001 00000001
1.040000000 1.040000000
@ -21,10 +24,10 @@ CARAvan
8.013 8.013
45 45
46.89 46.89
576,446.890
576,446.88800000
4567. 4567.
37800 37800
576,446.88800000
576,446.890
4798908.340000000000 4798908.340000000000
4798908.45 4798908.45
4798908.8909800 4798908.8909800

View file

@ -18,8 +18,12 @@ ____
^ no match for key ^ no match for key
^ no match for key ^ no match for key
-0
__
CARAvan CARAvan
^ no match for key ^ no match for key
-0
__
^ no match for key ^ no match for key
@ -28,6 +32,8 @@ CARAvan
^ no match for key ^ no match for key
000 000
___ ___
-0
__
1 1
_ _
00000001 00000001
@ -44,14 +50,14 @@ _____
__ __
46.89 46.89
_____ _____
576,446.890
___
576,446.88800000
___
4567. 4567.
_____ _____
>>>>37800 >>>>37800
_____ _____
576,446.88800000
________________
576,446.890
___________
4798908.340000000000 4798908.340000000000
____________________ ____________________
4798908.45 4798908.45

View file

@ -17,7 +17,9 @@
-2028789030 -2028789030
-896689 -896689
-0
CARAvan CARAvan
-0
-8.90880 -8.90880
-.05 -.05
@ -28,3 +30,4 @@ CARAvan
8.013 8.013
000 000
-0

View file

@ -11,10 +11,9 @@
8.013 8.013
45 45
46.89 46.89
576,446.890
4567. 4567.
37800 37800
576,446.88800000
576,446.890
4798908.340000000000 4798908.340000000000
4798908.45 4798908.45
4798908.8909800 4798908.8909800

View file

@ -24,14 +24,12 @@ _____
__ __
46.89 46.89
_____ _____
576,446.890
___
4567. 4567.
_____ _____
>>>>37800 >>>>37800
_____ _____
576,446.88800000
________________
576,446.890
___________
4798908.340000000000 4798908.340000000000
____________________ ____________________
4798908.45 4798908.45

View file

@ -1,10 +1,9 @@
4798908.8909800 4798908.8909800
4798908.45 4798908.45
4798908.340000000000 4798908.340000000000
576,446.890
576,446.88800000
37800 37800
4567. 4567.
576,446.890
46.89 46.89
45 45
8.013 8.013

View file

@ -4,14 +4,12 @@ _______________
__________ __________
4798908.340000000000 4798908.340000000000
____________________ ____________________
576,446.890
___________
576,446.88800000
________________
>>>>37800 >>>>37800
_____ _____
4567. 4567.
_____ _____
576,446.890
___
46.89 46.89
_____ _____
45 45

View file

@ -1,20 +0,0 @@
4798908.8909800
4798908.45
4798908.340000000000
576,446.890
576,446.88800000
37800
4567.
46.89
45
8.013
1.58590
1.444
1.040000000
1
-.05
-1
-8.90880
-896689
-2028789030

View file

@ -1,20 +0,0 @@
4798908.8909800
4798908.45
4798908.340000000000
576,446.890
576,446.88800000
37800
4567.
46.89
45
8.013
1.58590
1.444
1.040000000
1
-.05
-1
-8.90880
-896689
-2028789030

View file

@ -1,30 +0,0 @@
576,446.890
576,446.88800000
4567.
45
46.89
-1
1
00000001
4798908.340000000000
4798908.45
4798908.8909800
37800
-2028789030
-896689
CARAvan
-8.90880
-.05
1.444
1.58590
1.040000000
8.013
000

View file

@ -1,3 +1,4 @@
asdf
N/A Ut enim ad minim veniam, quis N/A Ut enim ad minim veniam, quis
Jan Lorem ipsum dolor sit amet Jan Lorem ipsum dolor sit amet
mar laboris nisi ut aliquip ex ea mar laboris nisi ut aliquip ex ea

View file

@ -1,3 +1,6 @@
asdf
^ no match for key
_____
N/A Ut enim ad minim veniam, quis N/A Ut enim ad minim veniam, quis
^ no match for key ^ no match for key
_________________________________ _________________________________

View file

@ -8,3 +8,4 @@ mar laboris nisi ut aliquip ex ea
Jul 2 these three lines Jul 2 these three lines
Jul 1 should remain 2,1,3 Jul 1 should remain 2,1,3
Jul 3 if --stable is provided Jul 3 if --stable is provided
asdf

View file

@ -1,4 +1,5 @@
N/A Ut enim ad minim veniam, quis N/A Ut enim ad minim veniam, quis
asdf
Jan Lorem ipsum dolor sit amet Jan Lorem ipsum dolor sit amet
mar laboris nisi ut aliquip ex ea mar laboris nisi ut aliquip ex ea
May sed do eiusmod tempor incididunt May sed do eiusmod tempor incididunt

View file

@ -1,5 +1,7 @@
N/A Ut enim ad minim veniam, quis N/A Ut enim ad minim veniam, quis
^ no match for key ^ no match for key
asdf
^ no match for key
Jan Lorem ipsum dolor sit amet Jan Lorem ipsum dolor sit amet
___ ___
mar laboris nisi ut aliquip ex ea mar laboris nisi ut aliquip ex ea

View file

@ -8,3 +8,4 @@ mar laboris nisi ut aliquip ex ea
Jul 2 these three lines Jul 2 these three lines
Jul 1 should remain 2,1,3 Jul 1 should remain 2,1,3
Jul 3 if --stable is provided Jul 3 if --stable is provided
asdf

View file

@ -21,6 +21,8 @@ CARAvan
8.013 8.013
45 45
46.89 46.89
576,446.88800000
576,446.890
4567..457 4567..457
4567. 4567.
4567.1 4567.1
@ -28,8 +30,6 @@ CARAvan
37800 37800
45670.89079.098 45670.89079.098
45670.89079.1 45670.89079.1
576,446.88800000
576,446.890
4798908.340000000000 4798908.340000000000
4798908.45 4798908.45
4798908.8909800 4798908.8909800

View file

@ -67,6 +67,12 @@ __
46.89 46.89
_____ _____
_____ _____
576,446.88800000
___
________________
576,446.890
___
___________
>>>>>>>>>>4567..457 >>>>>>>>>>4567..457
_____ _____
___________________ ___________________
@ -88,12 +94,6 @@ _____________________
>>>>>>45670.89079.1 >>>>>>45670.89079.1
___________ ___________
___________________ ___________________
576,446.88800000
________________
________________
576,446.890
___________
___________
4798908.340000000000 4798908.340000000000
____________________ ____________________
____________________ ____________________