From 8a05148d7be9949d2da93126c21ad2606d53cb2d Mon Sep 17 00:00:00 2001 From: Michael Debertol Date: Wed, 21 Apr 2021 17:56:59 +0200 Subject: [PATCH 1/2] sort: fix tokenization for trailing separators Trailing separators were included at the end of the last token, but they should not be. This changes tokenize_with_separator as suggested by @cbjadwani. --- src/uu/sort/src/sort.rs | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index 07b852921..7515ca1c9 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -351,20 +351,18 @@ fn tokenize_default(line: &str) -> Vec { /// Split between separators. These separators are not included in fields. fn tokenize_with_separator(line: &str, separator: char) -> Vec { - let mut tokens = vec![0..0]; - let mut previous_was_separator = false; - for (idx, char) in line.char_indices() { - if previous_was_separator { - tokens.push(idx..0); - } - if char == separator { - tokens.last_mut().unwrap().end = idx; - previous_was_separator = true; - } else { - previous_was_separator = false; - } + let mut tokens = vec![]; + let separator_indices = + line.char_indices() + .filter_map(|(i, c)| if c == separator { Some(i) } else { None }); + let mut start = 0; + for sep_idx in separator_indices { + tokens.push(start..sep_idx); + start = sep_idx + 1; + } + if start < line.len() { + tokens.push(start..line.len()); } - tokens.last_mut().unwrap().end = line.len(); tokens } @@ -1383,4 +1381,14 @@ mod tests { vec![0..0, 1..1, 2..2, 3..9, 10..18,] ); } + + #[test] + fn test_tokenize_fields_trailing_custom_separator() { + let line = "a"; + assert_eq!(tokenize(line, Some('a')), vec![0..0]); + let line = "aa"; + assert_eq!(tokenize(line, Some('a')), vec![0..0, 1..1]); + let line = "..a..a"; + assert_eq!(tokenize(line, Some('a')), vec![0..2, 3..5]); + } } From 8914fd0a579a0e0da51ea94319fdca96f13dcc18 Mon Sep 17 00:00:00 2001 From: Michael Debertol Date: Wed, 21 Apr 2021 19:26:17 +0200 Subject: [PATCH 2/2] add an integration test --- tests/by-util/test_sort.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tests/by-util/test_sort.rs b/tests/by-util/test_sort.rs index a4a9a383c..72d4f67fc 100644 --- a/tests/by-util/test_sort.rs +++ b/tests/by-util/test_sort.rs @@ -581,3 +581,12 @@ fn test_check_silent() { .fails() .stdout_is(""); } + +#[test] +fn test_trailing_separator() { + new_ucmd!() + .args(&["-t", "x", "-k", "1,1"]) + .pipe_in("aax\naaa\n") + .succeeds() + .stdout_is("aax\naaa\n"); +} \ No newline at end of file