diff --git a/Cargo.lock b/Cargo.lock index d2b2fa32e..a6ddf7105 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2293,7 +2293,7 @@ version = "0.0.6" dependencies = [ "clap", "fnv", - "itertools 0.8.2", + "itertools 0.10.0", "rand 0.7.3", "rayon", "semver", diff --git a/src/uu/sort/Cargo.toml b/src/uu/sort/Cargo.toml index 96e88ebc9..6a9976278 100644 --- a/src/uu/sort/Cargo.toml +++ b/src/uu/sort/Cargo.toml @@ -19,7 +19,7 @@ rayon = "1.5" rand = "0.7" clap = "2.33" fnv = "1.0.7" -itertools = "0.8.0" +itertools = "0.10.0" semver = "0.9.0" smallvec = "1.6.1" uucore = { version=">=0.0.8", package="uucore", path="../../uucore", features=["fs"] } diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index 4f669f578..35ab71ba2 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -162,6 +162,7 @@ impl From<&GlobalSettings> for KeySettings { } /// Represents the string selected by a FieldSelector. +#[derive(Debug)] enum Selection { /// If we had to transform this selection, we have to store a new string. String(String), @@ -181,6 +182,7 @@ impl Selection { type Field = Range; +#[derive(Debug)] struct Line { line: String, // The common case is not to specify fields. Let's make this fast. @@ -868,22 +870,12 @@ fn exec(files: Vec, settings: &GlobalSettings) -> i32 { } else { print_sorted(file_merger, &settings) } - } else if settings.mode == SortMode::Default && settings.unique { - print_sorted(lines.into_iter().map(|line| line.line).dedup(), &settings) - } else if settings.mode == SortMode::Month && settings.unique { - print_sorted( - lines - .into_iter() - .map(|line| line.line) - .dedup_by(|a, b| get_months_dedup(a) == get_months_dedup(b)), - &settings, - ) } else if settings.unique { print_sorted( lines .into_iter() - .map(|line| line.line) - .dedup_by(|a, b| get_num_dedup(a, settings) == get_num_dedup(b, settings)), + .dedup_by(|a, b| compare_by(a, b, settings) == Ordering::Equal) + .map(|line| line.line), &settings, ) } else { @@ -1062,63 +1054,6 @@ fn get_leading_gen(a: &str) -> &str { result } -fn get_months_dedup(a: &str) -> String { - let pattern = if a.trim().len().ge(&3) { - // Split at 3rd char and get first element of tuple ".0" - a.split_at(3).0 - } else { - "" - }; - - let month = match pattern.to_uppercase().as_ref() { - "JAN" => Month::January, - "FEB" => Month::February, - "MAR" => Month::March, - "APR" => Month::April, - "MAY" => Month::May, - "JUN" => Month::June, - "JUL" => Month::July, - "AUG" => Month::August, - "SEP" => Month::September, - "OCT" => Month::October, - "NOV" => Month::November, - "DEC" => Month::December, - _ => Month::Unknown, - }; - - if month == Month::Unknown { - "".to_owned() - } else { - pattern.to_uppercase() - } -} - -// *For all dedups/uniques expect default we must compare leading numbers* -// Also note numeric compare and unique output is specifically *not* the same as a "sort | uniq" -// See: https://www.gnu.org/software/coreutils/manual/html_node/sort-invocation.html -fn get_num_dedup<'a>(a: &'a str, settings: &GlobalSettings) -> &'a str { - // Trim and remove any leading zeros - let s = a.trim().trim_start_matches('0'); - - // Get first char - let c = s.chars().next().unwrap_or('\0'); - - // Empty lines and non-number lines are treated as the same for dedup - // Prepare lines for comparison of only the numerical leading numbers - if s.is_empty() || (!c.eq(&NEGATIVE) && !c.is_numeric()) { - "" - } else { - let result = match settings.mode { - SortMode::Numeric => get_leading_num(s), - SortMode::GeneralNumeric => get_leading_gen(s), - SortMode::HumanNumeric => get_leading_num(s), - SortMode::Version => get_leading_num(s), - _ => s, - }; - result - } -} - #[inline(always)] fn remove_thousands_sep<'a, S: Into>>(input: S) -> Cow<'a, str> { let input = input.into(); diff --git a/tests/by-util/test_sort.rs b/tests/by-util/test_sort.rs index 668e783ae..866beefff 100644 --- a/tests/by-util/test_sort.rs +++ b/tests/by-util/test_sort.rs @@ -223,6 +223,16 @@ fn test_mixed_floats_ints_chars_numeric_unique() { test_helper("mixed_floats_ints_chars_numeric_unique", "-nu"); } +#[test] +fn test_words_unique() { + test_helper("words_unique", "-u"); +} + +#[test] +fn test_numeric_unique() { + test_helper("numeric_unique", "-nu"); +} + #[test] fn test_mixed_floats_ints_chars_numeric_reverse() { test_helper("mixed_floats_ints_chars_numeric_unique_reverse", "-nur"); diff --git a/tests/fixtures/sort/numeric_unique.expected b/tests/fixtures/sort/numeric_unique.expected new file mode 100644 index 000000000..8a31187f6 --- /dev/null +++ b/tests/fixtures/sort/numeric_unique.expected @@ -0,0 +1,2 @@ +-10 bb +aa diff --git a/tests/fixtures/sort/numeric_unique.txt b/tests/fixtures/sort/numeric_unique.txt new file mode 100644 index 000000000..15cc08022 --- /dev/null +++ b/tests/fixtures/sort/numeric_unique.txt @@ -0,0 +1,3 @@ +aa +-10 bb +-10 aa diff --git a/tests/fixtures/sort/words_unique.expected b/tests/fixtures/sort/words_unique.expected new file mode 100644 index 000000000..2444ce1c6 --- /dev/null +++ b/tests/fixtures/sort/words_unique.expected @@ -0,0 +1,3 @@ +aaa +bbb +zzz diff --git a/tests/fixtures/sort/words_unique.txt b/tests/fixtures/sort/words_unique.txt new file mode 100644 index 000000000..9c6666029 --- /dev/null +++ b/tests/fixtures/sort/words_unique.txt @@ -0,0 +1,4 @@ +zzz +aaa +bbb +bbb