mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-28 11:37:44 +00:00
sort: dedup using compare_by (#2064)
compare_by is the function used for sorting, we should use it for dedup as well.
This commit is contained in:
parent
49c9d8c901
commit
69f4410a8a
8 changed files with 28 additions and 71 deletions
2
Cargo.lock
generated
2
Cargo.lock
generated
|
@ -2293,7 +2293,7 @@ version = "0.0.6"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"clap",
|
"clap",
|
||||||
"fnv",
|
"fnv",
|
||||||
"itertools 0.8.2",
|
"itertools 0.10.0",
|
||||||
"rand 0.7.3",
|
"rand 0.7.3",
|
||||||
"rayon",
|
"rayon",
|
||||||
"semver",
|
"semver",
|
||||||
|
|
|
@ -19,7 +19,7 @@ rayon = "1.5"
|
||||||
rand = "0.7"
|
rand = "0.7"
|
||||||
clap = "2.33"
|
clap = "2.33"
|
||||||
fnv = "1.0.7"
|
fnv = "1.0.7"
|
||||||
itertools = "0.8.0"
|
itertools = "0.10.0"
|
||||||
semver = "0.9.0"
|
semver = "0.9.0"
|
||||||
smallvec = "1.6.1"
|
smallvec = "1.6.1"
|
||||||
uucore = { version=">=0.0.8", package="uucore", path="../../uucore", features=["fs"] }
|
uucore = { version=">=0.0.8", package="uucore", path="../../uucore", features=["fs"] }
|
||||||
|
|
|
@ -162,6 +162,7 @@ impl From<&GlobalSettings> for KeySettings {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Represents the string selected by a FieldSelector.
|
/// Represents the string selected by a FieldSelector.
|
||||||
|
#[derive(Debug)]
|
||||||
enum Selection {
|
enum Selection {
|
||||||
/// If we had to transform this selection, we have to store a new string.
|
/// If we had to transform this selection, we have to store a new string.
|
||||||
String(String),
|
String(String),
|
||||||
|
@ -181,6 +182,7 @@ impl Selection {
|
||||||
|
|
||||||
type Field = Range<usize>;
|
type Field = Range<usize>;
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
struct Line {
|
struct Line {
|
||||||
line: String,
|
line: String,
|
||||||
// The common case is not to specify fields. Let's make this fast.
|
// The common case is not to specify fields. Let's make this fast.
|
||||||
|
@ -868,22 +870,12 @@ fn exec(files: Vec<String>, settings: &GlobalSettings) -> i32 {
|
||||||
} else {
|
} else {
|
||||||
print_sorted(file_merger, &settings)
|
print_sorted(file_merger, &settings)
|
||||||
}
|
}
|
||||||
} else if settings.mode == SortMode::Default && settings.unique {
|
|
||||||
print_sorted(lines.into_iter().map(|line| line.line).dedup(), &settings)
|
|
||||||
} else if settings.mode == SortMode::Month && settings.unique {
|
|
||||||
print_sorted(
|
|
||||||
lines
|
|
||||||
.into_iter()
|
|
||||||
.map(|line| line.line)
|
|
||||||
.dedup_by(|a, b| get_months_dedup(a) == get_months_dedup(b)),
|
|
||||||
&settings,
|
|
||||||
)
|
|
||||||
} else if settings.unique {
|
} else if settings.unique {
|
||||||
print_sorted(
|
print_sorted(
|
||||||
lines
|
lines
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|line| line.line)
|
.dedup_by(|a, b| compare_by(a, b, settings) == Ordering::Equal)
|
||||||
.dedup_by(|a, b| get_num_dedup(a, settings) == get_num_dedup(b, settings)),
|
.map(|line| line.line),
|
||||||
&settings,
|
&settings,
|
||||||
)
|
)
|
||||||
} else {
|
} else {
|
||||||
|
@ -1062,63 +1054,6 @@ fn get_leading_gen(a: &str) -> &str {
|
||||||
result
|
result
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_months_dedup(a: &str) -> String {
|
|
||||||
let pattern = if a.trim().len().ge(&3) {
|
|
||||||
// Split at 3rd char and get first element of tuple ".0"
|
|
||||||
a.split_at(3).0
|
|
||||||
} else {
|
|
||||||
""
|
|
||||||
};
|
|
||||||
|
|
||||||
let month = match pattern.to_uppercase().as_ref() {
|
|
||||||
"JAN" => Month::January,
|
|
||||||
"FEB" => Month::February,
|
|
||||||
"MAR" => Month::March,
|
|
||||||
"APR" => Month::April,
|
|
||||||
"MAY" => Month::May,
|
|
||||||
"JUN" => Month::June,
|
|
||||||
"JUL" => Month::July,
|
|
||||||
"AUG" => Month::August,
|
|
||||||
"SEP" => Month::September,
|
|
||||||
"OCT" => Month::October,
|
|
||||||
"NOV" => Month::November,
|
|
||||||
"DEC" => Month::December,
|
|
||||||
_ => Month::Unknown,
|
|
||||||
};
|
|
||||||
|
|
||||||
if month == Month::Unknown {
|
|
||||||
"".to_owned()
|
|
||||||
} else {
|
|
||||||
pattern.to_uppercase()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// *For all dedups/uniques expect default we must compare leading numbers*
|
|
||||||
// Also note numeric compare and unique output is specifically *not* the same as a "sort | uniq"
|
|
||||||
// See: https://www.gnu.org/software/coreutils/manual/html_node/sort-invocation.html
|
|
||||||
fn get_num_dedup<'a>(a: &'a str, settings: &GlobalSettings) -> &'a str {
|
|
||||||
// Trim and remove any leading zeros
|
|
||||||
let s = a.trim().trim_start_matches('0');
|
|
||||||
|
|
||||||
// Get first char
|
|
||||||
let c = s.chars().next().unwrap_or('\0');
|
|
||||||
|
|
||||||
// Empty lines and non-number lines are treated as the same for dedup
|
|
||||||
// Prepare lines for comparison of only the numerical leading numbers
|
|
||||||
if s.is_empty() || (!c.eq(&NEGATIVE) && !c.is_numeric()) {
|
|
||||||
""
|
|
||||||
} else {
|
|
||||||
let result = match settings.mode {
|
|
||||||
SortMode::Numeric => get_leading_num(s),
|
|
||||||
SortMode::GeneralNumeric => get_leading_gen(s),
|
|
||||||
SortMode::HumanNumeric => get_leading_num(s),
|
|
||||||
SortMode::Version => get_leading_num(s),
|
|
||||||
_ => s,
|
|
||||||
};
|
|
||||||
result
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn remove_thousands_sep<'a, S: Into<Cow<'a, str>>>(input: S) -> Cow<'a, str> {
|
fn remove_thousands_sep<'a, S: Into<Cow<'a, str>>>(input: S) -> Cow<'a, str> {
|
||||||
let input = input.into();
|
let input = input.into();
|
||||||
|
|
|
@ -223,6 +223,16 @@ fn test_mixed_floats_ints_chars_numeric_unique() {
|
||||||
test_helper("mixed_floats_ints_chars_numeric_unique", "-nu");
|
test_helper("mixed_floats_ints_chars_numeric_unique", "-nu");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_words_unique() {
|
||||||
|
test_helper("words_unique", "-u");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_numeric_unique() {
|
||||||
|
test_helper("numeric_unique", "-nu");
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_mixed_floats_ints_chars_numeric_reverse() {
|
fn test_mixed_floats_ints_chars_numeric_reverse() {
|
||||||
test_helper("mixed_floats_ints_chars_numeric_unique_reverse", "-nur");
|
test_helper("mixed_floats_ints_chars_numeric_unique_reverse", "-nur");
|
||||||
|
|
2
tests/fixtures/sort/numeric_unique.expected
vendored
Normal file
2
tests/fixtures/sort/numeric_unique.expected
vendored
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
-10 bb
|
||||||
|
aa
|
3
tests/fixtures/sort/numeric_unique.txt
vendored
Normal file
3
tests/fixtures/sort/numeric_unique.txt
vendored
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
aa
|
||||||
|
-10 bb
|
||||||
|
-10 aa
|
3
tests/fixtures/sort/words_unique.expected
vendored
Normal file
3
tests/fixtures/sort/words_unique.expected
vendored
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
aaa
|
||||||
|
bbb
|
||||||
|
zzz
|
4
tests/fixtures/sort/words_unique.txt
vendored
Normal file
4
tests/fixtures/sort/words_unique.txt
vendored
Normal file
|
@ -0,0 +1,4 @@
|
||||||
|
zzz
|
||||||
|
aaa
|
||||||
|
bbb
|
||||||
|
bbb
|
Loading…
Add table
Add a link
Reference in a new issue