From f851fb64544b83b24d00d1826252c35dedd6c819 Mon Sep 17 00:00:00 2001 From: Michael Debertol Date: Mon, 26 Jul 2021 12:28:29 +0200 Subject: [PATCH 1/2] sort: initialize the salt when the random setting is on a key as well Additionall, change the type of `salt` from `String` to `Option<[u8; 16]>`. --- src/uu/sort/src/sort.rs | 32 ++++++++++++++------------------ tests/by-util/test_sort.rs | 37 ++++++++++++------------------------- 2 files changed, 26 insertions(+), 43 deletions(-) diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index 4362863d5..06bcc7ff8 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -29,7 +29,6 @@ use custom_str_cmp::custom_str_cmp; use ext_sort::ext_sort; use fnv::FnvHasher; use numeric_str_cmp::{human_numeric_str_cmp, numeric_str_cmp, NumInfo, NumInfoParseSettings}; -use rand::distributions::Alphanumeric; use rand::{thread_rng, Rng}; use rayon::prelude::*; use std::cmp::Ordering; @@ -183,7 +182,7 @@ pub struct GlobalSettings { unique: bool, check: bool, check_silent: bool, - salt: String, + salt: Option<[u8; 16]>, selectors: Vec, separator: Option, threads: String, @@ -266,7 +265,7 @@ impl Default for GlobalSettings { unique: false, check: false, check_silent: false, - salt: String::new(), + salt: None, selectors: vec![], separator: None, threads: String::new(), @@ -1006,7 +1005,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { } else if matches.is_present(options::modes::RANDOM) || matches.value_of(options::modes::SORT) == Some("random") { - settings.salt = get_rand_string(); + settings.salt = Some(get_rand_string()); SortMode::Random } else { SortMode::Default @@ -1086,9 +1085,11 @@ pub fn uumain(args: impl uucore::Args) -> i32 { if let Some(values) = matches.values_of(options::KEY) { for value in values { - settings - .selectors - .push(FieldSelector::parse(value, &settings)); + let selector = FieldSelector::parse(value, &settings); + if selector.settings.mode == SortMode::Random && settings.salt.is_none() { + settings.salt = Some(get_rand_string()); + } + settings.selectors.push(selector); } } @@ -1397,7 +1398,7 @@ fn compare_by<'a>( let settings = &selector.settings; let cmp: Ordering = match settings.mode { - SortMode::Random => random_shuffle(a_str, b_str, &global_settings.salt), + SortMode::Random => random_shuffle(a_str, b_str, &global_settings.salt.unwrap()), SortMode::Numeric => { let a_num_info = &a_line_data.num_infos [a.index * global_settings.precomputed.num_infos_per_line + num_info_index]; @@ -1546,12 +1547,8 @@ fn general_numeric_compare(a: &GeneralF64ParseResult, b: &GeneralF64ParseResult) a.partial_cmp(b).unwrap() } -fn get_rand_string() -> String { - thread_rng() - .sample_iter(&Alphanumeric) - .take(16) - .map(char::from) - .collect::() +fn get_rand_string() -> [u8; 16] { + thread_rng().sample(rand::distributions::Standard) } fn get_hash(t: &T) -> u64 { @@ -1560,10 +1557,9 @@ fn get_hash(t: &T) -> u64 { s.finish() } -fn random_shuffle(a: &str, b: &str, salt: &str) -> Ordering { - let da = get_hash(&[a, salt].concat()); - let db = get_hash(&[b, salt].concat()); - +fn random_shuffle(a: &str, b: &str, salt: &[u8]) -> Ordering { + let da = get_hash(&(a, salt)); + let db = get_hash(&(b, salt)); da.cmp(&db) } diff --git a/tests/by-util/test_sort.rs b/tests/by-util/test_sort.rs index 5f44ce35f..16437d526 100644 --- a/tests/by-util/test_sort.rs +++ b/tests/by-util/test_sort.rs @@ -220,32 +220,19 @@ fn test_random_shuffle_contains_all_lines() { #[test] fn test_random_shuffle_two_runs_not_the_same() { - // check to verify that two random shuffles are not equal; this has the - // potential to fail in the very unlikely event that the random order is the same - // as the starting order, or if both random sorts end up having the same order. - const FILE: &str = "default_unsorted_ints.expected"; - let (at, _ucmd) = at_and_ucmd!(); - let result = new_ucmd!().arg("-R").arg(FILE).run().stdout_move_str(); - let expected = at.read(FILE); - let unexpected = new_ucmd!().arg("-R").arg(FILE).run().stdout_move_str(); + for arg in &["-R", "-k1,1R"] { + // check to verify that two random shuffles are not equal; this has the + // potential to fail in the very unlikely event that the random order is the same + // as the starting order, or if both random sorts end up having the same order. + const FILE: &str = "default_unsorted_ints.expected"; + let (at, _ucmd) = at_and_ucmd!(); + let result = new_ucmd!().arg(arg).arg(FILE).run().stdout_move_str(); + let expected = at.read(FILE); + let unexpected = new_ucmd!().arg(arg).arg(FILE).run().stdout_move_str(); - assert_ne!(result, expected); - assert_ne!(result, unexpected); -} - -#[test] -fn test_random_shuffle_contains_two_runs_not_the_same() { - // check to verify that two random shuffles are not equal; this has the - // potential to fail in the unlikely event that random order is the same - // as the starting order, or if both random sorts end up having the same order. - const FILE: &str = "default_unsorted_ints.expected"; - let (at, _ucmd) = at_and_ucmd!(); - let result = new_ucmd!().arg("-R").arg(FILE).run().stdout_move_str(); - let expected = at.read(FILE); - let unexpected = new_ucmd!().arg("-R").arg(FILE).run().stdout_move_str(); - - assert_ne!(result, expected); - assert_ne!(result, unexpected); + assert_ne!(result, expected); + assert_ne!(result, unexpected); + } } #[test] From 3564dc57924acd958a77085836dd34f9fed0fd76 Mon Sep 17 00:00:00 2001 From: Michael Debertol Date: Mon, 26 Jul 2021 16:27:24 +0200 Subject: [PATCH 2/2] sort: compare strings before comparing hashes Since lines that compare equal should be sorted together, we need to first compare the lines (taking settings into account). Only if they do not compare equal we should compare the hashes. --- src/uu/sort/src/sort.rs | 17 ++++++++++++++++- tests/by-util/test_sort.rs | 10 ++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index 06bcc7ff8..aa4d483c0 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -1398,7 +1398,22 @@ fn compare_by<'a>( let settings = &selector.settings; let cmp: Ordering = match settings.mode { - SortMode::Random => random_shuffle(a_str, b_str, &global_settings.salt.unwrap()), + SortMode::Random => { + // check if the two strings are equal + if custom_str_cmp( + a_str, + b_str, + settings.ignore_non_printing, + settings.dictionary_order, + settings.ignore_case, + ) == Ordering::Equal + { + Ordering::Equal + } else { + // Only if they are not equal compare by the hash + random_shuffle(a_str, b_str, &global_settings.salt.unwrap()) + } + } SortMode::Numeric => { let a_num_info = &a_line_data.num_infos [a.index * global_settings.precomputed.num_infos_per_line + num_info_index]; diff --git a/tests/by-util/test_sort.rs b/tests/by-util/test_sort.rs index 16437d526..8573ce03f 100644 --- a/tests/by-util/test_sort.rs +++ b/tests/by-util/test_sort.rs @@ -235,6 +235,16 @@ fn test_random_shuffle_two_runs_not_the_same() { } } +#[test] +fn test_random_ignore_case() { + let input = "ABC\nABc\nAbC\nAbc\naBC\naBc\nabC\nabc\n"; + new_ucmd!() + .args(&["-fR"]) + .pipe_in(input) + .succeeds() + .stdout_is(input); +} + #[test] fn test_numeric_floats_and_ints() { test_helper(