mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-08-02 22:17:45 +00:00
Merge pull request #2521 from miDeb/sort/rand
sort: improve compatibility of --random-sort
This commit is contained in:
commit
103a9d52ff
2 changed files with 48 additions and 40 deletions
|
@ -29,7 +29,6 @@ use custom_str_cmp::custom_str_cmp;
|
||||||
use ext_sort::ext_sort;
|
use ext_sort::ext_sort;
|
||||||
use fnv::FnvHasher;
|
use fnv::FnvHasher;
|
||||||
use numeric_str_cmp::{human_numeric_str_cmp, numeric_str_cmp, NumInfo, NumInfoParseSettings};
|
use numeric_str_cmp::{human_numeric_str_cmp, numeric_str_cmp, NumInfo, NumInfoParseSettings};
|
||||||
use rand::distributions::Alphanumeric;
|
|
||||||
use rand::{thread_rng, Rng};
|
use rand::{thread_rng, Rng};
|
||||||
use rayon::prelude::*;
|
use rayon::prelude::*;
|
||||||
use std::cmp::Ordering;
|
use std::cmp::Ordering;
|
||||||
|
@ -183,7 +182,7 @@ pub struct GlobalSettings {
|
||||||
unique: bool,
|
unique: bool,
|
||||||
check: bool,
|
check: bool,
|
||||||
check_silent: bool,
|
check_silent: bool,
|
||||||
salt: String,
|
salt: Option<[u8; 16]>,
|
||||||
selectors: Vec<FieldSelector>,
|
selectors: Vec<FieldSelector>,
|
||||||
separator: Option<char>,
|
separator: Option<char>,
|
||||||
threads: String,
|
threads: String,
|
||||||
|
@ -266,7 +265,7 @@ impl Default for GlobalSettings {
|
||||||
unique: false,
|
unique: false,
|
||||||
check: false,
|
check: false,
|
||||||
check_silent: false,
|
check_silent: false,
|
||||||
salt: String::new(),
|
salt: None,
|
||||||
selectors: vec![],
|
selectors: vec![],
|
||||||
separator: None,
|
separator: None,
|
||||||
threads: String::new(),
|
threads: String::new(),
|
||||||
|
@ -1006,7 +1005,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
|
||||||
} else if matches.is_present(options::modes::RANDOM)
|
} else if matches.is_present(options::modes::RANDOM)
|
||||||
|| matches.value_of(options::modes::SORT) == Some("random")
|
|| matches.value_of(options::modes::SORT) == Some("random")
|
||||||
{
|
{
|
||||||
settings.salt = get_rand_string();
|
settings.salt = Some(get_rand_string());
|
||||||
SortMode::Random
|
SortMode::Random
|
||||||
} else {
|
} else {
|
||||||
SortMode::Default
|
SortMode::Default
|
||||||
|
@ -1086,9 +1085,11 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
|
||||||
|
|
||||||
if let Some(values) = matches.values_of(options::KEY) {
|
if let Some(values) = matches.values_of(options::KEY) {
|
||||||
for value in values {
|
for value in values {
|
||||||
settings
|
let selector = FieldSelector::parse(value, &settings);
|
||||||
.selectors
|
if selector.settings.mode == SortMode::Random && settings.salt.is_none() {
|
||||||
.push(FieldSelector::parse(value, &settings));
|
settings.salt = Some(get_rand_string());
|
||||||
|
}
|
||||||
|
settings.selectors.push(selector);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1397,7 +1398,22 @@ fn compare_by<'a>(
|
||||||
let settings = &selector.settings;
|
let settings = &selector.settings;
|
||||||
|
|
||||||
let cmp: Ordering = match settings.mode {
|
let cmp: Ordering = match settings.mode {
|
||||||
SortMode::Random => random_shuffle(a_str, b_str, &global_settings.salt),
|
SortMode::Random => {
|
||||||
|
// check if the two strings are equal
|
||||||
|
if custom_str_cmp(
|
||||||
|
a_str,
|
||||||
|
b_str,
|
||||||
|
settings.ignore_non_printing,
|
||||||
|
settings.dictionary_order,
|
||||||
|
settings.ignore_case,
|
||||||
|
) == Ordering::Equal
|
||||||
|
{
|
||||||
|
Ordering::Equal
|
||||||
|
} else {
|
||||||
|
// Only if they are not equal compare by the hash
|
||||||
|
random_shuffle(a_str, b_str, &global_settings.salt.unwrap())
|
||||||
|
}
|
||||||
|
}
|
||||||
SortMode::Numeric => {
|
SortMode::Numeric => {
|
||||||
let a_num_info = &a_line_data.num_infos
|
let a_num_info = &a_line_data.num_infos
|
||||||
[a.index * global_settings.precomputed.num_infos_per_line + num_info_index];
|
[a.index * global_settings.precomputed.num_infos_per_line + num_info_index];
|
||||||
|
@ -1546,12 +1562,8 @@ fn general_numeric_compare(a: &GeneralF64ParseResult, b: &GeneralF64ParseResult)
|
||||||
a.partial_cmp(b).unwrap()
|
a.partial_cmp(b).unwrap()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_rand_string() -> String {
|
fn get_rand_string() -> [u8; 16] {
|
||||||
thread_rng()
|
thread_rng().sample(rand::distributions::Standard)
|
||||||
.sample_iter(&Alphanumeric)
|
|
||||||
.take(16)
|
|
||||||
.map(char::from)
|
|
||||||
.collect::<String>()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_hash<T: Hash>(t: &T) -> u64 {
|
fn get_hash<T: Hash>(t: &T) -> u64 {
|
||||||
|
@ -1560,10 +1572,9 @@ fn get_hash<T: Hash>(t: &T) -> u64 {
|
||||||
s.finish()
|
s.finish()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn random_shuffle(a: &str, b: &str, salt: &str) -> Ordering {
|
fn random_shuffle(a: &str, b: &str, salt: &[u8]) -> Ordering {
|
||||||
let da = get_hash(&[a, salt].concat());
|
let da = get_hash(&(a, salt));
|
||||||
let db = get_hash(&[b, salt].concat());
|
let db = get_hash(&(b, salt));
|
||||||
|
|
||||||
da.cmp(&db)
|
da.cmp(&db)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -220,32 +220,29 @@ fn test_random_shuffle_contains_all_lines() {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_random_shuffle_two_runs_not_the_same() {
|
fn test_random_shuffle_two_runs_not_the_same() {
|
||||||
// check to verify that two random shuffles are not equal; this has the
|
for arg in &["-R", "-k1,1R"] {
|
||||||
// potential to fail in the very unlikely event that the random order is the same
|
// check to verify that two random shuffles are not equal; this has the
|
||||||
// as the starting order, or if both random sorts end up having the same order.
|
// potential to fail in the very unlikely event that the random order is the same
|
||||||
const FILE: &str = "default_unsorted_ints.expected";
|
// as the starting order, or if both random sorts end up having the same order.
|
||||||
let (at, _ucmd) = at_and_ucmd!();
|
const FILE: &str = "default_unsorted_ints.expected";
|
||||||
let result = new_ucmd!().arg("-R").arg(FILE).run().stdout_move_str();
|
let (at, _ucmd) = at_and_ucmd!();
|
||||||
let expected = at.read(FILE);
|
let result = new_ucmd!().arg(arg).arg(FILE).run().stdout_move_str();
|
||||||
let unexpected = new_ucmd!().arg("-R").arg(FILE).run().stdout_move_str();
|
let expected = at.read(FILE);
|
||||||
|
let unexpected = new_ucmd!().arg(arg).arg(FILE).run().stdout_move_str();
|
||||||
|
|
||||||
assert_ne!(result, expected);
|
assert_ne!(result, expected);
|
||||||
assert_ne!(result, unexpected);
|
assert_ne!(result, unexpected);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_random_shuffle_contains_two_runs_not_the_same() {
|
fn test_random_ignore_case() {
|
||||||
// check to verify that two random shuffles are not equal; this has the
|
let input = "ABC\nABc\nAbC\nAbc\naBC\naBc\nabC\nabc\n";
|
||||||
// potential to fail in the unlikely event that random order is the same
|
new_ucmd!()
|
||||||
// as the starting order, or if both random sorts end up having the same order.
|
.args(&["-fR"])
|
||||||
const FILE: &str = "default_unsorted_ints.expected";
|
.pipe_in(input)
|
||||||
let (at, _ucmd) = at_and_ucmd!();
|
.succeeds()
|
||||||
let result = new_ucmd!().arg("-R").arg(FILE).run().stdout_move_str();
|
.stdout_is(input);
|
||||||
let expected = at.read(FILE);
|
|
||||||
let unexpected = new_ucmd!().arg("-R").arg(FILE).run().stdout_move_str();
|
|
||||||
|
|
||||||
assert_ne!(result, expected);
|
|
||||||
assert_ne!(result, unexpected);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue