diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index 8340778b0..4d166e21b 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -48,8 +48,8 @@ struct Settings { stable: bool, unique: bool, check: bool, - ignore_case: bool, compare_fns: Vec Ordering>, + transform_fns: Vec String>, } impl Default for Settings { @@ -62,8 +62,8 @@ impl Default for Settings { stable: false, unique: false, check: false, - ignore_case: false, compare_fns: Vec::new(), + transform_fns: Vec::new(), } } } @@ -148,6 +148,11 @@ pub fn uumain(args: Vec) -> i32 { let mut settings: Settings = Default::default(); let mut opts = getopts::Options::new(); + opts.optflag( + "d", + "dictionary-order", + "consider only blanks and alphanumeric characters", + ); opts.optflag( "f", "ignore-case", @@ -236,7 +241,13 @@ With no FILE, or when FILE is -, read standard input.", settings.stable = matches.opt_present("stable"); settings.unique = matches.opt_present("unique"); settings.check = matches.opt_present("check"); - settings.ignore_case = matches.opt_present("ignore-case"); + + if matches.opt_present("dictionary-order") { + settings.transform_fns.push(remove_nondictionary_chars); + } + if matches.opt_present("ignore-case") { + settings.transform_fns.push(|s| s.to_uppercase()); + } let mut files = matches.free; if files.is_empty() { @@ -343,17 +354,25 @@ fn exec_check_file(lines: Lines>>, settings: &Settings) } } +fn transform(line: &str, settings: &Settings) -> String { + let mut transformed = line.to_string(); + for transform_fn in &settings.transform_fns { + transformed = transform_fn(&transformed); + } + + transformed +} + fn sort_by(lines: &mut Vec, settings: &Settings) { lines.sort_by(|a, b| compare_by(a, b, &settings)) } fn compare_by(a: &str, b: &str, settings: &Settings) -> Ordering { - // Convert to uppercase if necessary - let (a_upper, b_upper): (String, String); - let (a, b) = if settings.ignore_case { - a_upper = a.to_uppercase(); - b_upper = b.to_uppercase(); - (&*a_upper, &*b_upper) + let (a_transformed, b_transformed): (String, String); + let (a, b) = if !settings.transform_fns.is_empty() { + a_transformed = transform(&a, &settings); + b_transformed = transform(&b, &settings); + (a_transformed.as_str(), b_transformed.as_str()) } else { (a, b) }; @@ -504,6 +523,15 @@ fn version_compare(a: &str, b: &str) -> Ordering { } } +fn remove_nondictionary_chars(s: &str) -> String { + // Using 'is_ascii_whitespace()' instead of 'is_whitespace()', because it + // uses only symbols compatible with UNIX sort (space, tab, newline). + // 'is_whitespace()' uses more symbols as whitespaces (e.g. vertical tab). + s.chars() + .filter(|c| c.is_alphanumeric() || c.is_ascii_whitespace()) + .collect::() +} + fn print_sorted>(iter: T, outfile: &Option) where S: std::fmt::Display, diff --git a/tests/fixtures/sort/dictionary_order.expected b/tests/fixtures/sort/dictionary_order.expected new file mode 100644 index 000000000..cfce86aec --- /dev/null +++ b/tests/fixtures/sort/dictionary_order.expected @@ -0,0 +1,3 @@ +bbb +./bbc +bbd diff --git a/tests/fixtures/sort/dictionary_order.txt b/tests/fixtures/sort/dictionary_order.txt new file mode 100644 index 000000000..cd4525ec7 --- /dev/null +++ b/tests/fixtures/sort/dictionary_order.txt @@ -0,0 +1,3 @@ +./bbc +bbd +bbb \ No newline at end of file diff --git a/tests/test_sort.rs b/tests/test_sort.rs index e7085787a..d40a8fe95 100644 --- a/tests/test_sort.rs +++ b/tests/test_sort.rs @@ -65,6 +65,11 @@ fn test_ignore_case() { test_helper("ignore_case", "-f"); } +#[test] +fn test_dictionary_order() { + test_helper("dictionary_order", "-d"); +} + #[test] fn test_multiple_files() { new_ucmd!()