diff --git a/src/uu/sort/BENCHMARKING.md b/src/uu/sort/BENCHMARKING.md index fd728c41d..560d6b438 100644 --- a/src/uu/sort/BENCHMARKING.md +++ b/src/uu/sort/BENCHMARKING.md @@ -2,7 +2,7 @@ Most of the time when sorting is spent comparing lines. The comparison functions however differ based on which arguments are passed to `sort`, therefore it is important to always benchmark multiple scenarios. -This is an overwiew over what was benchmarked, and if you make changes to `sort`, you are encouraged to check +This is an overview over what was benchmarked, and if you make changes to `sort`, you are encouraged to check how performance was affected for the workloads listed below. Feel free to add other workloads to the list that we should improve / make sure not to regress. diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index 5fdaf32cf..a3b79e5d7 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -108,7 +108,7 @@ const POSITIVE: char = '+'; // available memory into consideration, instead of relying on this constant only. static DEFAULT_BUF_SIZE: usize = 1_000_000_000; // 1 GB -#[derive(Eq, Ord, PartialEq, PartialOrd, Clone, Copy)] +#[derive(Eq, Ord, PartialEq, PartialOrd, Clone, Copy, Debug)] enum SortMode { Numeric, HumanNumeric, @@ -118,6 +118,21 @@ enum SortMode { Random, Default, } + +impl SortMode { + fn get_short_name(&self) -> Option { + match self { + SortMode::Numeric => Some('n'), + SortMode::HumanNumeric => Some('h'), + SortMode::GeneralNumeric => Some('g'), + SortMode::Month => Some('M'), + SortMode::Version => Some('V'), + SortMode::Random => Some('R'), + SortMode::Default => None, + } + } +} + #[derive(Clone)] pub struct GlobalSettings { mode: SortMode, @@ -211,7 +226,7 @@ impl Default for GlobalSettings { } } } -#[derive(Clone)] +#[derive(Clone, PartialEq, Debug)] struct KeySettings { mode: SortMode, ignore_blanks: bool, @@ -221,6 +236,60 @@ struct KeySettings { reverse: bool, } +impl KeySettings { + /// Checks if the supplied combination of `mode`, `ignore_non_printing` and `dictionary_order` is allowed. + fn check_compatibility( + mode: SortMode, + ignore_non_printing: bool, + dictionary_order: bool, + ) -> Result<(), String> { + if matches!( + mode, + SortMode::Numeric | SortMode::HumanNumeric | SortMode::GeneralNumeric | SortMode::Month + ) { + if dictionary_order { + return Err(format!( + "options '-{}{}' are incompatible", + 'd', + mode.get_short_name().unwrap() + )); + } else if ignore_non_printing { + return Err(format!( + "options '-{}{}' are incompatible", + 'i', + mode.get_short_name().unwrap() + )); + } + } + Ok(()) + } + + fn set_sort_mode(&mut self, mode: SortMode) -> Result<(), String> { + if self.mode != SortMode::Default { + return Err(format!( + "options '-{}{}' are incompatible", + self.mode.get_short_name().unwrap(), + mode.get_short_name().unwrap() + )); + } + Self::check_compatibility(mode, self.ignore_non_printing, self.dictionary_order)?; + self.mode = mode; + Ok(()) + } + + fn set_dictionary_order(&mut self) -> Result<(), String> { + Self::check_compatibility(self.mode, self.ignore_non_printing, true)?; + self.dictionary_order = true; + Ok(()) + } + + fn set_ignore_non_printing(&mut self) -> Result<(), String> { + Self::check_compatibility(self.mode, true, self.dictionary_order)?; + self.ignore_non_printing = true; + Ok(()) + } +} + impl From<&GlobalSettings> for KeySettings { fn from(settings: &GlobalSettings) -> Self { Self { @@ -234,6 +303,12 @@ impl From<&GlobalSettings> for KeySettings { } } +impl Default for KeySettings { + fn default() -> Self { + Self::from(&GlobalSettings::default()) + } +} + #[derive(Clone, Debug)] enum NumCache { AsF64(GeneralF64ParseResult), @@ -412,14 +487,18 @@ impl<'a> Line<'a> { } } } - if !(settings.mode == SortMode::Random - || settings.stable - || settings.unique - || !(settings.dictionary_order + if settings.mode != SortMode::Random + && !settings.stable + && !settings.unique + && (settings.dictionary_order || settings.ignore_blanks || settings.ignore_case || settings.ignore_non_printing - || settings.mode != SortMode::Default)) + || settings.mode != SortMode::Default + || settings + .selectors + .last() + .map_or(true, |selector| selector != &Default::default())) { // A last resort comparator is in use, underline the whole line. if self.line.is_empty() { @@ -483,7 +562,7 @@ fn tokenize_with_separator(line: &str, separator: char) -> Vec { tokens } -#[derive(Clone)] +#[derive(Clone, PartialEq, Debug)] struct KeyPosition { /// 1-indexed, 0 is invalid. field: usize, @@ -493,87 +572,45 @@ struct KeyPosition { } impl KeyPosition { - fn parse(key: &str, default_char_index: usize, settings: &mut KeySettings) -> Self { + fn new(key: &str, default_char_index: usize, ignore_blanks: bool) -> Result { let mut field_and_char = key.split('.'); - let mut field = field_and_char + + let field = field_and_char .next() - .unwrap_or_else(|| crash!(1, "invalid key `{}`", key)); - let mut char = field_and_char.next(); - - // If there is a char index, we expect options to appear after it. Otherwise we expect them after the field index. - let value_with_options = char.as_mut().unwrap_or(&mut field); - - let mut ignore_blanks = settings.ignore_blanks; - if let Some(options_start) = value_with_options.chars().position(char::is_alphabetic) { - for option in value_with_options[options_start..].chars() { - // valid options: MbdfghinRrV - match option { - 'M' => settings.mode = SortMode::Month, - 'b' => ignore_blanks = true, - 'd' => settings.dictionary_order = true, - 'f' => settings.ignore_case = true, - 'g' => settings.mode = SortMode::GeneralNumeric, - 'h' => settings.mode = SortMode::HumanNumeric, - 'i' => settings.ignore_non_printing = true, - 'n' => settings.mode = SortMode::Numeric, - 'R' => settings.mode = SortMode::Random, - 'r' => settings.reverse = true, - 'V' => settings.mode = SortMode::Version, - c => crash!(1, "invalid option for key: `{}`", c), - } - // All numeric sorts and month sort conflict with dictionary_order and ignore_non_printing. - // Instad of reporting an error, let them overwrite each other. - - // FIXME: This should only override if the overridden flag is a global flag. - // If conflicting flags are attached to the key, GNU sort crashes and we should probably too. - match option { - 'h' | 'n' | 'g' | 'M' => { - settings.dictionary_order = false; - settings.ignore_non_printing = false; - } - 'd' | 'i' => { - settings.mode = match settings.mode { - SortMode::Numeric - | SortMode::HumanNumeric - | SortMode::GeneralNumeric - | SortMode::Month => SortMode::Default, - // Only SortMode::Default and SortMode::Version work with dictionary_order and ignore_non_printing - m @ SortMode::Default - | m @ SortMode::Version - | m @ SortMode::Random => m, - } - } - _ => {} - } - } - // Strip away option characters from the original value so we can parse it later - *value_with_options = &value_with_options[..options_start]; - } + .ok_or_else(|| format!("invalid key `{}`", key))?; + let char = field_and_char.next(); let field = field .parse() - .unwrap_or_else(|e| crash!(1, "failed to parse field index for key `{}`: {}", key, e)); + .map_err(|e| format!("failed to parse field index `{}`: {}", field, e))?; if field == 0 { - crash!(1, "field index was 0"); + return Err("field index can not be 0".to_string()); } - let char = char.map_or(default_char_index, |char| { - char.parse().unwrap_or_else(|e| { - crash!( - 1, - "failed to parse character index for key `{}`: {}", - key, - e - ) - }) - }); - Self { + + let char = char.map_or(Ok(default_char_index), |char| { + char.parse() + .map_err(|e| format!("failed to parse character index `{}`: {}", char, e)) + })?; + + Ok(Self { field, char, ignore_blanks, + }) + } +} + +impl Default for KeyPosition { + fn default() -> Self { + KeyPosition { + field: 1, + char: 1, + ignore_blanks: false, } } } -#[derive(Clone)] + +#[derive(Clone, PartialEq, Debug)] struct FieldSelector { from: KeyPosition, to: Option, @@ -583,20 +620,120 @@ struct FieldSelector { is_default_selection: bool, } -impl FieldSelector { - fn new(from: KeyPosition, to: Option, settings: KeySettings) -> Self { +impl Default for FieldSelector { + fn default() -> Self { Self { - is_default_selection: from.field == 1 - && from.char == 1 - && to.is_none() - && !matches!( - settings.mode, - SortMode::Numeric | SortMode::GeneralNumeric | SortMode::HumanNumeric - ), - needs_tokens: from.field != 1 || from.char == 0 || to.is_some(), - from, - to, - settings, + from: Default::default(), + to: None, + settings: Default::default(), + needs_tokens: false, + is_default_selection: true, + } + } +} + +impl FieldSelector { + /// Splits this position into the actual position and the attached options. + fn split_key_options(position: &str) -> (&str, &str) { + if let Some((options_start, _)) = position.char_indices().find(|(_, c)| c.is_alphabetic()) { + position.split_at(options_start) + } else { + (position, "") + } + } + + fn parse(key: &str, global_settings: &GlobalSettings) -> Self { + let mut from_to = key.split(','); + let (from, from_options) = Self::split_key_options(from_to.next().unwrap()); + let to = from_to.next().map(|to| Self::split_key_options(to)); + let options_are_empty = from_options.is_empty() && matches!(to, None | Some((_, ""))); + crash_if_err!( + 2, + if options_are_empty { + // Inherit the global settings if there are no options attached to this key. + (|| { + // This would be ideal for a try block, I think. In the meantime this closure allows + // to use the `?` operator here. + Self::new( + KeyPosition::new(from, 1, global_settings.ignore_blanks)?, + to.map(|(to, _)| KeyPosition::new(to, 0, global_settings.ignore_blanks)) + .transpose()?, + KeySettings::from(global_settings), + ) + })() + } else { + // Do not inherit from `global_settings`, as there are options attached to this key. + Self::parse_with_options((from, from_options), to) + } + .map_err(|e| format!("failed to parse key `{}`: {}", key, e)) + ) + } + + fn parse_with_options( + (from, from_options): (&str, &str), + to: Option<(&str, &str)>, + ) -> Result { + /// Applies `options` to `key_settings`, returning if the 'b'-flag (ignore blanks) was present. + fn parse_key_settings( + options: &str, + key_settings: &mut KeySettings, + ) -> Result { + let mut ignore_blanks = false; + for option in options.chars() { + match option { + 'M' => key_settings.set_sort_mode(SortMode::Month)?, + 'b' => ignore_blanks = true, + 'd' => key_settings.set_dictionary_order()?, + 'f' => key_settings.ignore_case = true, + 'g' => key_settings.set_sort_mode(SortMode::GeneralNumeric)?, + 'h' => key_settings.set_sort_mode(SortMode::HumanNumeric)?, + 'i' => key_settings.set_ignore_non_printing()?, + 'n' => key_settings.set_sort_mode(SortMode::Numeric)?, + 'R' => key_settings.set_sort_mode(SortMode::Random)?, + 'r' => key_settings.reverse = true, + 'V' => key_settings.set_sort_mode(SortMode::Version)?, + c => return Err(format!("invalid option: `{}`", c)), + } + } + Ok(ignore_blanks) + } + + let mut key_settings = KeySettings::default(); + let from = parse_key_settings(from_options, &mut key_settings) + .map(|ignore_blanks| KeyPosition::new(from, 1, ignore_blanks))??; + let to = if let Some((to, to_options)) = to { + Some( + parse_key_settings(to_options, &mut key_settings) + .map(|ignore_blanks| KeyPosition::new(to, 0, ignore_blanks))??, + ) + } else { + None + }; + Self::new(from, to, key_settings) + } + + fn new( + from: KeyPosition, + to: Option, + settings: KeySettings, + ) -> Result { + if from.char == 0 { + Err("invalid character index 0 for the start position of a field".to_string()) + } else { + Ok(Self { + is_default_selection: from.field == 1 + && from.char == 1 + && to.is_none() + && !matches!( + settings.mode, + SortMode::Numeric | SortMode::GeneralNumeric | SortMode::HumanNumeric + ) + && !from.ignore_blanks, + needs_tokens: from.field != 1 || from.char == 0 || to.is_some(), + from, + to, + settings, + }) } } @@ -900,7 +1037,8 @@ pub fn uumain(args: impl uucore::Args) -> i32 { .long(OPT_SEPARATOR) .help("custom separator for -k") .takes_value(true)) - .arg(Arg::with_name(OPT_ZERO_TERMINATED) + .arg( + Arg::with_name(OPT_ZERO_TERMINATED) .short("z") .long(OPT_ZERO_TERMINATED) .help("line delimiter is NUL, not newline"), @@ -1053,43 +1191,27 @@ pub fn uumain(args: impl uucore::Args) -> i32 { if matches.is_present(OPT_KEY) { for key in &matches.args[OPT_KEY].vals { - let key = key.to_string_lossy(); - let mut from_to = key.split(','); - let mut key_settings = KeySettings::from(&settings); - let from = KeyPosition::parse( - from_to - .next() - .unwrap_or_else(|| crash!(1, "invalid key `{}`", key)), - 1, - &mut key_settings, - ); - if from.char == 0 { - crash!( - 1, - "invalid character index 0 in `{}` for the start position of a field", - key - ) - } - let to = from_to - .next() - .map(|to| KeyPosition::parse(to, 0, &mut key_settings)); - let field_selector = FieldSelector::new(from, to, key_settings); - settings.selectors.push(field_selector); + settings + .selectors + .push(FieldSelector::parse(&key.to_string_lossy(), &settings)); } } - if !settings.stable || !matches.is_present(OPT_KEY) { + if !matches.is_present(OPT_KEY) { // add a default selector matching the whole line let key_settings = KeySettings::from(&settings); - settings.selectors.push(FieldSelector::new( - KeyPosition { - field: 1, - char: 1, - ignore_blanks: key_settings.ignore_blanks, - }, - None, - key_settings, - )); + settings.selectors.push( + FieldSelector::new( + KeyPosition { + field: 1, + char: 1, + ignore_blanks: key_settings.ignore_blanks, + }, + None, + key_settings, + ) + .unwrap(), + ); } exec(&files, &settings) diff --git a/tests/by-util/test_sort.rs b/tests/by-util/test_sort.rs index 6a2350749..588ce86bd 100644 --- a/tests/by-util/test_sort.rs +++ b/tests/by-util/test_sort.rs @@ -471,7 +471,7 @@ fn test_keys_invalid_field() { new_ucmd!() .args(&["-k", "1."]) .fails() - .stderr_only("sort: failed to parse character index for key `1.`: cannot parse integer from empty string"); + .stderr_only("sort: failed to parse key `1.`: failed to parse character index ``: cannot parse integer from empty string"); } #[test] @@ -479,7 +479,7 @@ fn test_keys_invalid_field_option() { new_ucmd!() .args(&["-k", "1.1x"]) .fails() - .stderr_only("sort: invalid option for key: `x`"); + .stderr_only("sort: failed to parse key `1.1x`: invalid option: `x`"); } #[test] @@ -487,7 +487,7 @@ fn test_keys_invalid_field_zero() { new_ucmd!() .args(&["-k", "0.1"]) .fails() - .stderr_only("sort: field index was 0"); + .stderr_only("sort: failed to parse key `0.1`: field index can not be 0"); } #[test] @@ -495,7 +495,7 @@ fn test_keys_invalid_char_zero() { new_ucmd!() .args(&["-k", "1.0"]) .fails() - .stderr_only("sort: invalid character index 0 in `1.0` for the start position of a field"); + .stderr_only("sort: failed to parse key `1.0`: invalid character index 0 for the start position of a field"); } #[test] @@ -586,6 +586,47 @@ fn test_keys_negative_size_match() { test_helper("keys_negative_size", &["-k 3,1"]); } +#[test] +fn test_keys_ignore_flag() { + test_helper("keys_ignore_flag", &["-k 1n -b"]) +} + +#[test] +fn test_doesnt_inherit_key_settings() { + let input = " 1 +2 + 10 +"; + new_ucmd!() + .args(&["-k", "1b", "-n"]) + .pipe_in(input) + .succeeds() + .stdout_only( + " 1 + 10 +2 +", + ); +} + +#[test] +fn test_inherits_key_settings() { + let input = " 1 +2 + 10 +"; + new_ucmd!() + .args(&["-k", "1", "-n"]) + .pipe_in(input) + .succeeds() + .stdout_only( + " 1 +2 + 10 +", + ); +} + #[test] fn test_zero_terminated() { test_helper("zero-terminated", &["-z"]); @@ -707,10 +748,9 @@ fn test_dictionary_and_nonprinting_conflicts() { .succeeds(); } for conflicting_arg in &conflicting_args { - // FIXME: this should ideally fail. new_ucmd!() .args(&["-k", &format!("1{},1{}", restricted_arg, conflicting_arg)]) - .succeeds(); + .fails(); } } } @@ -737,3 +777,8 @@ fn test_nonexistent_file() { "sort: cannot read: \"nonexistent.txt\": The system cannot find the file specified. (os error 2)", ); } + +#[test] +fn test_blanks() { + test_helper("blanks", &["-b", "--ignore-blanks"]); +} diff --git a/tests/fixtures/sort/blanks.expected b/tests/fixtures/sort/blanks.expected new file mode 100644 index 000000000..3469e434c --- /dev/null +++ b/tests/fixtures/sort/blanks.expected @@ -0,0 +1,5 @@ + a +b + x + x + z diff --git a/tests/fixtures/sort/blanks.expected.debug b/tests/fixtures/sort/blanks.expected.debug new file mode 100644 index 000000000..28c8fc960 --- /dev/null +++ b/tests/fixtures/sort/blanks.expected.debug @@ -0,0 +1,15 @@ + a + _ +___ +b +_ +_ + x + _ +__________ + x + _ +___ + z + _ +__ diff --git a/tests/fixtures/sort/blanks.txt b/tests/fixtures/sort/blanks.txt new file mode 100644 index 000000000..eb2f3c94e --- /dev/null +++ b/tests/fixtures/sort/blanks.txt @@ -0,0 +1,5 @@ +b + a + z + x + x diff --git a/tests/fixtures/sort/keys_ignore_flag.expected b/tests/fixtures/sort/keys_ignore_flag.expected new file mode 100644 index 000000000..964d04663 --- /dev/null +++ b/tests/fixtures/sort/keys_ignore_flag.expected @@ -0,0 +1,2 @@ + 1a +1A diff --git a/tests/fixtures/sort/keys_ignore_flag.expected.debug b/tests/fixtures/sort/keys_ignore_flag.expected.debug new file mode 100644 index 000000000..6d0da711f --- /dev/null +++ b/tests/fixtures/sort/keys_ignore_flag.expected.debug @@ -0,0 +1,6 @@ + 1a + _ +___ +1A +_ +__ diff --git a/tests/fixtures/sort/keys_ignore_flag.txt b/tests/fixtures/sort/keys_ignore_flag.txt new file mode 100644 index 000000000..964d04663 --- /dev/null +++ b/tests/fixtures/sort/keys_ignore_flag.txt @@ -0,0 +1,2 @@ + 1a +1A