diff --git a/Cargo.lock b/Cargo.lock index 2362342d4..13441d4fe 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1445,12 +1445,6 @@ dependencies = [ "maybe-uninit", ] -[[package]] -name = "smallvec" -version = "1.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe0f37c9e8f3c5a4a66ad655a93c74daac4ad00c441533bf5c6e7990bb42604e" - [[package]] name = "strsim" version = "0.8.0" @@ -1912,7 +1906,7 @@ dependencies = [ "quickcheck", "rand 0.7.3", "rand_chacha", - "smallvec 0.6.14", + "smallvec", "uucore", "uucore_procs", ] @@ -2392,7 +2386,6 @@ dependencies = [ "rand 0.7.3", "rayon", "semver", - "smallvec 1.6.1", "tempdir", "unicode-width", "uucore", diff --git a/src/uu/sort/Cargo.toml b/src/uu/sort/Cargo.toml index 3784ccbb0..5221f1f4e 100644 --- a/src/uu/sort/Cargo.toml +++ b/src/uu/sort/Cargo.toml @@ -21,7 +21,6 @@ clap = "2.33" fnv = "1.0.7" itertools = "0.10.0" semver = "0.9.0" -smallvec = "1.6.1" unicode-width = "0.1.8" uucore = { version=">=0.0.8", package="uucore", path="../../uucore", features=["fs"] } uucore_procs = { version=">=0.0.5", package="uucore_procs", path="../../uucore_procs" } diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index 730be0039..776f71058 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -29,7 +29,6 @@ use rand::distributions::Alphanumeric; use rand::{thread_rng, Rng}; use rayon::prelude::*; use semver::Version; -use smallvec::SmallVec; use std::cmp::Ordering; use std::collections::BinaryHeap; use std::env; @@ -232,7 +231,6 @@ impl SelectionRange { enum NumCache { AsF64(GeneralF64ParseResult), WithInfo(NumInfo), - None, } impl NumCache { @@ -253,7 +251,7 @@ impl NumCache { #[derive(Clone)] struct Selection { range: SelectionRange, - num_cache: NumCache, + num_cache: Option>, } impl Selection { @@ -267,15 +265,17 @@ type Field = Range; #[derive(Clone)] pub struct Line { - line: String, + line: Box, // The common case is not to specify fields. Let's make this fast. - selections: SmallVec<[Selection; 1]>, + first_selection: Selection, + other_selections: Box<[Selection]>, } impl Line { + /// Estimate the number of bytes that this Line is occupying pub fn estimate_size(&self) -> usize { - self.line.capacity() - + self.selections.capacity() * std::mem::size_of::() + self.line.len() + + self.other_selections.len() * std::mem::size_of::() + std::mem::size_of::() } @@ -291,35 +291,22 @@ impl Line { None }; - let selections: SmallVec<[Selection; 1]> = settings - .selectors - .iter() - .map(|selector| { - let mut range = - SelectionRange::new(selector.get_selection(&line, fields.as_deref())); - let num_cache = if selector.settings.mode == SortMode::Numeric - || selector.settings.mode == SortMode::HumanNumeric - { - let (info, num_range) = NumInfo::parse( - range.get_str(&line), - NumInfoParseSettings { - accept_si_units: selector.settings.mode == SortMode::HumanNumeric, - thousands_separator: Some(THOUSANDS_SEP), - decimal_pt: Some(DECIMAL_PT), - }, - ); - range.shorten(num_range); - NumCache::WithInfo(info) - } else if selector.settings.mode == SortMode::GeneralNumeric { - let str = range.get_str(&line); - NumCache::AsF64(general_f64_parse(&str[get_leading_gen(str)])) - } else { - NumCache::None - }; - Selection { range, num_cache } - }) + let mut selectors = settings.selectors.iter(); + + let first_selection = selectors + .next() + .unwrap() + .get_selection(&line, fields.as_deref()); + + let other_selections: Vec = selectors + .map(|selector| selector.get_selection(&line, fields.as_deref())) .collect(); - Self { line, selections } + + Self { + line: line.into_boxed_str(), + first_selection, + other_selections: other_selections.into_boxed_slice(), + } } /// Writes indicators for the selections this line matched. The original line content is NOT expected @@ -338,7 +325,7 @@ impl Line { let fields = tokenize(&self.line, settings.separator); for selector in settings.selectors.iter() { - let mut selection = selector.get_selection(&self.line, Some(&fields)); + let mut selection = selector.get_range(&self.line, Some(&fields)); match selector.settings.mode { SortMode::Numeric | SortMode::HumanNumeric => { // find out which range is used for numeric comparisons @@ -595,9 +582,41 @@ impl FieldSelector { self.from.field != 1 || self.from.char == 0 || self.to.is_some() } - /// Look up the slice that corresponds to this selector for the given line. - /// If needs_fields returned false, fields may be None. - fn get_selection<'a>(&self, line: &'a str, tokens: Option<&[Field]>) -> Range { + /// Get the selection that corresponds to this selector for the line. + /// If needs_fields returned false, tokens may be None. + fn get_selection(&self, line: &str, tokens: Option<&[Field]>) -> Selection { + let mut range = SelectionRange::new(self.get_range(&line, tokens)); + let num_cache = if self.settings.mode == SortMode::Numeric + || self.settings.mode == SortMode::HumanNumeric + { + // Parse NumInfo for this number. + let (info, num_range) = NumInfo::parse( + range.get_str(&line), + NumInfoParseSettings { + accept_si_units: self.settings.mode == SortMode::HumanNumeric, + thousands_separator: Some(THOUSANDS_SEP), + decimal_pt: Some(DECIMAL_PT), + }, + ); + // Shorten the range to what we need to pass to numeric_str_cmp later. + range.shorten(num_range); + Some(Box::new(NumCache::WithInfo(info))) + } else if self.settings.mode == SortMode::GeneralNumeric { + // Parse this number as f64, as this is the requirement for general numeric sorting. + let str = range.get_str(&line); + Some(Box::new(NumCache::AsF64(general_f64_parse( + &str[get_leading_gen(str)], + )))) + } else { + // This is not a numeric sort, so we don't need a NumCache. + None + }; + Selection { range, num_cache } + } + + /// Look up the range in the line that corresponds to this selector. + /// If needs_fields returned false, tokens may be None. + fn get_range<'a>(&self, line: &'a str, tokens: Option<&[Field]>) -> Range { enum Resolution { // The start index of the resolved character, inclusive StartOfChar(usize), @@ -1238,8 +1257,11 @@ fn sort_by(unsorted: &mut Vec, settings: &GlobalSettings) { fn compare_by(a: &Line, b: &Line, global_settings: &GlobalSettings) -> Ordering { for (idx, selector) in global_settings.selectors.iter().enumerate() { - let a_selection = &a.selections[idx]; - let b_selection = &b.selections[idx]; + let (a_selection, b_selection) = if idx == 0 { + (&a.first_selection, &b.first_selection) + } else { + (&a.other_selections[idx - 1], &b.other_selections[idx - 1]) + }; let a_str = a_selection.get_str(a); let b_str = b_selection.get_str(b); let settings = &selector.settings; @@ -1249,12 +1271,12 @@ fn compare_by(a: &Line, b: &Line, global_settings: &GlobalSettings) -> Ordering } else { match settings.mode { SortMode::Numeric | SortMode::HumanNumeric => numeric_str_cmp( - (a_str, a_selection.num_cache.as_num_info()), - (b_str, b_selection.num_cache.as_num_info()), + (a_str, a_selection.num_cache.as_ref().unwrap().as_num_info()), + (b_str, b_selection.num_cache.as_ref().unwrap().as_num_info()), ), SortMode::GeneralNumeric => general_numeric_compare( - a_selection.num_cache.as_f64(), - b_selection.num_cache.as_f64(), + a_selection.num_cache.as_ref().unwrap().as_f64(), + b_selection.num_cache.as_ref().unwrap().as_f64(), ), SortMode::Month => month_compare(a_str, b_str), SortMode::Version => version_compare(a_str, b_str), @@ -1340,7 +1362,8 @@ enum GeneralF64ParseResult { Infinity, } -/// Parse the beginning string into an f64, returning -inf instead of NaN on errors. +/// Parse the beginning string into a GeneralF64ParseResult. +/// Using a GeneralF64ParseResult instead of f64 is necessary to correctly order floats. #[inline(always)] fn general_f64_parse(a: &str) -> GeneralF64ParseResult { // The actual behavior here relies on Rust's implementation of parsing floating points. @@ -1593,4 +1616,16 @@ mod tests { let line = "..a..a"; assert_eq!(tokenize(line, Some('a')), vec![0..2, 3..5]); } + + #[test] + #[cfg(target_pointer_width = "64")] + fn test_line_size() { + // We should make sure to not regress the size of the Line struct because + // it is unconditional overhead for every line we sort. + assert_eq!(std::mem::size_of::(), 56); + // These are the fields of Line: + assert_eq!(std::mem::size_of::>(), 16); + assert_eq!(std::mem::size_of::(), 24); + assert_eq!(std::mem::size_of::>(), 16); + } }