1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-28 11:37:44 +00:00

sort: don't store slices for general numeric sort

Gerenal numeric sort works by comparing pre-parsed floating point
numbers. That means that we don't have to store the &str the float was
parsed from.

As a result, memory usage was slightly reduced for general numeric sort.
This commit is contained in:
Michael Debertol 2021-06-24 18:33:33 +02:00
parent 4a956f38b9
commit 8bebfbb3e6

View file

@ -227,11 +227,8 @@ impl GlobalSettings {
/// afterwards. /// afterwards.
fn init_precomputed(&mut self) { fn init_precomputed(&mut self) {
self.precomputed.needs_tokens = self.selectors.iter().any(|s| s.needs_tokens); self.precomputed.needs_tokens = self.selectors.iter().any(|s| s.needs_tokens);
self.precomputed.selections_per_line = self self.precomputed.selections_per_line =
.selectors self.selectors.iter().filter(|s| s.needs_selection).count();
.iter()
.filter(|s| !s.is_default_selection)
.count();
self.precomputed.num_infos_per_line = self self.precomputed.num_infos_per_line = self
.selectors .selectors
.iter() .iter()
@ -362,10 +359,10 @@ impl Default for KeySettings {
Self::from(&GlobalSettings::default()) Self::from(&GlobalSettings::default())
} }
} }
enum NumCache { enum Selection<'a> {
AsF64(GeneralF64ParseResult), AsF64(GeneralF64ParseResult),
WithInfo(NumInfo), WithNumInfo(&'a str, NumInfo),
None, Str(&'a str),
} }
type Field = Range<usize>; type Field = Range<usize>;
@ -392,17 +389,22 @@ impl<'a> Line<'a> {
if settings.precomputed.needs_tokens { if settings.precomputed.needs_tokens {
tokenize(line, settings.separator, token_buffer); tokenize(line, settings.separator, token_buffer);
} }
for (selection, num_cache) in settings for (selector, selection) in settings
.selectors .selectors
.iter() .iter()
.filter(|selector| !selector.is_default_selection) .map(|selector| (selector, selector.get_selection(line, token_buffer)))
.map(|selector| selector.get_selection(line, token_buffer))
{ {
line_data.selections.push(selection); match selection {
match num_cache { Selection::AsF64(parsed_float) => line_data.parsed_floats.push(parsed_float),
NumCache::AsF64(parsed_float) => line_data.parsed_floats.push(parsed_float), Selection::WithNumInfo(str, num_info) => {
NumCache::WithInfo(num_info) => line_data.num_infos.push(num_info), line_data.num_infos.push(num_info);
NumCache::None => (), line_data.selections.push(str);
}
Selection::Str(str) => {
if selector.needs_selection {
line_data.selections.push(str)
}
}
} }
} }
Self { line, index } Self { line, index }
@ -667,8 +669,10 @@ struct FieldSelector {
to: Option<KeyPosition>, to: Option<KeyPosition>,
settings: KeySettings, settings: KeySettings,
needs_tokens: bool, needs_tokens: bool,
// Whether the selection for each line is going to be the whole line with no NumCache // Whether this selector operates on a sub-slice of a line.
is_default_selection: bool, // Selections are therefore not needed when this selector matches the whole line
// or the sort mode is general-numeric.
needs_selection: bool,
} }
impl Default for FieldSelector { impl Default for FieldSelector {
@ -678,7 +682,7 @@ impl Default for FieldSelector {
to: None, to: None,
settings: Default::default(), settings: Default::default(),
needs_tokens: false, needs_tokens: false,
is_default_selection: true, needs_selection: false,
} }
} }
} }
@ -774,14 +778,12 @@ impl FieldSelector {
Err("invalid character index 0 for the start position of a field".to_string()) Err("invalid character index 0 for the start position of a field".to_string())
} else { } else {
Ok(Self { Ok(Self {
is_default_selection: from.field == 1 needs_selection: (from.field != 1
&& from.char == 1 || from.char != 1
&& to.is_none() || to.is_some()
&& !matches!( || matches!(settings.mode, SortMode::Numeric | SortMode::HumanNumeric)
settings.mode, || from.ignore_blanks)
SortMode::Numeric | SortMode::GeneralNumeric | SortMode::HumanNumeric && !matches!(settings.mode, SortMode::GeneralNumeric),
)
&& !from.ignore_blanks,
needs_tokens: from.field != 1 || from.char == 0 || to.is_some(), needs_tokens: from.field != 1 || from.char == 0 || to.is_some(),
from, from,
to, to,
@ -792,7 +794,7 @@ impl FieldSelector {
/// Get the selection that corresponds to this selector for the line. /// Get the selection that corresponds to this selector for the line.
/// If needs_fields returned false, tokens may be empty. /// If needs_fields returned false, tokens may be empty.
fn get_selection<'a>(&self, line: &'a str, tokens: &[Field]) -> (&'a str, NumCache) { fn get_selection<'a>(&self, line: &'a str, tokens: &[Field]) -> Selection<'a> {
// `get_range` expects `None` when we don't need tokens and would get confused by an empty vector. // `get_range` expects `None` when we don't need tokens and would get confused by an empty vector.
let tokens = if self.needs_tokens { let tokens = if self.needs_tokens {
Some(tokens) Some(tokens)
@ -800,9 +802,7 @@ impl FieldSelector {
None None
}; };
let mut range = &line[self.get_range(line, tokens)]; let mut range = &line[self.get_range(line, tokens)];
let num_cache = if self.settings.mode == SortMode::Numeric if self.settings.mode == SortMode::Numeric || self.settings.mode == SortMode::HumanNumeric {
|| self.settings.mode == SortMode::HumanNumeric
{
// Parse NumInfo for this number. // Parse NumInfo for this number.
let (info, num_range) = NumInfo::parse( let (info, num_range) = NumInfo::parse(
range, range,
@ -813,15 +813,14 @@ impl FieldSelector {
); );
// Shorten the range to what we need to pass to numeric_str_cmp later. // Shorten the range to what we need to pass to numeric_str_cmp later.
range = &range[num_range]; range = &range[num_range];
NumCache::WithInfo(info) Selection::WithNumInfo(range, info)
} else if self.settings.mode == SortMode::GeneralNumeric { } else if self.settings.mode == SortMode::GeneralNumeric {
// Parse this number as f64, as this is the requirement for general numeric sorting. // Parse this number as f64, as this is the requirement for general numeric sorting.
NumCache::AsF64(general_f64_parse(&range[get_leading_gen(range)])) Selection::AsF64(general_f64_parse(&range[get_leading_gen(range)]))
} else { } else {
// This is not a numeric sort, so we don't need a NumCache. // This is not a numeric sort, so we don't need a NumCache.
NumCache::None Selection::Str(range)
}; }
(range, num_cache)
} }
/// Look up the range in the line that corresponds to this selector. /// Look up the range in the line that corresponds to this selector.
@ -1366,7 +1365,7 @@ fn compare_by<'a>(
let mut num_info_index = 0; let mut num_info_index = 0;
let mut parsed_float_index = 0; let mut parsed_float_index = 0;
for selector in &global_settings.selectors { for selector in &global_settings.selectors {
let (a_str, b_str) = if selector.is_default_selection { let (a_str, b_str) = if !selector.needs_selection {
// We can select the whole line. // We can select the whole line.
(a.line, b.line) (a.line, b.line)
} else { } else {