mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-27 19:17:43 +00:00
sort: Make use of ExtendedBigDecimal in -g sorting
This provides better precision than f64, which we need. Fixed #8031.
This commit is contained in:
parent
040532a027
commit
8426c1480c
5 changed files with 46 additions and 26 deletions
1
Cargo.lock
generated
1
Cargo.lock
generated
|
@ -3287,6 +3287,7 @@ dependencies = [
|
|||
name = "uu_sort"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"bigdecimal",
|
||||
"binary-heap-plus",
|
||||
"clap",
|
||||
"compare",
|
||||
|
|
1
fuzz/Cargo.lock
generated
1
fuzz/Cargo.lock
generated
|
@ -1345,6 +1345,7 @@ dependencies = [
|
|||
name = "uu_sort"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"bigdecimal",
|
||||
"binary-heap-plus",
|
||||
"clap",
|
||||
"compare",
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
# spell-checker:ignore bigdecimal
|
||||
|
||||
[package]
|
||||
name = "uu_sort"
|
||||
description = "sort ~ (uutils) sort input lines"
|
||||
|
@ -18,6 +20,7 @@ workspace = true
|
|||
path = "src/sort.rs"
|
||||
|
||||
[dependencies]
|
||||
bigdecimal = { workspace = true }
|
||||
binary-heap-plus = { workspace = true }
|
||||
clap = { workspace = true }
|
||||
compare = { workspace = true }
|
||||
|
|
|
@ -17,7 +17,9 @@ use memchr::memchr_iter;
|
|||
use self_cell::self_cell;
|
||||
use uucore::error::{UResult, USimpleError};
|
||||
|
||||
use crate::{GeneralF64ParseResult, GlobalSettings, Line, SortError, numeric_str_cmp::NumInfo};
|
||||
use crate::{
|
||||
GeneralBigDecimalParseResult, GlobalSettings, Line, SortError, numeric_str_cmp::NumInfo,
|
||||
};
|
||||
|
||||
self_cell!(
|
||||
/// The chunk that is passed around between threads.
|
||||
|
@ -41,7 +43,7 @@ pub struct ChunkContents<'a> {
|
|||
pub struct LineData<'a> {
|
||||
pub selections: Vec<&'a str>,
|
||||
pub num_infos: Vec<NumInfo>,
|
||||
pub parsed_floats: Vec<GeneralF64ParseResult>,
|
||||
pub parsed_floats: Vec<GeneralBigDecimalParseResult>,
|
||||
pub line_num_floats: Vec<Option<f64>>,
|
||||
}
|
||||
|
||||
|
@ -100,7 +102,7 @@ pub struct RecycledChunk {
|
|||
lines: Vec<Line<'static>>,
|
||||
selections: Vec<&'static str>,
|
||||
num_infos: Vec<NumInfo>,
|
||||
parsed_floats: Vec<GeneralF64ParseResult>,
|
||||
parsed_floats: Vec<GeneralBigDecimalParseResult>,
|
||||
line_num_floats: Vec<Option<f64>>,
|
||||
buffer: Vec<u8>,
|
||||
}
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
// https://pubs.opengroup.org/onlinepubs/9699919799/utilities/sort.html
|
||||
// https://www.gnu.org/software/coreutils/manual/html_node/sort-invocation.html
|
||||
|
||||
// spell-checker:ignore (misc) HFKJFK Mbdfhn getrlimit RLIMIT_NOFILE rlim
|
||||
// spell-checker:ignore (misc) HFKJFK Mbdfhn getrlimit RLIMIT_NOFILE rlim bigdecimal extendedbigdecimal
|
||||
|
||||
mod check;
|
||||
mod chunks;
|
||||
|
@ -17,6 +17,7 @@ mod merge;
|
|||
mod numeric_str_cmp;
|
||||
mod tmp_dir;
|
||||
|
||||
use bigdecimal::BigDecimal;
|
||||
use chunks::LineData;
|
||||
use clap::builder::ValueParser;
|
||||
use clap::{Arg, ArgAction, Command};
|
||||
|
@ -44,7 +45,9 @@ use unicode_width::UnicodeWidthStr;
|
|||
use uucore::display::Quotable;
|
||||
use uucore::error::{FromIo, strip_errno};
|
||||
use uucore::error::{UError, UResult, USimpleError, UUsageError, set_exit_code};
|
||||
use uucore::extendedbigdecimal::ExtendedBigDecimal;
|
||||
use uucore::line_ending::LineEnding;
|
||||
use uucore::parser::num_parser::{ExtendedParser, ExtendedParserError};
|
||||
use uucore::parser::parse_size::{ParseSizeError, Parser};
|
||||
use uucore::parser::shortcut_value_parser::ShortcutValueParser;
|
||||
use uucore::version_cmp::version_cmp;
|
||||
|
@ -450,7 +453,7 @@ impl Default for KeySettings {
|
|||
}
|
||||
}
|
||||
enum Selection<'a> {
|
||||
AsF64(GeneralF64ParseResult),
|
||||
AsBigDecimal(GeneralBigDecimalParseResult),
|
||||
WithNumInfo(&'a str, NumInfo),
|
||||
Str(&'a str),
|
||||
}
|
||||
|
@ -492,7 +495,7 @@ impl<'a> Line<'a> {
|
|||
.map(|selector| (selector, selector.get_selection(line, token_buffer)))
|
||||
{
|
||||
match selection {
|
||||
Selection::AsF64(parsed_float) => line_data.parsed_floats.push(parsed_float),
|
||||
Selection::AsBigDecimal(parsed_float) => line_data.parsed_floats.push(parsed_float),
|
||||
Selection::WithNumInfo(str, num_info) => {
|
||||
line_data.num_infos.push(num_info);
|
||||
line_data.selections.push(str);
|
||||
|
@ -904,8 +907,8 @@ impl FieldSelector {
|
|||
range = &range[num_range];
|
||||
Selection::WithNumInfo(range, info)
|
||||
} else if self.settings.mode == SortMode::GeneralNumeric {
|
||||
// Parse this number as f64, as this is the requirement for general numeric sorting.
|
||||
Selection::AsF64(general_f64_parse(&range[get_leading_gen(range)]))
|
||||
// Parse this number as BigDecimal, as this is the requirement for general numeric sorting.
|
||||
Selection::AsBigDecimal(general_bd_parse(&range[get_leading_gen(range)]))
|
||||
} else {
|
||||
// This is not a numeric sort, so we don't need a NumCache.
|
||||
Selection::Str(range)
|
||||
|
@ -1791,35 +1794,45 @@ fn get_leading_gen(input: &str) -> Range<usize> {
|
|||
leading_whitespace_len..input.len()
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, PartialEq, PartialOrd, Debug)]
|
||||
pub enum GeneralF64ParseResult {
|
||||
#[derive(Clone, PartialEq, PartialOrd, Debug)]
|
||||
pub enum GeneralBigDecimalParseResult {
|
||||
Invalid,
|
||||
NaN,
|
||||
NegInfinity,
|
||||
Number(f64),
|
||||
Nan,
|
||||
MinusInfinity,
|
||||
Number(BigDecimal),
|
||||
Infinity,
|
||||
}
|
||||
|
||||
/// Parse the beginning string into a GeneralF64ParseResult.
|
||||
/// Using a GeneralF64ParseResult instead of f64 is necessary to correctly order floats.
|
||||
/// Parse the beginning string into a GeneralBigDecimalParseResult.
|
||||
/// Using a GeneralBigDecimalParseResult instead of ExtendedBigDecimal is necessary to correctly order floats.
|
||||
#[inline(always)]
|
||||
fn general_f64_parse(a: &str) -> GeneralF64ParseResult {
|
||||
// The actual behavior here relies on Rust's implementation of parsing floating points.
|
||||
// For example "nan", "inf" (ignoring the case) and "infinity" are only parsed to floats starting from 1.53.
|
||||
// TODO: Once our minimum supported Rust version is 1.53 or above, we should add tests for those cases.
|
||||
match a.parse::<f64>() {
|
||||
Ok(a) if a.is_nan() => GeneralF64ParseResult::NaN,
|
||||
Ok(a) if a == f64::NEG_INFINITY => GeneralF64ParseResult::NegInfinity,
|
||||
Ok(a) if a == f64::INFINITY => GeneralF64ParseResult::Infinity,
|
||||
Ok(a) => GeneralF64ParseResult::Number(a),
|
||||
Err(_) => GeneralF64ParseResult::Invalid,
|
||||
fn general_bd_parse(a: &str) -> GeneralBigDecimalParseResult {
|
||||
// Parse digits, and fold in recoverable errors
|
||||
let ebd = match ExtendedBigDecimal::extended_parse(a) {
|
||||
Err(ExtendedParserError::NotNumeric) => return GeneralBigDecimalParseResult::Invalid,
|
||||
Err(ExtendedParserError::PartialMatch(ebd, _))
|
||||
| Err(ExtendedParserError::Overflow(ebd))
|
||||
| Err(ExtendedParserError::Underflow(ebd))
|
||||
| Ok(ebd) => ebd,
|
||||
};
|
||||
|
||||
match ebd {
|
||||
ExtendedBigDecimal::BigDecimal(bd) => GeneralBigDecimalParseResult::Number(bd),
|
||||
ExtendedBigDecimal::Infinity => GeneralBigDecimalParseResult::Infinity,
|
||||
ExtendedBigDecimal::MinusInfinity => GeneralBigDecimalParseResult::MinusInfinity,
|
||||
// Minus zero and zero are equal
|
||||
ExtendedBigDecimal::MinusZero => GeneralBigDecimalParseResult::Number(0.into()),
|
||||
ExtendedBigDecimal::Nan | ExtendedBigDecimal::MinusNan => GeneralBigDecimalParseResult::Nan,
|
||||
}
|
||||
}
|
||||
|
||||
/// Compares two floats, with errors and non-numerics assumed to be -inf.
|
||||
/// Stops coercing at the first non-numeric char.
|
||||
/// We explicitly need to convert to f64 in this case.
|
||||
fn general_numeric_compare(a: &GeneralF64ParseResult, b: &GeneralF64ParseResult) -> Ordering {
|
||||
fn general_numeric_compare(
|
||||
a: &GeneralBigDecimalParseResult,
|
||||
b: &GeneralBigDecimalParseResult,
|
||||
) -> Ordering {
|
||||
a.partial_cmp(b).unwrap()
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue