mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2026-01-21 12:41:13 +00:00
enhanced numeric sort and new human readable sort
This commit is contained in:
parent
314a254d1f
commit
f31192d46a
6 changed files with 308 additions and 40 deletions
106
src/sort/sort.rs
106
src/sort/sort.rs
|
|
@ -21,7 +21,6 @@ use std::cmp::Ordering;
|
|||
use std::fs::File;
|
||||
use std::io::{BufRead, BufReader, Read, stdin, Write};
|
||||
use std::path::Path;
|
||||
use std::str::Chars;
|
||||
|
||||
#[path = "../common/util.rs"]
|
||||
#[macro_use]
|
||||
|
|
@ -37,6 +36,7 @@ pub fn uumain(args: Vec<String>) -> i32 {
|
|||
let mut opts = getopts::Options::new();
|
||||
|
||||
opts.optflag("n", "numeric-sort", "compare according to string numerical value");
|
||||
opts.optflag("H", "human-readable-sort", "compare according to human readable sizes, eg 1M > 100k");
|
||||
opts.optflag("r", "reverse", "reverse the output");
|
||||
opts.optflag("h", "help", "display this help and exit");
|
||||
opts.optflag("", "version", "output version information and exit");
|
||||
|
|
@ -66,6 +66,7 @@ With no FILE, or when FILE is -, read standard input.", NAME, VERSION);
|
|||
}
|
||||
|
||||
let numeric = matches.opt_present("numeric-sort");
|
||||
let human_readable = matches.opt_present("human-readable-sort");
|
||||
let reverse = matches.opt_present("reverse");
|
||||
|
||||
let mut files = matches.free;
|
||||
|
|
@ -74,12 +75,12 @@ With no FILE, or when FILE is -, read standard input.", NAME, VERSION);
|
|||
files.push("-".to_string());
|
||||
}
|
||||
|
||||
exec(files, numeric, reverse);
|
||||
exec(files, numeric, human_readable, reverse);
|
||||
|
||||
0
|
||||
}
|
||||
|
||||
fn exec(files: Vec<String>, numeric: bool, reverse: bool) {
|
||||
fn exec(files: Vec<String>, numeric: bool, human_readable: bool, reverse: bool) {
|
||||
for path in files.iter() {
|
||||
let (reader, _) = match open(path) {
|
||||
Some(x) => x,
|
||||
|
|
@ -99,7 +100,9 @@ fn exec(files: Vec<String>, numeric: bool, reverse: bool) {
|
|||
}
|
||||
|
||||
if numeric {
|
||||
lines.sort_by(frac_compare);
|
||||
lines.sort_by(numeric_compare);
|
||||
} else if human_readable {
|
||||
lines.sort_by(human_readable_size_compare);
|
||||
} else {
|
||||
lines.sort();
|
||||
}
|
||||
|
|
@ -113,44 +116,73 @@ fn exec(files: Vec<String>, numeric: bool, reverse: bool) {
|
|||
}
|
||||
}
|
||||
|
||||
fn skip_zeros(mut char_a: char, char_iter: &mut Chars, ret: Ordering) -> Ordering {
|
||||
char_a = match char_iter.next() { None => 0 as char, Some(t) => t };
|
||||
while char_a == '0' {
|
||||
char_a = match char_iter.next() { None => return Ordering::Equal, Some(t) => t };
|
||||
/// Parse the beginning string into an f64, returning -inf instead of NaN on errors.
|
||||
fn permissive_f64_parse(a: &String) -> f64{
|
||||
//Maybe should be split on non-digit, but then 10e100 won't parse properly.
|
||||
//On the flip side, this will give NEG_INFINITY for "1,234", which might be OK
|
||||
//because there's no way to handle both CSV and thousands separators without a new flag.
|
||||
//GNU sort treats "1,234" as "1" in numeric, so maybe it's fine.
|
||||
let sa: &str = a.split_whitespace().next().unwrap();
|
||||
match sa.parse::<f64>() {
|
||||
Ok(a) => a,
|
||||
Err(_) => std::f64::NEG_INFINITY
|
||||
}
|
||||
if char_a.is_digit(10) { ret } else { Ordering::Equal }
|
||||
}
|
||||
|
||||
/// Compares two decimal fractions as strings (n < 1)
|
||||
/// This requires the strings to start with a decimal, otherwise it's treated as 0
|
||||
fn frac_compare(a: &String, b: &String) -> Ordering {
|
||||
let a_chars = &mut a.chars();
|
||||
let b_chars = &mut b.chars();
|
||||
/// Compares two floating point numbers, with errors being assumned to be -inf.
|
||||
/// Stops coercing at the first whitespace char, so 1e2 will parse as 100 but
|
||||
/// 1,000 will parse as -inf.
|
||||
fn numeric_compare(a: &String, b: &String) -> Ordering {
|
||||
let fa = permissive_f64_parse(a);
|
||||
let fb = permissive_f64_parse(b);
|
||||
//f64::cmp isn't implemented because NaN messes with it
|
||||
//but we sidestep that with permissive_f64_parse so just fake it
|
||||
if fa > fb {
|
||||
return Ordering::Greater;
|
||||
}
|
||||
else if fa < fb {
|
||||
return Ordering::Less;
|
||||
}
|
||||
else {
|
||||
return Ordering::Equal;
|
||||
}
|
||||
}
|
||||
|
||||
let mut char_a = match a_chars.next() { None => 0 as char, Some(t) => t };
|
||||
let mut char_b = match b_chars.next() { None => 0 as char, Some(t) => t };
|
||||
fn human_readable_convert(a: &String) -> f64 {
|
||||
let int_iter = a.chars();
|
||||
let suffix_iter = a.chars();
|
||||
let int_str: String = int_iter.take_while(|c| c.is_numeric()).collect();
|
||||
let suffix = suffix_iter.skip_while(|c| c.is_numeric()).next();
|
||||
let int_part = match int_str.parse::<f64>() {
|
||||
Ok(i) => i,
|
||||
Err(_) => -1f64
|
||||
} as f64;
|
||||
let suffix: f64 = match suffix.unwrap_or('\0') {
|
||||
'K' => 1000f64,
|
||||
'M' => 1E6,
|
||||
'G' => 1E9,
|
||||
'T' => 1E12,
|
||||
'P' => 1E15,
|
||||
_ => 1f64
|
||||
};
|
||||
return int_part * suffix;
|
||||
}
|
||||
|
||||
/// Compare two strings as if they are human readable sizes.
|
||||
/// AKA 1M > 100k
|
||||
fn human_readable_size_compare(a: &String, b: &String) -> Ordering {
|
||||
let fa = human_readable_convert(a);
|
||||
let fb = human_readable_convert(b);
|
||||
if fa > fb {
|
||||
return Ordering::Greater;
|
||||
}
|
||||
else if fa < fb {
|
||||
return Ordering::Less;
|
||||
}
|
||||
else {
|
||||
return Ordering::Equal;
|
||||
}
|
||||
|
||||
if char_a == DECIMAL_PT && char_b == DECIMAL_PT {
|
||||
while char_a == char_b {
|
||||
char_a = match a_chars.next() { None => 0 as char, Some(t) => t };
|
||||
char_b = match b_chars.next() { None => 0 as char, Some(t) => t };
|
||||
// hit the end at the same time, they are equal
|
||||
if !char_a.is_digit(10) {
|
||||
return Ordering::Equal;
|
||||
}
|
||||
}
|
||||
if char_a.is_digit(10) && char_b.is_digit(10) {
|
||||
(char_a as isize).cmp(&(char_b as isize))
|
||||
} else if char_a.is_digit(10) {
|
||||
skip_zeros(char_a, a_chars, Ordering::Greater)
|
||||
} else if char_b.is_digit(10) {
|
||||
skip_zeros(char_b, b_chars, Ordering::Less)
|
||||
} else { Ordering::Equal }
|
||||
} else if char_a == DECIMAL_PT {
|
||||
skip_zeros(char_a, a_chars, Ordering::Greater)
|
||||
} else if char_b == DECIMAL_PT {
|
||||
skip_zeros(char_b, b_chars, Ordering::Less)
|
||||
} else { Ordering::Equal }
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue