1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2026-01-21 12:41:13 +00:00

enhanced numeric sort and new human readable sort

This commit is contained in:
Christian Zinck 2015-08-26 23:51:52 -04:00
parent 314a254d1f
commit f31192d46a
6 changed files with 308 additions and 40 deletions

View file

@ -21,7 +21,6 @@ use std::cmp::Ordering;
use std::fs::File;
use std::io::{BufRead, BufReader, Read, stdin, Write};
use std::path::Path;
use std::str::Chars;
#[path = "../common/util.rs"]
#[macro_use]
@ -37,6 +36,7 @@ pub fn uumain(args: Vec<String>) -> i32 {
let mut opts = getopts::Options::new();
opts.optflag("n", "numeric-sort", "compare according to string numerical value");
opts.optflag("H", "human-readable-sort", "compare according to human readable sizes, eg 1M > 100k");
opts.optflag("r", "reverse", "reverse the output");
opts.optflag("h", "help", "display this help and exit");
opts.optflag("", "version", "output version information and exit");
@ -66,6 +66,7 @@ With no FILE, or when FILE is -, read standard input.", NAME, VERSION);
}
let numeric = matches.opt_present("numeric-sort");
let human_readable = matches.opt_present("human-readable-sort");
let reverse = matches.opt_present("reverse");
let mut files = matches.free;
@ -74,12 +75,12 @@ With no FILE, or when FILE is -, read standard input.", NAME, VERSION);
files.push("-".to_string());
}
exec(files, numeric, reverse);
exec(files, numeric, human_readable, reverse);
0
}
fn exec(files: Vec<String>, numeric: bool, reverse: bool) {
fn exec(files: Vec<String>, numeric: bool, human_readable: bool, reverse: bool) {
for path in files.iter() {
let (reader, _) = match open(path) {
Some(x) => x,
@ -99,7 +100,9 @@ fn exec(files: Vec<String>, numeric: bool, reverse: bool) {
}
if numeric {
lines.sort_by(frac_compare);
lines.sort_by(numeric_compare);
} else if human_readable {
lines.sort_by(human_readable_size_compare);
} else {
lines.sort();
}
@ -113,44 +116,73 @@ fn exec(files: Vec<String>, numeric: bool, reverse: bool) {
}
}
fn skip_zeros(mut char_a: char, char_iter: &mut Chars, ret: Ordering) -> Ordering {
char_a = match char_iter.next() { None => 0 as char, Some(t) => t };
while char_a == '0' {
char_a = match char_iter.next() { None => return Ordering::Equal, Some(t) => t };
/// Parse the beginning string into an f64, returning -inf instead of NaN on errors.
fn permissive_f64_parse(a: &String) -> f64{
//Maybe should be split on non-digit, but then 10e100 won't parse properly.
//On the flip side, this will give NEG_INFINITY for "1,234", which might be OK
//because there's no way to handle both CSV and thousands separators without a new flag.
//GNU sort treats "1,234" as "1" in numeric, so maybe it's fine.
let sa: &str = a.split_whitespace().next().unwrap();
match sa.parse::<f64>() {
Ok(a) => a,
Err(_) => std::f64::NEG_INFINITY
}
if char_a.is_digit(10) { ret } else { Ordering::Equal }
}
/// Compares two decimal fractions as strings (n < 1)
/// This requires the strings to start with a decimal, otherwise it's treated as 0
fn frac_compare(a: &String, b: &String) -> Ordering {
let a_chars = &mut a.chars();
let b_chars = &mut b.chars();
/// Compares two floating point numbers, with errors being assumned to be -inf.
/// Stops coercing at the first whitespace char, so 1e2 will parse as 100 but
/// 1,000 will parse as -inf.
fn numeric_compare(a: &String, b: &String) -> Ordering {
let fa = permissive_f64_parse(a);
let fb = permissive_f64_parse(b);
//f64::cmp isn't implemented because NaN messes with it
//but we sidestep that with permissive_f64_parse so just fake it
if fa > fb {
return Ordering::Greater;
}
else if fa < fb {
return Ordering::Less;
}
else {
return Ordering::Equal;
}
}
let mut char_a = match a_chars.next() { None => 0 as char, Some(t) => t };
let mut char_b = match b_chars.next() { None => 0 as char, Some(t) => t };
fn human_readable_convert(a: &String) -> f64 {
let int_iter = a.chars();
let suffix_iter = a.chars();
let int_str: String = int_iter.take_while(|c| c.is_numeric()).collect();
let suffix = suffix_iter.skip_while(|c| c.is_numeric()).next();
let int_part = match int_str.parse::<f64>() {
Ok(i) => i,
Err(_) => -1f64
} as f64;
let suffix: f64 = match suffix.unwrap_or('\0') {
'K' => 1000f64,
'M' => 1E6,
'G' => 1E9,
'T' => 1E12,
'P' => 1E15,
_ => 1f64
};
return int_part * suffix;
}
/// Compare two strings as if they are human readable sizes.
/// AKA 1M > 100k
fn human_readable_size_compare(a: &String, b: &String) -> Ordering {
let fa = human_readable_convert(a);
let fb = human_readable_convert(b);
if fa > fb {
return Ordering::Greater;
}
else if fa < fb {
return Ordering::Less;
}
else {
return Ordering::Equal;
}
if char_a == DECIMAL_PT && char_b == DECIMAL_PT {
while char_a == char_b {
char_a = match a_chars.next() { None => 0 as char, Some(t) => t };
char_b = match b_chars.next() { None => 0 as char, Some(t) => t };
// hit the end at the same time, they are equal
if !char_a.is_digit(10) {
return Ordering::Equal;
}
}
if char_a.is_digit(10) && char_b.is_digit(10) {
(char_a as isize).cmp(&(char_b as isize))
} else if char_a.is_digit(10) {
skip_zeros(char_a, a_chars, Ordering::Greater)
} else if char_b.is_digit(10) {
skip_zeros(char_b, b_chars, Ordering::Less)
} else { Ordering::Equal }
} else if char_a == DECIMAL_PT {
skip_zeros(char_a, a_chars, Ordering::Greater)
} else if char_b == DECIMAL_PT {
skip_zeros(char_b, b_chars, Ordering::Less)
} else { Ordering::Equal }
}
#[inline(always)]