1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-08-03 14:37:45 +00:00

enhanced numeric sort and new human readable sort

This commit is contained in:
Christian Zinck 2015-08-26 23:51:52 -04:00
parent 314a254d1f
commit f31192d46a
6 changed files with 308 additions and 40 deletions

View file

@ -21,7 +21,6 @@ use std::cmp::Ordering;
use std::fs::File;
use std::io::{BufRead, BufReader, Read, stdin, Write};
use std::path::Path;
use std::str::Chars;
#[path = "../common/util.rs"]
#[macro_use]
@ -37,6 +36,7 @@ pub fn uumain(args: Vec<String>) -> i32 {
let mut opts = getopts::Options::new();
opts.optflag("n", "numeric-sort", "compare according to string numerical value");
opts.optflag("H", "human-readable-sort", "compare according to human readable sizes, eg 1M > 100k");
opts.optflag("r", "reverse", "reverse the output");
opts.optflag("h", "help", "display this help and exit");
opts.optflag("", "version", "output version information and exit");
@ -66,6 +66,7 @@ With no FILE, or when FILE is -, read standard input.", NAME, VERSION);
}
let numeric = matches.opt_present("numeric-sort");
let human_readable = matches.opt_present("human-readable-sort");
let reverse = matches.opt_present("reverse");
let mut files = matches.free;
@ -74,12 +75,12 @@ With no FILE, or when FILE is -, read standard input.", NAME, VERSION);
files.push("-".to_string());
}
exec(files, numeric, reverse);
exec(files, numeric, human_readable, reverse);
0
}
fn exec(files: Vec<String>, numeric: bool, reverse: bool) {
fn exec(files: Vec<String>, numeric: bool, human_readable: bool, reverse: bool) {
for path in files.iter() {
let (reader, _) = match open(path) {
Some(x) => x,
@ -99,7 +100,9 @@ fn exec(files: Vec<String>, numeric: bool, reverse: bool) {
}
if numeric {
lines.sort_by(frac_compare);
lines.sort_by(numeric_compare);
} else if human_readable {
lines.sort_by(human_readable_size_compare);
} else {
lines.sort();
}
@ -113,44 +116,73 @@ fn exec(files: Vec<String>, numeric: bool, reverse: bool) {
}
}
fn skip_zeros(mut char_a: char, char_iter: &mut Chars, ret: Ordering) -> Ordering {
char_a = match char_iter.next() { None => 0 as char, Some(t) => t };
while char_a == '0' {
char_a = match char_iter.next() { None => return Ordering::Equal, Some(t) => t };
/// Parse the beginning string into an f64, returning -inf instead of NaN on errors.
fn permissive_f64_parse(a: &String) -> f64{
//Maybe should be split on non-digit, but then 10e100 won't parse properly.
//On the flip side, this will give NEG_INFINITY for "1,234", which might be OK
//because there's no way to handle both CSV and thousands separators without a new flag.
//GNU sort treats "1,234" as "1" in numeric, so maybe it's fine.
let sa: &str = a.split_whitespace().next().unwrap();
match sa.parse::<f64>() {
Ok(a) => a,
Err(_) => std::f64::NEG_INFINITY
}
if char_a.is_digit(10) { ret } else { Ordering::Equal }
}
/// Compares two decimal fractions as strings (n < 1)
/// This requires the strings to start with a decimal, otherwise it's treated as 0
fn frac_compare(a: &String, b: &String) -> Ordering {
let a_chars = &mut a.chars();
let b_chars = &mut b.chars();
/// Compares two floating point numbers, with errors being assumned to be -inf.
/// Stops coercing at the first whitespace char, so 1e2 will parse as 100 but
/// 1,000 will parse as -inf.
fn numeric_compare(a: &String, b: &String) -> Ordering {
let fa = permissive_f64_parse(a);
let fb = permissive_f64_parse(b);
//f64::cmp isn't implemented because NaN messes with it
//but we sidestep that with permissive_f64_parse so just fake it
if fa > fb {
return Ordering::Greater;
}
else if fa < fb {
return Ordering::Less;
}
else {
return Ordering::Equal;
}
}
let mut char_a = match a_chars.next() { None => 0 as char, Some(t) => t };
let mut char_b = match b_chars.next() { None => 0 as char, Some(t) => t };
fn human_readable_convert(a: &String) -> f64 {
let int_iter = a.chars();
let suffix_iter = a.chars();
let int_str: String = int_iter.take_while(|c| c.is_numeric()).collect();
let suffix = suffix_iter.skip_while(|c| c.is_numeric()).next();
let int_part = match int_str.parse::<f64>() {
Ok(i) => i,
Err(_) => -1f64
} as f64;
let suffix: f64 = match suffix.unwrap_or('\0') {
'K' => 1000f64,
'M' => 1E6,
'G' => 1E9,
'T' => 1E12,
'P' => 1E15,
_ => 1f64
};
return int_part * suffix;
}
/// Compare two strings as if they are human readable sizes.
/// AKA 1M > 100k
fn human_readable_size_compare(a: &String, b: &String) -> Ordering {
let fa = human_readable_convert(a);
let fb = human_readable_convert(b);
if fa > fb {
return Ordering::Greater;
}
else if fa < fb {
return Ordering::Less;
}
else {
return Ordering::Equal;
}
if char_a == DECIMAL_PT && char_b == DECIMAL_PT {
while char_a == char_b {
char_a = match a_chars.next() { None => 0 as char, Some(t) => t };
char_b = match b_chars.next() { None => 0 as char, Some(t) => t };
// hit the end at the same time, they are equal
if !char_a.is_digit(10) {
return Ordering::Equal;
}
}
if char_a.is_digit(10) && char_b.is_digit(10) {
(char_a as isize).cmp(&(char_b as isize))
} else if char_a.is_digit(10) {
skip_zeros(char_a, a_chars, Ordering::Greater)
} else if char_b.is_digit(10) {
skip_zeros(char_b, b_chars, Ordering::Less)
} else { Ordering::Equal }
} else if char_a == DECIMAL_PT {
skip_zeros(char_a, a_chars, Ordering::Greater)
} else if char_b == DECIMAL_PT {
skip_zeros(char_b, b_chars, Ordering::Less)
} else { Ordering::Equal }
}
#[inline(always)]

11
test/fixtures/sort/human1.ans vendored Normal file
View file

@ -0,0 +1,11 @@
844K
981K
11M
13M
14M
16M
18M
19M
20M
981T
20P

11
test/fixtures/sort/human1.txt vendored Normal file
View file

@ -0,0 +1,11 @@
14M
20M
20P
11M
981T
16M
18M
19M
844K
981K
13M

100
test/fixtures/sort/numeric6.ans vendored Normal file
View file

@ -0,0 +1,100 @@
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100

100
test/fixtures/sort/numeric6.txt vendored Normal file
View file

@ -0,0 +1,100 @@
33
16
35
56
72
37
21
49
70
48
90
83
44
79
10
20
4
26
27
63
29
47
51
85
88
46
30
61
93
81
78
53
87
18
98
38
13
39
23
71
5
100
96
8
24
14
28
15
25
43
36
67
75
66
31
57
34
80
40
86
17
55
9
1
62
12
74
58
69
76
11
73
68
59
41
45
52
97
82
6
7
77
42
84
95
94
89
19
64
2
22
50
60
32
92
3
99
65
54
91

View file

@ -30,15 +30,29 @@ fn numeric5() {
numeric_helper(5);
}
#[test]
fn numeric6() {
numeric_helper(6);
}
#[test]
fn human1() {
test_helper(&String::from("human1"), &String::from("-H"));
}
fn numeric_helper(test_num: isize) {
test_helper(&format!("numeric{}", test_num), &String::from("-n"))
}
fn test_helper(file_name: &String, args: &String) {
let mut cmd = Command::new(PROGNAME);
cmd.arg("-n");
let po = match cmd.arg(format!("{}{}{}", "numeric", test_num, ".txt")).output() {
cmd.arg(args);
let po = match cmd.arg(format!("{}{}", file_name, ".txt")).output() {
Ok(p) => p,
Err(err) => panic!("{}", err)
};
let filename = format!("{}{}{}", "numeric", test_num, ".ans");
let filename = format!("{}{}", file_name, ".ans");
let mut f = File::open(Path::new(&filename)).unwrap_or_else(|err| {
panic!("{}", err)
});