mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-08-03 06:27:45 +00:00
enhanced numeric sort and new human readable sort
This commit is contained in:
parent
314a254d1f
commit
f31192d46a
6 changed files with 308 additions and 40 deletions
106
src/sort/sort.rs
106
src/sort/sort.rs
|
@ -21,7 +21,6 @@ use std::cmp::Ordering;
|
|||
use std::fs::File;
|
||||
use std::io::{BufRead, BufReader, Read, stdin, Write};
|
||||
use std::path::Path;
|
||||
use std::str::Chars;
|
||||
|
||||
#[path = "../common/util.rs"]
|
||||
#[macro_use]
|
||||
|
@ -37,6 +36,7 @@ pub fn uumain(args: Vec<String>) -> i32 {
|
|||
let mut opts = getopts::Options::new();
|
||||
|
||||
opts.optflag("n", "numeric-sort", "compare according to string numerical value");
|
||||
opts.optflag("H", "human-readable-sort", "compare according to human readable sizes, eg 1M > 100k");
|
||||
opts.optflag("r", "reverse", "reverse the output");
|
||||
opts.optflag("h", "help", "display this help and exit");
|
||||
opts.optflag("", "version", "output version information and exit");
|
||||
|
@ -66,6 +66,7 @@ With no FILE, or when FILE is -, read standard input.", NAME, VERSION);
|
|||
}
|
||||
|
||||
let numeric = matches.opt_present("numeric-sort");
|
||||
let human_readable = matches.opt_present("human-readable-sort");
|
||||
let reverse = matches.opt_present("reverse");
|
||||
|
||||
let mut files = matches.free;
|
||||
|
@ -74,12 +75,12 @@ With no FILE, or when FILE is -, read standard input.", NAME, VERSION);
|
|||
files.push("-".to_string());
|
||||
}
|
||||
|
||||
exec(files, numeric, reverse);
|
||||
exec(files, numeric, human_readable, reverse);
|
||||
|
||||
0
|
||||
}
|
||||
|
||||
fn exec(files: Vec<String>, numeric: bool, reverse: bool) {
|
||||
fn exec(files: Vec<String>, numeric: bool, human_readable: bool, reverse: bool) {
|
||||
for path in files.iter() {
|
||||
let (reader, _) = match open(path) {
|
||||
Some(x) => x,
|
||||
|
@ -99,7 +100,9 @@ fn exec(files: Vec<String>, numeric: bool, reverse: bool) {
|
|||
}
|
||||
|
||||
if numeric {
|
||||
lines.sort_by(frac_compare);
|
||||
lines.sort_by(numeric_compare);
|
||||
} else if human_readable {
|
||||
lines.sort_by(human_readable_size_compare);
|
||||
} else {
|
||||
lines.sort();
|
||||
}
|
||||
|
@ -113,44 +116,73 @@ fn exec(files: Vec<String>, numeric: bool, reverse: bool) {
|
|||
}
|
||||
}
|
||||
|
||||
fn skip_zeros(mut char_a: char, char_iter: &mut Chars, ret: Ordering) -> Ordering {
|
||||
char_a = match char_iter.next() { None => 0 as char, Some(t) => t };
|
||||
while char_a == '0' {
|
||||
char_a = match char_iter.next() { None => return Ordering::Equal, Some(t) => t };
|
||||
/// Parse the beginning string into an f64, returning -inf instead of NaN on errors.
|
||||
fn permissive_f64_parse(a: &String) -> f64{
|
||||
//Maybe should be split on non-digit, but then 10e100 won't parse properly.
|
||||
//On the flip side, this will give NEG_INFINITY for "1,234", which might be OK
|
||||
//because there's no way to handle both CSV and thousands separators without a new flag.
|
||||
//GNU sort treats "1,234" as "1" in numeric, so maybe it's fine.
|
||||
let sa: &str = a.split_whitespace().next().unwrap();
|
||||
match sa.parse::<f64>() {
|
||||
Ok(a) => a,
|
||||
Err(_) => std::f64::NEG_INFINITY
|
||||
}
|
||||
if char_a.is_digit(10) { ret } else { Ordering::Equal }
|
||||
}
|
||||
|
||||
/// Compares two decimal fractions as strings (n < 1)
|
||||
/// This requires the strings to start with a decimal, otherwise it's treated as 0
|
||||
fn frac_compare(a: &String, b: &String) -> Ordering {
|
||||
let a_chars = &mut a.chars();
|
||||
let b_chars = &mut b.chars();
|
||||
/// Compares two floating point numbers, with errors being assumned to be -inf.
|
||||
/// Stops coercing at the first whitespace char, so 1e2 will parse as 100 but
|
||||
/// 1,000 will parse as -inf.
|
||||
fn numeric_compare(a: &String, b: &String) -> Ordering {
|
||||
let fa = permissive_f64_parse(a);
|
||||
let fb = permissive_f64_parse(b);
|
||||
//f64::cmp isn't implemented because NaN messes with it
|
||||
//but we sidestep that with permissive_f64_parse so just fake it
|
||||
if fa > fb {
|
||||
return Ordering::Greater;
|
||||
}
|
||||
else if fa < fb {
|
||||
return Ordering::Less;
|
||||
}
|
||||
else {
|
||||
return Ordering::Equal;
|
||||
}
|
||||
}
|
||||
|
||||
let mut char_a = match a_chars.next() { None => 0 as char, Some(t) => t };
|
||||
let mut char_b = match b_chars.next() { None => 0 as char, Some(t) => t };
|
||||
fn human_readable_convert(a: &String) -> f64 {
|
||||
let int_iter = a.chars();
|
||||
let suffix_iter = a.chars();
|
||||
let int_str: String = int_iter.take_while(|c| c.is_numeric()).collect();
|
||||
let suffix = suffix_iter.skip_while(|c| c.is_numeric()).next();
|
||||
let int_part = match int_str.parse::<f64>() {
|
||||
Ok(i) => i,
|
||||
Err(_) => -1f64
|
||||
} as f64;
|
||||
let suffix: f64 = match suffix.unwrap_or('\0') {
|
||||
'K' => 1000f64,
|
||||
'M' => 1E6,
|
||||
'G' => 1E9,
|
||||
'T' => 1E12,
|
||||
'P' => 1E15,
|
||||
_ => 1f64
|
||||
};
|
||||
return int_part * suffix;
|
||||
}
|
||||
|
||||
/// Compare two strings as if they are human readable sizes.
|
||||
/// AKA 1M > 100k
|
||||
fn human_readable_size_compare(a: &String, b: &String) -> Ordering {
|
||||
let fa = human_readable_convert(a);
|
||||
let fb = human_readable_convert(b);
|
||||
if fa > fb {
|
||||
return Ordering::Greater;
|
||||
}
|
||||
else if fa < fb {
|
||||
return Ordering::Less;
|
||||
}
|
||||
else {
|
||||
return Ordering::Equal;
|
||||
}
|
||||
|
||||
if char_a == DECIMAL_PT && char_b == DECIMAL_PT {
|
||||
while char_a == char_b {
|
||||
char_a = match a_chars.next() { None => 0 as char, Some(t) => t };
|
||||
char_b = match b_chars.next() { None => 0 as char, Some(t) => t };
|
||||
// hit the end at the same time, they are equal
|
||||
if !char_a.is_digit(10) {
|
||||
return Ordering::Equal;
|
||||
}
|
||||
}
|
||||
if char_a.is_digit(10) && char_b.is_digit(10) {
|
||||
(char_a as isize).cmp(&(char_b as isize))
|
||||
} else if char_a.is_digit(10) {
|
||||
skip_zeros(char_a, a_chars, Ordering::Greater)
|
||||
} else if char_b.is_digit(10) {
|
||||
skip_zeros(char_b, b_chars, Ordering::Less)
|
||||
} else { Ordering::Equal }
|
||||
} else if char_a == DECIMAL_PT {
|
||||
skip_zeros(char_a, a_chars, Ordering::Greater)
|
||||
} else if char_b == DECIMAL_PT {
|
||||
skip_zeros(char_b, b_chars, Ordering::Less)
|
||||
} else { Ordering::Equal }
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
|
|
11
test/fixtures/sort/human1.ans
vendored
Normal file
11
test/fixtures/sort/human1.ans
vendored
Normal file
|
@ -0,0 +1,11 @@
|
|||
844K
|
||||
981K
|
||||
11M
|
||||
13M
|
||||
14M
|
||||
16M
|
||||
18M
|
||||
19M
|
||||
20M
|
||||
981T
|
||||
20P
|
11
test/fixtures/sort/human1.txt
vendored
Normal file
11
test/fixtures/sort/human1.txt
vendored
Normal file
|
@ -0,0 +1,11 @@
|
|||
14M
|
||||
20M
|
||||
20P
|
||||
11M
|
||||
981T
|
||||
16M
|
||||
18M
|
||||
19M
|
||||
844K
|
||||
981K
|
||||
13M
|
100
test/fixtures/sort/numeric6.ans
vendored
Normal file
100
test/fixtures/sort/numeric6.ans
vendored
Normal file
|
@ -0,0 +1,100 @@
|
|||
1
|
||||
2
|
||||
3
|
||||
4
|
||||
5
|
||||
6
|
||||
7
|
||||
8
|
||||
9
|
||||
10
|
||||
11
|
||||
12
|
||||
13
|
||||
14
|
||||
15
|
||||
16
|
||||
17
|
||||
18
|
||||
19
|
||||
20
|
||||
21
|
||||
22
|
||||
23
|
||||
24
|
||||
25
|
||||
26
|
||||
27
|
||||
28
|
||||
29
|
||||
30
|
||||
31
|
||||
32
|
||||
33
|
||||
34
|
||||
35
|
||||
36
|
||||
37
|
||||
38
|
||||
39
|
||||
40
|
||||
41
|
||||
42
|
||||
43
|
||||
44
|
||||
45
|
||||
46
|
||||
47
|
||||
48
|
||||
49
|
||||
50
|
||||
51
|
||||
52
|
||||
53
|
||||
54
|
||||
55
|
||||
56
|
||||
57
|
||||
58
|
||||
59
|
||||
60
|
||||
61
|
||||
62
|
||||
63
|
||||
64
|
||||
65
|
||||
66
|
||||
67
|
||||
68
|
||||
69
|
||||
70
|
||||
71
|
||||
72
|
||||
73
|
||||
74
|
||||
75
|
||||
76
|
||||
77
|
||||
78
|
||||
79
|
||||
80
|
||||
81
|
||||
82
|
||||
83
|
||||
84
|
||||
85
|
||||
86
|
||||
87
|
||||
88
|
||||
89
|
||||
90
|
||||
91
|
||||
92
|
||||
93
|
||||
94
|
||||
95
|
||||
96
|
||||
97
|
||||
98
|
||||
99
|
||||
100
|
100
test/fixtures/sort/numeric6.txt
vendored
Normal file
100
test/fixtures/sort/numeric6.txt
vendored
Normal file
|
@ -0,0 +1,100 @@
|
|||
33
|
||||
16
|
||||
35
|
||||
56
|
||||
72
|
||||
37
|
||||
21
|
||||
49
|
||||
70
|
||||
48
|
||||
90
|
||||
83
|
||||
44
|
||||
79
|
||||
10
|
||||
20
|
||||
4
|
||||
26
|
||||
27
|
||||
63
|
||||
29
|
||||
47
|
||||
51
|
||||
85
|
||||
88
|
||||
46
|
||||
30
|
||||
61
|
||||
93
|
||||
81
|
||||
78
|
||||
53
|
||||
87
|
||||
18
|
||||
98
|
||||
38
|
||||
13
|
||||
39
|
||||
23
|
||||
71
|
||||
5
|
||||
100
|
||||
96
|
||||
8
|
||||
24
|
||||
14
|
||||
28
|
||||
15
|
||||
25
|
||||
43
|
||||
36
|
||||
67
|
||||
75
|
||||
66
|
||||
31
|
||||
57
|
||||
34
|
||||
80
|
||||
40
|
||||
86
|
||||
17
|
||||
55
|
||||
9
|
||||
1
|
||||
62
|
||||
12
|
||||
74
|
||||
58
|
||||
69
|
||||
76
|
||||
11
|
||||
73
|
||||
68
|
||||
59
|
||||
41
|
||||
45
|
||||
52
|
||||
97
|
||||
82
|
||||
6
|
||||
7
|
||||
77
|
||||
42
|
||||
84
|
||||
95
|
||||
94
|
||||
89
|
||||
19
|
||||
64
|
||||
2
|
||||
22
|
||||
50
|
||||
60
|
||||
32
|
||||
92
|
||||
3
|
||||
99
|
||||
65
|
||||
54
|
||||
91
|
20
test/sort.rs
20
test/sort.rs
|
@ -30,15 +30,29 @@ fn numeric5() {
|
|||
numeric_helper(5);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn numeric6() {
|
||||
numeric_helper(6);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn human1() {
|
||||
test_helper(&String::from("human1"), &String::from("-H"));
|
||||
}
|
||||
|
||||
fn numeric_helper(test_num: isize) {
|
||||
test_helper(&format!("numeric{}", test_num), &String::from("-n"))
|
||||
}
|
||||
|
||||
fn test_helper(file_name: &String, args: &String) {
|
||||
let mut cmd = Command::new(PROGNAME);
|
||||
cmd.arg("-n");
|
||||
let po = match cmd.arg(format!("{}{}{}", "numeric", test_num, ".txt")).output() {
|
||||
cmd.arg(args);
|
||||
let po = match cmd.arg(format!("{}{}", file_name, ".txt")).output() {
|
||||
Ok(p) => p,
|
||||
Err(err) => panic!("{}", err)
|
||||
};
|
||||
|
||||
let filename = format!("{}{}{}", "numeric", test_num, ".ans");
|
||||
let filename = format!("{}{}", file_name, ".ans");
|
||||
let mut f = File::open(Path::new(&filename)).unwrap_or_else(|err| {
|
||||
panic!("{}", err)
|
||||
});
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue