diff --git a/src/sort/sort.rs b/src/sort/sort.rs index f2856a643..19595f378 100644 --- a/src/sort/sort.rs +++ b/src/sort/sort.rs @@ -21,7 +21,6 @@ use std::cmp::Ordering; use std::fs::File; use std::io::{BufRead, BufReader, Read, stdin, Write}; use std::path::Path; -use std::str::Chars; #[path = "../common/util.rs"] #[macro_use] @@ -37,6 +36,7 @@ pub fn uumain(args: Vec) -> i32 { let mut opts = getopts::Options::new(); opts.optflag("n", "numeric-sort", "compare according to string numerical value"); + opts.optflag("H", "human-readable-sort", "compare according to human readable sizes, eg 1M > 100k"); opts.optflag("r", "reverse", "reverse the output"); opts.optflag("h", "help", "display this help and exit"); opts.optflag("", "version", "output version information and exit"); @@ -66,6 +66,7 @@ With no FILE, or when FILE is -, read standard input.", NAME, VERSION); } let numeric = matches.opt_present("numeric-sort"); + let human_readable = matches.opt_present("human-readable-sort"); let reverse = matches.opt_present("reverse"); let mut files = matches.free; @@ -74,12 +75,12 @@ With no FILE, or when FILE is -, read standard input.", NAME, VERSION); files.push("-".to_string()); } - exec(files, numeric, reverse); + exec(files, numeric, human_readable, reverse); 0 } -fn exec(files: Vec, numeric: bool, reverse: bool) { +fn exec(files: Vec, numeric: bool, human_readable: bool, reverse: bool) { for path in files.iter() { let (reader, _) = match open(path) { Some(x) => x, @@ -99,7 +100,9 @@ fn exec(files: Vec, numeric: bool, reverse: bool) { } if numeric { - lines.sort_by(frac_compare); + lines.sort_by(numeric_compare); + } else if human_readable { + lines.sort_by(human_readable_size_compare); } else { lines.sort(); } @@ -113,44 +116,73 @@ fn exec(files: Vec, numeric: bool, reverse: bool) { } } -fn skip_zeros(mut char_a: char, char_iter: &mut Chars, ret: Ordering) -> Ordering { - char_a = match char_iter.next() { None => 0 as char, Some(t) => t }; - while char_a == '0' { - char_a = match char_iter.next() { None => return Ordering::Equal, Some(t) => t }; +/// Parse the beginning string into an f64, returning -inf instead of NaN on errors. +fn permissive_f64_parse(a: &String) -> f64{ + //Maybe should be split on non-digit, but then 10e100 won't parse properly. + //On the flip side, this will give NEG_INFINITY for "1,234", which might be OK + //because there's no way to handle both CSV and thousands separators without a new flag. + //GNU sort treats "1,234" as "1" in numeric, so maybe it's fine. + let sa: &str = a.split_whitespace().next().unwrap(); + match sa.parse::() { + Ok(a) => a, + Err(_) => std::f64::NEG_INFINITY } - if char_a.is_digit(10) { ret } else { Ordering::Equal } } -/// Compares two decimal fractions as strings (n < 1) -/// This requires the strings to start with a decimal, otherwise it's treated as 0 -fn frac_compare(a: &String, b: &String) -> Ordering { - let a_chars = &mut a.chars(); - let b_chars = &mut b.chars(); +/// Compares two floating point numbers, with errors being assumned to be -inf. +/// Stops coercing at the first whitespace char, so 1e2 will parse as 100 but +/// 1,000 will parse as -inf. +fn numeric_compare(a: &String, b: &String) -> Ordering { + let fa = permissive_f64_parse(a); + let fb = permissive_f64_parse(b); + //f64::cmp isn't implemented because NaN messes with it + //but we sidestep that with permissive_f64_parse so just fake it + if fa > fb { + return Ordering::Greater; + } + else if fa < fb { + return Ordering::Less; + } + else { + return Ordering::Equal; + } +} - let mut char_a = match a_chars.next() { None => 0 as char, Some(t) => t }; - let mut char_b = match b_chars.next() { None => 0 as char, Some(t) => t }; +fn human_readable_convert(a: &String) -> f64 { + let int_iter = a.chars(); + let suffix_iter = a.chars(); + let int_str: String = int_iter.take_while(|c| c.is_numeric()).collect(); + let suffix = suffix_iter.skip_while(|c| c.is_numeric()).next(); + let int_part = match int_str.parse::() { + Ok(i) => i, + Err(_) => -1f64 + } as f64; + let suffix: f64 = match suffix.unwrap_or('\0') { + 'K' => 1000f64, + 'M' => 1E6, + 'G' => 1E9, + 'T' => 1E12, + 'P' => 1E15, + _ => 1f64 + }; + return int_part * suffix; +} + +/// Compare two strings as if they are human readable sizes. +/// AKA 1M > 100k +fn human_readable_size_compare(a: &String, b: &String) -> Ordering { + let fa = human_readable_convert(a); + let fb = human_readable_convert(b); + if fa > fb { + return Ordering::Greater; + } + else if fa < fb { + return Ordering::Less; + } + else { + return Ordering::Equal; + } - if char_a == DECIMAL_PT && char_b == DECIMAL_PT { - while char_a == char_b { - char_a = match a_chars.next() { None => 0 as char, Some(t) => t }; - char_b = match b_chars.next() { None => 0 as char, Some(t) => t }; - // hit the end at the same time, they are equal - if !char_a.is_digit(10) { - return Ordering::Equal; - } - } - if char_a.is_digit(10) && char_b.is_digit(10) { - (char_a as isize).cmp(&(char_b as isize)) - } else if char_a.is_digit(10) { - skip_zeros(char_a, a_chars, Ordering::Greater) - } else if char_b.is_digit(10) { - skip_zeros(char_b, b_chars, Ordering::Less) - } else { Ordering::Equal } - } else if char_a == DECIMAL_PT { - skip_zeros(char_a, a_chars, Ordering::Greater) - } else if char_b == DECIMAL_PT { - skip_zeros(char_b, b_chars, Ordering::Less) - } else { Ordering::Equal } } #[inline(always)] diff --git a/test/fixtures/sort/human1.ans b/test/fixtures/sort/human1.ans new file mode 100644 index 000000000..74fad9fdf --- /dev/null +++ b/test/fixtures/sort/human1.ans @@ -0,0 +1,11 @@ +844K +981K +11M +13M +14M +16M +18M +19M +20M +981T +20P diff --git a/test/fixtures/sort/human1.txt b/test/fixtures/sort/human1.txt new file mode 100644 index 000000000..803666dbe --- /dev/null +++ b/test/fixtures/sort/human1.txt @@ -0,0 +1,11 @@ +14M +20M +20P +11M +981T +16M +18M +19M +844K +981K +13M diff --git a/test/fixtures/sort/numeric6.ans b/test/fixtures/sort/numeric6.ans new file mode 100644 index 000000000..190423f88 --- /dev/null +++ b/test/fixtures/sort/numeric6.ans @@ -0,0 +1,100 @@ +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 +82 +83 +84 +85 +86 +87 +88 +89 +90 +91 +92 +93 +94 +95 +96 +97 +98 +99 +100 diff --git a/test/fixtures/sort/numeric6.txt b/test/fixtures/sort/numeric6.txt new file mode 100644 index 000000000..d3c2bb861 --- /dev/null +++ b/test/fixtures/sort/numeric6.txt @@ -0,0 +1,100 @@ +33 +16 +35 +56 +72 +37 +21 +49 +70 +48 +90 +83 +44 +79 +10 +20 +4 +26 +27 +63 +29 +47 +51 +85 +88 +46 +30 +61 +93 +81 +78 +53 +87 +18 +98 +38 +13 +39 +23 +71 +5 +100 +96 +8 +24 +14 +28 +15 +25 +43 +36 +67 +75 +66 +31 +57 +34 +80 +40 +86 +17 +55 +9 +1 +62 +12 +74 +58 +69 +76 +11 +73 +68 +59 +41 +45 +52 +97 +82 +6 +7 +77 +42 +84 +95 +94 +89 +19 +64 +2 +22 +50 +60 +32 +92 +3 +99 +65 +54 +91 diff --git a/test/sort.rs b/test/sort.rs index 9ba2b8e6a..aa630210c 100644 --- a/test/sort.rs +++ b/test/sort.rs @@ -30,15 +30,29 @@ fn numeric5() { numeric_helper(5); } +#[test] +fn numeric6() { + numeric_helper(6); +} + +#[test] +fn human1() { + test_helper(&String::from("human1"), &String::from("-H")); +} + fn numeric_helper(test_num: isize) { + test_helper(&format!("numeric{}", test_num), &String::from("-n")) +} + +fn test_helper(file_name: &String, args: &String) { let mut cmd = Command::new(PROGNAME); - cmd.arg("-n"); - let po = match cmd.arg(format!("{}{}{}", "numeric", test_num, ".txt")).output() { + cmd.arg(args); + let po = match cmd.arg(format!("{}{}", file_name, ".txt")).output() { Ok(p) => p, Err(err) => panic!("{}", err) }; - let filename = format!("{}{}{}", "numeric", test_num, ".ans"); + let filename = format!("{}{}", file_name, ".ans"); let mut f = File::open(Path::new(&filename)).unwrap_or_else(|err| { panic!("{}", err) });