diff --git a/src/sort/Cargo.toml b/src/sort/Cargo.toml index 545206f57..f8de86672 100644 --- a/src/sort/Cargo.toml +++ b/src/sort/Cargo.toml @@ -11,6 +11,7 @@ path = "sort.rs" getopts = "*" libc = "*" semver = "*" +itertools = "*" uucore = { path="../uucore" } [[bin]] diff --git a/src/sort/sort.rs b/src/sort/sort.rs index 39c3bff9b..47d7d4943 100644 --- a/src/sort/sort.rs +++ b/src/sort/sort.rs @@ -17,13 +17,18 @@ extern crate semver; #[macro_use] extern crate uucore; +#[macro_use] +extern crate itertools; use std::cmp::Ordering; +use std::collections::BinaryHeap; use std::fs::File; -use std::io::{BufRead, BufReader, BufWriter, Read, stdin, stdout, Write}; +use std::io::{BufRead, BufReader, BufWriter, Lines, Read, stdin, stdout, Write}; +use std::mem::replace; use std::path::Path; use uucore::fs::is_stdin_interactive; use semver::Version; +use itertools::Itertools; // for Iterator::dedup() static NAME: &'static str = "sort"; static VERSION: &'static str = env!("CARGO_PKG_VERSION"); @@ -41,22 +46,105 @@ enum SortMode { struct Settings { mode: SortMode, + merge: bool, reverse: bool, outfile: Option, stable: bool, unique: bool, check: bool, + compare_fns: Vec Ordering>, } impl Default for Settings { fn default() -> Settings { Settings { mode: SortMode::Default, + merge: false, reverse: false, outfile: None, stable: false, unique: false, check: false, + compare_fns: Vec::new(), + } + } +} + +struct MergeableFile<'a> { + lines: Lines>>, + current_line: String, + settings: &'a Settings, +} + +// BinaryHeap depends on `Ord`. Note that we want to pop smallest items +// from the heap first, and BinaryHeap.pop() returns the largest, so we +// trick it into the right order by calling reverse() here. +impl<'a> Ord for MergeableFile<'a> { + fn cmp(&self, other: &MergeableFile) -> Ordering { + compare_by(&self.current_line, &other.current_line, &self.settings).reverse() + } +} + +impl<'a> PartialOrd for MergeableFile<'a> { + fn partial_cmp(&self, other: &MergeableFile) -> Option { + Some(self.cmp(other)) + } +} + +impl<'a> PartialEq for MergeableFile<'a> { + fn eq(&self, other: &MergeableFile) -> bool { + Ordering::Equal == compare_by(&self.current_line, &other.current_line, &self.settings) + } +} + +impl<'a> Eq for MergeableFile<'a> {} + +struct FileMerger<'a> { + heap: BinaryHeap>, + settings: &'a Settings, +} + +impl<'a> FileMerger<'a> { + fn new(settings: &'a Settings) -> FileMerger<'a> { + FileMerger { + heap: BinaryHeap::new(), + settings: settings, + } + } + fn push_file(&mut self, mut lines: Lines>>){ + match lines.next() { + Some(Ok(next_line)) => { + let mergeable_file = MergeableFile { + lines: lines, + current_line: next_line, + settings: &self.settings, + }; + self.heap.push(mergeable_file); + } + _ => {} + } + } +} + +impl<'a> Iterator for FileMerger<'a> { + type Item = String; + fn next(&mut self) -> Option { + match self.heap.pop() { + Some(mut current) => { + match current.lines.next() { + Some(Ok(next_line)) => { + let ret = replace(&mut current.current_line, next_line); + self.heap.push(current); + Some(ret) + }, + _ => { + // Don't put it back in the heap (it's empty/erroring) + // but its first line is still valid. + Some(current.current_line) + }, + } + }, + None => None, } } } @@ -71,6 +159,7 @@ pub fn uumain(args: Vec) -> i32 { opts.optflag("r", "reverse", "reverse the output"); opts.optflag("h", "help", "display this help and exit"); opts.optflag("", "version", "output version information and exit"); + opts.optflag("m", "merge", "merge already sorted files; do not sort"); opts.optopt("o", "output", "write output to FILENAME instead of stdout", "FILENAME"); opts.optflag("s", "stable", "stabilize sort by disabling last-resort comparison"); opts.optflag("u", "unique", "output only the first of an equal run"); @@ -113,6 +202,7 @@ With no FILE, or when FILE is -, read standard input.", NAME, VERSION); SortMode::Default }; + settings.merge = matches.opt_present("merge"); settings.reverse = matches.opt_present("reverse"); settings.outfile = matches.opt_str("output"); settings.stable = matches.opt_present("stable"); @@ -124,35 +214,12 @@ With no FILE, or when FILE is -, read standard input.", NAME, VERSION); /* if no file, default to stdin */ files.push("-".to_owned()); } + else if settings.check && files.len() != 1 { + crash!(1, "sort: extra operand `{}' not allowed with -c", files[1]) - exec(files, &settings) -} - -fn exec(files: Vec, settings: &Settings) -> i32 { - let mut lines = Vec::new(); - for path in &files { - let (reader, _) = match open(path) { - Some(x) => x, - None => continue, - }; - - let buf_reader = BufReader::new(reader); - - for line in buf_reader.lines() { - match line { - Ok(n) => { - lines.push(n); - }, - _ => break - } - } } - let original_lines = lines.to_vec(); - - let mut compare_fns = Vec::new(); - - compare_fns.push(match settings.mode { + settings.compare_fns.push(match settings.mode { SortMode::Numeric => numeric_compare, SortMode::HumanNumeric => human_numeric_size_compare, SortMode::Month => month_compare, @@ -163,48 +230,123 @@ fn exec(files: Vec, settings: &Settings) -> i32 { if !settings.stable { match settings.mode { SortMode::Default => {} - _ => compare_fns.push(String::cmp) + _ => settings.compare_fns.push(String::cmp) } } - sort_by(&mut lines, compare_fns); + exec(files, &settings) +} - if settings.unique { - lines.dedup() - } +fn exec(files: Vec, settings: &Settings) -> i32 { + let mut lines = Vec::new(); + let mut file_merger = FileMerger::new(&settings); - if settings.reverse { - lines.reverse() - } + for path in &files { + let (reader, _) = match open(path) { + Some(x) => x, + None => continue, + }; - if settings.check { - for (i, line) in lines.iter().enumerate() { - if line != &original_lines[i] { - println!("sort: disorder in line {}", i); - return 1; + let buf_reader = BufReader::new(reader); + + if settings.merge { + file_merger.push_file(buf_reader.lines()); + } + else if settings.check { + return exec_check_file(buf_reader.lines(), &settings) + } + else { + for line in buf_reader.lines() { + if let Ok(n) = line { + lines.push(n); + } + else { + break; + } } } } + + sort_by(&mut lines, &settings); + + if settings.merge { + if settings.unique { + print_sorted(file_merger.dedup(), &settings.outfile) + } + else { + print_sorted(file_merger, &settings.outfile) + } + } else { - print_sorted(lines.iter(), &settings.outfile) + if settings.unique { + print_sorted(lines.iter().dedup(), &settings.outfile) + } + else { + print_sorted(lines.iter(), &settings.outfile) + } } 0 } -fn sort_by(lines: &mut Vec, compare_fns: Vec) - where F: Fn( &String, &String ) -> Ordering -{ +fn exec_check_file(lines: Lines>>, settings: &Settings) -> i32 { + // errors yields the line before each disorder, + // plus the last line (quirk of .coalesce()) + let unwrapped_lines = lines.filter_map(|maybe_line| { + if let Ok(line) = maybe_line { + Some(line) + } + else { + None + } + }); + let mut errors = unwrapped_lines.enumerate().coalesce( + |(last_i, last_line), (i, line)| { + if compare_by(&last_line, &line, &settings) == Ordering::Greater { + Err(((last_i, last_line), (i, line))) + } + else { + Ok((i, line)) + } + }); + if let Some((first_error_index, _line)) = errors.next() { + // Check for a second "error", as .coalesce() always returns the last + // line, no matter what our merging function does. + if let Some(_last_line_or_next_error) = errors.next() { + println!("sort: disorder in line {}", first_error_index); + return 1; + } + else { + // first "error" was actually the last line. + return 0; + } + } + else { + // unwrapped_lines was empty. Empty files are defined to be sorted. + return 0; + } +} + +fn sort_by(lines: &mut Vec, settings: &Settings) { lines.sort_by(|a, b| { - for compare_fn in &compare_fns { - let cmp = compare_fn(a, b); - if cmp != Ordering::Equal { + compare_by(a, b, &settings) + }) +} + +fn compare_by(a: &String, b: &String, settings: &Settings) -> Ordering { + for compare_fn in &settings.compare_fns { + let cmp = compare_fn(a, b); + if cmp != Ordering::Equal { + if settings.reverse { + return cmp.reverse(); + } + else { return cmp; } } - return Ordering::Equal; - }) + } + return Ordering::Equal; } /// Parse the beginning string into an f64, returning -inf instead of NaN on errors. diff --git a/tests/fixtures/sort/merge_ints_interleaved.expected b/tests/fixtures/sort/merge_ints_interleaved.expected new file mode 100644 index 000000000..071939893 --- /dev/null +++ b/tests/fixtures/sort/merge_ints_interleaved.expected @@ -0,0 +1,9 @@ +1 +2 +3 +4 +5 +6 +7 +8 +9 diff --git a/tests/fixtures/sort/merge_ints_interleaved_1.txt b/tests/fixtures/sort/merge_ints_interleaved_1.txt new file mode 100644 index 000000000..6e181b92b --- /dev/null +++ b/tests/fixtures/sort/merge_ints_interleaved_1.txt @@ -0,0 +1,3 @@ +1 +4 +7 diff --git a/tests/fixtures/sort/merge_ints_interleaved_2.txt b/tests/fixtures/sort/merge_ints_interleaved_2.txt new file mode 100644 index 000000000..62ffd8a69 --- /dev/null +++ b/tests/fixtures/sort/merge_ints_interleaved_2.txt @@ -0,0 +1,3 @@ +2 +5 +8 diff --git a/tests/fixtures/sort/merge_ints_interleaved_3.txt b/tests/fixtures/sort/merge_ints_interleaved_3.txt new file mode 100644 index 000000000..1e3ac1e93 --- /dev/null +++ b/tests/fixtures/sort/merge_ints_interleaved_3.txt @@ -0,0 +1,3 @@ +3 +6 +9 diff --git a/tests/fixtures/sort/merge_ints_reversed.expected b/tests/fixtures/sort/merge_ints_reversed.expected new file mode 100644 index 000000000..abb8f7739 --- /dev/null +++ b/tests/fixtures/sort/merge_ints_reversed.expected @@ -0,0 +1,9 @@ +9 +8 +7 +6 +5 +4 +3 +2 +1 diff --git a/tests/fixtures/sort/merge_ints_reversed_1.txt b/tests/fixtures/sort/merge_ints_reversed_1.txt new file mode 100644 index 000000000..8313069f4 --- /dev/null +++ b/tests/fixtures/sort/merge_ints_reversed_1.txt @@ -0,0 +1,3 @@ +7 +4 +1 diff --git a/tests/fixtures/sort/merge_ints_reversed_2.txt b/tests/fixtures/sort/merge_ints_reversed_2.txt new file mode 100644 index 000000000..c0416aa97 --- /dev/null +++ b/tests/fixtures/sort/merge_ints_reversed_2.txt @@ -0,0 +1,3 @@ +8 +5 +2 diff --git a/tests/fixtures/sort/merge_ints_reversed_3.txt b/tests/fixtures/sort/merge_ints_reversed_3.txt new file mode 100644 index 000000000..bd33aa425 --- /dev/null +++ b/tests/fixtures/sort/merge_ints_reversed_3.txt @@ -0,0 +1,3 @@ +9 +6 +3 diff --git a/tests/test_sort.rs b/tests/test_sort.rs index dbbdb92f5..18180d4a4 100644 --- a/tests/test_sort.rs +++ b/tests/test_sort.rs @@ -67,7 +67,42 @@ fn test_multiple_files() { .arg("-n") .arg("multiple_files1.txt") .arg("multiple_files2.txt") - .succeeds().stdout_is_fixture("multiple_files.expected"); + .succeeds().stdout_only_fixture("multiple_files.expected"); +} + +#[test] +fn test_merge_interleaved() { + new_ucmd() + .arg("-m") + .arg("merge_ints_interleaved_1.txt") + .arg("merge_ints_interleaved_2.txt") + .arg("merge_ints_interleaved_3.txt") + .succeeds().stdout_only_fixture("merge_ints_interleaved.expected"); +} + +#[test] +fn test_merge_unique() { + new_ucmd() + .arg("-m") + .arg("--unique") + .arg("merge_ints_interleaved_1.txt") + .arg("merge_ints_interleaved_2.txt") + .arg("merge_ints_interleaved_3.txt") + .arg("merge_ints_interleaved_3.txt") + .arg("merge_ints_interleaved_2.txt") + .arg("merge_ints_interleaved_1.txt") + .succeeds().stdout_only_fixture("merge_ints_interleaved.expected"); +} + +#[test] +fn test_merge_reversed() { + new_ucmd() + .arg("-m") + .arg("--reverse") + .arg("merge_ints_reversed_1.txt") + .arg("merge_ints_reversed_2.txt") + .arg("merge_ints_reversed_3.txt") + .succeeds().stdout_only_fixture("merge_ints_reversed.expected"); } #[test]