From 642633fe3b37dd8d047788805e2eb1a9ead27c67 Mon Sep 17 00:00:00 2001 From: Konstantin Pospelov Date: Sun, 11 Feb 2018 18:23:12 +0300 Subject: [PATCH] join: check line order --- src/join/join.rs | 116 ++++++++++++++++++++++++++++++++++++++------- tests/test_join.rs | 8 ++++ 2 files changed, 106 insertions(+), 18 deletions(-) diff --git a/src/join/join.rs b/src/join/join.rs index ab116e4a6..30acc5d09 100644 --- a/src/join/join.rs +++ b/src/join/join.rs @@ -36,6 +36,13 @@ enum Sep { Whitespaces, } +#[derive(Copy, Clone, PartialEq)] +enum CheckOrder { + Default, + Disabled, + Enabled, +} + struct Settings { key1: usize, key2: usize, @@ -45,6 +52,7 @@ struct Settings { autoformat: bool, format: Vec, empty: String, + check_order: CheckOrder, } impl Default for Settings { @@ -58,6 +66,7 @@ impl Default for Settings { autoformat: false, format: vec![], empty: String::new(), + check_order: CheckOrder::Default, } } } @@ -121,6 +130,23 @@ impl<'a> Repr<'a> { } } +/// Input processing parameters. +struct Input { + separator: Sep, + ignore_case: bool, + check_order: CheckOrder, +} + +impl Input { + fn new(separator: Sep, ignore_case: bool, check_order: CheckOrder) -> Input { + Input { + separator, + ignore_case, + check_order, + } + } +} + enum Spec { Key, Field(FileNum, usize), @@ -179,17 +205,20 @@ impl Line { struct State<'a> { key: usize, + file_name: &'a str, file_num: FileNum, print_unpaired: bool, lines: Lines>, seq: Vec, max_fields: usize, + line_num: usize, + has_failed: bool, } impl<'a> State<'a> { fn new( file_num: FileNum, - name: &str, + name: &'a str, stdin: &'a Stdin, key: usize, print_unpaired: FileNum, @@ -205,11 +234,14 @@ impl<'a> State<'a> { State { key: key, + file_name: name, file_num: file_num, print_unpaired: print_unpaired == file_num, lines: f.lines(), seq: Vec::new(), max_fields: usize::max_value(), + line_num: 0, + has_failed: false, } } @@ -222,12 +254,12 @@ impl<'a> State<'a> { } /// Skip the current unpaired line. - fn skip_line(&mut self, read_sep: Sep, repr: &Repr) { + fn skip_line(&mut self, input: &Input, repr: &Repr) { if self.print_unpaired { self.print_unpaired_line(&self.seq[0], repr); } - match self.read_line(read_sep) { + match self.next_line(input) { Some(line) => self.seq[0] = line, None => self.seq.clear(), } @@ -235,12 +267,12 @@ impl<'a> State<'a> { /// Keep reading line sequence until the key does not change, return /// the first line whose key differs. - fn extend(&mut self, read_sep: Sep, ignore_case: bool) -> Option { - while let Some(line) = self.read_line(read_sep) { + fn extend(&mut self, input: &Input) -> Option { + while let Some(line) = self.next_line(input) { let diff = compare( self.seq[0].get_field(self.key), line.get_field(self.key), - ignore_case, + input.ignore_case, ); if diff == Ordering::Equal { @@ -308,21 +340,48 @@ impl<'a> State<'a> { } } - fn finalize(&mut self, read_sep: Sep, repr: &Repr) { + fn finalize(&mut self, input: &Input, repr: &Repr) { if self.has_line() && self.print_unpaired { self.print_unpaired_line(&self.seq[0], repr); - while let Some(line) = self.read_line(read_sep) { + while let Some(line) = self.next_line(input) { self.print_unpaired_line(&line, repr); } } } fn read_line(&mut self, sep: Sep) -> Option { - match self.lines.next() { - Some(value) => Some(Line::new(crash_if_err!(1, value), sep)), - None => None, + let value = self.lines.next()?; + self.line_num += 1; + Some(Line::new(crash_if_err!(1, value), sep)) + } + + /// Prepare the next line. + fn next_line(&mut self, input: &Input) -> Option { + let line = self.read_line(input.separator)?; + + if input.check_order == CheckOrder::Disabled { + return Some(line); } + + let diff = compare( + self.seq[self.seq.len() - 1].get_field(self.key), + line.get_field(self.key), + input.ignore_case, + ); + + if diff == Ordering::Greater { + eprintln!("{}:{}: is not sorted", self.file_name, self.line_num); + + // This is fatal if the check is enabled. + if input.check_order == CheckOrder::Enabled { + exit!(1); + } + + self.has_failed = true; + } + + Some(line) } fn print_unpaired_line(&self, line: &Line, repr: &Repr) { @@ -395,6 +454,13 @@ FILENUM is 1 or 2, corresponding to FILE1 or FILE2")) .takes_value(true) .value_name("FIELD") .help("join on this FIELD of file 2")) + .arg(Arg::with_name("check-order") + .long("check-order") + .help("check that the input is correctly sorted, \ + even if all input lines are pairable")) + .arg(Arg::with_name("nocheck-order") + .long("nocheck-order") + .help("do not check that the input is correctly sorted")) .arg(Arg::with_name("file1") .required(true) .value_name("FILE1") @@ -445,6 +511,14 @@ FILENUM is 1 or 2, corresponding to FILE1 or FILE2")) settings.empty = empty.to_string(); } + if matches.is_present("nocheck-order") { + settings.check_order = CheckOrder::Disabled; + } + + if matches.is_present("check-order") { + settings.check_order = CheckOrder::Enabled; + } + let file1 = matches.value_of("file1").unwrap(); let file2 = matches.value_of("file2").unwrap(); @@ -474,6 +548,12 @@ fn exec(file1: &str, file2: &str, settings: &Settings) -> i32 { settings.print_unpaired, ); + let input = Input::new( + settings.separator, + settings.ignore_case, + settings.check_order, + ); + let repr = Repr::new( match settings.separator { Sep::Char(sep) => sep, @@ -491,14 +571,14 @@ fn exec(file1: &str, file2: &str, settings: &Settings) -> i32 { match diff { Ordering::Less => { - state1.skip_line(settings.separator, &repr); + state1.skip_line(&input, &repr); } Ordering::Greater => { - state2.skip_line(settings.separator, &repr); + state2.skip_line(&input, &repr); } Ordering::Equal => { - let next_line1 = state1.extend(settings.separator, settings.ignore_case); - let next_line2 = state2.extend(settings.separator, settings.ignore_case); + let next_line1 = state1.extend(&input); + let next_line2 = state2.extend(&input); state1.combine(&state2, &repr); @@ -508,10 +588,10 @@ fn exec(file1: &str, file2: &str, settings: &Settings) -> i32 { } } - state1.finalize(settings.separator, &repr); - state2.finalize(settings.separator, &repr); + state1.finalize(&input, &repr); + state2.finalize(&input, &repr); - 0 + (state1.has_failed || state2.has_failed) as i32 } /// Check that keys for both files and for a particular file are not diff --git a/tests/test_join.rs b/tests/test_join.rs index c932955c7..2ceb7c97d 100644 --- a/tests/test_join.rs +++ b/tests/test_join.rs @@ -199,3 +199,11 @@ fn missing_format_fields() { .arg("x") .succeeds().stdout_only_fixture("missing_format_fields.expected"); } + +#[test] +fn wrong_line_order() { + new_ucmd!() + .arg("fields_2.txt") + .arg("fields_4.txt") + .fails().stderr_is("fields_4.txt:5: is not sorted"); +}