1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-28 03:27:44 +00:00

Merge pull request #1140 from kupospelov/master

join: check line order
This commit is contained in:
Alex Lyon 2018-02-12 17:25:38 -08:00 committed by GitHub
commit d4c3c94848
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 106 additions and 18 deletions

View file

@ -36,6 +36,13 @@ enum Sep {
Whitespaces, Whitespaces,
} }
#[derive(Copy, Clone, PartialEq)]
enum CheckOrder {
Default,
Disabled,
Enabled,
}
struct Settings { struct Settings {
key1: usize, key1: usize,
key2: usize, key2: usize,
@ -45,6 +52,7 @@ struct Settings {
autoformat: bool, autoformat: bool,
format: Vec<Spec>, format: Vec<Spec>,
empty: String, empty: String,
check_order: CheckOrder,
} }
impl Default for Settings { impl Default for Settings {
@ -58,6 +66,7 @@ impl Default for Settings {
autoformat: false, autoformat: false,
format: vec![], format: vec![],
empty: String::new(), empty: String::new(),
check_order: CheckOrder::Default,
} }
} }
} }
@ -121,6 +130,23 @@ impl<'a> Repr<'a> {
} }
} }
/// Input processing parameters.
struct Input {
separator: Sep,
ignore_case: bool,
check_order: CheckOrder,
}
impl Input {
fn new(separator: Sep, ignore_case: bool, check_order: CheckOrder) -> Input {
Input {
separator,
ignore_case,
check_order,
}
}
}
enum Spec { enum Spec {
Key, Key,
Field(FileNum, usize), Field(FileNum, usize),
@ -179,17 +205,20 @@ impl Line {
struct State<'a> { struct State<'a> {
key: usize, key: usize,
file_name: &'a str,
file_num: FileNum, file_num: FileNum,
print_unpaired: bool, print_unpaired: bool,
lines: Lines<Box<BufRead + 'a>>, lines: Lines<Box<BufRead + 'a>>,
seq: Vec<Line>, seq: Vec<Line>,
max_fields: usize, max_fields: usize,
line_num: usize,
has_failed: bool,
} }
impl<'a> State<'a> { impl<'a> State<'a> {
fn new( fn new(
file_num: FileNum, file_num: FileNum,
name: &str, name: &'a str,
stdin: &'a Stdin, stdin: &'a Stdin,
key: usize, key: usize,
print_unpaired: FileNum, print_unpaired: FileNum,
@ -205,11 +234,14 @@ impl<'a> State<'a> {
State { State {
key: key, key: key,
file_name: name,
file_num: file_num, file_num: file_num,
print_unpaired: print_unpaired == file_num, print_unpaired: print_unpaired == file_num,
lines: f.lines(), lines: f.lines(),
seq: Vec::new(), seq: Vec::new(),
max_fields: usize::max_value(), max_fields: usize::max_value(),
line_num: 0,
has_failed: false,
} }
} }
@ -222,12 +254,12 @@ impl<'a> State<'a> {
} }
/// Skip the current unpaired line. /// Skip the current unpaired line.
fn skip_line(&mut self, read_sep: Sep, repr: &Repr) { fn skip_line(&mut self, input: &Input, repr: &Repr) {
if self.print_unpaired { if self.print_unpaired {
self.print_unpaired_line(&self.seq[0], repr); self.print_unpaired_line(&self.seq[0], repr);
} }
match self.read_line(read_sep) { match self.next_line(input) {
Some(line) => self.seq[0] = line, Some(line) => self.seq[0] = line,
None => self.seq.clear(), None => self.seq.clear(),
} }
@ -235,12 +267,12 @@ impl<'a> State<'a> {
/// Keep reading line sequence until the key does not change, return /// Keep reading line sequence until the key does not change, return
/// the first line whose key differs. /// the first line whose key differs.
fn extend(&mut self, read_sep: Sep, ignore_case: bool) -> Option<Line> { fn extend(&mut self, input: &Input) -> Option<Line> {
while let Some(line) = self.read_line(read_sep) { while let Some(line) = self.next_line(input) {
let diff = compare( let diff = compare(
self.seq[0].get_field(self.key), self.seq[0].get_field(self.key),
line.get_field(self.key), line.get_field(self.key),
ignore_case, input.ignore_case,
); );
if diff == Ordering::Equal { if diff == Ordering::Equal {
@ -308,21 +340,48 @@ impl<'a> State<'a> {
} }
} }
fn finalize(&mut self, read_sep: Sep, repr: &Repr) { fn finalize(&mut self, input: &Input, repr: &Repr) {
if self.has_line() && self.print_unpaired { if self.has_line() && self.print_unpaired {
self.print_unpaired_line(&self.seq[0], repr); self.print_unpaired_line(&self.seq[0], repr);
while let Some(line) = self.read_line(read_sep) { while let Some(line) = self.next_line(input) {
self.print_unpaired_line(&line, repr); self.print_unpaired_line(&line, repr);
} }
} }
} }
fn read_line(&mut self, sep: Sep) -> Option<Line> { fn read_line(&mut self, sep: Sep) -> Option<Line> {
match self.lines.next() { let value = self.lines.next()?;
Some(value) => Some(Line::new(crash_if_err!(1, value), sep)), self.line_num += 1;
None => None, Some(Line::new(crash_if_err!(1, value), sep))
} }
/// Prepare the next line.
fn next_line(&mut self, input: &Input) -> Option<Line> {
let line = self.read_line(input.separator)?;
if input.check_order == CheckOrder::Disabled {
return Some(line);
}
let diff = compare(
self.seq[self.seq.len() - 1].get_field(self.key),
line.get_field(self.key),
input.ignore_case,
);
if diff == Ordering::Greater {
eprintln!("{}:{}: is not sorted", self.file_name, self.line_num);
// This is fatal if the check is enabled.
if input.check_order == CheckOrder::Enabled {
exit!(1);
}
self.has_failed = true;
}
Some(line)
} }
fn print_unpaired_line(&self, line: &Line, repr: &Repr) { fn print_unpaired_line(&self, line: &Line, repr: &Repr) {
@ -395,6 +454,13 @@ FILENUM is 1 or 2, corresponding to FILE1 or FILE2"))
.takes_value(true) .takes_value(true)
.value_name("FIELD") .value_name("FIELD")
.help("join on this FIELD of file 2")) .help("join on this FIELD of file 2"))
.arg(Arg::with_name("check-order")
.long("check-order")
.help("check that the input is correctly sorted, \
even if all input lines are pairable"))
.arg(Arg::with_name("nocheck-order")
.long("nocheck-order")
.help("do not check that the input is correctly sorted"))
.arg(Arg::with_name("file1") .arg(Arg::with_name("file1")
.required(true) .required(true)
.value_name("FILE1") .value_name("FILE1")
@ -445,6 +511,14 @@ FILENUM is 1 or 2, corresponding to FILE1 or FILE2"))
settings.empty = empty.to_string(); settings.empty = empty.to_string();
} }
if matches.is_present("nocheck-order") {
settings.check_order = CheckOrder::Disabled;
}
if matches.is_present("check-order") {
settings.check_order = CheckOrder::Enabled;
}
let file1 = matches.value_of("file1").unwrap(); let file1 = matches.value_of("file1").unwrap();
let file2 = matches.value_of("file2").unwrap(); let file2 = matches.value_of("file2").unwrap();
@ -474,6 +548,12 @@ fn exec(file1: &str, file2: &str, settings: &Settings) -> i32 {
settings.print_unpaired, settings.print_unpaired,
); );
let input = Input::new(
settings.separator,
settings.ignore_case,
settings.check_order,
);
let repr = Repr::new( let repr = Repr::new(
match settings.separator { match settings.separator {
Sep::Char(sep) => sep, Sep::Char(sep) => sep,
@ -491,14 +571,14 @@ fn exec(file1: &str, file2: &str, settings: &Settings) -> i32 {
match diff { match diff {
Ordering::Less => { Ordering::Less => {
state1.skip_line(settings.separator, &repr); state1.skip_line(&input, &repr);
} }
Ordering::Greater => { Ordering::Greater => {
state2.skip_line(settings.separator, &repr); state2.skip_line(&input, &repr);
} }
Ordering::Equal => { Ordering::Equal => {
let next_line1 = state1.extend(settings.separator, settings.ignore_case); let next_line1 = state1.extend(&input);
let next_line2 = state2.extend(settings.separator, settings.ignore_case); let next_line2 = state2.extend(&input);
state1.combine(&state2, &repr); state1.combine(&state2, &repr);
@ -508,10 +588,10 @@ fn exec(file1: &str, file2: &str, settings: &Settings) -> i32 {
} }
} }
state1.finalize(settings.separator, &repr); state1.finalize(&input, &repr);
state2.finalize(settings.separator, &repr); state2.finalize(&input, &repr);
0 (state1.has_failed || state2.has_failed) as i32
} }
/// Check that keys for both files and for a particular file are not /// Check that keys for both files and for a particular file are not

View file

@ -199,3 +199,11 @@ fn missing_format_fields() {
.arg("x") .arg("x")
.succeeds().stdout_only_fixture("missing_format_fields.expected"); .succeeds().stdout_only_fixture("missing_format_fields.expected");
} }
#[test]
fn wrong_line_order() {
new_ucmd!()
.arg("fields_2.txt")
.arg("fields_4.txt")
.fails().stderr_is("fields_4.txt:5: is not sorted");
}