From 12c5c951fb0498b6ad015297da6affc31919d0a8 Mon Sep 17 00:00:00 2001 From: Konstantin Pospelov Date: Sat, 6 Jan 2018 22:49:07 +0300 Subject: [PATCH] join: implement the -o option --- src/join/join.rs | 252 ++++++++++++++---- tests/fixtures/join/autoformat.expected | 5 + tests/fixtures/join/different_lengths.txt | 5 + .../join/unpaired_lines_format.expected | 6 + tests/test_join.rs | 49 ++++ 5 files changed, 259 insertions(+), 58 deletions(-) create mode 100644 tests/fixtures/join/autoformat.expected create mode 100644 tests/fixtures/join/different_lengths.txt create mode 100644 tests/fixtures/join/unpaired_lines_format.expected diff --git a/src/join/join.rs b/src/join/join.rs index 24b5f8e92..37dedeb8b 100644 --- a/src/join/join.rs +++ b/src/join/join.rs @@ -16,13 +16,13 @@ extern crate uucore; use std::fs::File; use std::io::{BufRead, BufReader, Lines, Stdin, stdin}; -use std::cmp::Ordering; +use std::cmp::{min, Ordering}; use clap::{App, Arg}; static NAME: &'static str = "join"; static VERSION: &'static str = env!("CARGO_PKG_VERSION"); -#[derive(PartialEq)] +#[derive(Copy, Clone, PartialEq)] enum FileNum { None, File1, @@ -42,6 +42,8 @@ struct Settings { print_unpaired: FileNum, ignore_case: bool, separator: Sep, + autoformat: bool, + format: Vec, } impl Default for Settings { @@ -52,10 +54,87 @@ impl Default for Settings { print_unpaired: FileNum::None, ignore_case: false, separator: Sep::Whitespaces, + autoformat: false, + format: vec![], } } } +/// Output representation. +struct Repr<'a> { + separator: char, + format: &'a [Spec], +} + +impl<'a> Repr<'a> { + fn new(separator: char, format: &'a [Spec]) -> Repr { + Repr { separator, format } + } + + fn uses_format(&self) -> bool { + !self.format.is_empty() + } + + /// Print each field except the one at the index. + fn print_fields(&self, line: &Line, index: usize, max_fields: usize) { + for i in 0..min(max_fields, line.fields.len()) { + if i != index { + print!("{}{}", self.separator, line.fields[i]); + } + } + } + + /// Print each field or the empty filler if the field is not set. + fn print_format(&self, f: F) + where + F: Fn(&Spec) -> Option<&'a str>, + { + for i in 0..self.format.len() { + if i > 0 { + print!("{}", self.separator); + } + + let field = match f(&self.format[i]) { + Some(value) => value, + None => "", + }; + + print!("{}", field); + } + } +} + +enum Spec { + Key, + Field(FileNum, usize), +} + +impl Spec { + fn parse(format: &str) -> Spec { + let mut chars = format.chars(); + + let file_num = match chars.next() { + Some('0') => { + // Must be all alone without a field specifier. + if let None = chars.next() { + return Spec::Key; + } + + crash!(1, "invalid field specifier: '{}'", format); + } + Some('1') => FileNum::File1, + Some('2') => FileNum::File2, + _ => crash!(1, "invalid file number in field spec: '{}'", format), + }; + + if let Some('.') = chars.next() { + return Spec::Field(file_num, parse_field_number(chars.as_str())); + } + + crash!(1, "invalid field specifier: '{}'", format); + } +} + struct Line { fields: Vec, } @@ -79,26 +158,25 @@ impl Line { "" } } - - /// Print each field except the one at the index. - fn print_fields(&self, index: usize, separator: char) { - for i in 0..self.fields.len() { - if i != index { - print!("{}{}", separator, self.fields[i]); - } - } - } } struct State<'a> { key: usize, + file_num: FileNum, print_unpaired: bool, lines: Lines>, seq: Vec, + max_fields: usize, } impl<'a> State<'a> { - fn new(name: &str, stdin: &'a Stdin, key: usize, print_unpaired: bool) -> State<'a> { + fn new( + file_num: FileNum, + name: &str, + stdin: &'a Stdin, + key: usize, + print_unpaired: FileNum, + ) -> State<'a> { let f = if name == "-" { Box::new(stdin.lock()) as Box } else { @@ -110,9 +188,11 @@ impl<'a> State<'a> { State { key: key, - print_unpaired: print_unpaired, + file_num: file_num, + print_unpaired: print_unpaired == file_num, lines: f.lines(), seq: Vec::new(), + max_fields: usize::max_value(), } } @@ -125,9 +205,9 @@ impl<'a> State<'a> { } /// Skip the current unpaired line. - fn skip_line(&mut self, read_sep: Sep, write_sep: char) { + fn skip_line(&mut self, read_sep: Sep, repr: &Repr) { if self.print_unpaired { - self.print_unpaired_line(&self.seq[0], write_sep); + self.print_unpaired_line(&self.seq[0], repr); } match self.read_line(read_sep) { @@ -157,14 +237,32 @@ impl<'a> State<'a> { } /// Combine two line sequences. - fn combine(&self, other: &State, write_sep: char) { + fn combine(&self, other: &State, repr: &Repr) { let key = self.seq[0].get_field(self.key); for line1 in &self.seq { for line2 in &other.seq { - print!("{}", key); - line1.print_fields(self.key, write_sep); - line2.print_fields(other.key, write_sep); + if repr.uses_format() { + repr.print_format(|spec| match spec { + &Spec::Key => Some(key), + &Spec::Field(file_num, field_num) => { + if file_num == self.file_num { + return Some(line1.get_field(field_num)); + } + + if file_num == other.file_num { + return Some(line2.get_field(field_num)); + } + + None + } + }); + } else { + print!("{}", key); + repr.print_fields(&line1, self.key, self.max_fields); + repr.print_fields(&line2, other.key, self.max_fields); + } + println!(); } } @@ -183,18 +281,22 @@ impl<'a> State<'a> { !self.seq.is_empty() } - fn initialize(&mut self, read_sep: Sep) { + fn initialize(&mut self, read_sep: Sep, autoformat: bool) { if let Some(line) = self.read_line(read_sep) { + if autoformat { + self.max_fields = line.fields.len(); + } + self.seq.push(line); } } - fn finalize(&mut self, read_sep: Sep, write_sep: char) { + fn finalize(&mut self, read_sep: Sep, repr: &Repr) { if self.has_line() && self.print_unpaired { - self.print_unpaired_line(&self.seq[0], write_sep); + self.print_unpaired_line(&self.seq[0], repr); while let Some(line) = self.read_line(read_sep) { - self.print_unpaired_line(&line, write_sep); + self.print_unpaired_line(&line, repr); } } } @@ -206,9 +308,21 @@ impl<'a> State<'a> { } } - fn print_unpaired_line(&self, line: &Line, sep: char) { - print!("{}", line.get_field(self.key)); - line.print_fields(self.key, sep); + fn print_unpaired_line(&self, line: &Line, repr: &Repr) { + if repr.uses_format() { + repr.print_format(|spec| match spec { + &Spec::Key => Some(line.get_field(self.key)), + &Spec::Field(file_num, field_num) => if file_num == self.file_num { + Some(line.get_field(field_num)) + } else { + None + }, + }); + } else { + print!("{}", line.get_field(self.key)); + repr.print_fields(line, self.key, self.max_fields); + } + println!(); } } @@ -239,6 +353,11 @@ FILENUM is 1 or 2, corresponding to FILE1 or FILE2")) .takes_value(true) .value_name("FIELD") .help("equivalent to '-1 FIELD -2 FIELD'")) + .arg(Arg::with_name("o") + .short("o") + .takes_value(true) + .value_name("FORMAT") + .help("obey FORMAT while constructing output line")) .arg(Arg::with_name("t") .short("t") .takes_value(true) @@ -264,9 +383,9 @@ FILENUM is 1 or 2, corresponding to FILE1 or FILE2")) .hidden(true)) .get_matches_from(args); - let keys = parse_field_number(matches.value_of("j")); - let key1 = parse_field_number(matches.value_of("1")); - let key2 = parse_field_number(matches.value_of("2")); + let keys = parse_field_number_option(matches.value_of("j")); + let key1 = parse_field_number_option(matches.value_of("1")); + let key2 = parse_field_number_option(matches.value_of("2")); let mut settings: Settings = Default::default(); settings.print_unpaired = match matches.value_of("a") { @@ -274,9 +393,9 @@ FILENUM is 1 or 2, corresponding to FILE1 or FILE2")) match value { "1" => FileNum::File1, "2" => FileNum::File2, - value => crash!(1, "invalid file number: {}", value), + value => crash!(1, "invalid file number: '{}'", value), } - } + }, None => FileNum::None, }; settings.ignore_case = matches.is_present("i"); @@ -291,6 +410,17 @@ FILENUM is 1 or 2, corresponding to FILE1 or FILE2")) }; } + if let Some(format) = matches.value_of("o") { + if format == "auto" { + settings.autoformat = true; + } else { + settings.format = format + .split(|c| c == ' ' || c == ',' || c == '\t') + .map(Spec::parse) + .collect(); + } + } + let file1 = matches.value_of("file1").unwrap(); let file2 = matches.value_of("file2").unwrap(); @@ -305,42 +435,47 @@ fn exec(file1: &str, file2: &str, settings: &Settings) -> i32 { let stdin = stdin(); let mut state1 = State::new( + FileNum::File1, &file1, &stdin, settings.key1, - settings.print_unpaired == FileNum::File1, + settings.print_unpaired, ); let mut state2 = State::new( + FileNum::File2, &file2, &stdin, settings.key2, - settings.print_unpaired == FileNum::File2, + settings.print_unpaired, ); - let write_sep = match settings.separator { - Sep::Char(sep) => sep, - _ => ' ', - }; + let repr = Repr::new( + match settings.separator { + Sep::Char(sep) => sep, + _ => ' ', + }, + &settings.format, + ); - state1.initialize(settings.separator); - state2.initialize(settings.separator); + state1.initialize(settings.separator, settings.autoformat); + state2.initialize(settings.separator, settings.autoformat); while state1.has_line() && state2.has_line() { let diff = state1.compare(&state2, settings.ignore_case); match diff { Ordering::Less => { - state1.skip_line(settings.separator, write_sep); + state1.skip_line(settings.separator, &repr); } Ordering::Greater => { - state2.skip_line(settings.separator, write_sep); + state2.skip_line(settings.separator, &repr); } Ordering::Equal => { let next_line1 = state1.extend(settings.separator, settings.ignore_case); let next_line2 = state2.extend(settings.separator, settings.ignore_case); - state1.combine(&state2, write_sep); + state1.combine(&state2, &repr); state1.reset(next_line1); state2.reset(next_line2); @@ -348,44 +483,45 @@ fn exec(file1: &str, file2: &str, settings: &Settings) -> i32 { } } - state1.finalize(settings.separator, write_sep); - state2.finalize(settings.separator, write_sep); + state1.finalize(settings.separator, &repr); + state2.finalize(settings.separator, &repr); 0 } /// Check that keys for both files and for a particular file are not -/// contradictory and return the zero-based key index. +/// contradictory and return the key index. fn get_field_number(keys: Option, key: Option) -> usize { if let Some(keys) = keys { if let Some(key) = key { if keys != key { - crash!(1, "incompatible join fields {}, {}", keys, key); + // Show zero-based field numbers as one-based. + crash!(1, "incompatible join fields {}, {}", keys + 1, key + 1); } } - return keys - 1; + return keys; } match key { - Some(key) => key - 1, + Some(key) => key, None => 0, } } -/// Parse the specified field string as a natural number and return it. -fn parse_field_number(value: Option<&str>) -> Option { - match value { - Some(value) => { - match value.parse() { - Ok(result) if result > 0 => Some(result), - _ => crash!(1, "invalid field number: '{}'", value), - } - } - None => None, +/// Parse the specified field string as a natural number and return +/// the zero-based field number. +fn parse_field_number(value: &str) -> usize { + match value.parse::() { + Ok(result) if result > 0 => result - 1, + _ => crash!(1, "invalid field number: '{}'", value), } } +fn parse_field_number_option(value: Option<&str>) -> Option { + Some(parse_field_number(value?)) +} + fn compare(field1: &str, field2: &str, ignore_case: bool) -> Ordering { if ignore_case { field1.to_lowercase().cmp(&field2.to_lowercase()) diff --git a/tests/fixtures/join/autoformat.expected b/tests/fixtures/join/autoformat.expected new file mode 100644 index 000000000..576f91092 --- /dev/null +++ b/tests/fixtures/join/autoformat.expected @@ -0,0 +1,5 @@ +1 a a +2 b b +3 c d +4 d g +5 e i diff --git a/tests/fixtures/join/different_lengths.txt b/tests/fixtures/join/different_lengths.txt new file mode 100644 index 000000000..3d4a53d78 --- /dev/null +++ b/tests/fixtures/join/different_lengths.txt @@ -0,0 +1,5 @@ +1 a +2 b c +3 d e f +4 g h +5 i diff --git a/tests/fixtures/join/unpaired_lines_format.expected b/tests/fixtures/join/unpaired_lines_format.expected new file mode 100644 index 000000000..d1324aa66 --- /dev/null +++ b/tests/fixtures/join/unpaired_lines_format.expected @@ -0,0 +1,6 @@ + f 2 a + g 3 b + h 4 c + i 5 f + j 6 g + k 7 h diff --git a/tests/test_join.rs b/tests/test_join.rs index 560d97d17..adf082f15 100644 --- a/tests/test_join.rs +++ b/tests/test_join.rs @@ -124,3 +124,52 @@ fn multitab_character() { .arg("э") .fails().stderr_is("join: error: multi-character tab э"); } + +#[test] +fn default_format() { + new_ucmd!() + .arg("fields_1.txt") + .arg("fields_2.txt") + .arg("-o") + .arg("1.1 2.2") + .succeeds().stdout_only_fixture("default.expected"); + + new_ucmd!() + .arg("fields_1.txt") + .arg("fields_2.txt") + .arg("-o") + .arg("0 2.2") + .succeeds().stdout_only_fixture("default.expected"); +} + +#[test] +fn unpaired_lines_format() { + new_ucmd!() + .arg("fields_2.txt") + .arg("fields_3.txt") + .arg("-a") + .arg("2") + .arg("-o") + .arg("1.2 1.1 2.4 2.3 2.2 0") + .succeeds().stdout_only_fixture("unpaired_lines_format.expected"); +} + +#[test] +fn autoformat() { + new_ucmd!() + .arg("fields_2.txt") + .arg("different_lengths.txt") + .arg("-o") + .arg("auto") + .succeeds().stdout_only_fixture("autoformat.expected"); +} + +#[test] +fn empty_format() { + new_ucmd!() + .arg("fields_1.txt") + .arg("fields_2.txt") + .arg("-o") + .arg("") + .fails().stderr_is("join: error: invalid file number in field spec: ''"); +}