mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-28 11:37:44 +00:00
join: implement the -o option
This commit is contained in:
parent
49cf7c2a5b
commit
12c5c951fb
5 changed files with 259 additions and 58 deletions
252
src/join/join.rs
252
src/join/join.rs
|
@ -16,13 +16,13 @@ extern crate uucore;
|
|||
|
||||
use std::fs::File;
|
||||
use std::io::{BufRead, BufReader, Lines, Stdin, stdin};
|
||||
use std::cmp::Ordering;
|
||||
use std::cmp::{min, Ordering};
|
||||
use clap::{App, Arg};
|
||||
|
||||
static NAME: &'static str = "join";
|
||||
static VERSION: &'static str = env!("CARGO_PKG_VERSION");
|
||||
|
||||
#[derive(PartialEq)]
|
||||
#[derive(Copy, Clone, PartialEq)]
|
||||
enum FileNum {
|
||||
None,
|
||||
File1,
|
||||
|
@ -42,6 +42,8 @@ struct Settings {
|
|||
print_unpaired: FileNum,
|
||||
ignore_case: bool,
|
||||
separator: Sep,
|
||||
autoformat: bool,
|
||||
format: Vec<Spec>,
|
||||
}
|
||||
|
||||
impl Default for Settings {
|
||||
|
@ -52,10 +54,87 @@ impl Default for Settings {
|
|||
print_unpaired: FileNum::None,
|
||||
ignore_case: false,
|
||||
separator: Sep::Whitespaces,
|
||||
autoformat: false,
|
||||
format: vec![],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Output representation.
|
||||
struct Repr<'a> {
|
||||
separator: char,
|
||||
format: &'a [Spec],
|
||||
}
|
||||
|
||||
impl<'a> Repr<'a> {
|
||||
fn new(separator: char, format: &'a [Spec]) -> Repr {
|
||||
Repr { separator, format }
|
||||
}
|
||||
|
||||
fn uses_format(&self) -> bool {
|
||||
!self.format.is_empty()
|
||||
}
|
||||
|
||||
/// Print each field except the one at the index.
|
||||
fn print_fields(&self, line: &Line, index: usize, max_fields: usize) {
|
||||
for i in 0..min(max_fields, line.fields.len()) {
|
||||
if i != index {
|
||||
print!("{}{}", self.separator, line.fields[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Print each field or the empty filler if the field is not set.
|
||||
fn print_format<F>(&self, f: F)
|
||||
where
|
||||
F: Fn(&Spec) -> Option<&'a str>,
|
||||
{
|
||||
for i in 0..self.format.len() {
|
||||
if i > 0 {
|
||||
print!("{}", self.separator);
|
||||
}
|
||||
|
||||
let field = match f(&self.format[i]) {
|
||||
Some(value) => value,
|
||||
None => "",
|
||||
};
|
||||
|
||||
print!("{}", field);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
enum Spec {
|
||||
Key,
|
||||
Field(FileNum, usize),
|
||||
}
|
||||
|
||||
impl Spec {
|
||||
fn parse(format: &str) -> Spec {
|
||||
let mut chars = format.chars();
|
||||
|
||||
let file_num = match chars.next() {
|
||||
Some('0') => {
|
||||
// Must be all alone without a field specifier.
|
||||
if let None = chars.next() {
|
||||
return Spec::Key;
|
||||
}
|
||||
|
||||
crash!(1, "invalid field specifier: '{}'", format);
|
||||
}
|
||||
Some('1') => FileNum::File1,
|
||||
Some('2') => FileNum::File2,
|
||||
_ => crash!(1, "invalid file number in field spec: '{}'", format),
|
||||
};
|
||||
|
||||
if let Some('.') = chars.next() {
|
||||
return Spec::Field(file_num, parse_field_number(chars.as_str()));
|
||||
}
|
||||
|
||||
crash!(1, "invalid field specifier: '{}'", format);
|
||||
}
|
||||
}
|
||||
|
||||
struct Line {
|
||||
fields: Vec<String>,
|
||||
}
|
||||
|
@ -79,26 +158,25 @@ impl Line {
|
|||
""
|
||||
}
|
||||
}
|
||||
|
||||
/// Print each field except the one at the index.
|
||||
fn print_fields(&self, index: usize, separator: char) {
|
||||
for i in 0..self.fields.len() {
|
||||
if i != index {
|
||||
print!("{}{}", separator, self.fields[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct State<'a> {
|
||||
key: usize,
|
||||
file_num: FileNum,
|
||||
print_unpaired: bool,
|
||||
lines: Lines<Box<BufRead + 'a>>,
|
||||
seq: Vec<Line>,
|
||||
max_fields: usize,
|
||||
}
|
||||
|
||||
impl<'a> State<'a> {
|
||||
fn new(name: &str, stdin: &'a Stdin, key: usize, print_unpaired: bool) -> State<'a> {
|
||||
fn new(
|
||||
file_num: FileNum,
|
||||
name: &str,
|
||||
stdin: &'a Stdin,
|
||||
key: usize,
|
||||
print_unpaired: FileNum,
|
||||
) -> State<'a> {
|
||||
let f = if name == "-" {
|
||||
Box::new(stdin.lock()) as Box<BufRead>
|
||||
} else {
|
||||
|
@ -110,9 +188,11 @@ impl<'a> State<'a> {
|
|||
|
||||
State {
|
||||
key: key,
|
||||
print_unpaired: print_unpaired,
|
||||
file_num: file_num,
|
||||
print_unpaired: print_unpaired == file_num,
|
||||
lines: f.lines(),
|
||||
seq: Vec::new(),
|
||||
max_fields: usize::max_value(),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -125,9 +205,9 @@ impl<'a> State<'a> {
|
|||
}
|
||||
|
||||
/// Skip the current unpaired line.
|
||||
fn skip_line(&mut self, read_sep: Sep, write_sep: char) {
|
||||
fn skip_line(&mut self, read_sep: Sep, repr: &Repr) {
|
||||
if self.print_unpaired {
|
||||
self.print_unpaired_line(&self.seq[0], write_sep);
|
||||
self.print_unpaired_line(&self.seq[0], repr);
|
||||
}
|
||||
|
||||
match self.read_line(read_sep) {
|
||||
|
@ -157,14 +237,32 @@ impl<'a> State<'a> {
|
|||
}
|
||||
|
||||
/// Combine two line sequences.
|
||||
fn combine(&self, other: &State, write_sep: char) {
|
||||
fn combine(&self, other: &State, repr: &Repr) {
|
||||
let key = self.seq[0].get_field(self.key);
|
||||
|
||||
for line1 in &self.seq {
|
||||
for line2 in &other.seq {
|
||||
print!("{}", key);
|
||||
line1.print_fields(self.key, write_sep);
|
||||
line2.print_fields(other.key, write_sep);
|
||||
if repr.uses_format() {
|
||||
repr.print_format(|spec| match spec {
|
||||
&Spec::Key => Some(key),
|
||||
&Spec::Field(file_num, field_num) => {
|
||||
if file_num == self.file_num {
|
||||
return Some(line1.get_field(field_num));
|
||||
}
|
||||
|
||||
if file_num == other.file_num {
|
||||
return Some(line2.get_field(field_num));
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
});
|
||||
} else {
|
||||
print!("{}", key);
|
||||
repr.print_fields(&line1, self.key, self.max_fields);
|
||||
repr.print_fields(&line2, other.key, self.max_fields);
|
||||
}
|
||||
|
||||
println!();
|
||||
}
|
||||
}
|
||||
|
@ -183,18 +281,22 @@ impl<'a> State<'a> {
|
|||
!self.seq.is_empty()
|
||||
}
|
||||
|
||||
fn initialize(&mut self, read_sep: Sep) {
|
||||
fn initialize(&mut self, read_sep: Sep, autoformat: bool) {
|
||||
if let Some(line) = self.read_line(read_sep) {
|
||||
if autoformat {
|
||||
self.max_fields = line.fields.len();
|
||||
}
|
||||
|
||||
self.seq.push(line);
|
||||
}
|
||||
}
|
||||
|
||||
fn finalize(&mut self, read_sep: Sep, write_sep: char) {
|
||||
fn finalize(&mut self, read_sep: Sep, repr: &Repr) {
|
||||
if self.has_line() && self.print_unpaired {
|
||||
self.print_unpaired_line(&self.seq[0], write_sep);
|
||||
self.print_unpaired_line(&self.seq[0], repr);
|
||||
|
||||
while let Some(line) = self.read_line(read_sep) {
|
||||
self.print_unpaired_line(&line, write_sep);
|
||||
self.print_unpaired_line(&line, repr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -206,9 +308,21 @@ impl<'a> State<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
fn print_unpaired_line(&self, line: &Line, sep: char) {
|
||||
print!("{}", line.get_field(self.key));
|
||||
line.print_fields(self.key, sep);
|
||||
fn print_unpaired_line(&self, line: &Line, repr: &Repr) {
|
||||
if repr.uses_format() {
|
||||
repr.print_format(|spec| match spec {
|
||||
&Spec::Key => Some(line.get_field(self.key)),
|
||||
&Spec::Field(file_num, field_num) => if file_num == self.file_num {
|
||||
Some(line.get_field(field_num))
|
||||
} else {
|
||||
None
|
||||
},
|
||||
});
|
||||
} else {
|
||||
print!("{}", line.get_field(self.key));
|
||||
repr.print_fields(line, self.key, self.max_fields);
|
||||
}
|
||||
|
||||
println!();
|
||||
}
|
||||
}
|
||||
|
@ -239,6 +353,11 @@ FILENUM is 1 or 2, corresponding to FILE1 or FILE2"))
|
|||
.takes_value(true)
|
||||
.value_name("FIELD")
|
||||
.help("equivalent to '-1 FIELD -2 FIELD'"))
|
||||
.arg(Arg::with_name("o")
|
||||
.short("o")
|
||||
.takes_value(true)
|
||||
.value_name("FORMAT")
|
||||
.help("obey FORMAT while constructing output line"))
|
||||
.arg(Arg::with_name("t")
|
||||
.short("t")
|
||||
.takes_value(true)
|
||||
|
@ -264,9 +383,9 @@ FILENUM is 1 or 2, corresponding to FILE1 or FILE2"))
|
|||
.hidden(true))
|
||||
.get_matches_from(args);
|
||||
|
||||
let keys = parse_field_number(matches.value_of("j"));
|
||||
let key1 = parse_field_number(matches.value_of("1"));
|
||||
let key2 = parse_field_number(matches.value_of("2"));
|
||||
let keys = parse_field_number_option(matches.value_of("j"));
|
||||
let key1 = parse_field_number_option(matches.value_of("1"));
|
||||
let key2 = parse_field_number_option(matches.value_of("2"));
|
||||
|
||||
let mut settings: Settings = Default::default();
|
||||
settings.print_unpaired = match matches.value_of("a") {
|
||||
|
@ -274,9 +393,9 @@ FILENUM is 1 or 2, corresponding to FILE1 or FILE2"))
|
|||
match value {
|
||||
"1" => FileNum::File1,
|
||||
"2" => FileNum::File2,
|
||||
value => crash!(1, "invalid file number: {}", value),
|
||||
value => crash!(1, "invalid file number: '{}'", value),
|
||||
}
|
||||
}
|
||||
},
|
||||
None => FileNum::None,
|
||||
};
|
||||
settings.ignore_case = matches.is_present("i");
|
||||
|
@ -291,6 +410,17 @@ FILENUM is 1 or 2, corresponding to FILE1 or FILE2"))
|
|||
};
|
||||
}
|
||||
|
||||
if let Some(format) = matches.value_of("o") {
|
||||
if format == "auto" {
|
||||
settings.autoformat = true;
|
||||
} else {
|
||||
settings.format = format
|
||||
.split(|c| c == ' ' || c == ',' || c == '\t')
|
||||
.map(Spec::parse)
|
||||
.collect();
|
||||
}
|
||||
}
|
||||
|
||||
let file1 = matches.value_of("file1").unwrap();
|
||||
let file2 = matches.value_of("file2").unwrap();
|
||||
|
||||
|
@ -305,42 +435,47 @@ fn exec(file1: &str, file2: &str, settings: &Settings) -> i32 {
|
|||
let stdin = stdin();
|
||||
|
||||
let mut state1 = State::new(
|
||||
FileNum::File1,
|
||||
&file1,
|
||||
&stdin,
|
||||
settings.key1,
|
||||
settings.print_unpaired == FileNum::File1,
|
||||
settings.print_unpaired,
|
||||
);
|
||||
|
||||
let mut state2 = State::new(
|
||||
FileNum::File2,
|
||||
&file2,
|
||||
&stdin,
|
||||
settings.key2,
|
||||
settings.print_unpaired == FileNum::File2,
|
||||
settings.print_unpaired,
|
||||
);
|
||||
|
||||
let write_sep = match settings.separator {
|
||||
Sep::Char(sep) => sep,
|
||||
_ => ' ',
|
||||
};
|
||||
let repr = Repr::new(
|
||||
match settings.separator {
|
||||
Sep::Char(sep) => sep,
|
||||
_ => ' ',
|
||||
},
|
||||
&settings.format,
|
||||
);
|
||||
|
||||
state1.initialize(settings.separator);
|
||||
state2.initialize(settings.separator);
|
||||
state1.initialize(settings.separator, settings.autoformat);
|
||||
state2.initialize(settings.separator, settings.autoformat);
|
||||
|
||||
while state1.has_line() && state2.has_line() {
|
||||
let diff = state1.compare(&state2, settings.ignore_case);
|
||||
|
||||
match diff {
|
||||
Ordering::Less => {
|
||||
state1.skip_line(settings.separator, write_sep);
|
||||
state1.skip_line(settings.separator, &repr);
|
||||
}
|
||||
Ordering::Greater => {
|
||||
state2.skip_line(settings.separator, write_sep);
|
||||
state2.skip_line(settings.separator, &repr);
|
||||
}
|
||||
Ordering::Equal => {
|
||||
let next_line1 = state1.extend(settings.separator, settings.ignore_case);
|
||||
let next_line2 = state2.extend(settings.separator, settings.ignore_case);
|
||||
|
||||
state1.combine(&state2, write_sep);
|
||||
state1.combine(&state2, &repr);
|
||||
|
||||
state1.reset(next_line1);
|
||||
state2.reset(next_line2);
|
||||
|
@ -348,44 +483,45 @@ fn exec(file1: &str, file2: &str, settings: &Settings) -> i32 {
|
|||
}
|
||||
}
|
||||
|
||||
state1.finalize(settings.separator, write_sep);
|
||||
state2.finalize(settings.separator, write_sep);
|
||||
state1.finalize(settings.separator, &repr);
|
||||
state2.finalize(settings.separator, &repr);
|
||||
|
||||
0
|
||||
}
|
||||
|
||||
/// Check that keys for both files and for a particular file are not
|
||||
/// contradictory and return the zero-based key index.
|
||||
/// contradictory and return the key index.
|
||||
fn get_field_number(keys: Option<usize>, key: Option<usize>) -> usize {
|
||||
if let Some(keys) = keys {
|
||||
if let Some(key) = key {
|
||||
if keys != key {
|
||||
crash!(1, "incompatible join fields {}, {}", keys, key);
|
||||
// Show zero-based field numbers as one-based.
|
||||
crash!(1, "incompatible join fields {}, {}", keys + 1, key + 1);
|
||||
}
|
||||
}
|
||||
|
||||
return keys - 1;
|
||||
return keys;
|
||||
}
|
||||
|
||||
match key {
|
||||
Some(key) => key - 1,
|
||||
Some(key) => key,
|
||||
None => 0,
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse the specified field string as a natural number and return it.
|
||||
fn parse_field_number(value: Option<&str>) -> Option<usize> {
|
||||
match value {
|
||||
Some(value) => {
|
||||
match value.parse() {
|
||||
Ok(result) if result > 0 => Some(result),
|
||||
_ => crash!(1, "invalid field number: '{}'", value),
|
||||
}
|
||||
}
|
||||
None => None,
|
||||
/// Parse the specified field string as a natural number and return
|
||||
/// the zero-based field number.
|
||||
fn parse_field_number(value: &str) -> usize {
|
||||
match value.parse::<usize>() {
|
||||
Ok(result) if result > 0 => result - 1,
|
||||
_ => crash!(1, "invalid field number: '{}'", value),
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_field_number_option(value: Option<&str>) -> Option<usize> {
|
||||
Some(parse_field_number(value?))
|
||||
}
|
||||
|
||||
fn compare(field1: &str, field2: &str, ignore_case: bool) -> Ordering {
|
||||
if ignore_case {
|
||||
field1.to_lowercase().cmp(&field2.to_lowercase())
|
||||
|
|
5
tests/fixtures/join/autoformat.expected
vendored
Normal file
5
tests/fixtures/join/autoformat.expected
vendored
Normal file
|
@ -0,0 +1,5 @@
|
|||
1 a a
|
||||
2 b b
|
||||
3 c d
|
||||
4 d g
|
||||
5 e i
|
5
tests/fixtures/join/different_lengths.txt
vendored
Normal file
5
tests/fixtures/join/different_lengths.txt
vendored
Normal file
|
@ -0,0 +1,5 @@
|
|||
1 a
|
||||
2 b c
|
||||
3 d e f
|
||||
4 g h
|
||||
5 i
|
6
tests/fixtures/join/unpaired_lines_format.expected
vendored
Normal file
6
tests/fixtures/join/unpaired_lines_format.expected
vendored
Normal file
|
@ -0,0 +1,6 @@
|
|||
f 2 a
|
||||
g 3 b
|
||||
h 4 c
|
||||
i 5 f
|
||||
j 6 g
|
||||
k 7 h
|
|
@ -124,3 +124,52 @@ fn multitab_character() {
|
|||
.arg("э")
|
||||
.fails().stderr_is("join: error: multi-character tab э");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn default_format() {
|
||||
new_ucmd!()
|
||||
.arg("fields_1.txt")
|
||||
.arg("fields_2.txt")
|
||||
.arg("-o")
|
||||
.arg("1.1 2.2")
|
||||
.succeeds().stdout_only_fixture("default.expected");
|
||||
|
||||
new_ucmd!()
|
||||
.arg("fields_1.txt")
|
||||
.arg("fields_2.txt")
|
||||
.arg("-o")
|
||||
.arg("0 2.2")
|
||||
.succeeds().stdout_only_fixture("default.expected");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unpaired_lines_format() {
|
||||
new_ucmd!()
|
||||
.arg("fields_2.txt")
|
||||
.arg("fields_3.txt")
|
||||
.arg("-a")
|
||||
.arg("2")
|
||||
.arg("-o")
|
||||
.arg("1.2 1.1 2.4 2.3 2.2 0")
|
||||
.succeeds().stdout_only_fixture("unpaired_lines_format.expected");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn autoformat() {
|
||||
new_ucmd!()
|
||||
.arg("fields_2.txt")
|
||||
.arg("different_lengths.txt")
|
||||
.arg("-o")
|
||||
.arg("auto")
|
||||
.succeeds().stdout_only_fixture("autoformat.expected");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_format() {
|
||||
new_ucmd!()
|
||||
.arg("fields_1.txt")
|
||||
.arg("fields_2.txt")
|
||||
.arg("-o")
|
||||
.arg("")
|
||||
.fails().stderr_is("join: error: invalid file number in field spec: ''");
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue