mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-28 11:37:44 +00:00
Merge pull request #1114 from kupospelov/master
join: implement the -t option
This commit is contained in:
commit
be5ce3c252
5 changed files with 110 additions and 45 deletions
111
src/join/join.rs
111
src/join/join.rs
|
@ -29,11 +29,19 @@ enum FileNum {
|
||||||
File2,
|
File2,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Copy, Clone)]
|
||||||
|
enum Sep {
|
||||||
|
Char(char),
|
||||||
|
Line,
|
||||||
|
Whitespaces,
|
||||||
|
}
|
||||||
|
|
||||||
struct Settings {
|
struct Settings {
|
||||||
key1: usize,
|
key1: usize,
|
||||||
key2: usize,
|
key2: usize,
|
||||||
print_unpaired: FileNum,
|
print_unpaired: FileNum,
|
||||||
ignore_case: bool,
|
ignore_case: bool,
|
||||||
|
separator: Sep,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Default for Settings {
|
impl Default for Settings {
|
||||||
|
@ -43,6 +51,7 @@ impl Default for Settings {
|
||||||
key2: 0,
|
key2: 0,
|
||||||
print_unpaired: FileNum::None,
|
print_unpaired: FileNum::None,
|
||||||
ignore_case: false,
|
ignore_case: false,
|
||||||
|
separator: Sep::Whitespaces,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -52,8 +61,14 @@ struct Line {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Line {
|
impl Line {
|
||||||
fn new(string: String) -> Line {
|
fn new(string: String, separator: Sep) -> Line {
|
||||||
Line { fields: string.split_whitespace().map(String::from).collect() }
|
let fields = match separator {
|
||||||
|
Sep::Whitespaces => string.split_whitespace().map(String::from).collect(),
|
||||||
|
Sep::Char(sep) => string.split(sep).map(String::from).collect(),
|
||||||
|
Sep::Line => vec![string],
|
||||||
|
};
|
||||||
|
|
||||||
|
Line { fields }
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get field at index.
|
/// Get field at index.
|
||||||
|
@ -65,14 +80,11 @@ impl Line {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Iterate each field except the one at the index.
|
/// Print each field except the one at the index.
|
||||||
fn foreach_except<F>(&self, index: usize, f: &F)
|
fn print_fields(&self, index: usize, separator: char) {
|
||||||
where
|
for i in 0..self.fields.len() {
|
||||||
F: Fn(&String),
|
|
||||||
{
|
|
||||||
for (i, field) in self.fields.iter().enumerate() {
|
|
||||||
if i != index {
|
if i != index {
|
||||||
f(&field);
|
print!("{}{}", separator, self.fields[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -113,17 +125,12 @@ impl<'a> State<'a> {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Skip the current unpaired line.
|
/// Skip the current unpaired line.
|
||||||
fn skip_line(&mut self) {
|
fn skip_line(&mut self, read_sep: Sep, write_sep: char) {
|
||||||
if self.print_unpaired {
|
if self.print_unpaired {
|
||||||
self.print_unpaired_line(&self.seq[0]);
|
self.print_unpaired_line(&self.seq[0], write_sep);
|
||||||
}
|
}
|
||||||
|
|
||||||
self.next_line();
|
match self.read_line(read_sep) {
|
||||||
}
|
|
||||||
|
|
||||||
/// Move to the next line, if any.
|
|
||||||
fn next_line(&mut self) {
|
|
||||||
match self.read_line() {
|
|
||||||
Some(line) => self.seq[0] = line,
|
Some(line) => self.seq[0] = line,
|
||||||
None => self.seq.clear(),
|
None => self.seq.clear(),
|
||||||
}
|
}
|
||||||
|
@ -131,8 +138,8 @@ impl<'a> State<'a> {
|
||||||
|
|
||||||
/// Keep reading line sequence until the key does not change, return
|
/// Keep reading line sequence until the key does not change, return
|
||||||
/// the first line whose key differs.
|
/// the first line whose key differs.
|
||||||
fn extend(&mut self, ignore_case: bool) -> Option<Line> {
|
fn extend(&mut self, read_sep: Sep, ignore_case: bool) -> Option<Line> {
|
||||||
while let Some(line) = self.read_line() {
|
while let Some(line) = self.read_line(read_sep) {
|
||||||
let diff = compare(
|
let diff = compare(
|
||||||
self.seq[0].get_field(self.key),
|
self.seq[0].get_field(self.key),
|
||||||
line.get_field(self.key),
|
line.get_field(self.key),
|
||||||
|
@ -150,14 +157,14 @@ impl<'a> State<'a> {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Combine two line sequences.
|
/// Combine two line sequences.
|
||||||
fn combine(&self, other: &State) {
|
fn combine(&self, other: &State, write_sep: char) {
|
||||||
let key = self.seq[0].get_field(self.key);
|
let key = self.seq[0].get_field(self.key);
|
||||||
|
|
||||||
for line1 in &self.seq {
|
for line1 in &self.seq {
|
||||||
for line2 in &other.seq {
|
for line2 in &other.seq {
|
||||||
print!("{}", key);
|
print!("{}", key);
|
||||||
line1.foreach_except(self.key, &print_field);
|
line1.print_fields(self.key, write_sep);
|
||||||
line2.foreach_except(other.key, &print_field);
|
line2.print_fields(other.key, write_sep);
|
||||||
println!();
|
println!();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -176,32 +183,32 @@ impl<'a> State<'a> {
|
||||||
!self.seq.is_empty()
|
!self.seq.is_empty()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn initialize(&mut self) {
|
fn initialize(&mut self, read_sep: Sep) {
|
||||||
if let Some(line) = self.read_line() {
|
if let Some(line) = self.read_line(read_sep) {
|
||||||
self.seq.push(line);
|
self.seq.push(line);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn finalize(&mut self) {
|
fn finalize(&mut self, read_sep: Sep, write_sep: char) {
|
||||||
if self.has_line() && self.print_unpaired {
|
if self.has_line() && self.print_unpaired {
|
||||||
self.print_unpaired_line(&self.seq[0]);
|
self.print_unpaired_line(&self.seq[0], write_sep);
|
||||||
|
|
||||||
while let Some(line) = self.read_line() {
|
while let Some(line) = self.read_line(read_sep) {
|
||||||
self.print_unpaired_line(&line);
|
self.print_unpaired_line(&line, write_sep);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn read_line(&mut self) -> Option<Line> {
|
fn read_line(&mut self, sep: Sep) -> Option<Line> {
|
||||||
match self.lines.next() {
|
match self.lines.next() {
|
||||||
Some(value) => Some(Line::new(crash_if_err!(1, value))),
|
Some(value) => Some(Line::new(crash_if_err!(1, value), sep)),
|
||||||
None => None,
|
None => None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn print_unpaired_line(&self, line: &Line) {
|
fn print_unpaired_line(&self, line: &Line, sep: char) {
|
||||||
print!("{}", line.get_field(self.key));
|
print!("{}", line.get_field(self.key));
|
||||||
line.foreach_except(self.key, &print_field);
|
line.print_fields(self.key, sep);
|
||||||
println!();
|
println!();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -232,6 +239,11 @@ FILENUM is 1 or 2, corresponding to FILE1 or FILE2"))
|
||||||
.takes_value(true)
|
.takes_value(true)
|
||||||
.value_name("FIELD")
|
.value_name("FIELD")
|
||||||
.help("equivalent to '-1 FIELD -2 FIELD'"))
|
.help("equivalent to '-1 FIELD -2 FIELD'"))
|
||||||
|
.arg(Arg::with_name("t")
|
||||||
|
.short("t")
|
||||||
|
.takes_value(true)
|
||||||
|
.value_name("CHAR")
|
||||||
|
.help("use CHAR as input and output field separator"))
|
||||||
.arg(Arg::with_name("1")
|
.arg(Arg::with_name("1")
|
||||||
.short("1")
|
.short("1")
|
||||||
.takes_value(true)
|
.takes_value(true)
|
||||||
|
@ -271,6 +283,14 @@ FILENUM is 1 or 2, corresponding to FILE1 or FILE2"))
|
||||||
settings.key1 = get_field_number(keys, key1);
|
settings.key1 = get_field_number(keys, key1);
|
||||||
settings.key2 = get_field_number(keys, key2);
|
settings.key2 = get_field_number(keys, key2);
|
||||||
|
|
||||||
|
if let Some(value) = matches.value_of("t") {
|
||||||
|
settings.separator = match value.len() {
|
||||||
|
0 => Sep::Line,
|
||||||
|
1 => Sep::Char(value.chars().nth(0).unwrap()),
|
||||||
|
_ => crash!(1, "multi-character tab {}", value),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
let file1 = matches.value_of("file1").unwrap();
|
let file1 = matches.value_of("file1").unwrap();
|
||||||
let file2 = matches.value_of("file2").unwrap();
|
let file2 = matches.value_of("file2").unwrap();
|
||||||
|
|
||||||
|
@ -298,24 +318,29 @@ fn exec(file1: &str, file2: &str, settings: &Settings) -> i32 {
|
||||||
settings.print_unpaired == FileNum::File2,
|
settings.print_unpaired == FileNum::File2,
|
||||||
);
|
);
|
||||||
|
|
||||||
state1.initialize();
|
let write_sep = match settings.separator {
|
||||||
state2.initialize();
|
Sep::Char(sep) => sep,
|
||||||
|
_ => ' ',
|
||||||
|
};
|
||||||
|
|
||||||
|
state1.initialize(settings.separator);
|
||||||
|
state2.initialize(settings.separator);
|
||||||
|
|
||||||
while state1.has_line() && state2.has_line() {
|
while state1.has_line() && state2.has_line() {
|
||||||
let diff = state1.compare(&state2, settings.ignore_case);
|
let diff = state1.compare(&state2, settings.ignore_case);
|
||||||
|
|
||||||
match diff {
|
match diff {
|
||||||
Ordering::Less => {
|
Ordering::Less => {
|
||||||
state1.skip_line();
|
state1.skip_line(settings.separator, write_sep);
|
||||||
}
|
}
|
||||||
Ordering::Greater => {
|
Ordering::Greater => {
|
||||||
state2.skip_line();
|
state2.skip_line(settings.separator, write_sep);
|
||||||
}
|
}
|
||||||
Ordering::Equal => {
|
Ordering::Equal => {
|
||||||
let next_line1 = state1.extend(settings.ignore_case);
|
let next_line1 = state1.extend(settings.separator, settings.ignore_case);
|
||||||
let next_line2 = state2.extend(settings.ignore_case);
|
let next_line2 = state2.extend(settings.separator, settings.ignore_case);
|
||||||
|
|
||||||
state1.combine(&state2);
|
state1.combine(&state2, write_sep);
|
||||||
|
|
||||||
state1.reset(next_line1);
|
state1.reset(next_line1);
|
||||||
state2.reset(next_line2);
|
state2.reset(next_line2);
|
||||||
|
@ -323,8 +348,8 @@ fn exec(file1: &str, file2: &str, settings: &Settings) -> i32 {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
state1.finalize();
|
state1.finalize(settings.separator, write_sep);
|
||||||
state2.finalize();
|
state2.finalize(settings.separator, write_sep);
|
||||||
|
|
||||||
0
|
0
|
||||||
}
|
}
|
||||||
|
@ -368,7 +393,3 @@ fn compare(field1: &str, field2: &str, ignore_case: bool) -> Ordering {
|
||||||
field1.cmp(field2)
|
field1.cmp(field2)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn print_field(field: &String) {
|
|
||||||
print!("{}{}", ' ', field);
|
|
||||||
}
|
|
||||||
|
|
6
tests/fixtures/join/semicolon_fields_1.txt
vendored
Normal file
6
tests/fixtures/join/semicolon_fields_1.txt
vendored
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
1; a
|
||||||
|
2 ;b
|
||||||
|
3; c
|
||||||
|
4 ;d
|
||||||
|
5; e
|
||||||
|
6 ;f
|
3
tests/fixtures/join/semicolon_fields_2.txt
vendored
Normal file
3
tests/fixtures/join/semicolon_fields_2.txt
vendored
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
2 ;x
|
||||||
|
3; y
|
||||||
|
4 ;z
|
4
tests/fixtures/join/semicolon_separated.expected
vendored
Normal file
4
tests/fixtures/join/semicolon_separated.expected
vendored
Normal file
|
@ -0,0 +1,4 @@
|
||||||
|
2 ;b;x
|
||||||
|
3; c; y
|
||||||
|
4 ;d;z
|
||||||
|
|
|
@ -93,3 +93,34 @@ fn case_insensitive() {
|
||||||
.arg("-i")
|
.arg("-i")
|
||||||
.succeeds().stdout_only_fixture("case_insensitive.expected");
|
.succeeds().stdout_only_fixture("case_insensitive.expected");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn semicolon_separated() {
|
||||||
|
new_ucmd!()
|
||||||
|
.arg("semicolon_fields_1.txt")
|
||||||
|
.arg("semicolon_fields_2.txt")
|
||||||
|
.arg("-t")
|
||||||
|
.arg(";")
|
||||||
|
.succeeds().stdout_only_fixture("semicolon_separated.expected");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn new_line_separated() {
|
||||||
|
new_ucmd!()
|
||||||
|
.arg("-")
|
||||||
|
.arg("fields_2.txt")
|
||||||
|
.arg("-t")
|
||||||
|
.arg("")
|
||||||
|
.pipe_in("1 a\n1 b\n8 h\n")
|
||||||
|
.succeeds().stdout_only("1 a\n8 h");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn multitab_character() {
|
||||||
|
new_ucmd!()
|
||||||
|
.arg("semicolon_fields_1.txt")
|
||||||
|
.arg("semicolon_fields_2.txt")
|
||||||
|
.arg("-t")
|
||||||
|
.arg("э")
|
||||||
|
.fails().stderr_is("join: error: multi-character tab э");
|
||||||
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue