mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-28 03:27:44 +00:00
Merge pull request #1112 from kupospelov/master
join: implement basic functionality
This commit is contained in:
commit
c27edbb73f
18 changed files with 556 additions and 0 deletions
9
Cargo.lock
generated
9
Cargo.lock
generated
|
@ -35,6 +35,7 @@ dependencies = [
|
|||
"hostname 0.0.1",
|
||||
"id 0.0.1",
|
||||
"install 0.0.1",
|
||||
"join 0.0.1",
|
||||
"kill 0.0.1",
|
||||
"lazy_static 0.2.10 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"libc 0.2.33 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
|
@ -588,6 +589,14 @@ dependencies = [
|
|||
"either 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "join"
|
||||
version = "0.0.1"
|
||||
dependencies = [
|
||||
"getopts 0.2.15 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"uucore 0.0.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "kernel32-sys"
|
||||
version = "0.2.2"
|
||||
|
|
|
@ -69,6 +69,7 @@ fuchsia = [
|
|||
generic = [
|
||||
"cat",
|
||||
"hashsum",
|
||||
"join",
|
||||
"more",
|
||||
"ln",
|
||||
"ls",
|
||||
|
@ -178,6 +179,7 @@ hostid = { optional=true, path="src/hostid" }
|
|||
hostname = { optional=true, path="src/hostname" }
|
||||
id = { optional=true, path="src/id" }
|
||||
install = { optional=true, path="src/install" }
|
||||
join = { optional=true, path="src/join" }
|
||||
kill = { optional=true, path="src/kill" }
|
||||
link = { optional=true, path="src/link" }
|
||||
ln = { optional=true, path="src/ln" }
|
||||
|
|
1
Makefile
1
Makefile
|
@ -61,6 +61,7 @@ PROGS := \
|
|||
fold \
|
||||
hashsum \
|
||||
head \
|
||||
join \
|
||||
link \
|
||||
ln \
|
||||
ls \
|
||||
|
|
17
src/join/Cargo.toml
Normal file
17
src/join/Cargo.toml
Normal file
|
@ -0,0 +1,17 @@
|
|||
[package]
|
||||
name = "join"
|
||||
version = "0.0.1"
|
||||
authors = []
|
||||
build = "../../mkmain.rs"
|
||||
|
||||
[lib]
|
||||
name = "uu_join"
|
||||
path = "join.rs"
|
||||
|
||||
[dependencies]
|
||||
clap = "2.24.1"
|
||||
uucore = { path="../uucore" }
|
||||
|
||||
[[bin]]
|
||||
name = "join"
|
||||
path = "../../uumain.rs"
|
374
src/join/join.rs
Normal file
374
src/join/join.rs
Normal file
|
@ -0,0 +1,374 @@
|
|||
#![crate_name = "uu_join"]
|
||||
|
||||
/*
|
||||
* This file is part of the uutils coreutils package.
|
||||
*
|
||||
* (c) Konstantin Pospelov <kupospelov@gmail.com>
|
||||
*
|
||||
* For the full copyright and license information, please view the LICENSE
|
||||
* file that was distributed with this source code.
|
||||
*/
|
||||
|
||||
extern crate clap;
|
||||
|
||||
#[macro_use]
|
||||
extern crate uucore;
|
||||
|
||||
use std::fs::File;
|
||||
use std::io::{BufRead, BufReader, Lines, Stdin, stdin};
|
||||
use std::cmp::Ordering;
|
||||
use clap::{App, Arg};
|
||||
|
||||
static NAME: &'static str = "join";
|
||||
static VERSION: &'static str = env!("CARGO_PKG_VERSION");
|
||||
|
||||
#[derive(PartialEq)]
|
||||
enum FileNum {
|
||||
None,
|
||||
File1,
|
||||
File2,
|
||||
}
|
||||
|
||||
struct Settings {
|
||||
key1: usize,
|
||||
key2: usize,
|
||||
print_unpaired: FileNum,
|
||||
ignore_case: bool,
|
||||
}
|
||||
|
||||
impl Default for Settings {
|
||||
fn default() -> Settings {
|
||||
Settings {
|
||||
key1: 0,
|
||||
key2: 0,
|
||||
print_unpaired: FileNum::None,
|
||||
ignore_case: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct Line {
|
||||
fields: Vec<String>,
|
||||
}
|
||||
|
||||
impl Line {
|
||||
fn new(string: String) -> Line {
|
||||
Line { fields: string.split_whitespace().map(String::from).collect() }
|
||||
}
|
||||
|
||||
/// Get field at index.
|
||||
fn get_field(&self, index: usize) -> &str {
|
||||
if index < self.fields.len() {
|
||||
&self.fields[index]
|
||||
} else {
|
||||
""
|
||||
}
|
||||
}
|
||||
|
||||
/// Iterate each field except the one at the index.
|
||||
fn foreach_except<F>(&self, index: usize, f: &F)
|
||||
where
|
||||
F: Fn(&String),
|
||||
{
|
||||
for (i, field) in self.fields.iter().enumerate() {
|
||||
if i != index {
|
||||
f(&field);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct State<'a> {
|
||||
key: usize,
|
||||
print_unpaired: bool,
|
||||
lines: Lines<Box<BufRead + 'a>>,
|
||||
seq: Vec<Line>,
|
||||
}
|
||||
|
||||
impl<'a> State<'a> {
|
||||
fn new(name: &str, stdin: &'a Stdin, key: usize, print_unpaired: bool) -> State<'a> {
|
||||
let f = if name == "-" {
|
||||
Box::new(stdin.lock()) as Box<BufRead>
|
||||
} else {
|
||||
match File::open(name) {
|
||||
Ok(file) => Box::new(BufReader::new(file)) as Box<BufRead>,
|
||||
Err(err) => crash!(1, "{}: {}", name, err),
|
||||
}
|
||||
};
|
||||
|
||||
State {
|
||||
key: key,
|
||||
print_unpaired: print_unpaired,
|
||||
lines: f.lines(),
|
||||
seq: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Compare the key fields of the two current lines.
|
||||
fn compare(&self, other: &State, ignore_case: bool) -> Ordering {
|
||||
let key1 = self.seq[0].get_field(self.key);
|
||||
let key2 = other.seq[0].get_field(other.key);
|
||||
|
||||
compare(key1, key2, ignore_case)
|
||||
}
|
||||
|
||||
/// Skip the current unpaired line.
|
||||
fn skip_line(&mut self) {
|
||||
if self.print_unpaired {
|
||||
self.print_unpaired_line(&self.seq[0]);
|
||||
}
|
||||
|
||||
self.next_line();
|
||||
}
|
||||
|
||||
/// Move to the next line, if any.
|
||||
fn next_line(&mut self) {
|
||||
match self.read_line() {
|
||||
Some(line) => self.seq[0] = line,
|
||||
None => self.seq.clear(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Keep reading line sequence until the key does not change, return
|
||||
/// the first line whose key differs.
|
||||
fn extend(&mut self, ignore_case: bool) -> Option<Line> {
|
||||
while let Some(line) = self.read_line() {
|
||||
let diff = compare(
|
||||
self.seq[0].get_field(self.key),
|
||||
line.get_field(self.key),
|
||||
ignore_case,
|
||||
);
|
||||
|
||||
if diff == Ordering::Equal {
|
||||
self.seq.push(line);
|
||||
} else {
|
||||
return Some(line);
|
||||
}
|
||||
}
|
||||
|
||||
return None;
|
||||
}
|
||||
|
||||
/// Combine two line sequences.
|
||||
fn combine(&self, other: &State) {
|
||||
let key = self.seq[0].get_field(self.key);
|
||||
|
||||
for line1 in &self.seq {
|
||||
for line2 in &other.seq {
|
||||
print!("{}", key);
|
||||
line1.foreach_except(self.key, &print_field);
|
||||
line2.foreach_except(other.key, &print_field);
|
||||
println!();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Reset with the next line.
|
||||
fn reset(&mut self, next_line: Option<Line>) {
|
||||
self.seq.clear();
|
||||
|
||||
if let Some(line) = next_line {
|
||||
self.seq.push(line);
|
||||
}
|
||||
}
|
||||
|
||||
fn has_line(&self) -> bool {
|
||||
!self.seq.is_empty()
|
||||
}
|
||||
|
||||
fn initialize(&mut self) {
|
||||
if let Some(line) = self.read_line() {
|
||||
self.seq.push(line);
|
||||
}
|
||||
}
|
||||
|
||||
fn finalize(&mut self) {
|
||||
if self.has_line() && self.print_unpaired {
|
||||
self.print_unpaired_line(&self.seq[0]);
|
||||
|
||||
while let Some(line) = self.read_line() {
|
||||
self.print_unpaired_line(&line);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn read_line(&mut self) -> Option<Line> {
|
||||
match self.lines.next() {
|
||||
Some(value) => Some(Line::new(crash_if_err!(1, value))),
|
||||
None => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn print_unpaired_line(&self, line: &Line) {
|
||||
print!("{}", line.get_field(self.key));
|
||||
line.foreach_except(self.key, &print_field);
|
||||
println!();
|
||||
}
|
||||
}
|
||||
|
||||
pub fn uumain(args: Vec<String>) -> i32 {
|
||||
let matches = App::new(NAME)
|
||||
.version(VERSION)
|
||||
.about(
|
||||
"For each pair of input lines with identical join fields, write a line to
|
||||
standard output. The default join field is the first, delimited by blanks.
|
||||
|
||||
When FILE1 or FILE2 (not both) is -, read standard input.")
|
||||
.help_message("display this help and exit")
|
||||
.version_message("display version and exit")
|
||||
.arg(Arg::with_name("a")
|
||||
.short("a")
|
||||
.takes_value(true)
|
||||
.possible_values(&["1", "2"])
|
||||
.value_name("FILENUM")
|
||||
.help("also print unpairable lines from file FILENUM, where
|
||||
FILENUM is 1 or 2, corresponding to FILE1 or FILE2"))
|
||||
.arg(Arg::with_name("i")
|
||||
.short("i")
|
||||
.long("ignore-case")
|
||||
.help("ignore differences in case when comparing fields"))
|
||||
.arg(Arg::with_name("j")
|
||||
.short("j")
|
||||
.takes_value(true)
|
||||
.value_name("FIELD")
|
||||
.help("equivalent to '-1 FIELD -2 FIELD'"))
|
||||
.arg(Arg::with_name("1")
|
||||
.short("1")
|
||||
.takes_value(true)
|
||||
.value_name("FIELD")
|
||||
.help("join on this FIELD of file 1"))
|
||||
.arg(Arg::with_name("2")
|
||||
.short("2")
|
||||
.takes_value(true)
|
||||
.value_name("FIELD")
|
||||
.help("join on this FIELD of file 2"))
|
||||
.arg(Arg::with_name("file1")
|
||||
.required(true)
|
||||
.value_name("FILE1")
|
||||
.hidden(true))
|
||||
.arg(Arg::with_name("file2")
|
||||
.required(true)
|
||||
.value_name("FILE2")
|
||||
.hidden(true))
|
||||
.get_matches_from(args);
|
||||
|
||||
let keys = parse_field_number(matches.value_of("j"));
|
||||
let key1 = parse_field_number(matches.value_of("1"));
|
||||
let key2 = parse_field_number(matches.value_of("2"));
|
||||
|
||||
let mut settings: Settings = Default::default();
|
||||
settings.print_unpaired = match matches.value_of("a") {
|
||||
Some(value) => {
|
||||
match value {
|
||||
"1" => FileNum::File1,
|
||||
"2" => FileNum::File2,
|
||||
value => crash!(1, "invalid file number: {}", value),
|
||||
}
|
||||
}
|
||||
None => FileNum::None,
|
||||
};
|
||||
settings.ignore_case = matches.is_present("i");
|
||||
settings.key1 = get_field_number(keys, key1);
|
||||
settings.key2 = get_field_number(keys, key2);
|
||||
|
||||
let file1 = matches.value_of("file1").unwrap();
|
||||
let file2 = matches.value_of("file2").unwrap();
|
||||
|
||||
if file1 == "-" && file2 == "-" {
|
||||
crash!(1, "both files cannot be standard input");
|
||||
}
|
||||
|
||||
exec(file1, file2, &settings)
|
||||
}
|
||||
|
||||
fn exec(file1: &str, file2: &str, settings: &Settings) -> i32 {
|
||||
let stdin = stdin();
|
||||
|
||||
let mut state1 = State::new(
|
||||
&file1,
|
||||
&stdin,
|
||||
settings.key1,
|
||||
settings.print_unpaired == FileNum::File1,
|
||||
);
|
||||
|
||||
let mut state2 = State::new(
|
||||
&file2,
|
||||
&stdin,
|
||||
settings.key2,
|
||||
settings.print_unpaired == FileNum::File2,
|
||||
);
|
||||
|
||||
state1.initialize();
|
||||
state2.initialize();
|
||||
|
||||
while state1.has_line() && state2.has_line() {
|
||||
let diff = state1.compare(&state2, settings.ignore_case);
|
||||
|
||||
match diff {
|
||||
Ordering::Less => {
|
||||
state1.skip_line();
|
||||
}
|
||||
Ordering::Greater => {
|
||||
state2.skip_line();
|
||||
}
|
||||
Ordering::Equal => {
|
||||
let next_line1 = state1.extend(settings.ignore_case);
|
||||
let next_line2 = state2.extend(settings.ignore_case);
|
||||
|
||||
state1.combine(&state2);
|
||||
|
||||
state1.reset(next_line1);
|
||||
state2.reset(next_line2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
state1.finalize();
|
||||
state2.finalize();
|
||||
|
||||
0
|
||||
}
|
||||
|
||||
/// Check that keys for both files and for a particular file are not
|
||||
/// contradictory and return the zero-based key index.
|
||||
fn get_field_number(keys: Option<usize>, key: Option<usize>) -> usize {
|
||||
if let Some(keys) = keys {
|
||||
if let Some(key) = key {
|
||||
if keys != key {
|
||||
crash!(1, "incompatible join fields {}, {}", keys, key);
|
||||
}
|
||||
}
|
||||
|
||||
return keys - 1;
|
||||
}
|
||||
|
||||
match key {
|
||||
Some(key) => key - 1,
|
||||
None => 0,
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse the specified field string as a natural number and return it.
|
||||
fn parse_field_number(value: Option<&str>) -> Option<usize> {
|
||||
match value {
|
||||
Some(value) => {
|
||||
match value.parse() {
|
||||
Ok(result) if result > 0 => Some(result),
|
||||
_ => crash!(1, "invalid field number: '{}'", value),
|
||||
}
|
||||
}
|
||||
None => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn compare(field1: &str, field2: &str, ignore_case: bool) -> Ordering {
|
||||
if ignore_case {
|
||||
field1.to_lowercase().cmp(&field2.to_lowercase())
|
||||
} else {
|
||||
field1.cmp(field2)
|
||||
}
|
||||
}
|
||||
|
||||
fn print_field(field: &String) {
|
||||
print!("{}{}", ' ', field);
|
||||
}
|
4
tests/fixtures/join/capitalized.txt
vendored
Normal file
4
tests/fixtures/join/capitalized.txt
vendored
Normal file
|
@ -0,0 +1,4 @@
|
|||
A 1
|
||||
B 2
|
||||
C 4
|
||||
D 8
|
3
tests/fixtures/join/case_insensitive.expected
vendored
Normal file
3
tests/fixtures/join/case_insensitive.expected
vendored
Normal file
|
@ -0,0 +1,3 @@
|
|||
A 1 2 f
|
||||
B 2 3 g
|
||||
C 4 4 h
|
5
tests/fixtures/join/default.expected
vendored
Normal file
5
tests/fixtures/join/default.expected
vendored
Normal file
|
@ -0,0 +1,5 @@
|
|||
1 a
|
||||
2 b
|
||||
3 c
|
||||
5 e
|
||||
8 h
|
6
tests/fixtures/join/different_field.expected
vendored
Normal file
6
tests/fixtures/join/different_field.expected
vendored
Normal file
|
@ -0,0 +1,6 @@
|
|||
2 b a f
|
||||
3 c b g
|
||||
4 d c h
|
||||
5 e f i
|
||||
6 f g j
|
||||
7 g h k
|
5
tests/fixtures/join/different_fields.expected
vendored
Normal file
5
tests/fixtures/join/different_fields.expected
vendored
Normal file
|
@ -0,0 +1,5 @@
|
|||
c 3 2 1 cd
|
||||
d 4 3 2 de
|
||||
e 5 5 3 ef
|
||||
f 6 7 4 fg
|
||||
g 7 11 5 gh
|
0
tests/fixtures/join/empty.txt
vendored
Normal file
0
tests/fixtures/join/empty.txt
vendored
Normal file
5
tests/fixtures/join/fields_1.txt
vendored
Normal file
5
tests/fixtures/join/fields_1.txt
vendored
Normal file
|
@ -0,0 +1,5 @@
|
|||
1
|
||||
2
|
||||
3
|
||||
5
|
||||
8
|
9
tests/fixtures/join/fields_2.txt
vendored
Normal file
9
tests/fixtures/join/fields_2.txt
vendored
Normal file
|
@ -0,0 +1,9 @@
|
|||
1 a
|
||||
2 b
|
||||
3 c
|
||||
4 d
|
||||
5 e
|
||||
6 f
|
||||
7 g
|
||||
8 h
|
||||
9 i
|
6
tests/fixtures/join/fields_3.txt
vendored
Normal file
6
tests/fixtures/join/fields_3.txt
vendored
Normal file
|
@ -0,0 +1,6 @@
|
|||
a 2 f
|
||||
b 3 g
|
||||
c 4 h
|
||||
f 5 i
|
||||
g 6 j
|
||||
h 7 k
|
5
tests/fixtures/join/fields_4.txt
vendored
Normal file
5
tests/fixtures/join/fields_4.txt
vendored
Normal file
|
@ -0,0 +1,5 @@
|
|||
2 c 1 cd
|
||||
3 d 2 de
|
||||
5 e 3 ef
|
||||
7 f 4 fg
|
||||
11 g 5 gh
|
9
tests/fixtures/join/unpaired_lines.expected
vendored
Normal file
9
tests/fixtures/join/unpaired_lines.expected
vendored
Normal file
|
@ -0,0 +1,9 @@
|
|||
1 a
|
||||
2 a f b
|
||||
3 b g c
|
||||
4 c h d
|
||||
5 f i e
|
||||
6 g j f
|
||||
7 h k g
|
||||
8 h
|
||||
9 i
|
95
tests/test_join.rs
Normal file
95
tests/test_join.rs
Normal file
|
@ -0,0 +1,95 @@
|
|||
use common::util::*;
|
||||
|
||||
|
||||
#[test]
|
||||
fn empty_files() {
|
||||
new_ucmd!()
|
||||
.arg("empty.txt")
|
||||
.arg("empty.txt")
|
||||
.succeeds().stdout_only("");
|
||||
|
||||
new_ucmd!()
|
||||
.arg("empty.txt")
|
||||
.arg("fields_1.txt")
|
||||
.succeeds().stdout_only("");
|
||||
|
||||
new_ucmd!()
|
||||
.arg("fields_1.txt")
|
||||
.arg("empty.txt")
|
||||
.succeeds().stdout_only("");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_intersection() {
|
||||
new_ucmd!()
|
||||
.arg("fields_1.txt")
|
||||
.arg("fields_2.txt")
|
||||
.arg("-2")
|
||||
.arg("2")
|
||||
.succeeds().stdout_only("");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn default_arguments() {
|
||||
new_ucmd!()
|
||||
.arg("fields_1.txt")
|
||||
.arg("fields_2.txt")
|
||||
.succeeds().stdout_only_fixture("default.expected");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn different_fields() {
|
||||
new_ucmd!()
|
||||
.arg("fields_2.txt")
|
||||
.arg("fields_4.txt")
|
||||
.arg("-j")
|
||||
.arg("2")
|
||||
.succeeds().stdout_only_fixture("different_fields.expected");
|
||||
|
||||
new_ucmd!()
|
||||
.arg("fields_2.txt")
|
||||
.arg("fields_4.txt")
|
||||
.arg("-1")
|
||||
.arg("2")
|
||||
.arg("-2")
|
||||
.arg("2")
|
||||
.succeeds().stdout_only_fixture("different_fields.expected");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn different_field() {
|
||||
new_ucmd!()
|
||||
.arg("fields_2.txt")
|
||||
.arg("fields_3.txt")
|
||||
.arg("-2")
|
||||
.arg("2")
|
||||
.succeeds().stdout_only_fixture("different_field.expected");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unpaired_lines() {
|
||||
new_ucmd!()
|
||||
.arg("fields_2.txt")
|
||||
.arg("fields_3.txt")
|
||||
.arg("-a")
|
||||
.arg("1")
|
||||
.succeeds().stdout_only_fixture("fields_2.txt");
|
||||
|
||||
new_ucmd!()
|
||||
.arg("fields_3.txt")
|
||||
.arg("fields_2.txt")
|
||||
.arg("-1")
|
||||
.arg("2")
|
||||
.arg("-a")
|
||||
.arg("2")
|
||||
.succeeds().stdout_only_fixture("unpaired_lines.expected");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn case_insensitive() {
|
||||
new_ucmd!()
|
||||
.arg("capitalized.txt")
|
||||
.arg("fields_3.txt")
|
||||
.arg("-i")
|
||||
.succeeds().stdout_only_fixture("case_insensitive.expected");
|
||||
}
|
|
@ -58,6 +58,7 @@ generic! {
|
|||
"fold", test_fold;
|
||||
"hashsum", test_hashsum;
|
||||
"head", test_head;
|
||||
"join", test_join;
|
||||
"link", test_link;
|
||||
"ln", test_ln;
|
||||
"ls", test_ls;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue