diff --git a/Cargo.lock b/Cargo.lock index 5dcadf7e0..7d277b45f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -35,6 +35,7 @@ dependencies = [ "hostname 0.0.1", "id 0.0.1", "install 0.0.1", + "join 0.0.1", "kill 0.0.1", "lazy_static 0.2.10 (registry+https://github.com/rust-lang/crates.io-index)", "libc 0.2.33 (registry+https://github.com/rust-lang/crates.io-index)", @@ -588,6 +589,14 @@ dependencies = [ "either 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "join" +version = "0.0.1" +dependencies = [ + "getopts 0.2.15 (registry+https://github.com/rust-lang/crates.io-index)", + "uucore 0.0.1", +] + [[package]] name = "kernel32-sys" version = "0.2.2" diff --git a/Cargo.toml b/Cargo.toml index 3148e261c..fdc46a845 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -69,6 +69,7 @@ fuchsia = [ generic = [ "cat", "hashsum", + "join", "more", "ln", "ls", @@ -178,6 +179,7 @@ hostid = { optional=true, path="src/hostid" } hostname = { optional=true, path="src/hostname" } id = { optional=true, path="src/id" } install = { optional=true, path="src/install" } +join = { optional=true, path="src/join" } kill = { optional=true, path="src/kill" } link = { optional=true, path="src/link" } ln = { optional=true, path="src/ln" } diff --git a/Makefile b/Makefile index 4bc976e13..84510df71 100644 --- a/Makefile +++ b/Makefile @@ -61,6 +61,7 @@ PROGS := \ fold \ hashsum \ head \ + join \ link \ ln \ ls \ diff --git a/src/join/Cargo.toml b/src/join/Cargo.toml new file mode 100644 index 000000000..9c7fc6bfa --- /dev/null +++ b/src/join/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "join" +version = "0.0.1" +authors = [] + +[lib] +name = "uu_join" +path = "join.rs" + +[dependencies] +getopts = "0.2.14" +uucore = { path="../uucore" } + +[[bin]] +name = "join" +path = "main.rs" diff --git a/src/join/join.rs b/src/join/join.rs new file mode 100644 index 000000000..5f257268a --- /dev/null +++ b/src/join/join.rs @@ -0,0 +1,374 @@ +#![crate_name = "uu_join"] + +/* + * This file is part of the uutils coreutils package. + * + * (c) Konstantin Pospelov + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +extern crate getopts; + +#[macro_use] +extern crate uucore; + +use std::fs::File; +use std::io::{BufRead, BufReader, Lines, Read, stdin}; +use std::cmp::Ordering; + +static NAME: &'static str = "join"; +static VERSION: &'static str = env!("CARGO_PKG_VERSION"); + +#[derive(PartialEq)] +enum FileNum { + None, + File1, + File2, +} + +struct Settings { + key1: usize, + key2: usize, + print_unpaired: FileNum, + ignore_case: bool, +} + +impl Default for Settings { + fn default() -> Settings { + Settings { + key1: 0, + key2: 0, + print_unpaired: FileNum::None, + ignore_case: false, + } + } +} + +struct Line { + fields: Vec, +} + +impl Line { + fn new(string: String) -> Line { + Line { fields: string.split_whitespace().map(|s| String::from(s)).collect() } + } + + /// Get field at index. + fn get_field(&self, index: usize) -> &str { + if index < self.fields.len() { + &self.fields[index] + } else { + "" + } + } + + /// Iterate each field except the one at the index. + fn foreach_except(&self, index: usize, f: &F) + where + F: Fn(&String), + { + for (i, field) in self.fields.iter().enumerate() { + if i != index { + f(&field); + } + } + } +} + +struct State { + key: usize, + print_unpaired: bool, + lines: Lines>>, + seq: Vec, +} + +impl State { + fn new(name: &str, key: usize, print_unpaired: bool) -> State { + let f: Box = if name == "-" { + Box::new(stdin()) as Box + } else { + match File::open(name) { + Ok(file) => Box::new(file) as Box, + Err(err) => crash!(1, "{}: {}", name, err), + } + }; + + State { + key: key, + print_unpaired: print_unpaired, + lines: BufReader::new(f).lines(), + seq: Vec::new(), + } + } + + /// Compare the key fields of the two current lines. + fn compare(&self, other: &State, ignore_case: bool) -> Ordering { + let key1 = self.seq[0].get_field(self.key); + let key2 = other.seq[0].get_field(other.key); + + compare(key1, key2, ignore_case) + } + + /// Skip the current unpaired line. + fn skip_line(&mut self) { + if self.print_unpaired { + self.print_unpaired_line(&self.seq[0]); + } + + self.next_line(); + } + + /// Move to the next line, if any. + fn next_line(&mut self) { + match self.read_line() { + Some(line) => { + self.seq[0] = line; + } + None => { + self.seq.clear(); + } + } + } + + /// Keep reading line sequence until the key does not change, return + /// the first line whose key differs. + fn extend(&mut self, ignore_case: bool) -> Option { + while let Some(line) = self.read_line() { + let diff = compare( + self.seq[0].get_field(self.key), + line.get_field(self.key), + ignore_case, + ); + + if diff == Ordering::Equal { + self.seq.push(line); + } else { + return Some(line); + } + } + + return None; + } + + /// Combine two line sequences. + fn combine(&self, other: &State) { + let key = self.seq[0].get_field(self.key); + + for line1 in &self.seq { + for line2 in &other.seq { + print!("{}", key); + line1.foreach_except(self.key, &print_field); + line2.foreach_except(other.key, &print_field); + println!(); + } + } + } + + /// Reset with the next line. + fn reset(&mut self, next_line: Option) { + self.seq.clear(); + + if let Some(line) = next_line { + self.seq.push(line); + } + } + + fn has_line(&self) -> bool { + !self.seq.is_empty() + } + + fn initialize(&mut self) { + if let Some(line) = self.read_line() { + self.seq.push(line); + } + } + + fn finalize(&mut self) { + if self.has_line() && self.print_unpaired { + self.print_unpaired_line(&self.seq[0]); + + while let Some(line) = self.read_line() { + self.print_unpaired_line(&line); + } + } + } + + fn read_line(&mut self) -> Option { + match self.lines.next() { + Some(value) => Some(Line::new(value.expect("error reading file"))), + None => None, + } + } + + fn print_unpaired_line(&self, line: &Line) { + print!("{}", line.get_field(self.key)); + line.foreach_except(self.key, &print_field); + println!(); + } +} + +pub fn uumain(args: Vec) -> i32 { + let mut settings: Settings = Default::default(); + let mut opts = getopts::Options::new(); + + opts.optflag("h", "help", "display this help and exit"); + opts.optopt( + "a", + "", + "also print unpairable lines from file FILENUM, where FILENUM is 1 or 2, corresponding to FILE1 or FILE2", + "FILENUM" + ); + opts.optflag( + "i", + "ignore-case", + "ignore differences in case when comparing fields", + ); + opts.optopt("j", "", "equivalent to '-1 FIELD -2 FIELD'", "FIELD"); + opts.optopt("1", "", "join on this FIELD of file 1", "FIELD"); + opts.optopt("2", "", "join on this FIELD of file 2", "FIELD"); + + let matches = match opts.parse(&args[1..]) { + Ok(m) => m, + Err(f) => crash!(1, "Invalid options\n{}", f), + }; + + if matches.opt_present("help") { + let msg = format!( + "{0} {1} +Usage: + {0} [OPTION]... FILE1 FILE2 + +For each pair of input lines with identical join fields, write a line to +standard output. The default join field is the first, delimited by blanks.", + NAME, + VERSION + ); + print!("{}", opts.usage(&msg)); + return 0; + } + + let keys = parse_field_number(matches.opt_str("j")); + let key1 = parse_field_number(matches.opt_str("1")); + let key2 = parse_field_number(matches.opt_str("2")); + + settings.print_unpaired = match matches.opt_str("a") { + Some(value) => { + match &value[..] { + "1" => FileNum::File1, + "2" => FileNum::File2, + value => crash!(1, "invalid file number: {}", value), + } + } + None => FileNum::None, + }; + settings.ignore_case = matches.opt_present("ignore-case"); + settings.key1 = get_field_number(keys, key1); + settings.key2 = get_field_number(keys, key2); + + let files = matches.free; + let file_count = files.len(); + + if file_count < 1 { + crash!(1, "missing operand"); + } else if file_count < 2 { + crash!(1, "missing operand after '{}'", files[0]); + } else if file_count > 2 { + crash!(1, "extra operand '{}'", files[2]); + } + + if files[0] == "-" && files[1] == "-" { + crash!(1, "both files cannot be standard input"); + } + + exec(files, &settings) +} + +fn exec(files: Vec, settings: &Settings) -> i32 { + let mut state1 = State::new( + &files[0], + settings.key1, + settings.print_unpaired == FileNum::File1, + ); + + let mut state2 = State::new( + &files[1], + settings.key2, + settings.print_unpaired == FileNum::File2, + ); + + state1.initialize(); + state2.initialize(); + + while state1.has_line() && state2.has_line() { + let diff = state1.compare(&state2, settings.ignore_case); + + match diff { + Ordering::Less => { + state1.skip_line(); + } + Ordering::Greater => { + state2.skip_line(); + } + Ordering::Equal => { + let next_line1 = state1.extend(settings.ignore_case); + let next_line2 = state2.extend(settings.ignore_case); + + state1.combine(&state2); + + state1.reset(next_line1); + state2.reset(next_line2); + } + } + } + + state1.finalize(); + state2.finalize(); + + 0 +} + +/// Check that keys for both files and for a particular file are not +/// contradictory and return the zero-based key index. +fn get_field_number(keys: Option, key: Option) -> usize { + if let Some(keys) = keys { + if let Some(key) = key { + if keys != key { + crash!(1, "incompatible join fields {}, {}", keys, key); + } + } + + return keys - 1; + } + + match key { + Some(key) => key - 1, + None => 0, + } +} + +/// Parse the specified field string as a natural number and return it. +fn parse_field_number(value: Option) -> Option { + match value { + Some(value) => { + match value.parse() { + Ok(result) if result > 0 => Some(result), + _ => crash!(1, "invalid field number: '{}'", value), + } + } + None => None, + } +} + +fn compare(field1: &str, field2: &str, ignore_case: bool) -> Ordering { + if ignore_case { + field1.to_lowercase().cmp(&field2.to_lowercase()) + } else { + field1.cmp(field2) + } +} + +fn print_field(field: &String) { + print!("{}{}", ' ', field); +} diff --git a/src/join/main.rs b/src/join/main.rs new file mode 100644 index 000000000..ee56bc720 --- /dev/null +++ b/src/join/main.rs @@ -0,0 +1,5 @@ +extern crate uu_join; + +fn main() { + std::process::exit(uu_join::uumain(std::env::args().collect())); +} diff --git a/tests/fixtures/join/capitalized.txt b/tests/fixtures/join/capitalized.txt new file mode 100644 index 000000000..322f0d316 --- /dev/null +++ b/tests/fixtures/join/capitalized.txt @@ -0,0 +1,4 @@ +A 1 +B 2 +C 4 +D 8 diff --git a/tests/fixtures/join/case_insensitive.expected b/tests/fixtures/join/case_insensitive.expected new file mode 100644 index 000000000..da91427a5 --- /dev/null +++ b/tests/fixtures/join/case_insensitive.expected @@ -0,0 +1,3 @@ +A 1 2 f +B 2 3 g +C 4 4 h diff --git a/tests/fixtures/join/default.expected b/tests/fixtures/join/default.expected new file mode 100644 index 000000000..5b3643826 --- /dev/null +++ b/tests/fixtures/join/default.expected @@ -0,0 +1,5 @@ +1 a +2 b +3 c +5 e +8 h diff --git a/tests/fixtures/join/different_field.expected b/tests/fixtures/join/different_field.expected new file mode 100644 index 000000000..fa9181ada --- /dev/null +++ b/tests/fixtures/join/different_field.expected @@ -0,0 +1,6 @@ +2 b a f +3 c b g +4 d c h +5 e f i +6 f g j +7 g h k diff --git a/tests/fixtures/join/different_fields.expected b/tests/fixtures/join/different_fields.expected new file mode 100644 index 000000000..cd870783f --- /dev/null +++ b/tests/fixtures/join/different_fields.expected @@ -0,0 +1,5 @@ +c 3 2 1 cd +d 4 3 2 de +e 5 5 3 ef +f 6 7 4 fg +g 7 11 5 gh diff --git a/tests/fixtures/join/empty.txt b/tests/fixtures/join/empty.txt new file mode 100644 index 000000000..e69de29bb diff --git a/tests/fixtures/join/fields_1.txt b/tests/fixtures/join/fields_1.txt new file mode 100644 index 000000000..24d5fc285 --- /dev/null +++ b/tests/fixtures/join/fields_1.txt @@ -0,0 +1,5 @@ +1 +2 +3 +5 +8 diff --git a/tests/fixtures/join/fields_2.txt b/tests/fixtures/join/fields_2.txt new file mode 100644 index 000000000..5b0d49021 --- /dev/null +++ b/tests/fixtures/join/fields_2.txt @@ -0,0 +1,9 @@ +1 a +2 b +3 c +4 d +5 e +6 f +7 g +8 h +9 i diff --git a/tests/fixtures/join/fields_3.txt b/tests/fixtures/join/fields_3.txt new file mode 100644 index 000000000..4c5c0e779 --- /dev/null +++ b/tests/fixtures/join/fields_3.txt @@ -0,0 +1,6 @@ +a 2 f +b 3 g +c 4 h +f 5 i +g 6 j +h 7 k diff --git a/tests/fixtures/join/fields_4.txt b/tests/fixtures/join/fields_4.txt new file mode 100644 index 000000000..680e07c8d --- /dev/null +++ b/tests/fixtures/join/fields_4.txt @@ -0,0 +1,5 @@ +2 c 1 cd +3 d 2 de +5 e 3 ef +7 f 4 fg +11 g 5 gh diff --git a/tests/fixtures/join/unpaired_lines.expected b/tests/fixtures/join/unpaired_lines.expected new file mode 100644 index 000000000..1cf8624b9 --- /dev/null +++ b/tests/fixtures/join/unpaired_lines.expected @@ -0,0 +1,9 @@ +1 a +2 a f b +3 b g c +4 c h d +5 f i e +6 g j f +7 h k g +8 h +9 i diff --git a/tests/test_join.rs b/tests/test_join.rs new file mode 100644 index 000000000..71f896fa4 --- /dev/null +++ b/tests/test_join.rs @@ -0,0 +1,95 @@ +use common::util::*; + + +#[test] +fn empty_files() { + new_ucmd!() + .arg("empty.txt") + .arg("empty.txt") + .succeeds().stdout_only(""); + + new_ucmd!() + .arg("empty.txt") + .arg("fields_1.txt") + .succeeds().stdout_only(""); + + new_ucmd!() + .arg("fields_1.txt") + .arg("empty.txt") + .succeeds().stdout_only(""); +} + +#[test] +fn empty_intersection() { + new_ucmd!() + .arg("fields_1.txt") + .arg("fields_2.txt") + .arg("-2") + .arg("2") + .succeeds().stdout_only(""); +} + +#[test] +fn default_arguments() { + new_ucmd!() + .arg("fields_1.txt") + .arg("fields_2.txt") + .succeeds().stdout_only_fixture("default.expected"); +} + +#[test] +fn different_fields() { + new_ucmd!() + .arg("fields_2.txt") + .arg("fields_4.txt") + .arg("-j") + .arg("2") + .succeeds().stdout_only_fixture("different_fields.expected"); + + new_ucmd!() + .arg("fields_2.txt") + .arg("fields_4.txt") + .arg("-1") + .arg("2") + .arg("-2") + .arg("2") + .succeeds().stdout_only_fixture("different_fields.expected"); +} + +#[test] +fn different_field() { + new_ucmd!() + .arg("fields_2.txt") + .arg("fields_3.txt") + .arg("-2") + .arg("2") + .succeeds().stdout_only_fixture("different_field.expected"); +} + +#[test] +fn unpaired_lines() { + new_ucmd!() + .arg("fields_2.txt") + .arg("fields_3.txt") + .arg("-a") + .arg("1") + .succeeds().stdout_only_fixture("fields_2.txt"); + + new_ucmd!() + .arg("fields_3.txt") + .arg("fields_2.txt") + .arg("-1") + .arg("2") + .arg("-a") + .arg("2") + .succeeds().stdout_only_fixture("unpaired_lines.expected"); +} + +#[test] +fn case_insensitive() { + new_ucmd!() + .arg("capitalized.txt") + .arg("fields_3.txt") + .arg("-i") + .succeeds().stdout_only_fixture("case_insensitive.expected"); +} diff --git a/tests/tests.rs b/tests/tests.rs index f8d20ec0e..d7d1032fa 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -58,6 +58,7 @@ generic! { "fold", test_fold; "hashsum", test_hashsum; "head", test_head; + "join", test_join; "link", test_link; "ln", test_ln; "ls", test_ls;