From b33ce67d913184d95de550a154f76cd6574b053a Mon Sep 17 00:00:00 2001 From: Konstantin Pospelov Date: Thu, 14 Dec 2017 00:02:42 +0300 Subject: [PATCH 1/6] join: implement basic functionality The basic implementation of join with some tests. The supported options: -1, -2, -j, -a, -i. --- Cargo.lock | 9 + Cargo.toml | 2 + Makefile | 1 + src/join/Cargo.toml | 16 + src/join/join.rs | 374 ++++++++++++++++++ src/join/main.rs | 5 + tests/fixtures/join/capitalized.txt | 4 + tests/fixtures/join/case_insensitive.expected | 3 + tests/fixtures/join/default.expected | 5 + tests/fixtures/join/different_field.expected | 6 + tests/fixtures/join/different_fields.expected | 5 + tests/fixtures/join/empty.txt | 0 tests/fixtures/join/fields_1.txt | 5 + tests/fixtures/join/fields_2.txt | 9 + tests/fixtures/join/fields_3.txt | 6 + tests/fixtures/join/fields_4.txt | 5 + tests/fixtures/join/unpaired_lines.expected | 9 + tests/test_join.rs | 95 +++++ tests/tests.rs | 1 + 19 files changed, 560 insertions(+) create mode 100644 src/join/Cargo.toml create mode 100644 src/join/join.rs create mode 100644 src/join/main.rs create mode 100644 tests/fixtures/join/capitalized.txt create mode 100644 tests/fixtures/join/case_insensitive.expected create mode 100644 tests/fixtures/join/default.expected create mode 100644 tests/fixtures/join/different_field.expected create mode 100644 tests/fixtures/join/different_fields.expected create mode 100644 tests/fixtures/join/empty.txt create mode 100644 tests/fixtures/join/fields_1.txt create mode 100644 tests/fixtures/join/fields_2.txt create mode 100644 tests/fixtures/join/fields_3.txt create mode 100644 tests/fixtures/join/fields_4.txt create mode 100644 tests/fixtures/join/unpaired_lines.expected create mode 100644 tests/test_join.rs diff --git a/Cargo.lock b/Cargo.lock index 5dcadf7e0..7d277b45f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -35,6 +35,7 @@ dependencies = [ "hostname 0.0.1", "id 0.0.1", "install 0.0.1", + "join 0.0.1", "kill 0.0.1", "lazy_static 0.2.10 (registry+https://github.com/rust-lang/crates.io-index)", "libc 0.2.33 (registry+https://github.com/rust-lang/crates.io-index)", @@ -588,6 +589,14 @@ dependencies = [ "either 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "join" +version = "0.0.1" +dependencies = [ + "getopts 0.2.15 (registry+https://github.com/rust-lang/crates.io-index)", + "uucore 0.0.1", +] + [[package]] name = "kernel32-sys" version = "0.2.2" diff --git a/Cargo.toml b/Cargo.toml index 3148e261c..fdc46a845 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -69,6 +69,7 @@ fuchsia = [ generic = [ "cat", "hashsum", + "join", "more", "ln", "ls", @@ -178,6 +179,7 @@ hostid = { optional=true, path="src/hostid" } hostname = { optional=true, path="src/hostname" } id = { optional=true, path="src/id" } install = { optional=true, path="src/install" } +join = { optional=true, path="src/join" } kill = { optional=true, path="src/kill" } link = { optional=true, path="src/link" } ln = { optional=true, path="src/ln" } diff --git a/Makefile b/Makefile index 4bc976e13..84510df71 100644 --- a/Makefile +++ b/Makefile @@ -61,6 +61,7 @@ PROGS := \ fold \ hashsum \ head \ + join \ link \ ln \ ls \ diff --git a/src/join/Cargo.toml b/src/join/Cargo.toml new file mode 100644 index 000000000..9c7fc6bfa --- /dev/null +++ b/src/join/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "join" +version = "0.0.1" +authors = [] + +[lib] +name = "uu_join" +path = "join.rs" + +[dependencies] +getopts = "0.2.14" +uucore = { path="../uucore" } + +[[bin]] +name = "join" +path = "main.rs" diff --git a/src/join/join.rs b/src/join/join.rs new file mode 100644 index 000000000..5f257268a --- /dev/null +++ b/src/join/join.rs @@ -0,0 +1,374 @@ +#![crate_name = "uu_join"] + +/* + * This file is part of the uutils coreutils package. + * + * (c) Konstantin Pospelov + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +extern crate getopts; + +#[macro_use] +extern crate uucore; + +use std::fs::File; +use std::io::{BufRead, BufReader, Lines, Read, stdin}; +use std::cmp::Ordering; + +static NAME: &'static str = "join"; +static VERSION: &'static str = env!("CARGO_PKG_VERSION"); + +#[derive(PartialEq)] +enum FileNum { + None, + File1, + File2, +} + +struct Settings { + key1: usize, + key2: usize, + print_unpaired: FileNum, + ignore_case: bool, +} + +impl Default for Settings { + fn default() -> Settings { + Settings { + key1: 0, + key2: 0, + print_unpaired: FileNum::None, + ignore_case: false, + } + } +} + +struct Line { + fields: Vec, +} + +impl Line { + fn new(string: String) -> Line { + Line { fields: string.split_whitespace().map(|s| String::from(s)).collect() } + } + + /// Get field at index. + fn get_field(&self, index: usize) -> &str { + if index < self.fields.len() { + &self.fields[index] + } else { + "" + } + } + + /// Iterate each field except the one at the index. + fn foreach_except(&self, index: usize, f: &F) + where + F: Fn(&String), + { + for (i, field) in self.fields.iter().enumerate() { + if i != index { + f(&field); + } + } + } +} + +struct State { + key: usize, + print_unpaired: bool, + lines: Lines>>, + seq: Vec, +} + +impl State { + fn new(name: &str, key: usize, print_unpaired: bool) -> State { + let f: Box = if name == "-" { + Box::new(stdin()) as Box + } else { + match File::open(name) { + Ok(file) => Box::new(file) as Box, + Err(err) => crash!(1, "{}: {}", name, err), + } + }; + + State { + key: key, + print_unpaired: print_unpaired, + lines: BufReader::new(f).lines(), + seq: Vec::new(), + } + } + + /// Compare the key fields of the two current lines. + fn compare(&self, other: &State, ignore_case: bool) -> Ordering { + let key1 = self.seq[0].get_field(self.key); + let key2 = other.seq[0].get_field(other.key); + + compare(key1, key2, ignore_case) + } + + /// Skip the current unpaired line. + fn skip_line(&mut self) { + if self.print_unpaired { + self.print_unpaired_line(&self.seq[0]); + } + + self.next_line(); + } + + /// Move to the next line, if any. + fn next_line(&mut self) { + match self.read_line() { + Some(line) => { + self.seq[0] = line; + } + None => { + self.seq.clear(); + } + } + } + + /// Keep reading line sequence until the key does not change, return + /// the first line whose key differs. + fn extend(&mut self, ignore_case: bool) -> Option { + while let Some(line) = self.read_line() { + let diff = compare( + self.seq[0].get_field(self.key), + line.get_field(self.key), + ignore_case, + ); + + if diff == Ordering::Equal { + self.seq.push(line); + } else { + return Some(line); + } + } + + return None; + } + + /// Combine two line sequences. + fn combine(&self, other: &State) { + let key = self.seq[0].get_field(self.key); + + for line1 in &self.seq { + for line2 in &other.seq { + print!("{}", key); + line1.foreach_except(self.key, &print_field); + line2.foreach_except(other.key, &print_field); + println!(); + } + } + } + + /// Reset with the next line. + fn reset(&mut self, next_line: Option) { + self.seq.clear(); + + if let Some(line) = next_line { + self.seq.push(line); + } + } + + fn has_line(&self) -> bool { + !self.seq.is_empty() + } + + fn initialize(&mut self) { + if let Some(line) = self.read_line() { + self.seq.push(line); + } + } + + fn finalize(&mut self) { + if self.has_line() && self.print_unpaired { + self.print_unpaired_line(&self.seq[0]); + + while let Some(line) = self.read_line() { + self.print_unpaired_line(&line); + } + } + } + + fn read_line(&mut self) -> Option { + match self.lines.next() { + Some(value) => Some(Line::new(value.expect("error reading file"))), + None => None, + } + } + + fn print_unpaired_line(&self, line: &Line) { + print!("{}", line.get_field(self.key)); + line.foreach_except(self.key, &print_field); + println!(); + } +} + +pub fn uumain(args: Vec) -> i32 { + let mut settings: Settings = Default::default(); + let mut opts = getopts::Options::new(); + + opts.optflag("h", "help", "display this help and exit"); + opts.optopt( + "a", + "", + "also print unpairable lines from file FILENUM, where FILENUM is 1 or 2, corresponding to FILE1 or FILE2", + "FILENUM" + ); + opts.optflag( + "i", + "ignore-case", + "ignore differences in case when comparing fields", + ); + opts.optopt("j", "", "equivalent to '-1 FIELD -2 FIELD'", "FIELD"); + opts.optopt("1", "", "join on this FIELD of file 1", "FIELD"); + opts.optopt("2", "", "join on this FIELD of file 2", "FIELD"); + + let matches = match opts.parse(&args[1..]) { + Ok(m) => m, + Err(f) => crash!(1, "Invalid options\n{}", f), + }; + + if matches.opt_present("help") { + let msg = format!( + "{0} {1} +Usage: + {0} [OPTION]... FILE1 FILE2 + +For each pair of input lines with identical join fields, write a line to +standard output. The default join field is the first, delimited by blanks.", + NAME, + VERSION + ); + print!("{}", opts.usage(&msg)); + return 0; + } + + let keys = parse_field_number(matches.opt_str("j")); + let key1 = parse_field_number(matches.opt_str("1")); + let key2 = parse_field_number(matches.opt_str("2")); + + settings.print_unpaired = match matches.opt_str("a") { + Some(value) => { + match &value[..] { + "1" => FileNum::File1, + "2" => FileNum::File2, + value => crash!(1, "invalid file number: {}", value), + } + } + None => FileNum::None, + }; + settings.ignore_case = matches.opt_present("ignore-case"); + settings.key1 = get_field_number(keys, key1); + settings.key2 = get_field_number(keys, key2); + + let files = matches.free; + let file_count = files.len(); + + if file_count < 1 { + crash!(1, "missing operand"); + } else if file_count < 2 { + crash!(1, "missing operand after '{}'", files[0]); + } else if file_count > 2 { + crash!(1, "extra operand '{}'", files[2]); + } + + if files[0] == "-" && files[1] == "-" { + crash!(1, "both files cannot be standard input"); + } + + exec(files, &settings) +} + +fn exec(files: Vec, settings: &Settings) -> i32 { + let mut state1 = State::new( + &files[0], + settings.key1, + settings.print_unpaired == FileNum::File1, + ); + + let mut state2 = State::new( + &files[1], + settings.key2, + settings.print_unpaired == FileNum::File2, + ); + + state1.initialize(); + state2.initialize(); + + while state1.has_line() && state2.has_line() { + let diff = state1.compare(&state2, settings.ignore_case); + + match diff { + Ordering::Less => { + state1.skip_line(); + } + Ordering::Greater => { + state2.skip_line(); + } + Ordering::Equal => { + let next_line1 = state1.extend(settings.ignore_case); + let next_line2 = state2.extend(settings.ignore_case); + + state1.combine(&state2); + + state1.reset(next_line1); + state2.reset(next_line2); + } + } + } + + state1.finalize(); + state2.finalize(); + + 0 +} + +/// Check that keys for both files and for a particular file are not +/// contradictory and return the zero-based key index. +fn get_field_number(keys: Option, key: Option) -> usize { + if let Some(keys) = keys { + if let Some(key) = key { + if keys != key { + crash!(1, "incompatible join fields {}, {}", keys, key); + } + } + + return keys - 1; + } + + match key { + Some(key) => key - 1, + None => 0, + } +} + +/// Parse the specified field string as a natural number and return it. +fn parse_field_number(value: Option) -> Option { + match value { + Some(value) => { + match value.parse() { + Ok(result) if result > 0 => Some(result), + _ => crash!(1, "invalid field number: '{}'", value), + } + } + None => None, + } +} + +fn compare(field1: &str, field2: &str, ignore_case: bool) -> Ordering { + if ignore_case { + field1.to_lowercase().cmp(&field2.to_lowercase()) + } else { + field1.cmp(field2) + } +} + +fn print_field(field: &String) { + print!("{}{}", ' ', field); +} diff --git a/src/join/main.rs b/src/join/main.rs new file mode 100644 index 000000000..ee56bc720 --- /dev/null +++ b/src/join/main.rs @@ -0,0 +1,5 @@ +extern crate uu_join; + +fn main() { + std::process::exit(uu_join::uumain(std::env::args().collect())); +} diff --git a/tests/fixtures/join/capitalized.txt b/tests/fixtures/join/capitalized.txt new file mode 100644 index 000000000..322f0d316 --- /dev/null +++ b/tests/fixtures/join/capitalized.txt @@ -0,0 +1,4 @@ +A 1 +B 2 +C 4 +D 8 diff --git a/tests/fixtures/join/case_insensitive.expected b/tests/fixtures/join/case_insensitive.expected new file mode 100644 index 000000000..da91427a5 --- /dev/null +++ b/tests/fixtures/join/case_insensitive.expected @@ -0,0 +1,3 @@ +A 1 2 f +B 2 3 g +C 4 4 h diff --git a/tests/fixtures/join/default.expected b/tests/fixtures/join/default.expected new file mode 100644 index 000000000..5b3643826 --- /dev/null +++ b/tests/fixtures/join/default.expected @@ -0,0 +1,5 @@ +1 a +2 b +3 c +5 e +8 h diff --git a/tests/fixtures/join/different_field.expected b/tests/fixtures/join/different_field.expected new file mode 100644 index 000000000..fa9181ada --- /dev/null +++ b/tests/fixtures/join/different_field.expected @@ -0,0 +1,6 @@ +2 b a f +3 c b g +4 d c h +5 e f i +6 f g j +7 g h k diff --git a/tests/fixtures/join/different_fields.expected b/tests/fixtures/join/different_fields.expected new file mode 100644 index 000000000..cd870783f --- /dev/null +++ b/tests/fixtures/join/different_fields.expected @@ -0,0 +1,5 @@ +c 3 2 1 cd +d 4 3 2 de +e 5 5 3 ef +f 6 7 4 fg +g 7 11 5 gh diff --git a/tests/fixtures/join/empty.txt b/tests/fixtures/join/empty.txt new file mode 100644 index 000000000..e69de29bb diff --git a/tests/fixtures/join/fields_1.txt b/tests/fixtures/join/fields_1.txt new file mode 100644 index 000000000..24d5fc285 --- /dev/null +++ b/tests/fixtures/join/fields_1.txt @@ -0,0 +1,5 @@ +1 +2 +3 +5 +8 diff --git a/tests/fixtures/join/fields_2.txt b/tests/fixtures/join/fields_2.txt new file mode 100644 index 000000000..5b0d49021 --- /dev/null +++ b/tests/fixtures/join/fields_2.txt @@ -0,0 +1,9 @@ +1 a +2 b +3 c +4 d +5 e +6 f +7 g +8 h +9 i diff --git a/tests/fixtures/join/fields_3.txt b/tests/fixtures/join/fields_3.txt new file mode 100644 index 000000000..4c5c0e779 --- /dev/null +++ b/tests/fixtures/join/fields_3.txt @@ -0,0 +1,6 @@ +a 2 f +b 3 g +c 4 h +f 5 i +g 6 j +h 7 k diff --git a/tests/fixtures/join/fields_4.txt b/tests/fixtures/join/fields_4.txt new file mode 100644 index 000000000..680e07c8d --- /dev/null +++ b/tests/fixtures/join/fields_4.txt @@ -0,0 +1,5 @@ +2 c 1 cd +3 d 2 de +5 e 3 ef +7 f 4 fg +11 g 5 gh diff --git a/tests/fixtures/join/unpaired_lines.expected b/tests/fixtures/join/unpaired_lines.expected new file mode 100644 index 000000000..1cf8624b9 --- /dev/null +++ b/tests/fixtures/join/unpaired_lines.expected @@ -0,0 +1,9 @@ +1 a +2 a f b +3 b g c +4 c h d +5 f i e +6 g j f +7 h k g +8 h +9 i diff --git a/tests/test_join.rs b/tests/test_join.rs new file mode 100644 index 000000000..71f896fa4 --- /dev/null +++ b/tests/test_join.rs @@ -0,0 +1,95 @@ +use common::util::*; + + +#[test] +fn empty_files() { + new_ucmd!() + .arg("empty.txt") + .arg("empty.txt") + .succeeds().stdout_only(""); + + new_ucmd!() + .arg("empty.txt") + .arg("fields_1.txt") + .succeeds().stdout_only(""); + + new_ucmd!() + .arg("fields_1.txt") + .arg("empty.txt") + .succeeds().stdout_only(""); +} + +#[test] +fn empty_intersection() { + new_ucmd!() + .arg("fields_1.txt") + .arg("fields_2.txt") + .arg("-2") + .arg("2") + .succeeds().stdout_only(""); +} + +#[test] +fn default_arguments() { + new_ucmd!() + .arg("fields_1.txt") + .arg("fields_2.txt") + .succeeds().stdout_only_fixture("default.expected"); +} + +#[test] +fn different_fields() { + new_ucmd!() + .arg("fields_2.txt") + .arg("fields_4.txt") + .arg("-j") + .arg("2") + .succeeds().stdout_only_fixture("different_fields.expected"); + + new_ucmd!() + .arg("fields_2.txt") + .arg("fields_4.txt") + .arg("-1") + .arg("2") + .arg("-2") + .arg("2") + .succeeds().stdout_only_fixture("different_fields.expected"); +} + +#[test] +fn different_field() { + new_ucmd!() + .arg("fields_2.txt") + .arg("fields_3.txt") + .arg("-2") + .arg("2") + .succeeds().stdout_only_fixture("different_field.expected"); +} + +#[test] +fn unpaired_lines() { + new_ucmd!() + .arg("fields_2.txt") + .arg("fields_3.txt") + .arg("-a") + .arg("1") + .succeeds().stdout_only_fixture("fields_2.txt"); + + new_ucmd!() + .arg("fields_3.txt") + .arg("fields_2.txt") + .arg("-1") + .arg("2") + .arg("-a") + .arg("2") + .succeeds().stdout_only_fixture("unpaired_lines.expected"); +} + +#[test] +fn case_insensitive() { + new_ucmd!() + .arg("capitalized.txt") + .arg("fields_3.txt") + .arg("-i") + .succeeds().stdout_only_fixture("case_insensitive.expected"); +} diff --git a/tests/tests.rs b/tests/tests.rs index f8d20ec0e..d7d1032fa 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -58,6 +58,7 @@ generic! { "fold", test_fold; "hashsum", test_hashsum; "head", test_head; + "join", test_join; "link", test_link; "ln", test_ln; "ls", test_ls; From a04a77a477d751096ddbf649fdbd5ba64b4284cf Mon Sep 17 00:00:00 2001 From: Konstantin Pospelov Date: Sat, 16 Dec 2017 16:09:46 +0300 Subject: [PATCH 2/6] join: switch to auto-generated main.rs --- src/join/Cargo.toml | 3 ++- src/join/main.rs | 5 ----- 2 files changed, 2 insertions(+), 6 deletions(-) delete mode 100644 src/join/main.rs diff --git a/src/join/Cargo.toml b/src/join/Cargo.toml index 9c7fc6bfa..e96c5e220 100644 --- a/src/join/Cargo.toml +++ b/src/join/Cargo.toml @@ -2,6 +2,7 @@ name = "join" version = "0.0.1" authors = [] +build = "../../mkmain.rs" [lib] name = "uu_join" @@ -13,4 +14,4 @@ uucore = { path="../uucore" } [[bin]] name = "join" -path = "main.rs" +path = "../../uumain.rs" diff --git a/src/join/main.rs b/src/join/main.rs deleted file mode 100644 index ee56bc720..000000000 --- a/src/join/main.rs +++ /dev/null @@ -1,5 +0,0 @@ -extern crate uu_join; - -fn main() { - std::process::exit(uu_join::uumain(std::env::args().collect())); -} From 3aa63ad9dd21361c0bd69ec29931f808801d1d22 Mon Sep 17 00:00:00 2001 From: Konstantin Pospelov Date: Sat, 16 Dec 2017 16:11:39 +0300 Subject: [PATCH 3/6] join: minor review points --- src/join/join.rs | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/src/join/join.rs b/src/join/join.rs index 5f257268a..01dc150c1 100644 --- a/src/join/join.rs +++ b/src/join/join.rs @@ -123,12 +123,8 @@ impl State { /// Move to the next line, if any. fn next_line(&mut self) { match self.read_line() { - Some(line) => { - self.seq[0] = line; - } - None => { - self.seq.clear(); - } + Some(line) => self.seq[0] = line, + None => self.seq.clear() } } @@ -197,7 +193,7 @@ impl State { fn read_line(&mut self) -> Option { match self.lines.next() { - Some(value) => Some(Line::new(value.expect("error reading file"))), + Some(value) => Some(Line::new(crash_if_err!(1, value))), None => None, } } From d28e09de04b73ef58019eda4c66f38319f33ddc6 Mon Sep 17 00:00:00 2001 From: Konstantin Pospelov Date: Sat, 16 Dec 2017 19:26:25 +0300 Subject: [PATCH 4/6] join: do not wrap stdin in BufReader --- src/join/join.rs | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/src/join/join.rs b/src/join/join.rs index 01dc150c1..48fefb7a7 100644 --- a/src/join/join.rs +++ b/src/join/join.rs @@ -15,7 +15,7 @@ extern crate getopts; extern crate uucore; use std::fs::File; -use std::io::{BufRead, BufReader, Lines, Read, stdin}; +use std::io::{BufRead, BufReader, Lines, Stdin, stdin}; use std::cmp::Ordering; static NAME: &'static str = "join"; @@ -77,20 +77,20 @@ impl Line { } } -struct State { +struct State<'a> { key: usize, print_unpaired: bool, - lines: Lines>>, + lines: Lines>, seq: Vec, } -impl State { - fn new(name: &str, key: usize, print_unpaired: bool) -> State { - let f: Box = if name == "-" { - Box::new(stdin()) as Box +impl<'a> State<'a> { + fn new(name: &str, stdin: &'a Stdin, key: usize, print_unpaired: bool) -> State<'a> { + let f = if name == "-" { + Box::new(stdin.lock()) as Box } else { match File::open(name) { - Ok(file) => Box::new(file) as Box, + Ok(file) => Box::new(BufReader::new(file)) as Box, Err(err) => crash!(1, "{}: {}", name, err), } }; @@ -98,7 +98,7 @@ impl State { State { key: key, print_unpaired: print_unpaired, - lines: BufReader::new(f).lines(), + lines: f.lines(), seq: Vec::new(), } } @@ -124,7 +124,7 @@ impl State { fn next_line(&mut self) { match self.read_line() { Some(line) => self.seq[0] = line, - None => self.seq.clear() + None => self.seq.clear(), } } @@ -282,14 +282,18 @@ standard output. The default join field is the first, delimited by blanks.", } fn exec(files: Vec, settings: &Settings) -> i32 { + let stdin = stdin(); + let mut state1 = State::new( &files[0], + &stdin, settings.key1, settings.print_unpaired == FileNum::File1, ); let mut state2 = State::new( &files[1], + &stdin, settings.key2, settings.print_unpaired == FileNum::File2, ); From 2a6d550f4bbdda9ff788f0ed1373f11987b74d6c Mon Sep 17 00:00:00 2001 From: Konstantin Pospelov Date: Sun, 17 Dec 2017 16:36:46 +0300 Subject: [PATCH 5/6] join: switch to clap --- src/join/Cargo.toml | 2 +- src/join/join.rs | 124 ++++++++++++++++++++++---------------------- 2 files changed, 63 insertions(+), 63 deletions(-) diff --git a/src/join/Cargo.toml b/src/join/Cargo.toml index e96c5e220..92532edf2 100644 --- a/src/join/Cargo.toml +++ b/src/join/Cargo.toml @@ -9,7 +9,7 @@ name = "uu_join" path = "join.rs" [dependencies] -getopts = "0.2.14" +clap = "2.24.1" uucore = { path="../uucore" } [[bin]] diff --git a/src/join/join.rs b/src/join/join.rs index 48fefb7a7..5f351ee76 100644 --- a/src/join/join.rs +++ b/src/join/join.rs @@ -9,7 +9,7 @@ * file that was distributed with this source code. */ -extern crate getopts; +extern crate clap; #[macro_use] extern crate uucore; @@ -17,6 +17,7 @@ extern crate uucore; use std::fs::File; use std::io::{BufRead, BufReader, Lines, Stdin, stdin}; use std::cmp::Ordering; +use clap::{App, Arg}; static NAME: &'static str = "join"; static VERSION: &'static str = env!("CARGO_PKG_VERSION"); @@ -206,52 +207,59 @@ impl<'a> State<'a> { } pub fn uumain(args: Vec) -> i32 { + let matches = App::new(NAME) + .version(VERSION) + .about( + "For each pair of input lines with identical join fields, write a line to +standard output. The default join field is the first, delimited by blanks. + +When FILE1 or FILE2 (not both) is -, read standard input.") + .help_message("display this help and exit") + .version_message("display version and exit") + .arg(Arg::with_name("a") + .short("a") + .takes_value(true) + .possible_values(&["1", "2"]) + .value_name("FILENUM") + .help("also print unpairable lines from file FILENUM, where +FILENUM is 1 or 2, corresponding to FILE1 or FILE2")) + .arg(Arg::with_name("i") + .short("i") + .long("ignore-case") + .help("ignore differences in case when comparing fields")) + .arg(Arg::with_name("j") + .short("j") + .takes_value(true) + .value_name("FIELD") + .help("equivalent to '-1 FIELD -2 FIELD'")) + .arg(Arg::with_name("1") + .short("1") + .takes_value(true) + .value_name("FIELD") + .help("join on this FIELD of file 1")) + .arg(Arg::with_name("2") + .short("2") + .takes_value(true) + .value_name("FIELD") + .help("join on this FIELD of file 2")) + .arg(Arg::with_name("file1") + .required(true) + .value_name("FILE1") + .hidden(true)) + .arg(Arg::with_name("file2") + .required(true) + .value_name("FILE2") + .hidden(true)) + .get_matches_from(args); + + let keys = parse_field_number(matches.value_of("j")); + let key1 = parse_field_number(matches.value_of("1")); + let key2 = parse_field_number(matches.value_of("2")); + let mut settings: Settings = Default::default(); - let mut opts = getopts::Options::new(); - - opts.optflag("h", "help", "display this help and exit"); - opts.optopt( - "a", - "", - "also print unpairable lines from file FILENUM, where FILENUM is 1 or 2, corresponding to FILE1 or FILE2", - "FILENUM" - ); - opts.optflag( - "i", - "ignore-case", - "ignore differences in case when comparing fields", - ); - opts.optopt("j", "", "equivalent to '-1 FIELD -2 FIELD'", "FIELD"); - opts.optopt("1", "", "join on this FIELD of file 1", "FIELD"); - opts.optopt("2", "", "join on this FIELD of file 2", "FIELD"); - - let matches = match opts.parse(&args[1..]) { - Ok(m) => m, - Err(f) => crash!(1, "Invalid options\n{}", f), - }; - - if matches.opt_present("help") { - let msg = format!( - "{0} {1} -Usage: - {0} [OPTION]... FILE1 FILE2 - -For each pair of input lines with identical join fields, write a line to -standard output. The default join field is the first, delimited by blanks.", - NAME, - VERSION - ); - print!("{}", opts.usage(&msg)); - return 0; - } - - let keys = parse_field_number(matches.opt_str("j")); - let key1 = parse_field_number(matches.opt_str("1")); - let key2 = parse_field_number(matches.opt_str("2")); - - settings.print_unpaired = match matches.opt_str("a") { + settings.print_unpaired = match matches.value_of("a") { Some(value) => { - match &value[..] { + match value { "1" => FileNum::File1, "2" => FileNum::File2, value => crash!(1, "invalid file number: {}", value), @@ -259,40 +267,32 @@ standard output. The default join field is the first, delimited by blanks.", } None => FileNum::None, }; - settings.ignore_case = matches.opt_present("ignore-case"); + settings.ignore_case = matches.is_present("i"); settings.key1 = get_field_number(keys, key1); settings.key2 = get_field_number(keys, key2); - let files = matches.free; - let file_count = files.len(); + let file1 = matches.value_of("file1").unwrap(); + let file2 = matches.value_of("file2").unwrap(); - if file_count < 1 { - crash!(1, "missing operand"); - } else if file_count < 2 { - crash!(1, "missing operand after '{}'", files[0]); - } else if file_count > 2 { - crash!(1, "extra operand '{}'", files[2]); - } - - if files[0] == "-" && files[1] == "-" { + if file1 == "-" && file2 == "-" { crash!(1, "both files cannot be standard input"); } - exec(files, &settings) + exec(file1, file2, &settings) } -fn exec(files: Vec, settings: &Settings) -> i32 { +fn exec(file1: &str, file2: &str, settings: &Settings) -> i32 { let stdin = stdin(); let mut state1 = State::new( - &files[0], + &file1, &stdin, settings.key1, settings.print_unpaired == FileNum::File1, ); let mut state2 = State::new( - &files[1], + &file2, &stdin, settings.key2, settings.print_unpaired == FileNum::File2, @@ -349,7 +349,7 @@ fn get_field_number(keys: Option, key: Option) -> usize { } /// Parse the specified field string as a natural number and return it. -fn parse_field_number(value: Option) -> Option { +fn parse_field_number(value: Option<&str>) -> Option { match value { Some(value) => { match value.parse() { From 743a5b68ed013fee1105163093b08a942d6d7db8 Mon Sep 17 00:00:00 2001 From: Konstantin Pospelov Date: Mon, 18 Dec 2017 10:40:15 +0300 Subject: [PATCH 6/6] join: simplify closure for line parsing --- src/join/join.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/join/join.rs b/src/join/join.rs index 5f351ee76..03d0396ef 100644 --- a/src/join/join.rs +++ b/src/join/join.rs @@ -53,7 +53,7 @@ struct Line { impl Line { fn new(string: String) -> Line { - Line { fields: string.split_whitespace().map(|s| String::from(s)).collect() } + Line { fields: string.split_whitespace().map(String::from).collect() } } /// Get field at index.