1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-28 11:37:44 +00:00

Merge pull request #180 from ebfe/tr

tr: speed up and add '--complement'
This commit is contained in:
Arcterus 2014-05-18 13:30:37 -07:00
commit af33a33bee
2 changed files with 52 additions and 18 deletions

View file

@ -1,7 +1,7 @@
use std::io::process::Command; use std::io::process::Command;
fn run(input: &str, set1: &str, set2: &str) -> Vec<u8> { fn run(input: &str, args: &[&'static str]) -> Vec<u8> {
let mut process = Command::new("build/tr").arg(set1).arg(set2).spawn().unwrap(); let mut process = Command::new("build/tr").args(args).spawn().unwrap();
process.stdin.take_unwrap().write_str(input).unwrap(); process.stdin.take_unwrap().write_str(input).unwrap();
@ -14,20 +14,32 @@ fn run(input: &str, set1: &str, set2: &str) -> Vec<u8> {
#[test] #[test]
fn test_toupper() { fn test_toupper() {
let out = run("!abcd!", "a-z", "A-Z"); let out = run("!abcd!", ["a-z", "A-Z"]);
assert_eq!(out.as_slice(), bytes!("!ABCD!")); assert_eq!(out.as_slice(), bytes!("!ABCD!"));
} }
#[test] #[test]
fn test_small_set2() { fn test_small_set2() {
let out = run("@0123456789", "0-9", "X"); let out = run("@0123456789", ["0-9", "X"]);
assert_eq!(out.as_slice(), bytes!("@XXXXXXXXXX")); assert_eq!(out.as_slice(), bytes!("@XXXXXXXXXX"));
} }
#[test] #[test]
fn test_unicode() { fn test_unicode() {
let out = run("(,°□°), ┬─┬", ", ┬─┬", "╯︵┻━┻"); let out = run("(,°□°), ┬─┬", [", ┬─┬", "╯︵┻━┻"]);
assert_eq!(out.as_slice(), bytes!("(╯°□°)╯︵┻━┻")); assert_eq!(out.as_slice(), bytes!("(╯°□°)╯︵┻━┻"));
} }
#[test]
fn test_delete() {
let out = run("aBcD", ["-d", "a-z"]);
assert_eq!(out.as_slice(), bytes!("BD"));
}
#[test]
fn test_delete_complement() {
let out = run("aBcD", ["-d", "-c", "a-z"]);
assert_eq!(out.as_slice(), bytes!("ac"));
}

View file

@ -1,4 +1,5 @@
#![crate_id(name="tr", vers="1.0.0", author="Michael Gehring")] #![crate_id(name="tr", vers="1.0.0", author="Michael Gehring")]
#![feature(macro_rules)]
/* /*
* This file is part of the uutils coreutils package. * This file is part of the uutils coreutils package.
@ -12,7 +13,8 @@
extern crate collections; extern crate collections;
extern crate getopts; extern crate getopts;
use collections::hashmap::{HashMap, HashSet}; use collections::bitv::BitvSet;
use collections::smallintmap::SmallIntMap;
use getopts::OptGroup; use getopts::OptGroup;
use std::char::from_u32; use std::char::from_u32;
use std::io::print; use std::io::print;
@ -21,6 +23,9 @@ use std::iter::FromIterator;
use std::os; use std::os;
use std::vec::Vec; use std::vec::Vec;
#[path="../common/util.rs"]
mod util;
static NAME : &'static str = "tr"; static NAME : &'static str = "tr";
static VERSION : &'static str = "1.0.0"; static VERSION : &'static str = "1.0.0";
@ -81,17 +86,23 @@ fn expand_set(s: &str) -> Vec<char> {
set set
} }
fn delete(set: Vec<char>) { fn delete(set: Vec<char>, complement: bool) {
let mut hset = HashSet::new(); let mut bset = BitvSet::new();
let mut out = stdout(); let mut out = stdout();
for &c in set.iter() { for &c in set.iter() {
hset.insert(c); bset.insert(c as uint);
} }
let is_allowed = if complement {
|c: char| bset.contains(&(c as uint))
} else {
|c: char| !bset.contains(&(c as uint))
};
for c in stdin().chars() { for c in stdin().chars() {
match c { match c {
Ok(c) if !hset.contains(&c) => out.write_char(c).unwrap(), Ok(c) if is_allowed(c) => out.write_char(c).unwrap(),
Ok(_) => (), Ok(_) => (),
Err(err) => fail!("{}", err), Err(err) => fail!("{}", err),
}; };
@ -99,21 +110,21 @@ fn delete(set: Vec<char>) {
} }
fn tr(set1: &[char], set2: &[char]) { fn tr(set1: &[char], set2: &[char]) {
let mut map = HashMap::new(); let mut map = SmallIntMap::<char>::new();
let mut out = stdout(); let mut out = stdout();
for i in range(0, set1.len()) { for i in range(0, set1.len()) {
if i >= set2.len() { if i >= set2.len() {
map.insert(set1[i], set2[set2.len()-1]); map.insert(set1[i] as uint, set2[set2.len()-1]);
} else { } else {
map.insert(set1[i], set2[i]); map.insert(set1[i] as uint, set2[i]);
} }
} }
for c in stdin().chars() { for c in stdin().chars() {
match c { match c {
Ok(inc) => { Ok(inc) => {
let trc = match map.find(&inc) { let trc = match map.find(&(inc as uint)) {
Some(t) => *t, Some(t) => *t,
None => inc, None => inc,
}; };
@ -138,6 +149,8 @@ fn usage(opts: &[OptGroup]) {
pub fn main() { pub fn main() {
let args: Vec<StrBuf> = os::args().iter().map(|x| x.to_strbuf()).collect(); let args: Vec<StrBuf> = os::args().iter().map(|x| x.to_strbuf()).collect();
let opts = [ let opts = [
getopts::optflag("c", "complement", "use the complement of SET1"),
getopts::optflag("C", "", "same as -c"),
getopts::optflag("d", "delete", "delete characters in SET1"), getopts::optflag("d", "delete", "delete characters in SET1"),
getopts::optflag("h", "help", "display this help and exit"), getopts::optflag("h", "help", "display this help and exit"),
getopts::optflag("V", "version", "output version information and exit"), getopts::optflag("V", "version", "output version information and exit"),
@ -145,12 +158,15 @@ pub fn main() {
let matches = match getopts::getopts(args.tail(), opts) { let matches = match getopts::getopts(args.tail(), opts) {
Ok(m) => m, Ok(m) => m,
Err(err) => fail!("{}", err.to_err_msg()), Err(err) => {
show_error!(1, "{}", err.to_err_msg());
return;
}
}; };
if matches.opt_present("help") { if matches.opt_present("help") {
usage(opts); usage(opts);
return return;
} }
if matches.opt_present("version") { if matches.opt_present("version") {
@ -161,15 +177,21 @@ pub fn main() {
if matches.free.len() == 0 { if matches.free.len() == 0 {
usage(opts); usage(opts);
os::set_exit_status(1); os::set_exit_status(1);
return return;
} }
let dflag = matches.opt_present("d"); let dflag = matches.opt_present("d");
let cflag = matches.opts_present(["c".to_strbuf(), "C".to_strbuf()]);
let sets = matches.free; let sets = matches.free;
if cflag && !dflag {
show_error!(1, "-c is only supported with -d");
return;
}
if dflag { if dflag {
let set1 = expand_set(sets.get(0).as_slice()); let set1 = expand_set(sets.get(0).as_slice());
delete(set1); delete(set1, cflag);
} else { } else {
let set1 = expand_set(sets.get(0).as_slice()); let set1 = expand_set(sets.get(0).as_slice());
let set2 = expand_set(sets.get(1).as_slice()); let set2 = expand_set(sets.get(1).as_slice());