diff --git a/src/tr/expand.rs b/src/tr/expand.rs new file mode 100644 index 000000000..fd8f885ff --- /dev/null +++ b/src/tr/expand.rs @@ -0,0 +1,117 @@ +/* + * This file is part of the uutils coreutils package. + * + * (c) Michael Gehring + * (c) kwantam + * 20150428 created `expand` module to eliminate most allocs during setup + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +use std::char::from_u32; +use std::cmp::min; +use std::iter::Peekable; +use std::ops::Range; + +#[inline] +fn unescape_char(c: char) -> char { + match c { + 'a' => 0x07u8 as char, + 'b' => 0x08u8 as char, + 'f' => 0x0cu8 as char, + 'v' => 0x0bu8 as char, + 'n' => '\n', + 'r' => '\r', + 't' => '\t', + _ => c, + } +} + +struct Unescape<'a> { + string: &'a str, +} + +impl<'a> Iterator for Unescape<'a> { + type Item = char; + + #[inline] + fn size_hint(&self) -> (usize, Option) { + let slen = self.string.len(); + (min(slen, 1), None) + } + + #[inline] + fn next(&mut self) -> Option { + if self.string.len() == 0 { + return None; + } + + // is the next character an escape? + let (ret, idx) = match self.string.chars().next().unwrap() { + '\\' if self.string.len() > 1 => { + // yes---it's \ and it's not the last char in a string + // we know that \ is 1 byte long so we can index into the string safely + let c = self.string[1..].chars().next().unwrap(); + (Some(unescape_char(c)), 1 + c.len_utf8()) + }, + c => (Some(c), c.len_utf8()), // not an escape char + }; + + self.string = &self.string[idx..]; // advance the pointer to the next char + ret + } +} + +pub struct ExpandSet<'a> { + range: Range, + unesc: Peekable>, +} + +impl<'a> Iterator for ExpandSet<'a> { + type Item = char; + + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.unesc.size_hint() + } + + #[inline] + fn next(&mut self) -> Option { + // while the Range has elements, try to return chars from it + // but make sure that they actually turn out to be Chars! + while let Some(n) = self.range.next() { + match from_u32(n) { + Some(c) => return Some(c), + _ => (), + } + } + + if let Some(first) = self.unesc.next() { + // peek ahead + if self.unesc.peek() == Some(&'-') && match self.unesc.size_hint() { + (x, _) if x > 1 => true, // there's a range here; record it in our internal Range struct + _ => false, + } { + self.unesc.next(); // this is the '-' + let last = self.unesc.next().unwrap(); // this is the end of the range + + self.range = first as u32 + 1 .. last as u32 + 1; + } + + return Some(first); // in any case, return the next char + } + + None + } +} + +impl<'a> ExpandSet<'a> { + #[inline] + pub fn new(s: &'a str) -> ExpandSet<'a> { + ExpandSet { + range: 0 .. 0, + unesc: Unescape { string: s }.peekable(), + } + } +} diff --git a/src/tr/tr.rs b/src/tr/tr.rs index 45e85bfd6..f100dd7a1 100644 --- a/src/tr/tr.rs +++ b/src/tr/tr.rs @@ -1,98 +1,40 @@ #![crate_name = "tr"] -#![feature(collections, core, old_io, rustc_private)] +#![feature(io, rustc_private)] /* * This file is part of the uutils coreutils package. * * (c) Michael Gehring + * (c) kwantam + * 20150428 created `expand` module to eliminate most allocs during setup * * For the full copyright and license information, please view the LICENSE * file that was distributed with this source code. */ -extern crate collections; extern crate getopts; use getopts::OptGroup; -use std::char::from_u32; use std::collections::{BitSet, VecMap}; -use std::old_io::{BufferedReader, print}; -use std::old_io::stdio::{stdin_raw, stdout}; -use std::iter::FromIterator; -use std::vec::Vec; +use std::io::{stdin, stdout, BufReader, Read, Write}; +use expand::ExpandSet; #[path="../common/util.rs"] #[macro_use] mod util; +mod expand; + static NAME : &'static str = "tr"; static VERSION : &'static str = "1.0.0"; +const BUFFER_LEN: usize = 1024; -#[inline] -fn unescape_char(c: char) -> char { - match c { - 'a' => 0x07u8 as char, - 'b' => 0x08u8 as char, - 'f' => 0x0cu8 as char, - 'v' => 0x0bu8 as char, - 'n' => '\n', - 'r' => '\r', - 't' => '\t', - _ => c, - } -} - -#[inline] -fn unescape(v: Vec) -> Vec { - let mut out = Vec::new(); - let mut input = v.as_slice(); - loop { - input = match input { - ['\\', e, rest..] => { - out.push(unescape_char(e)); - rest - } - [c, rest..] => { - out.push(c); - rest - } - [] => break - } - } - out -} - -#[inline] -fn expand_range(from: char, to: char) -> Vec { - range(from as u32, to as u32 + 1).map(|c| from_u32(c).unwrap()).collect() -} - -fn expand_set(s: &str) -> Vec { - let mut set = Vec::::new(); - let unesc = unescape(FromIterator::from_iter(s.chars())); - let mut input = unesc.as_slice(); - - loop { - input = match input { - [f, '-', t, rest..] => { - set.push_all(expand_range(f, t).as_slice()); - rest - } - [c, rest..] => { - set.push(c); - rest - } - [] => break - }; - } - set -} - -fn delete(set: Vec, complement: bool) { +fn delete<'a>(set: ExpandSet<'a>, complement: bool) { let mut bset = BitSet::new(); - let mut out = stdout(); + let mut stdout = stdout(); + let mut buf = String::with_capacity(BUFFER_LEN + 4); - for &c in set.iter() { + for c in set { bset.insert(c as usize); } @@ -104,42 +46,44 @@ fn delete(set: Vec, complement: bool) { } }; - for c in BufferedReader::new(stdin_raw()).chars() { + for c in BufReader::new(stdin()).chars() { match c { - Ok(c) if is_allowed(c) => out.write_char(c).unwrap(), + Ok(c) if is_allowed(c) => buf.push(c), Ok(_) => (), Err(err) => panic!("{}", err), }; + if buf.len() >= BUFFER_LEN { + safe_unwrap!(stdout.write_all(&buf[..].as_bytes())); + } + } + if buf.len() > 0 { + safe_unwrap!(stdout.write_all(&buf[..].as_bytes())); } } -fn tr(set1: &[char], set2: &[char]) { - const BUFFER_LEN: usize = 1024; - +fn tr<'a>(set1: ExpandSet<'a>, mut set2: ExpandSet<'a>) { let mut map = VecMap::new(); let mut stdout = stdout(); - let mut outbuffer = String::with_capacity(BUFFER_LEN); + let mut buf = String::with_capacity(BUFFER_LEN + 4); - let set2_len = set2.len(); - for i in range(0, set1.len()) { - if i >= set2_len { - map.insert(set1[i] as usize, set2[set2_len - 1]); - } else { - map.insert(set1[i] as usize, set2[i]); - } + let mut s2_prev = '_'; + for i in set1 { + s2_prev = set2.next().unwrap_or(s2_prev); + + map.insert(i as usize, s2_prev); } - for c in BufferedReader::new(stdin_raw()).chars() { + for c in BufReader::new(stdin()).chars() { match c { Ok(inc) => { let trc = match map.get(&(inc as usize)) { Some(t) => *t, None => inc, }; - outbuffer.push(trc); - if outbuffer.len() >= BUFFER_LEN { - stdout.write_str(outbuffer.as_slice()).unwrap(); - outbuffer.clear(); + buf.push(trc); + if buf.len() >= BUFFER_LEN { + safe_unwrap!(stdout.write_all(&buf[..].as_bytes())); + buf.truncate(0); } } Err(err) => { @@ -147,8 +91,8 @@ fn tr(set1: &[char], set2: &[char]) { } } } - if outbuffer.len() > 0 { - stdout.write_str(outbuffer.as_slice()).unwrap(); + if buf.len() > 0 { + safe_unwrap!(stdout.write_all(&buf[..].as_bytes())); } } @@ -158,7 +102,7 @@ fn usage(opts: &[OptGroup]) { println!("Usage:"); println!(" {} [OPTIONS] SET1 [SET2]", NAME); println!(""); - print(getopts::usage("Translate or delete characters.", opts).as_slice()); + println!("{}", getopts::usage("Translate or delete characters.", opts)); } pub fn uumain(args: Vec) -> i32 { @@ -170,7 +114,7 @@ pub fn uumain(args: Vec) -> i32 { getopts::optflag("V", "version", "output version information and exit"), ]; - let matches = match getopts::getopts(args.tail(), &opts) { + let matches = match getopts::getopts(&args[1..], &opts) { Ok(m) => m, Err(err) => { show_error!("{}", err); @@ -203,12 +147,12 @@ pub fn uumain(args: Vec) -> i32 { } if dflag { - let set1 = expand_set(sets[0].as_slice()); + let set1 = ExpandSet::new(sets[0].as_ref()); delete(set1, cflag); } else { - let set1 = expand_set(sets[0].as_slice()); - let set2 = expand_set(sets[1].as_slice()); - tr(set1.as_slice(), set2.as_slice()); + let set1 = ExpandSet::new(sets[0].as_ref()); + let set2 = ExpandSet::new(sets[1].as_ref()); + tr(set1, set2); } 0 diff --git a/test/tr.rs b/test/tr.rs index 0ebe0f948..e3c64ee1a 100644 --- a/test/tr.rs +++ b/test/tr.rs @@ -1,49 +1,51 @@ -#![allow(unstable)] - -use std::old_io::process::Command; +use std::io::Write; +use std::process::{Command, Stdio}; static PROGNAME: &'static str = "./tr"; fn run(input: &str, args: &[&'static str]) -> Vec { - let mut process = Command::new(PROGNAME).args(args).spawn().unwrap(); + let mut process = Command::new(PROGNAME) + .args(args) + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .spawn() + .unwrap_or_else(|e| panic!("{}", e)); - process.stdin.take().unwrap().write_str(input).unwrap(); + process.stdin.take().unwrap_or_else(|| panic!("Could not take child process stdin")) + .write_all(input.as_bytes()).unwrap_or_else(|e| panic!("{}", e)); - let po = match process.wait_with_output() { - Ok(p) => p, - Err(err) => panic!("{}", err), - }; - po.output + let po = process.wait_with_output().unwrap_or_else(|e| panic!("{}", e)); + po.stdout } #[test] fn test_toupper() { let out = run("!abcd!", &["a-z", "A-Z"]); - assert_eq!(out.as_slice(), b"!ABCD!"); + assert_eq!(&out[..], b"!ABCD!"); } #[test] fn test_small_set2() { let out = run("@0123456789", &["0-9", "X"]); - assert_eq!(out.as_slice(), b"@XXXXXXXXXX"); + assert_eq!(&out[..], b"@XXXXXXXXXX"); } #[test] fn test_unicode() { let out = run("(,°□°), ┬─┬", &[", ┬─┬", "╯︵┻━┻"]); - assert_eq!(out.as_slice(), "(╯°□°)╯︵┻━┻".as_bytes()); + assert_eq!(&out[..], "(╯°□°)╯︵┻━┻".as_bytes()); } #[test] fn test_delete() { let out = run("aBcD", &["-d", "a-z"]); - assert_eq!(out.as_slice(), b"BD"); + assert_eq!(&out[..], b"BD"); } #[test] fn test_delete_complement() { let out = run("aBcD", &["-d", "-c", "a-z"]); - assert_eq!(out.as_slice(), b"ac"); + assert_eq!(&out[..], b"ac"); }