diff --git a/src/base64/base64.rs b/src/base64/base64.rs index 09987361a..d50cc4770 100644 --- a/src/base64/base64.rs +++ b/src/base64/base64.rs @@ -1,5 +1,5 @@ #![crate_name = "base64"] -#![feature(box_syntax, rustc_private)] +#![feature(rustc_private)] /* * This file is part of the uutils coreutils package. @@ -80,11 +80,11 @@ pub fn uumain(args: Vec) -> i32 { let mut file_buf; let mut input = if matches.free.is_empty() || &matches.free[0][..] == "-" { stdin_buf = stdin(); - BufReader::new(box stdin_buf as Box) + BufReader::new(Box::new(stdin_buf) as Box) } else { let path = Path::new(&matches.free[0][..]); file_buf = safe_unwrap!(File::open(&path)); - BufReader::new(box file_buf as Box) + BufReader::new(Box::new(file_buf) as Box) }; match mode { diff --git a/src/cat/cat.rs b/src/cat/cat.rs index dfea5d71c..37aa9dd9c 100644 --- a/src/cat/cat.rs +++ b/src/cat/cat.rs @@ -1,5 +1,5 @@ #![crate_name = "cat"] -#![feature(rustc_private, box_syntax, unsafe_destructor)] +#![feature(rustc_private, unsafe_destructor)] /* * This file is part of the uutils coreutils package. @@ -261,11 +261,11 @@ fn open(path: &str) -> Option<(Box, bool)> { if path == "-" { let stdin = stdin(); let interactive = unsafe { isatty(STDIN_FILENO) } != 0 as c_int; - return Some((box stdin as Box, interactive)); + return Some((Box::new(stdin) as Box, interactive)); } match File::open(path) { - Ok(f) => Some((box f as Box, false)), + Ok(f) => Some((Box::new(f) as Box, false)), Err(e) => { (writeln!(&mut stderr(), "cat: {0}: {1}", path, e.to_string())).unwrap(); None diff --git a/src/expand/deps.mk b/src/expand/deps.mk new file mode 100644 index 000000000..fb8005c0c --- /dev/null +++ b/src/expand/deps.mk @@ -0,0 +1 @@ +DEPLIBS += unicode-width diff --git a/src/expand/expand.rs b/src/expand/expand.rs index 4ee390efc..3ee2cb879 100644 --- a/src/expand/expand.rs +++ b/src/expand/expand.rs @@ -1,22 +1,28 @@ #![crate_name = "expand"] -#![feature(collections, core, old_io, old_path, rustc_private)] +#![feature(rustc_private, unicode)] /* * This file is part of the uutils coreutils package. * * (c) Virgile Andreani + * (c) kwantam + * 20150428 updated to work with both UTF-8 and non-UTF-8 encodings * * For the full copyright and license information, please view the LICENSE * file that was distributed with this source code. */ -#![feature(box_syntax)] - extern crate getopts; extern crate libc; +extern crate rustc_unicode; +extern crate unicode_width; -use std::old_io as io; -use std::str::StrExt; +use std::fs::File; +use std::io::{stdin, stdout, BufRead, BufReader, BufWriter, Read, Write}; +use std::iter::repeat; +use std::str::from_utf8; +use rustc_unicode::str::utf8_char_width; +use unicode_width::UnicodeWidthChar; #[path = "../common/util.rs"] #[macro_use] @@ -28,7 +34,7 @@ static VERSION: &'static str = "0.0.1"; static DEFAULT_TABSTOP: usize = 8; fn tabstops_parse(s: String) -> Vec { - let words = s.as_slice().split(',').collect::>(); + let words = s.split(',').collect::>(); let nums = words.into_iter() .map(|sn| sn.parse::() @@ -52,7 +58,9 @@ fn tabstops_parse(s: String) -> Vec { struct Options { files: Vec, tabstops: Vec, - iflag: bool + tspaces: String, + iflag: bool, + uflag: bool, } impl Options { @@ -63,6 +71,16 @@ impl Options { }; let iflag = matches.opt_present("i"); + let uflag = !matches.opt_present("U"); + + // avoid allocations when dumping out long sequences of spaces + // by precomputing the longest string of spaces we will ever need + let nspaces = tabstops.iter().scan(0, |pr,&it| { + let ret = Some(it - *pr); + *pr = it; + ret + }).max().unwrap(); // length of tabstops is guaranteed >= 1 + let tspaces = repeat(' ').take(nspaces).collect(); let files = if matches.free.is_empty() { @@ -71,7 +89,7 @@ impl Options { matches.free }; - Options { files: files, tabstops: tabstops, iflag: iflag } + Options { files: files, tabstops: tabstops, tspaces: tspaces, iflag: iflag, uflag: uflag } } } @@ -80,20 +98,21 @@ pub fn uumain(args: Vec) -> i32 { getopts::optflag("i", "initial", "do not convert tabs after non blanks"), getopts::optopt("t", "tabs", "have tabs NUMBER characters apart, not 8", "NUMBER"), getopts::optopt("t", "tabs", "use comma separated list of explicit tab positions", "LIST"), + getopts::optflag("U", "no-utf8", "interpret input file as 8-bit ASCII rather than UTF-8"), getopts::optflag("h", "help", "display this help and exit"), getopts::optflag("V", "version", "output version information and exit"), ]; - let matches = match getopts::getopts(args.tail(), &opts) { + let matches = match getopts::getopts(&args[1..], &opts) { Ok(m) => m, Err(f) => crash!(1, "{}", f) }; if matches.opt_present("help") { println!("Usage: {} [OPTION]... [FILE]...", NAME); - io::print(getopts::usage( + println!("{}", getopts::usage( "Convert tabs in each FILE to spaces, writing to standard output.\n\ - With no FILE, or when FILE is -, read standard input.", &opts).as_slice()); + With no FILE, or when FILE is -, read standard input.", &opts)); return 0; } @@ -107,64 +126,119 @@ pub fn uumain(args: Vec) -> i32 { return 0; } -fn open(path: String) -> io::BufferedReader> { +fn open(path: String) -> BufReader> { let mut file_buf; - if path.as_slice() == "-" { - io::BufferedReader::new(box io::stdio::stdin_raw() as Box) + if path == "-" { + BufReader::new(Box::new(stdin()) as Box) } else { - file_buf = match io::File::open(&Path::new(path.as_slice())) { + file_buf = match File::open(&path[..]) { Ok(a) => a, - _ => crash!(1, "{}: {}\n", path, "No such file or directory") + Err(e) => crash!(1, "{}: {}\n", &path[..], e), }; - io::BufferedReader::new(box file_buf as Box) + BufReader::new(Box::new(file_buf) as Box) } } -fn to_next_stop(tabstops: &[usize], col: usize) -> usize { - match tabstops.as_slice() { - [tabstop] => tabstop - col % tabstop, - tabstops => match tabstops.iter().skip_while(|&t| *t <= col).next() { - Some(&tabstop) => tabstop - col % tabstop, - None => 1 +fn next_tabstop(tabstops: &[usize], col: usize) -> usize { + if tabstops.len() == 1 { + tabstops[0] - col % tabstops[0] + } else { + match tabstops.iter().skip_while(|&&t| t <= col).next() { + Some(t) => t - col, + None => 1, } } } +#[derive(PartialEq, Eq, Debug)] +enum CharType { + Backspace, + Tab, + Other, +} + fn expand(options: Options) { - let mut output = io::stdout(); + use self::CharType::*; + + let mut output = BufWriter::new(stdout()); + let ts = options.tabstops.as_ref(); + let mut buf = Vec::new(); for file in options.files.into_iter() { - let mut col = 0; - let mut init = true; - for c in open(file).chars() { - match c { - Ok('\t') if init || !options.iflag => { - let nb_spaces = to_next_stop(options.tabstops.as_slice(), col); - col += nb_spaces; - safe_write!(&mut output, "{:1$}", "", nb_spaces); - } - Ok('\x08') => { - if col > 0 { - col -= 1; + let mut fh = open(file); + + while match fh.read_until('\n' as u8, &mut buf) { + Ok(s) => s > 0, + Err(_) => buf.len() > 0, + } { + let mut col = 0; + let mut byte = 0; + let mut init = true; + + while byte < buf.len() { + let (ctype, cwidth, nbytes) = if options.uflag { + let nbytes = utf8_char_width(buf[byte]); + + if byte + nbytes > buf.len() { + // don't overrun buffer because of invalid UTF-8 + (Other, 1, 1) + } else if let Ok(t) = from_utf8(&buf[byte..byte+nbytes]) { + match t.chars().next() { + Some('\t') => (Tab, 0, nbytes), + Some('\x08') => (Backspace, 0, nbytes), + Some(c) => (Other, UnicodeWidthChar::width(c).unwrap_or(0), nbytes), + None => { // no valid char at start of t, so take 1 byte + (Other, 1, 1) + }, + } + } else { + (Other, 1, 1) // implicit assumption: non-UTF-8 char is 1 col wide } - init = false; - safe_write!(&mut output, "{}", '\x08'); + } else { + (match buf[byte] { // always take exactly 1 byte in strict ASCII mode + 0x09 => Tab, + 0x08 => Backspace, + _ => Other, + }, 1, 1) + }; + + // figure out how many columns this char takes up + match ctype { + Tab => { + // figure out how many spaces to the next tabstop + let nts = next_tabstop(ts, col); + col += nts; + + // now dump out either spaces if we're expanding, or a literal tab if we're not + if init || !options.iflag { + safe_unwrap!(output.write_all(&options.tspaces[..nts].as_bytes())); + } else { + safe_unwrap!(output.write_all(&buf[byte..byte+nbytes])); + } + }, + _ => { + col = if ctype == Other { + col + cwidth + } else if col > 0 { + col - 1 + } else { + 0 + }; + + // if we're writing anything other than a space, then we're + // done with the line's leading spaces + if buf[byte] != 0x20 { + init = false; + } + + safe_unwrap!(output.write_all(&buf[byte..byte+nbytes])); + }, } - Ok('\n') => { - col = 0; - init = true; - safe_write!(&mut output, "{}", '\n'); - } - Ok(c) => { - col += 1; - if c != ' ' { - init = false; - } - safe_write!(&mut output, "{}", c); - } - Err(_) => break + + byte += nbytes; // advance the pointer } + + buf.truncate(0); // clear the buffer } } } - diff --git a/src/fmt/fmt.rs b/src/fmt/fmt.rs index 7edae633f..72db42444 100644 --- a/src/fmt/fmt.rs +++ b/src/fmt/fmt.rs @@ -1,5 +1,5 @@ #![crate_name = "fmt"] -#![feature(box_syntax,rustc_private,str_char,unicode,core)] +#![feature(rustc_private,str_char,unicode,core)] /* * This file is part of `fmt` from the uutils coreutils package. @@ -197,9 +197,9 @@ pub fn uumain(args: Vec) -> i32 { for i in files.iter().map(|x| &x[..]) { let mut fp = match i { - "-" => BufReader::new(box stdin() as Box), + "-" => BufReader::new(Box::new(stdin()) as Box), _ => match File::open(i) { - Ok(f) => BufReader::new(box f as Box), + Ok(f) => BufReader::new(Box::new(f) as Box), Err(e) => { show_warning!("{}: {}", i, e); continue; diff --git a/src/tr/expand.rs b/src/tr/expand.rs new file mode 100644 index 000000000..fd8f885ff --- /dev/null +++ b/src/tr/expand.rs @@ -0,0 +1,117 @@ +/* + * This file is part of the uutils coreutils package. + * + * (c) Michael Gehring + * (c) kwantam + * 20150428 created `expand` module to eliminate most allocs during setup + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +use std::char::from_u32; +use std::cmp::min; +use std::iter::Peekable; +use std::ops::Range; + +#[inline] +fn unescape_char(c: char) -> char { + match c { + 'a' => 0x07u8 as char, + 'b' => 0x08u8 as char, + 'f' => 0x0cu8 as char, + 'v' => 0x0bu8 as char, + 'n' => '\n', + 'r' => '\r', + 't' => '\t', + _ => c, + } +} + +struct Unescape<'a> { + string: &'a str, +} + +impl<'a> Iterator for Unescape<'a> { + type Item = char; + + #[inline] + fn size_hint(&self) -> (usize, Option) { + let slen = self.string.len(); + (min(slen, 1), None) + } + + #[inline] + fn next(&mut self) -> Option { + if self.string.len() == 0 { + return None; + } + + // is the next character an escape? + let (ret, idx) = match self.string.chars().next().unwrap() { + '\\' if self.string.len() > 1 => { + // yes---it's \ and it's not the last char in a string + // we know that \ is 1 byte long so we can index into the string safely + let c = self.string[1..].chars().next().unwrap(); + (Some(unescape_char(c)), 1 + c.len_utf8()) + }, + c => (Some(c), c.len_utf8()), // not an escape char + }; + + self.string = &self.string[idx..]; // advance the pointer to the next char + ret + } +} + +pub struct ExpandSet<'a> { + range: Range, + unesc: Peekable>, +} + +impl<'a> Iterator for ExpandSet<'a> { + type Item = char; + + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.unesc.size_hint() + } + + #[inline] + fn next(&mut self) -> Option { + // while the Range has elements, try to return chars from it + // but make sure that they actually turn out to be Chars! + while let Some(n) = self.range.next() { + match from_u32(n) { + Some(c) => return Some(c), + _ => (), + } + } + + if let Some(first) = self.unesc.next() { + // peek ahead + if self.unesc.peek() == Some(&'-') && match self.unesc.size_hint() { + (x, _) if x > 1 => true, // there's a range here; record it in our internal Range struct + _ => false, + } { + self.unesc.next(); // this is the '-' + let last = self.unesc.next().unwrap(); // this is the end of the range + + self.range = first as u32 + 1 .. last as u32 + 1; + } + + return Some(first); // in any case, return the next char + } + + None + } +} + +impl<'a> ExpandSet<'a> { + #[inline] + pub fn new(s: &'a str) -> ExpandSet<'a> { + ExpandSet { + range: 0 .. 0, + unesc: Unescape { string: s }.peekable(), + } + } +} diff --git a/src/tr/tr.rs b/src/tr/tr.rs index 45e85bfd6..f100dd7a1 100644 --- a/src/tr/tr.rs +++ b/src/tr/tr.rs @@ -1,98 +1,40 @@ #![crate_name = "tr"] -#![feature(collections, core, old_io, rustc_private)] +#![feature(io, rustc_private)] /* * This file is part of the uutils coreutils package. * * (c) Michael Gehring + * (c) kwantam + * 20150428 created `expand` module to eliminate most allocs during setup * * For the full copyright and license information, please view the LICENSE * file that was distributed with this source code. */ -extern crate collections; extern crate getopts; use getopts::OptGroup; -use std::char::from_u32; use std::collections::{BitSet, VecMap}; -use std::old_io::{BufferedReader, print}; -use std::old_io::stdio::{stdin_raw, stdout}; -use std::iter::FromIterator; -use std::vec::Vec; +use std::io::{stdin, stdout, BufReader, Read, Write}; +use expand::ExpandSet; #[path="../common/util.rs"] #[macro_use] mod util; +mod expand; + static NAME : &'static str = "tr"; static VERSION : &'static str = "1.0.0"; +const BUFFER_LEN: usize = 1024; -#[inline] -fn unescape_char(c: char) -> char { - match c { - 'a' => 0x07u8 as char, - 'b' => 0x08u8 as char, - 'f' => 0x0cu8 as char, - 'v' => 0x0bu8 as char, - 'n' => '\n', - 'r' => '\r', - 't' => '\t', - _ => c, - } -} - -#[inline] -fn unescape(v: Vec) -> Vec { - let mut out = Vec::new(); - let mut input = v.as_slice(); - loop { - input = match input { - ['\\', e, rest..] => { - out.push(unescape_char(e)); - rest - } - [c, rest..] => { - out.push(c); - rest - } - [] => break - } - } - out -} - -#[inline] -fn expand_range(from: char, to: char) -> Vec { - range(from as u32, to as u32 + 1).map(|c| from_u32(c).unwrap()).collect() -} - -fn expand_set(s: &str) -> Vec { - let mut set = Vec::::new(); - let unesc = unescape(FromIterator::from_iter(s.chars())); - let mut input = unesc.as_slice(); - - loop { - input = match input { - [f, '-', t, rest..] => { - set.push_all(expand_range(f, t).as_slice()); - rest - } - [c, rest..] => { - set.push(c); - rest - } - [] => break - }; - } - set -} - -fn delete(set: Vec, complement: bool) { +fn delete<'a>(set: ExpandSet<'a>, complement: bool) { let mut bset = BitSet::new(); - let mut out = stdout(); + let mut stdout = stdout(); + let mut buf = String::with_capacity(BUFFER_LEN + 4); - for &c in set.iter() { + for c in set { bset.insert(c as usize); } @@ -104,42 +46,44 @@ fn delete(set: Vec, complement: bool) { } }; - for c in BufferedReader::new(stdin_raw()).chars() { + for c in BufReader::new(stdin()).chars() { match c { - Ok(c) if is_allowed(c) => out.write_char(c).unwrap(), + Ok(c) if is_allowed(c) => buf.push(c), Ok(_) => (), Err(err) => panic!("{}", err), }; + if buf.len() >= BUFFER_LEN { + safe_unwrap!(stdout.write_all(&buf[..].as_bytes())); + } + } + if buf.len() > 0 { + safe_unwrap!(stdout.write_all(&buf[..].as_bytes())); } } -fn tr(set1: &[char], set2: &[char]) { - const BUFFER_LEN: usize = 1024; - +fn tr<'a>(set1: ExpandSet<'a>, mut set2: ExpandSet<'a>) { let mut map = VecMap::new(); let mut stdout = stdout(); - let mut outbuffer = String::with_capacity(BUFFER_LEN); + let mut buf = String::with_capacity(BUFFER_LEN + 4); - let set2_len = set2.len(); - for i in range(0, set1.len()) { - if i >= set2_len { - map.insert(set1[i] as usize, set2[set2_len - 1]); - } else { - map.insert(set1[i] as usize, set2[i]); - } + let mut s2_prev = '_'; + for i in set1 { + s2_prev = set2.next().unwrap_or(s2_prev); + + map.insert(i as usize, s2_prev); } - for c in BufferedReader::new(stdin_raw()).chars() { + for c in BufReader::new(stdin()).chars() { match c { Ok(inc) => { let trc = match map.get(&(inc as usize)) { Some(t) => *t, None => inc, }; - outbuffer.push(trc); - if outbuffer.len() >= BUFFER_LEN { - stdout.write_str(outbuffer.as_slice()).unwrap(); - outbuffer.clear(); + buf.push(trc); + if buf.len() >= BUFFER_LEN { + safe_unwrap!(stdout.write_all(&buf[..].as_bytes())); + buf.truncate(0); } } Err(err) => { @@ -147,8 +91,8 @@ fn tr(set1: &[char], set2: &[char]) { } } } - if outbuffer.len() > 0 { - stdout.write_str(outbuffer.as_slice()).unwrap(); + if buf.len() > 0 { + safe_unwrap!(stdout.write_all(&buf[..].as_bytes())); } } @@ -158,7 +102,7 @@ fn usage(opts: &[OptGroup]) { println!("Usage:"); println!(" {} [OPTIONS] SET1 [SET2]", NAME); println!(""); - print(getopts::usage("Translate or delete characters.", opts).as_slice()); + println!("{}", getopts::usage("Translate or delete characters.", opts)); } pub fn uumain(args: Vec) -> i32 { @@ -170,7 +114,7 @@ pub fn uumain(args: Vec) -> i32 { getopts::optflag("V", "version", "output version information and exit"), ]; - let matches = match getopts::getopts(args.tail(), &opts) { + let matches = match getopts::getopts(&args[1..], &opts) { Ok(m) => m, Err(err) => { show_error!("{}", err); @@ -203,12 +147,12 @@ pub fn uumain(args: Vec) -> i32 { } if dflag { - let set1 = expand_set(sets[0].as_slice()); + let set1 = ExpandSet::new(sets[0].as_ref()); delete(set1, cflag); } else { - let set1 = expand_set(sets[0].as_slice()); - let set2 = expand_set(sets[1].as_slice()); - tr(set1.as_slice(), set2.as_slice()); + let set1 = ExpandSet::new(sets[0].as_ref()); + let set2 = ExpandSet::new(sets[1].as_ref()); + tr(set1, set2); } 0 diff --git a/src/unexpand/deps.mk b/src/unexpand/deps.mk new file mode 100644 index 000000000..fb8005c0c --- /dev/null +++ b/src/unexpand/deps.mk @@ -0,0 +1 @@ +DEPLIBS += unicode-width diff --git a/src/unexpand/unexpand.rs b/src/unexpand/unexpand.rs index 4f05dc04b..3761c3477 100644 --- a/src/unexpand/unexpand.rs +++ b/src/unexpand/unexpand.rs @@ -1,10 +1,12 @@ #![crate_name = "unexpand"] -#![feature(collections, core, old_io, old_path, rustc_private)] +#![feature(rustc_private, unicode)] /* * This file is part of the uutils coreutils package. * * (c) Virgile Andreani + * (c) kwantam + * 20150428 updated to work with both UTF-8 and non-UTF-8 encodings * * For the full copyright and license information, please view the LICENSE * file that was distributed with this source code. @@ -12,8 +14,14 @@ extern crate getopts; extern crate libc; +extern crate rustc_unicode; +extern crate unicode_width; -use std::old_io as io; +use std::fs::File; +use std::io::{stdin, stdout, BufRead, BufReader, BufWriter, Read, Stdout, Write}; +use std::str::from_utf8; +use rustc_unicode::str::utf8_char_width; +use unicode_width::UnicodeWidthChar; #[path = "../common/util.rs"] #[macro_use] @@ -25,7 +33,7 @@ static VERSION: &'static str = "0.0.1"; static DEFAULT_TABSTOP: usize = 8; fn tabstops_parse(s: String) -> Vec { - let words = s.as_slice().split(',').collect::>(); + let words = s.split(',').collect::>(); let nums = words.into_iter() .map(|sn| sn.parse() @@ -49,7 +57,8 @@ fn tabstops_parse(s: String) -> Vec { struct Options { files: Vec, tabstops: Vec, - aflag: bool + aflag: bool, + uflag: bool, } impl Options { @@ -61,6 +70,7 @@ impl Options { let aflag = (matches.opt_present("all") || matches.opt_present("tabs")) && !matches.opt_present("first-only"); + let uflag = !matches.opt_present("U"); let files = if matches.free.is_empty() { @@ -69,7 +79,7 @@ impl Options { matches.free }; - Options { files: files, tabstops: tabstops, aflag: aflag } + Options { files: files, tabstops: tabstops, aflag: aflag, uflag: uflag } } } @@ -79,20 +89,21 @@ pub fn uumain(args: Vec) -> i32 { getopts::optflag("", "first-only", "convert only leading sequences of blanks (overrides -a)"), getopts::optopt("t", "tabs", "have tabs N characters apart instead of 8 (enables -a)", "N"), getopts::optopt("t", "tabs", "use comma separated LIST of tab positions (enables -a)", "LIST"), + getopts::optflag("U", "no-utf8", "interpret input file as 8-bit ASCII rather than UTF-8"), getopts::optflag("h", "help", "display this help and exit"), getopts::optflag("V", "version", "output version information and exit"), ]; - let matches = match getopts::getopts(args.tail(), &opts) { + let matches = match getopts::getopts(&args[1..], &opts) { Ok(m) => m, Err(f) => crash!(1, "{}", f) }; if matches.opt_present("help") { println!("Usage: {} [OPTION]... [FILE]...", NAME); - io::print(getopts::usage( + println!("{}", getopts::usage( "Convert blanks in each FILE to tabs, writing to standard output.\n\ - With no FILE, or when FILE is -, read standard input.", &opts).as_slice()); + With no FILE, or when FILE is -, read standard input.", &opts)); return 0; } @@ -106,121 +117,175 @@ pub fn uumain(args: Vec) -> i32 { return 0; } -fn open(path: String) -> io::BufferedReader> { +fn open(path: String) -> BufReader> { let mut file_buf; - if path.as_slice() == "-" { - io::BufferedReader::new(Box::new(io::stdio::stdin_raw()) as Box) + if path == "-" { + BufReader::new(Box::new(stdin()) as Box) } else { - file_buf = match io::File::open(&Path::new(path.as_slice())) { + file_buf = match File::open(&path[..]) { Ok(a) => a, - _ => crash!(1, "{}: {}\n", path, "No such file or directory") + Err(e) => crash!(1, "{}: {}", &path[..], e), }; - io::BufferedReader::new(Box::new(file_buf) as Box) + BufReader::new(Box::new(file_buf) as Box) } } -fn is_tabstop(tabstops: &[usize], col: usize) -> bool { - match tabstops { - [tabstop] => col % tabstop == 0, - tabstops => tabstops.binary_search_by(|&e| e.cmp(&col)).is_ok() - } -} - -fn to_next_stop(tabstops: &[usize], col: usize) -> Option { - match tabstops { - [tabstop] => Some(tabstop - col % tabstop), - tabstops => tabstops.iter().skip_while(|&t| *t <= col).next() - .map(|&tabstop| tabstop - col % tabstop) - } -} - -fn unexpandspan(mut output: &mut io::LineBufferedWriter, - tabstops: &[usize], nspaces: usize, col: usize, init: bool) { - let mut cur = col - nspaces; - if nspaces > 1 || init { - loop { - match to_next_stop(tabstops, cur) { - Some(to_next) if cur + to_next <= col => { - safe_write!(&mut output, "{}", '\t'); - cur += to_next; - } - _ => break - } +fn next_tabstop(tabstops: &[usize], col: usize) -> Option { + if tabstops.len() == 1 { + Some(tabstops[0] - col % tabstops[0]) + } else { + // find next larger tab + match tabstops.iter().skip_while(|&&t| t <= col).next() { + Some(t) => Some(t - col), + None => None, // if there isn't one in the list, tab becomes a single space } } - safe_write!(&mut output, "{:1$}", "", col - cur); +} + +fn write_tabs(mut output: &mut BufWriter, tabstops: &[usize], mut scol: usize, col: usize) { + while let Some(nts) = next_tabstop(tabstops, scol) { + if col < scol + nts { + break; + } + + safe_unwrap!(output.write_all("\t".as_bytes())); + scol += nts; + } + + while col > scol { + safe_unwrap!(output.write_all(" ".as_bytes())); + scol += 1; + } +} + +#[derive(PartialEq, Eq, Debug)] +enum CharType { + Backspace, + Space, + Tab, + Other, } fn unexpand(options: Options) { - let mut output = io::stdout(); - let ts = options.tabstops.as_slice(); + use self::CharType::*; + + let mut output = BufWriter::new(stdout()); + let ts = &options.tabstops[..]; + let mut buf = Vec::new(); + let lastcol = if ts.len() > 1 { + *ts.last().unwrap() + } else { + 0 + }; for file in options.files.into_iter() { - let mut col = 0; - let mut nspaces = 0; - let mut init = true; - for c in open(file).chars() { - match c { - Ok(' ') => { - if init || options.aflag { - nspaces += 1; + let mut fh = open(file); + + while match fh.read_until('\n' as u8, &mut buf) { + Ok(s) => s > 0, + Err(_) => buf.len() > 0, + } { + let mut byte = 0; // offset into the buffer + let mut col = 0; // the current column + let mut scol = 0; // the start col for the current span, i.e., the already-printed width + let mut init = true; // are we at the start of the line? + let mut pctype = Other; + + while byte < buf.len() { + // when we have a finite number of columns, never convert past the last column + if lastcol > 0 && col >= lastcol { + if (pctype != Tab && col > scol + 1) || + (col > scol && (init || pctype == Tab)) { + write_tabs(&mut output, ts, scol, col); + } else if col > scol { + safe_unwrap!(output.write_all(" ".as_bytes())); + } + scol = col; + + safe_unwrap!(output.write_all(&buf[byte..])); + break; + } + + let (ctype, cwidth, nbytes) = if options.uflag { + let nbytes = utf8_char_width(buf[byte]); + + // figure out how big the next char is, if it's UTF-8 + if byte + nbytes > buf.len() { + // make sure we don't overrun the buffer because of invalid UTF-8 + (Other, 1, 1) + } else if let Ok(t) = from_utf8(&buf[byte..byte+nbytes]) { + // Now that we think it's UTF-8, figure out what kind of char it is + match t.chars().next() { + Some(' ') => (Space, 0, 1), + Some('\t') => (Tab, 0, 1), + Some('\x08') => (Backspace, 0, 1), + Some(c) => (Other, UnicodeWidthChar::width(c).unwrap_or(0), nbytes), + None => { // invalid char snuck past the utf8_validation_iterator somehow??? + (Other, 1, 1) + }, + } } else { - nspaces = 0; - safe_write!(&mut output, "{}", ' '); + // otherwise, it's not valid + (Other, 1, 1) // implicit assumption: non-UTF8 char has display width 1 } - col += 1; - } - Ok('\t') if nspaces > 0 => { - if is_tabstop(ts, col) { - nspaces = 0; - col += 1; - safe_write!(&mut output, "{}", '\t'); - } - match to_next_stop(ts, col) { - Some(to_next) => { - nspaces += to_next; - col += to_next; + } else { + (match buf[byte] { // always take exactly 1 byte in strict ASCII mode + 0x20 => Space, + 0x09 => Tab, + 0x08 => Backspace, + _ => Other, + }, 1, 1) + }; + + // now figure out how many columns this char takes up, and maybe print it + let tabs_buffered = init || options.aflag; + match ctype { + Space | Tab => { // compute next col, but only write space or tab chars if not buffering + col += if ctype == Space { + 1 + } else { + next_tabstop(ts, col).unwrap_or(1) + }; + + if !tabs_buffered { + safe_unwrap!(output.write_all(&buf[byte..byte+nbytes])); + scol = col; // now printed up to this column } - None => { - col += 1; - unexpandspan(&mut output, ts, nspaces, col, init); - nspaces = 0; - safe_write!(&mut output, "{}", '\t'); + }, + Other | Backspace => { // always + // never turn a single space before a non-blank into a tab + // unless it's at the start of the line + if (tabs_buffered && pctype != Tab && col > scol + 1) || + (col > scol && (init || (tabs_buffered && pctype == Tab))) { + write_tabs(&mut output, ts, scol, col); + } else if col > scol { + safe_unwrap!(output.write_all(" ".as_bytes())); } - } + init = false; + col = if ctype == Other { // use computed width + col + cwidth + } else if col > 0 { // Backspace case, but only if col > 0 + col - 1 + } else { + 0 + }; + safe_unwrap!(output.write_all(&buf[byte..byte+nbytes])); + scol = col; // we've now printed up to this column + }, } - Ok('\x08') => { // '\b' - if init || options.aflag { - unexpandspan(&mut output, ts, nspaces, col, init) - } - nspaces = 0; - if col > 0 { col -= 1; } - init = false; - safe_write!(&mut output, "{}", '\x08'); - } - Ok('\n') => { - if init || options.aflag { - unexpandspan(&mut output, ts, nspaces, col, init) - } - nspaces = 0; - col = 0; - init = true; - safe_write!(&mut output, "{}", '\n'); - } - Ok(c) => { - if init || options.aflag { - unexpandspan(&mut output, ts, nspaces, col, init) - } - nspaces = 0; - col += 1; - init = false; - safe_write!(&mut output, "{}", c); - } - Err(_) => break + + byte += nbytes; // move on to next char + pctype = ctype; // save the previous type } - } - if init || options.aflag { - unexpandspan(&mut output, ts, nspaces, col, init) + + // write out anything remaining + if col > scol + 1 || (init && col > scol) { + write_tabs(&mut output, ts, scol, col); + } else if col > scol { + safe_unwrap!(output.write_all(" ".as_bytes())); + } + + buf.truncate(0); // clear out the buffer } } } diff --git a/src/unlink/unlink.rs b/src/unlink/unlink.rs index 0151110cf..3aeb8b473 100644 --- a/src/unlink/unlink.rs +++ b/src/unlink/unlink.rs @@ -1,5 +1,5 @@ #![crate_name = "unlink"] -#![feature(collections, core, old_io, old_path, rustc_private)] +#![feature(rustc_private)] /* * This file is part of the uutils coreutils package. @@ -15,9 +15,14 @@ extern crate getopts; extern crate libc; -use std::old_io as io; -use std::old_io::fs::{self, PathExtensions}; -use std::old_io::print; +use libc::consts::os::posix88::{S_IFMT, S_IFLNK, S_IFREG}; +use libc::types::os::arch::c95::c_char; +use libc::types::os::arch::posix01::stat; +use libc::funcs::posix01::stat_::lstat; +use libc::funcs::posix88::unistd::unlink; + +use std::mem::uninitialized; +use std::io::{Error, ErrorKind, Write}; #[path = "../common/util.rs"] #[macro_use] @@ -26,13 +31,12 @@ mod util; static NAME: &'static str = "unlink"; pub fn uumain(args: Vec) -> i32 { - let program = args[0].clone(); let opts = [ getopts::optflag("h", "help", "display this help and exit"), getopts::optflag("V", "version", "output version information and exit"), ]; - let matches = match getopts::getopts(args.tail(), &opts) { + let matches = match getopts::getopts(&args[1..], &opts) { Ok(m) => m, Err(f) => { crash!(1, "invalid options\n{}", f) @@ -43,9 +47,9 @@ pub fn uumain(args: Vec) -> i32 { println!("unlink 1.0.0"); println!(""); println!("Usage:"); - println!(" {0} [FILE]... [OPTION]...", program); + println!(" {0} [FILE]... [OPTION]...", args[0]); println!(""); - print(getopts::usage("Unlink the file at [FILE].", &opts).as_slice()); + println!("{}", getopts::usage("Unlink the file at [FILE].", &opts)); return 0; } @@ -55,31 +59,38 @@ pub fn uumain(args: Vec) -> i32 { } if matches.free.len() == 0 { - crash!(1, "missing operand\nTry '{0} --help' for more information.", program); + crash!(1, "missing operand\nTry '{0} --help' for more information.", args[0]); } else if matches.free.len() > 1 { - crash!(1, "extra operand: '{1}'\nTry '{0} --help' for more information.", program, matches.free[1]); + crash!(1, "extra operand: '{1}'\nTry '{0} --help' for more information.", args[0], matches.free[1]); } - let path = Path::new(matches.free[0].clone()); + let st_mode = { + let mut buf: stat = unsafe { uninitialized() }; + let result = unsafe { lstat(matches.free[0].as_ptr() as *const c_char, &mut buf as *mut stat) }; - let result = path.lstat().and_then(|info| { - match info.kind { - io::FileType::RegularFile => Ok(()), - io::FileType::Symlink => Ok(()), - _ => Err(io::IoError { - kind: io::OtherIoError, - desc: "is not a file or symlink", - detail: None - }) + if result < 0 { + crash!(1, "Cannot stat '{}': {}", matches.free[0], Error::last_os_error()); } - }).and_then(|_| { - fs::unlink(&path) - }); + + buf.st_mode & S_IFMT + }; + + let result = if st_mode != S_IFREG && st_mode != S_IFLNK { + Err(Error::new(ErrorKind::Other, "Not a regular file or symlink")) + } else { + let result = unsafe { unlink(matches.free[0].as_ptr() as *const c_char) }; + + if result < 0 { + Err(Error::last_os_error()) + } else { + Ok(()) + } + }; match result { Ok(_) => (), Err(e) => { - crash!(1, "cannot unlink '{0}': {1}", path.display(), e.desc); + crash!(1, "cannot unlink '{0}': {1}", matches.free[0], e); } } diff --git a/test/tr.rs b/test/tr.rs index 0ebe0f948..e3c64ee1a 100644 --- a/test/tr.rs +++ b/test/tr.rs @@ -1,49 +1,51 @@ -#![allow(unstable)] - -use std::old_io::process::Command; +use std::io::Write; +use std::process::{Command, Stdio}; static PROGNAME: &'static str = "./tr"; fn run(input: &str, args: &[&'static str]) -> Vec { - let mut process = Command::new(PROGNAME).args(args).spawn().unwrap(); + let mut process = Command::new(PROGNAME) + .args(args) + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .spawn() + .unwrap_or_else(|e| panic!("{}", e)); - process.stdin.take().unwrap().write_str(input).unwrap(); + process.stdin.take().unwrap_or_else(|| panic!("Could not take child process stdin")) + .write_all(input.as_bytes()).unwrap_or_else(|e| panic!("{}", e)); - let po = match process.wait_with_output() { - Ok(p) => p, - Err(err) => panic!("{}", err), - }; - po.output + let po = process.wait_with_output().unwrap_or_else(|e| panic!("{}", e)); + po.stdout } #[test] fn test_toupper() { let out = run("!abcd!", &["a-z", "A-Z"]); - assert_eq!(out.as_slice(), b"!ABCD!"); + assert_eq!(&out[..], b"!ABCD!"); } #[test] fn test_small_set2() { let out = run("@0123456789", &["0-9", "X"]); - assert_eq!(out.as_slice(), b"@XXXXXXXXXX"); + assert_eq!(&out[..], b"@XXXXXXXXXX"); } #[test] fn test_unicode() { let out = run("(,°□°), ┬─┬", &[", ┬─┬", "╯︵┻━┻"]); - assert_eq!(out.as_slice(), "(╯°□°)╯︵┻━┻".as_bytes()); + assert_eq!(&out[..], "(╯°□°)╯︵┻━┻".as_bytes()); } #[test] fn test_delete() { let out = run("aBcD", &["-d", "a-z"]); - assert_eq!(out.as_slice(), b"BD"); + assert_eq!(&out[..], b"BD"); } #[test] fn test_delete_complement() { let out = run("aBcD", &["-d", "-c", "a-z"]); - assert_eq!(out.as_slice(), b"ac"); + assert_eq!(&out[..], b"ac"); } diff --git a/test/unexpand.rs b/test/unexpand.rs index 92377fad2..8776a6fe2 100644 --- a/test/unexpand.rs +++ b/test/unexpand.rs @@ -1,74 +1,76 @@ -#![allow(unstable)] - -use std::old_io::process::Command; +use std::io::Write; +use std::process::{Command, Stdio}; static PROGNAME: &'static str = "./unexpand"; fn run(input: &str, args: &[&'static str]) -> Vec { - let mut process = Command::new(PROGNAME).args(args).spawn().unwrap(); + let mut process = Command::new(PROGNAME) + .args(args) + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .spawn() + .unwrap_or_else(|e| panic!("{}", e)); - process.stdin.take().unwrap().write_str(input).unwrap(); + process.stdin.take().unwrap_or_else(|| panic!("Could not take child process stdin")) + .write_all(input.as_bytes()).unwrap_or_else(|e| panic!("{}", e)); - let po = match process.wait_with_output() { - Ok(p) => p, - Err(err) => panic!("{}", err), - }; - po.output + let po = process.wait_with_output().unwrap_or_else(|e| panic!("{}", e)); + po.stdout } #[test] fn unexpand_init_0() { let out = run(" 1\n 2\n 3\n 4\n", &["-t4"]); - assert_eq!(out.as_slice(), b" 1\n 2\n 3\n\t4\n"); + assert_eq!(&out[..], b" 1\n 2\n 3\n\t4\n" as &[u8]); } #[test] fn unexpand_init_1() { let out = run(" 5\n 6\n 7\n 8\n", &["-t4"]); - assert_eq!(out.as_slice(), b"\t 5\n\t 6\n\t 7\n\t\t8\n"); + assert_eq!(&out[..], b"\t 5\n\t 6\n\t 7\n\t\t8\n" as &[u8]); } #[test] fn unexpand_init_list_0() { let out = run(" 1\n 2\n 3\n 4\n", &["-t2,4"]); - assert_eq!(out.as_slice(), b" 1\n\t2\n\t 3\n\t\t4\n"); + assert_eq!(&out[..], b" 1\n\t2\n\t 3\n\t\t4\n" as &[u8]); } #[test] fn unexpand_init_list_1() { // Once the list is exhausted, spaces are not converted anymore let out = run(" 5\n 6\n 7\n 8\n", &["-t2,4"]); - assert_eq!(out.as_slice(), b"\t\t 5\n\t\t 6\n\t\t 7\n\t\t 8\n"); + assert_eq!(&out[..], b"\t\t 5\n\t\t 6\n\t\t 7\n\t\t 8\n" as &[u8]); } #[test] fn unexpand_aflag_0() { - let out = run("e E\nf F\ng G\nh H\n", &[]); - assert_eq!(out.as_slice(), b"e E\nf F\ng G\nh H\n"); + let out = run("e E\nf F\ng G\nh H\n", &["--"]); + assert_eq!(&out[..], b"e E\nf F\ng G\nh H\n" as &[u8]); } #[test] fn unexpand_aflag_1() { let out = run("e E\nf F\ng G\nh H\n", &["-a"]); - assert_eq!(out.as_slice(), b"e E\nf F\ng\tG\nh\t H\n"); + assert_eq!(&out[..], b"e E\nf F\ng\tG\nh\t H\n" as &[u8]); } #[test] fn unexpand_aflag_2() { let out = run("e E\nf F\ng G\nh H\n", &["-t8"]); - assert_eq!(out.as_slice(), b"e E\nf F\ng\tG\nh\t H\n"); + assert_eq!(&out[..], b"e E\nf F\ng\tG\nh\t H\n" as &[u8]); } #[test] fn unexpand_first_only_0() { let out = run(" A B", &["-t3"]); - assert_eq!(out.as_slice(), b"\t\t A\t B"); + assert_eq!(&out[..], b"\t\t A\t B" as &[u8]); } #[test] fn unexpand_first_only_1() { let out = run(" A B", &["-t3", "--first-only"]); - assert_eq!(out.as_slice(), b"\t\t A B"); + assert_eq!(&out[..], b"\t\t A B" as &[u8]); } #[test] @@ -76,20 +78,20 @@ fn unexpand_trailing_space_0() { // evil // Individual spaces before fields starting with non blanks should not be // converted, unless they are at the beginning of the line. let out = run("123 \t1\n123 1\n123 \n123 ", &["-t4"]); - assert_eq!(out.as_slice(), b"123\t\t1\n123 1\n123 \n123 "); + assert_eq!(&out[..], b"123\t\t1\n123 1\n123 \n123 " as &[u8]); } #[test] fn unexpand_trailing_space_1() { // super evil let out = run(" abc d e f g ", &["-t1"]); - assert_eq!(out.as_slice(), b"\tabc d e\t\tf\t\tg "); + assert_eq!(&out[..], b"\tabc d e\t\tf\t\tg " as &[u8]); } #[test] fn unexpand_spaces_follow_tabs_0() { // The two first spaces can be included into the first tab. let out = run(" \t\t A", &[]); - assert_eq!(out.as_slice(), b"\t\t A"); + assert_eq!(&out[..], b"\t\t A" as &[u8]); } #[test] @@ -100,6 +102,7 @@ fn unexpand_spaces_follow_tabs_1() { // evil // ' ' -> '\t' // third tabstop (5) // ' B \t' -> ' B \t' // after the list is exhausted, nothing must change let out = run("a \t B \t", &["-t1,4,5"]); - assert_eq!(out.as_slice(), b"a\t\t B \t"); + assert_eq!(&out[..], b"a\t\t B \t" as &[u8]); } +