Merge pull request #553 from kwantam/master

more feature cleanup ; fix more utils
2025-08-02 05:57:46 +00:00 · 2015-04-29 09:32:52 +03:00 · 2015-04-29 09:32:52 +03:00 · 1a6f8105e3
commit 1a6f8105e3
parent baaa96a871 91827a594a
12 changed files with 541 additions and 323 deletions
--- a/src/base64/base64.rs
+++ b/src/base64/base64.rs
@ -1,5 +1,5 @@
 #![crate_name = "base64"]
-#![feature(box_syntax, rustc_private)]
+#![feature(rustc_private)]

 /*
 * This file is part of the uutils coreutils package.
@ -80,11 +80,11 @@ pub fn uumain(args: Vec<String>) -> i32 {
    let mut file_buf;
    let mut input = if matches.free.is_empty() || &matches.free[0][..] == "-" {
        stdin_buf = stdin();
-        BufReader::new(box stdin_buf as Box<Read+'static>)
+        BufReader::new(Box::new(stdin_buf) as Box<Read+'static>)
    } else {
        let path = Path::new(&matches.free[0][..]);
        file_buf = safe_unwrap!(File::open(&path));
-        BufReader::new(box file_buf as Box<Read+'static>)
+        BufReader::new(Box::new(file_buf) as Box<Read+'static>)
    };

    match mode {
--- a/src/cat/cat.rs
+++ b/src/cat/cat.rs
@ -1,5 +1,5 @@
 #![crate_name = "cat"]
-#![feature(rustc_private, box_syntax, unsafe_destructor)]
+#![feature(rustc_private, unsafe_destructor)]

 /*
 * This file is part of the uutils coreutils package.
@ -261,11 +261,11 @@ fn open(path: &str) -> Option<(Box<Read>, bool)> {
    if path == "-" {
        let stdin = stdin();
        let interactive = unsafe { isatty(STDIN_FILENO) } != 0 as c_int;
-        return Some((box stdin as Box<Read>, interactive));
+        return Some((Box::new(stdin) as Box<Read>, interactive));
    }

    match File::open(path) {
-        Ok(f) => Some((box f as Box<Read>, false)),
+        Ok(f) => Some((Box::new(f) as Box<Read>, false)),
        Err(e) => {
            (writeln!(&mut stderr(), "cat: {0}: {1}", path, e.to_string())).unwrap();
            None
--- a/src/expand/deps.mk
+++ b/src/expand/deps.mk
@ -0,0 +1 @@
+DEPLIBS += unicode-width
--- a/src/expand/expand.rs
+++ b/src/expand/expand.rs
@ -1,22 +1,28 @@
 #![crate_name = "expand"]
-#![feature(collections, core, old_io, old_path, rustc_private)]
+#![feature(rustc_private, unicode)]

 /*
 * This file is part of the uutils coreutils package.
 *
 * (c) Virgile Andreani <virgile.andreani@anbuco.fr>
+ * (c) kwantam <kwantam@gmail.com>
+ *     20150428 updated to work with both UTF-8 and non-UTF-8 encodings
 *
 * For the full copyright and license information, please view the LICENSE
 * file that was distributed with this source code.
 */

-#![feature(box_syntax)]
-
 extern crate getopts;
 extern crate libc;
+extern crate rustc_unicode;
+extern crate unicode_width;

-use std::old_io as io;
-use std::str::StrExt;
+use std::fs::File;
+use std::io::{stdin, stdout, BufRead, BufReader, BufWriter, Read, Write};
+use std::iter::repeat;
+use std::str::from_utf8;
+use rustc_unicode::str::utf8_char_width;
+use unicode_width::UnicodeWidthChar;

 #[path = "../common/util.rs"]
 #[macro_use]
@ -28,7 +34,7 @@ static VERSION: &'static str = "0.0.1";
 static DEFAULT_TABSTOP: usize = 8;

 fn tabstops_parse(s: String) -> Vec<usize> {
-    let words = s.as_slice().split(',').collect::<Vec<&str>>();
+    let words = s.split(',').collect::<Vec<&str>>();

    let nums = words.into_iter()
        .map(|sn| sn.parse::<usize>()
@ -52,7 +58,9 @@ fn tabstops_parse(s: String) -> Vec<usize> {
 struct Options {
    files: Vec<String>,
    tabstops: Vec<usize>,
-    iflag: bool
+    tspaces: String,
+    iflag: bool,
+    uflag: bool,
 }

 impl Options {
@ -63,6 +71,16 @@ impl Options {
        };

        let iflag = matches.opt_present("i");
+        let uflag = !matches.opt_present("U");
+
+        // avoid allocations when dumping out long sequences of spaces
+        // by precomputing the longest string of spaces we will ever need
+        let nspaces = tabstops.iter().scan(0, |pr,&it| {
+            let ret = Some(it - *pr);
+            *pr = it;
+            ret
+        }).max().unwrap();  // length of tabstops is guaranteed >= 1
+        let tspaces = repeat(' ').take(nspaces).collect();

        let files =
            if matches.free.is_empty() {
@ -71,7 +89,7 @@ impl Options {
                matches.free
            };

-        Options { files: files, tabstops: tabstops, iflag: iflag }
+        Options { files: files, tabstops: tabstops, tspaces: tspaces, iflag: iflag, uflag: uflag }
    }
 }

@ -80,20 +98,21 @@ pub fn uumain(args: Vec<String>) -> i32 {
        getopts::optflag("i", "initial", "do not convert tabs after non blanks"),
        getopts::optopt("t", "tabs", "have tabs NUMBER characters apart, not 8", "NUMBER"),
        getopts::optopt("t", "tabs", "use comma separated list of explicit tab positions", "LIST"),
+        getopts::optflag("U", "no-utf8", "interpret input file as 8-bit ASCII rather than UTF-8"),
        getopts::optflag("h", "help", "display this help and exit"),
        getopts::optflag("V", "version", "output version information and exit"),
    ];

-    let matches = match getopts::getopts(args.tail(), &opts) {
+    let matches = match getopts::getopts(&args[1..], &opts) {
        Ok(m) => m,
        Err(f) => crash!(1, "{}", f)
    };

    if matches.opt_present("help") {
        println!("Usage: {} [OPTION]... [FILE]...", NAME);
-        io::print(getopts::usage(
+        println!("{}", getopts::usage(
            "Convert tabs in each FILE to spaces, writing to standard output.\n\
-            With no FILE, or when FILE is -, read standard input.", &opts).as_slice());
+            With no FILE, or when FILE is -, read standard input.", &opts));
        return 0;
    }

@ -107,64 +126,119 @@ pub fn uumain(args: Vec<String>) -> i32 {
    return 0;
 }

-fn open(path: String) -> io::BufferedReader<Box<Reader+'static>> {
+fn open(path: String) -> BufReader<Box<Read+'static>> {
    let mut file_buf;
-    if path.as_slice() == "-" {
-        io::BufferedReader::new(box io::stdio::stdin_raw() as Box<Reader>)
+    if path == "-" {
+        BufReader::new(Box::new(stdin()) as Box<Read>)
    } else {
-        file_buf = match io::File::open(&Path::new(path.as_slice())) {
+        file_buf = match File::open(&path[..]) {
            Ok(a) => a,
-            _ => crash!(1, "{}: {}\n", path, "No such file or directory")
+            Err(e) => crash!(1, "{}: {}\n", &path[..], e),
        };
-        io::BufferedReader::new(box file_buf as Box<Reader>)
+        BufReader::new(Box::new(file_buf) as Box<Read>)
    }
 }

-fn to_next_stop(tabstops: &[usize], col: usize) -> usize {
-    match tabstops.as_slice() {
-        [tabstop] => tabstop - col % tabstop,
-        tabstops => match tabstops.iter().skip_while(|&t| *t <= col).next() {
-            Some(&tabstop) => tabstop - col % tabstop,
-            None => 1
+fn next_tabstop(tabstops: &[usize], col: usize) -> usize {
+    if tabstops.len() == 1 {
+        tabstops[0] - col % tabstops[0]
+    } else {
+        match tabstops.iter().skip_while(|&&t| t <= col).next() {
+            Some(t) => t - col,
+            None => 1,
        }
    }
 }

+#[derive(PartialEq, Eq, Debug)]
+enum CharType {
+    Backspace,
+    Tab,
+    Other,
+}
+
 fn expand(options: Options) {
-    let mut output = io::stdout();
+    use self::CharType::*;
+
+    let mut output = BufWriter::new(stdout());
+    let ts = options.tabstops.as_ref();
+    let mut buf = Vec::new();

    for file in options.files.into_iter() {
+        let mut fh = open(file);
+
+        while match fh.read_until('\n' as u8, &mut buf) {
+            Ok(s) => s > 0,
+            Err(_) => buf.len() > 0,
+        } {
            let mut col = 0;
+            let mut byte = 0;
            let mut init = true;
-        for c in open(file).chars() {
-            match c {
-                Ok('\t') if init || !options.iflag => {
-                    let nb_spaces = to_next_stop(options.tabstops.as_slice(), col);
-                    col += nb_spaces;
-                    safe_write!(&mut output, "{:1$}", "", nb_spaces);
+
+            while byte < buf.len() {
+                let (ctype, cwidth, nbytes) = if options.uflag {
+                    let nbytes = utf8_char_width(buf[byte]);
+
+                    if byte + nbytes > buf.len() {
+                        // don't overrun buffer because of invalid UTF-8
+                        (Other, 1, 1)
+                    } else if let Ok(t) = from_utf8(&buf[byte..byte+nbytes]) {
+                        match t.chars().next() {
+                            Some('\t') => (Tab, 0, nbytes),
+                            Some('\x08') => (Backspace, 0, nbytes),
+                            Some(c) => (Other, UnicodeWidthChar::width(c).unwrap_or(0), nbytes),
+                            None => {   // no valid char at start of t, so take 1 byte
+                                (Other, 1, 1)
+                            },
                        }
-                Ok('\x08') => {
-                    if col > 0 {
-                        col -= 1;
+                    } else {
+                        (Other, 1, 1)   // implicit assumption: non-UTF-8 char is 1 col wide
                    }
-                    init = false;
-                    safe_write!(&mut output, "{}", '\x08');
+                } else {
+                    (match buf[byte] {   // always take exactly 1 byte in strict ASCII mode
+                        0x09 => Tab,
+                        0x08 => Backspace,
+                        _ => Other,
+                    }, 1, 1)
+                };
+
+                // figure out how many columns this char takes up
+                match ctype {
+                    Tab => {
+                        // figure out how many spaces to the next tabstop
+                        let nts = next_tabstop(ts, col);
+                        col += nts;
+
+                        // now dump out either spaces if we're expanding, or a literal tab if we're not
+                        if init || !options.iflag {
+                            safe_unwrap!(output.write_all(&options.tspaces[..nts].as_bytes()));
+                        } else {
+                            safe_unwrap!(output.write_all(&buf[byte..byte+nbytes]));
                        }
-                Ok('\n') =>  {
-                    col = 0;
-                    init = true;
-                    safe_write!(&mut output, "{}", '\n');
-                }
-                Ok(c) => {
-                    col += 1;
-                    if c != ' ' {
+                    },
+                    _ => {
+                        col = if ctype == Other {
+                            col + cwidth
+                        } else if col > 0 {
+                            col - 1
+                        } else {
+                            0
+                        };
+
+                        // if we're writing anything other than a space, then we're
+                        // done with the line's leading spaces
+                        if buf[byte] != 0x20 {
                            init = false;
                        }
-                    safe_write!(&mut output, "{}", c);
+
+                        safe_unwrap!(output.write_all(&buf[byte..byte+nbytes]));
+                    },
                }
-                Err(_) => break
+
+                byte += nbytes; // advance the pointer
            }
+
+            buf.truncate(0);    // clear the buffer
        }
    }
 }
-
--- a/src/fmt/fmt.rs
+++ b/src/fmt/fmt.rs
@ -1,5 +1,5 @@
 #![crate_name = "fmt"]
-#![feature(box_syntax,rustc_private,str_char,unicode,core)]
+#![feature(rustc_private,str_char,unicode,core)]

 /*
 * This file is part of `fmt` from the uutils coreutils package.
@ -197,9 +197,9 @@ pub fn uumain(args: Vec<String>) -> i32 {

    for i in files.iter().map(|x| &x[..]) {
        let mut fp = match i {
-            "-" => BufReader::new(box stdin() as Box<Read+'static>),
+            "-" => BufReader::new(Box::new(stdin()) as Box<Read+'static>),
            _ => match File::open(i) {
-                Ok(f) => BufReader::new(box f as Box<Read+'static>),
+                Ok(f) => BufReader::new(Box::new(f) as Box<Read+'static>),
                Err(e) => {
                    show_warning!("{}: {}", i, e);
                    continue;
--- a/src/tr/expand.rs
+++ b/src/tr/expand.rs
@ -0,0 +1,117 @@
+/*
+ * This file is part of the uutils coreutils package.
+ *
+ * (c) Michael Gehring <mg@ebfe.org>
+ * (c) kwantam <kwantam@gmail.com>
+ *     20150428 created `expand` module to eliminate most allocs during setup
+ *
+ * For the full copyright and license information, please view the LICENSE
+ * file that was distributed with this source code.
+ */
+
+use std::char::from_u32;
+use std::cmp::min;
+use std::iter::Peekable;
+use std::ops::Range;
+
+#[inline]
+fn unescape_char(c: char) -> char {
+    match c {
+        'a' => 0x07u8 as char,
+        'b' => 0x08u8 as char,
+        'f' => 0x0cu8 as char,
+        'v' => 0x0bu8 as char,
+        'n' => '\n',
+        'r' => '\r',
+        't' => '\t',
+        _ => c,
+    }
+}
+
+struct Unescape<'a> {
+    string: &'a str,
+}
+
+impl<'a> Iterator for Unescape<'a> {
+    type Item = char;
+
+    #[inline]
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        let slen = self.string.len();
+        (min(slen, 1), None)
+    }
+
+    #[inline]
+    fn next(&mut self) -> Option<Self::Item> {
+        if self.string.len() == 0 {
+            return None;
+        }
+
+        // is the next character an escape?
+        let (ret, idx) = match self.string.chars().next().unwrap() {
+            '\\' if self.string.len() > 1 => {
+                // yes---it's \ and it's not the last char in a string
+                // we know that \ is 1 byte long so we can index into the string safely
+                let c = self.string[1..].chars().next().unwrap();
+                (Some(unescape_char(c)), 1 + c.len_utf8())
+            },
+            c => (Some(c), c.len_utf8()),   // not an escape char
+        };
+
+        self.string = &self.string[idx..];              // advance the pointer to the next char
+        ret
+    }
+}
+
+pub struct ExpandSet<'a> {
+    range: Range<u32>,
+    unesc: Peekable<Unescape<'a>>,
+}
+
+impl<'a> Iterator for ExpandSet<'a> {
+    type Item = char;
+
+    #[inline]
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        self.unesc.size_hint()
+    }
+
+    #[inline]
+    fn next(&mut self) -> Option<Self::Item> {
+        // while the Range has elements, try to return chars from it
+        // but make sure that they actually turn out to be Chars!
+        while let Some(n) = self.range.next() {
+            match from_u32(n) {
+                Some(c) => return Some(c),
+                _ => (),
+            }
+        }
+
+        if let Some(first) = self.unesc.next() {
+            // peek ahead
+            if self.unesc.peek() == Some(&'-') && match self.unesc.size_hint() {
+                (x, _) if x > 1 => true,    // there's a range here; record it in our internal Range struct
+                _ => false,
+            } {
+                self.unesc.next();                      // this is the '-'
+                let last = self.unesc.next().unwrap();  // this is the end of the range
+
+                self.range = first as u32 + 1 .. last as u32 + 1;
+            }
+
+            return Some(first);     // in any case, return the next char
+        }
+
+        None
+    }
+}
+
+impl<'a> ExpandSet<'a> {
+    #[inline]
+    pub fn new(s: &'a str) -> ExpandSet<'a> {
+        ExpandSet {
+            range: 0 .. 0,
+            unesc: Unescape { string: s }.peekable(),
+        }
+    }
+}
--- a/src/tr/tr.rs
+++ b/src/tr/tr.rs
@ -1,98 +1,40 @@
 #![crate_name = "tr"]
-#![feature(collections, core, old_io, rustc_private)]
+#![feature(io, rustc_private)]

 /*
 * This file is part of the uutils coreutils package.
 *
 * (c) Michael Gehring <mg@ebfe.org>
+ * (c) kwantam <kwantam@gmail.com>
+ *     20150428 created `expand` module to eliminate most allocs during setup
 *
 * For the full copyright and license information, please view the LICENSE
 * file that was distributed with this source code.
 */

-extern crate collections;
 extern crate getopts;

 use getopts::OptGroup;
-use std::char::from_u32;
 use std::collections::{BitSet, VecMap};
-use std::old_io::{BufferedReader, print};
-use std::old_io::stdio::{stdin_raw, stdout};
-use std::iter::FromIterator;
-use std::vec::Vec;
+use std::io::{stdin, stdout, BufReader, Read, Write};
+use expand::ExpandSet;

 #[path="../common/util.rs"]
 #[macro_use]
 mod util;

+mod expand;
+
 static NAME : &'static str = "tr";
 static VERSION : &'static str = "1.0.0";
+const BUFFER_LEN: usize = 1024;

-#[inline]
-fn unescape_char(c: char) -> char {
-    match c {
-        'a' => 0x07u8 as char,
-        'b' => 0x08u8 as char,
-        'f' => 0x0cu8 as char,
-        'v' => 0x0bu8 as char,
-        'n' => '\n',
-        'r' => '\r',
-        't' => '\t',
-        _ => c,
-    }
-}
-
-#[inline]
-fn unescape(v: Vec<char>) -> Vec<char> {
-    let mut out = Vec::new();
-    let mut input = v.as_slice();
-    loop {
-        input = match input {
-            ['\\', e, rest..] => {
-                out.push(unescape_char(e));
-                rest
-            }
-            [c, rest..] => {
-                out.push(c);
-                rest
-            }
-            [] => break
-        }
-    }
-    out
-}
-
-#[inline]
-fn expand_range(from: char, to: char) -> Vec<char> {
-    range(from as u32, to as u32 + 1).map(|c| from_u32(c).unwrap()).collect()
-}
-
-fn expand_set(s: &str) -> Vec<char> {
-    let mut set = Vec::<char>::new();
-    let unesc = unescape(FromIterator::from_iter(s.chars()));
-    let mut input = unesc.as_slice();
-
-    loop {
-        input = match input {
-            [f, '-', t, rest..] => {
-                set.push_all(expand_range(f, t).as_slice());
-                rest
-            }
-            [c, rest..] => {
-                set.push(c);
-                rest
-            }
-            [] => break
-        };
-    }
-    set
-}
-
-fn delete(set: Vec<char>, complement: bool) {
+fn delete<'a>(set: ExpandSet<'a>, complement: bool) {
    let mut bset = BitSet::new();
-    let mut out = stdout();
+    let mut stdout = stdout();
+    let mut buf = String::with_capacity(BUFFER_LEN + 4);

-    for &c in set.iter() {
+    for c in set {
        bset.insert(c as usize);
    }

@ -104,42 +46,44 @@ fn delete(set: Vec<char>, complement: bool) {
        }
    };

-    for c in BufferedReader::new(stdin_raw()).chars() {
+    for c in BufReader::new(stdin()).chars() {
        match c {
-            Ok(c) if is_allowed(c) => out.write_char(c).unwrap(),
+            Ok(c) if is_allowed(c) => buf.push(c),
            Ok(_) => (),
            Err(err) => panic!("{}", err),
        };
+        if buf.len() >= BUFFER_LEN {
+            safe_unwrap!(stdout.write_all(&buf[..].as_bytes()));
+        }
+    }
+    if buf.len() > 0 {
+        safe_unwrap!(stdout.write_all(&buf[..].as_bytes()));
    }
 }

-fn tr(set1: &[char], set2: &[char]) {
-    const BUFFER_LEN: usize = 1024;
-
+fn tr<'a>(set1: ExpandSet<'a>, mut set2: ExpandSet<'a>) {
    let mut map = VecMap::new();
    let mut stdout = stdout();
-    let mut outbuffer = String::with_capacity(BUFFER_LEN);
+    let mut buf = String::with_capacity(BUFFER_LEN + 4);

-    let set2_len = set2.len();
-    for i in range(0, set1.len()) {
-        if i >= set2_len {
-            map.insert(set1[i] as usize, set2[set2_len - 1]);
-        } else {
-            map.insert(set1[i] as usize, set2[i]);
-        }
+    let mut s2_prev = '_';
+    for i in set1 {
+        s2_prev = set2.next().unwrap_or(s2_prev);
+
+        map.insert(i as usize, s2_prev);
    }

-    for c in BufferedReader::new(stdin_raw()).chars() {
+    for c in BufReader::new(stdin()).chars() {
        match c {
            Ok(inc) => {
                let trc = match map.get(&(inc as usize)) {
                    Some(t) => *t,
                    None => inc,
                };
-                outbuffer.push(trc);
-                if outbuffer.len() >= BUFFER_LEN {
-                    stdout.write_str(outbuffer.as_slice()).unwrap();
-                    outbuffer.clear();
+                buf.push(trc);
+                if buf.len() >= BUFFER_LEN {
+                    safe_unwrap!(stdout.write_all(&buf[..].as_bytes()));
+                    buf.truncate(0);
                }
            }
            Err(err) => {
@ -147,8 +91,8 @@ fn tr(set1: &[char], set2: &[char]) {
            }
        }
    }
-    if outbuffer.len() > 0 {
-        stdout.write_str(outbuffer.as_slice()).unwrap();
+    if buf.len() > 0 {
+        safe_unwrap!(stdout.write_all(&buf[..].as_bytes()));
    }
 }

@ -158,7 +102,7 @@ fn usage(opts: &[OptGroup]) {
    println!("Usage:");
    println!("  {} [OPTIONS] SET1 [SET2]", NAME);
    println!("");
-    print(getopts::usage("Translate or delete characters.", opts).as_slice());
+    println!("{}", getopts::usage("Translate or delete characters.", opts));
 }

 pub fn uumain(args: Vec<String>) -> i32 {
@ -170,7 +114,7 @@ pub fn uumain(args: Vec<String>) -> i32 {
        getopts::optflag("V", "version", "output version information and exit"),
    ];

-    let matches = match getopts::getopts(args.tail(), &opts) {
+    let matches = match getopts::getopts(&args[1..], &opts) {
        Ok(m) => m,
        Err(err) => {
            show_error!("{}", err);
@ -203,12 +147,12 @@ pub fn uumain(args: Vec<String>) -> i32 {
    }

    if dflag {
-        let set1 = expand_set(sets[0].as_slice());
+        let set1 = ExpandSet::new(sets[0].as_ref());
        delete(set1, cflag);
    } else {
-        let set1 = expand_set(sets[0].as_slice());
-        let set2 = expand_set(sets[1].as_slice());
-        tr(set1.as_slice(), set2.as_slice());
+        let set1 = ExpandSet::new(sets[0].as_ref());
+        let set2 = ExpandSet::new(sets[1].as_ref());
+        tr(set1, set2);
    }

    0
--- a/src/unexpand/deps.mk
+++ b/src/unexpand/deps.mk
@ -0,0 +1 @@
+DEPLIBS += unicode-width
--- a/src/unexpand/unexpand.rs
+++ b/src/unexpand/unexpand.rs
@ -1,10 +1,12 @@
 #![crate_name = "unexpand"]
-#![feature(collections, core, old_io, old_path, rustc_private)]
+#![feature(rustc_private, unicode)]

 /*
 * This file is part of the uutils coreutils package.
 *
 * (c) Virgile Andreani <virgile.andreani@anbuco.fr>
+ * (c) kwantam <kwantam@gmail.com>
+ *     20150428 updated to work with both UTF-8 and non-UTF-8 encodings
 *
 * For the full copyright and license information, please view the LICENSE
 * file that was distributed with this source code.
@ -12,8 +14,14 @@

 extern crate getopts;
 extern crate libc;
+extern crate rustc_unicode;
+extern crate unicode_width;

-use std::old_io as io;
+use std::fs::File;
+use std::io::{stdin, stdout, BufRead, BufReader, BufWriter, Read, Stdout, Write};
+use std::str::from_utf8;
+use rustc_unicode::str::utf8_char_width;
+use unicode_width::UnicodeWidthChar;

 #[path = "../common/util.rs"]
 #[macro_use]
@ -25,7 +33,7 @@ static VERSION: &'static str = "0.0.1";
 static DEFAULT_TABSTOP: usize = 8;

 fn tabstops_parse(s: String) -> Vec<usize> {
-    let words = s.as_slice().split(',').collect::<Vec<&str>>();
+    let words = s.split(',').collect::<Vec<&str>>();

    let nums = words.into_iter()
        .map(|sn| sn.parse()
@ -49,7 +57,8 @@ fn tabstops_parse(s: String) -> Vec<usize> {
 struct Options {
    files: Vec<String>,
    tabstops: Vec<usize>,
-    aflag: bool
+    aflag: bool,
+    uflag: bool,
 }

 impl Options {
@ -61,6 +70,7 @@ impl Options {

        let aflag = (matches.opt_present("all") || matches.opt_present("tabs"))
                    && !matches.opt_present("first-only");
+        let uflag = !matches.opt_present("U");

        let files =
            if matches.free.is_empty() {
@ -69,7 +79,7 @@ impl Options {
                matches.free
            };

-        Options { files: files, tabstops: tabstops, aflag: aflag }
+        Options { files: files, tabstops: tabstops, aflag: aflag, uflag: uflag }
    }
 }

@ -79,20 +89,21 @@ pub fn uumain(args: Vec<String>) -> i32 {
        getopts::optflag("", "first-only", "convert only leading sequences of blanks (overrides -a)"),
        getopts::optopt("t", "tabs", "have tabs N characters apart instead of 8 (enables -a)", "N"),
        getopts::optopt("t", "tabs", "use comma separated LIST of tab positions (enables -a)", "LIST"),
+        getopts::optflag("U", "no-utf8", "interpret input file as 8-bit ASCII rather than UTF-8"),
        getopts::optflag("h", "help", "display this help and exit"),
        getopts::optflag("V", "version", "output version information and exit"),
    ];

-    let matches = match getopts::getopts(args.tail(), &opts) {
+    let matches = match getopts::getopts(&args[1..], &opts) {
        Ok(m) => m,
        Err(f) => crash!(1, "{}", f)
    };

    if matches.opt_present("help") {
        println!("Usage: {} [OPTION]... [FILE]...", NAME);
-        io::print(getopts::usage(
+        println!("{}", getopts::usage(
            "Convert blanks in each FILE to tabs, writing to standard output.\n\
-            With no FILE, or when FILE is -, read standard input.", &opts).as_slice());
+            With no FILE, or when FILE is -, read standard input.", &opts));
        return 0;
    }

@ -106,121 +117,175 @@ pub fn uumain(args: Vec<String>) -> i32 {
    return 0;
 }

-fn open(path: String) -> io::BufferedReader<Box<Reader+'static>> {
+fn open(path: String) -> BufReader<Box<Read+'static>> {
    let mut file_buf;
-    if path.as_slice() == "-" {
-        io::BufferedReader::new(Box::new(io::stdio::stdin_raw()) as Box<Reader>)
+    if path == "-" {
+        BufReader::new(Box::new(stdin()) as Box<Read>)
    } else {
-        file_buf = match io::File::open(&Path::new(path.as_slice())) {
+        file_buf = match File::open(&path[..]) {
            Ok(a) => a,
-            _ => crash!(1, "{}: {}\n", path, "No such file or directory")
+            Err(e) => crash!(1, "{}: {}", &path[..], e),
        };
-        io::BufferedReader::new(Box::new(file_buf) as Box<Reader>)
+        BufReader::new(Box::new(file_buf) as Box<Read>)
    }
 }

-fn is_tabstop(tabstops: &[usize], col: usize) -> bool {
-    match tabstops {
-        [tabstop] => col % tabstop == 0,
-        tabstops => tabstops.binary_search_by(|&e| e.cmp(&col)).is_ok()
+fn next_tabstop(tabstops: &[usize], col: usize) -> Option<usize> {
+    if tabstops.len() == 1 {
+        Some(tabstops[0] - col % tabstops[0])
+    } else {
+        // find next larger tab
+        match tabstops.iter().skip_while(|&&t| t <= col).next() {
+            Some(t) => Some(t - col),
+            None => None,   // if there isn't one in the list, tab becomes a single space
+        }
    }
 }

-fn to_next_stop(tabstops: &[usize], col: usize) -> Option<usize> {
-    match tabstops {
-        [tabstop] => Some(tabstop - col % tabstop),
-        tabstops => tabstops.iter().skip_while(|&t| *t <= col).next()
-            .map(|&tabstop| tabstop - col % tabstop)
+fn write_tabs(mut output: &mut BufWriter<Stdout>, tabstops: &[usize], mut scol: usize, col: usize) {
+    while let Some(nts) = next_tabstop(tabstops, scol) {
+        if col < scol + nts {
+            break;
+        }
+
+        safe_unwrap!(output.write_all("\t".as_bytes()));
+        scol += nts;
+    }
+
+    while col > scol {
+        safe_unwrap!(output.write_all(" ".as_bytes()));
+        scol += 1;
    }
 }

-fn unexpandspan(mut output: &mut io::LineBufferedWriter<io::stdio::StdWriter>,
-                tabstops: &[usize], nspaces: usize, col: usize, init: bool) {
-    let mut cur = col - nspaces;
-    if nspaces > 1 || init {
-        loop {
-            match to_next_stop(tabstops, cur) {
-                Some(to_next) if cur + to_next <= col => {
-                        safe_write!(&mut output, "{}", '\t');
-                        cur += to_next;
-                    }
-                _ => break
-            }
-        }
-    }
-    safe_write!(&mut output, "{:1$}", "", col - cur);
+#[derive(PartialEq, Eq, Debug)]
+enum CharType {
+    Backspace,
+    Space,
+    Tab,
+    Other,
 }

 fn unexpand(options: Options) {
-    let mut output = io::stdout();
-    let ts = options.tabstops.as_slice();
+    use self::CharType::*;
+
+    let mut output = BufWriter::new(stdout());
+    let ts = &options.tabstops[..];
+    let mut buf = Vec::new();
+    let lastcol = if ts.len() > 1 {
+        *ts.last().unwrap()
+    } else {
+        0
+    };

    for file in options.files.into_iter() {
-        let mut col = 0;
-        let mut nspaces = 0;
-        let mut init = true;
-        for c in open(file).chars() {
-            match c {
-                Ok(' ') => {
-                    if init || options.aflag {
-                        nspaces += 1;
+        let mut fh = open(file);
+
+        while match fh.read_until('\n' as u8, &mut buf) {
+            Ok(s) => s > 0,
+            Err(_) => buf.len() > 0,
+        } {
+            let mut byte = 0;       // offset into the buffer
+            let mut col = 0;        // the current column
+            let mut scol = 0;       // the start col for the current span, i.e., the already-printed width
+            let mut init = true;    // are we at the start of the line?
+            let mut pctype = Other;
+
+            while byte < buf.len() {
+                // when we have a finite number of columns, never convert past the last column
+                if lastcol > 0 && col >= lastcol {
+                    if (pctype != Tab && col > scol + 1) || 
+                       (col > scol && (init || pctype == Tab)) {
+                        write_tabs(&mut output, ts, scol, col);
+                    } else if col > scol {
+                        safe_unwrap!(output.write_all(" ".as_bytes()));
+                    }
+                    scol = col;
+
+                    safe_unwrap!(output.write_all(&buf[byte..]));
+                    break;
+                }
+
+                let (ctype, cwidth, nbytes) = if options.uflag {
+                    let nbytes = utf8_char_width(buf[byte]);
+
+                    // figure out how big the next char is, if it's UTF-8
+                    if byte + nbytes > buf.len() {
+                        // make sure we don't overrun the buffer because of invalid UTF-8
+                        (Other, 1, 1)
+                    } else if let Ok(t) = from_utf8(&buf[byte..byte+nbytes]) {
+                        // Now that we think it's UTF-8, figure out what kind of char it is
+                        match t.chars().next() {
+                            Some(' ') => (Space, 0, 1),
+                            Some('\t') => (Tab, 0, 1),
+                            Some('\x08') => (Backspace, 0, 1),
+                            Some(c) => (Other, UnicodeWidthChar::width(c).unwrap_or(0), nbytes),
+                            None => {   // invalid char snuck past the utf8_validation_iterator somehow???
+                                (Other, 1, 1)
+                            },
+                        }
                    } else {
-                        nspaces = 0;
-                        safe_write!(&mut output, "{}", ' ');
+                        // otherwise, it's not valid
+                        (Other, 1, 1)       // implicit assumption: non-UTF8 char has display width 1
                    }
-                    col += 1;
+                } else {
+                    (match buf[byte] {      // always take exactly 1 byte in strict ASCII mode
+                        0x20 => Space,
+                        0x09 => Tab,
+                        0x08 => Backspace,
+                        _ => Other,
+                    }, 1, 1)
+                };
+
+                // now figure out how many columns this char takes up, and maybe print it
+                let tabs_buffered = init || options.aflag;
+                match ctype {
+                    Space | Tab => {    // compute next col, but only write space or tab chars if not buffering
+                        col += if ctype == Space {
+                            1
+                        } else {
+                            next_tabstop(ts, col).unwrap_or(1)
+                        };
+
+                        if !tabs_buffered {
+                            safe_unwrap!(output.write_all(&buf[byte..byte+nbytes]));
+                            scol = col;             // now printed up to this column
                        }
-                Ok('\t') if nspaces > 0 => {
-                    if is_tabstop(ts, col) {
-                        nspaces = 0;
-                        col += 1;
-                        safe_write!(&mut output, "{}", '\t');
+                    },
+                    Other | Backspace => {  // always 
+                        // never turn a single space before a non-blank into a tab
+                        // unless it's at the start of the line
+                        if (tabs_buffered && pctype != Tab && col > scol + 1) || 
+                           (col > scol && (init || (tabs_buffered && pctype == Tab))) {
+                            write_tabs(&mut output, ts, scol, col);
+                        } else if col > scol {
+                            safe_unwrap!(output.write_all(" ".as_bytes()));
                        }
-                    match to_next_stop(ts, col) {
-                        Some(to_next) => {
-                            nspaces += to_next;
-                            col += to_next;
-                        }
-                        None => {
-                            col += 1;
-                            unexpandspan(&mut output, ts, nspaces, col, init);
-                            nspaces = 0;
-                            safe_write!(&mut output, "{}", '\t');
-                        }
-                    }
-                }
-                Ok('\x08') => { // '\b'
-                    if init || options.aflag {
-                        unexpandspan(&mut output, ts, nspaces, col, init)
-                    }
-                    nspaces = 0;
-                    if col > 0 { col -= 1; }
                        init = false;
-                    safe_write!(&mut output, "{}", '\x08');
+                        col = if ctype == Other {   // use computed width
+                            col + cwidth
+                        } else if col > 0 {         // Backspace case, but only if col > 0
+                            col - 1
+                        } else {
+                            0
+                        };
+                        safe_unwrap!(output.write_all(&buf[byte..byte+nbytes]));
+                        scol = col;                 // we've now printed up to this column
+                    },
                }
-                Ok('\n') => {
-                    if init || options.aflag {
-                        unexpandspan(&mut output, ts, nspaces, col, init)
+
+                byte += nbytes; // move on to next char
+                pctype = ctype; // save the previous type
            }
-                    nspaces = 0;
-                    col = 0;
-                    init = true;
-                    safe_write!(&mut output, "{}", '\n');
+
+            // write out anything remaining
+            if col > scol + 1 || (init && col > scol) {
+                write_tabs(&mut output, ts, scol, col);
+            } else if col > scol {
+                safe_unwrap!(output.write_all(" ".as_bytes()));
            }
-                Ok(c) => {
-                    if init || options.aflag {
-                        unexpandspan(&mut output, ts, nspaces, col, init)
-                    }
-                    nspaces = 0;
-                    col += 1;
-                    init = false;
-                    safe_write!(&mut output, "{}", c);
-                }
-                Err(_) => break
-            }
-        }
-        if init || options.aflag {
-            unexpandspan(&mut output, ts, nspaces, col, init)
+
+            buf.truncate(0);    // clear out the buffer
        }
    }
 }
--- a/src/unlink/unlink.rs
+++ b/src/unlink/unlink.rs
@ -1,5 +1,5 @@
 #![crate_name = "unlink"]
-#![feature(collections, core, old_io, old_path, rustc_private)]
+#![feature(rustc_private)]

 /*
 * This file is part of the uutils coreutils package.
@ -15,9 +15,14 @@
 extern crate getopts;
 extern crate libc;

-use std::old_io as io;
-use std::old_io::fs::{self, PathExtensions};
-use std::old_io::print;
+use libc::consts::os::posix88::{S_IFMT, S_IFLNK, S_IFREG};
+use libc::types::os::arch::c95::c_char;
+use libc::types::os::arch::posix01::stat;
+use libc::funcs::posix01::stat_::lstat;
+use libc::funcs::posix88::unistd::unlink;
+
+use std::mem::uninitialized;
+use std::io::{Error, ErrorKind, Write};

 #[path = "../common/util.rs"]
 #[macro_use]
@ -26,13 +31,12 @@ mod util;
 static NAME: &'static str = "unlink";

 pub fn uumain(args: Vec<String>) -> i32 {
-    let program = args[0].clone();
    let opts = [
        getopts::optflag("h", "help", "display this help and exit"),
        getopts::optflag("V", "version", "output version information and exit"),
    ];

-    let matches = match getopts::getopts(args.tail(), &opts) {
+    let matches = match getopts::getopts(&args[1..], &opts) {
        Ok(m) => m,
        Err(f) => {
            crash!(1, "invalid options\n{}", f)
@ -43,9 +47,9 @@ pub fn uumain(args: Vec<String>) -> i32 {
        println!("unlink 1.0.0");
        println!("");
        println!("Usage:");
-        println!("  {0} [FILE]... [OPTION]...", program);
+        println!("  {0} [FILE]... [OPTION]...", args[0]);
        println!("");
-        print(getopts::usage("Unlink the file at [FILE].", &opts).as_slice());
+        println!("{}", getopts::usage("Unlink the file at [FILE].", &opts));
        return 0;
    }

@ -55,31 +59,38 @@ pub fn uumain(args: Vec<String>) -> i32 {
    }

    if matches.free.len() == 0 {
-        crash!(1, "missing operand\nTry '{0} --help' for more information.", program);
+        crash!(1, "missing operand\nTry '{0} --help' for more information.", args[0]);
    } else if matches.free.len() > 1 {
-        crash!(1, "extra operand: '{1}'\nTry '{0} --help' for more information.", program, matches.free[1]);
+        crash!(1, "extra operand: '{1}'\nTry '{0} --help' for more information.", args[0], matches.free[1]);
    }

-    let path = Path::new(matches.free[0].clone());
+    let st_mode = {
+        let mut buf: stat = unsafe { uninitialized() };
+        let result = unsafe { lstat(matches.free[0].as_ptr() as *const c_char, &mut buf as *mut stat) };

-    let result = path.lstat().and_then(|info| {
-        match info.kind {
-            io::FileType::RegularFile => Ok(()),
-            io::FileType::Symlink => Ok(()),
-            _ => Err(io::IoError {
-                kind: io::OtherIoError,
-                desc: "is not a file or symlink",
-                detail: None
-            })
+        if result < 0 {
+            crash!(1, "Cannot stat '{}': {}", matches.free[0], Error::last_os_error());
        }
-    }).and_then(|_| {
-        fs::unlink(&path)
-    });
+
+        buf.st_mode & S_IFMT
+    };
+
+    let result = if st_mode != S_IFREG && st_mode != S_IFLNK {
+        Err(Error::new(ErrorKind::Other, "Not a regular file or symlink"))
+    } else {
+        let result = unsafe { unlink(matches.free[0].as_ptr() as *const c_char) };
+
+        if result < 0 {
+            Err(Error::last_os_error())
+        } else {
+            Ok(())
+        }
+    };

    match result {
        Ok(_) => (),
        Err(e) => {
-            crash!(1, "cannot unlink '{0}': {1}", path.display(), e.desc);
+            crash!(1, "cannot unlink '{0}': {1}", matches.free[0], e);
        }
    }

--- a/test/tr.rs
+++ b/test/tr.rs
@ -1,49 +1,51 @@
-#![allow(unstable)]
-
-use std::old_io::process::Command;
+use std::io::Write;
+use std::process::{Command, Stdio};

 static PROGNAME: &'static str = "./tr";

 fn run(input: &str, args: &[&'static str]) -> Vec<u8> {
-    let mut process = Command::new(PROGNAME).args(args).spawn().unwrap();
+    let mut process = Command::new(PROGNAME)
+                                   .args(args)
+                                   .stdin(Stdio::piped())
+                                   .stdout(Stdio::piped())
+                                   .spawn()
+                                   .unwrap_or_else(|e| panic!("{}", e));

-    process.stdin.take().unwrap().write_str(input).unwrap();
+    process.stdin.take().unwrap_or_else(|| panic!("Could not take child process stdin"))
+        .write_all(input.as_bytes()).unwrap_or_else(|e| panic!("{}", e));

-    let po = match process.wait_with_output() {
-        Ok(p) => p,
-        Err(err) => panic!("{}", err),
-    };
-    po.output
+    let po = process.wait_with_output().unwrap_or_else(|e| panic!("{}", e));
+    po.stdout
 }

 #[test]
 fn test_toupper() {
    let out = run("!abcd!", &["a-z", "A-Z"]);
-    assert_eq!(out.as_slice(), b"!ABCD!");
+    assert_eq!(&out[..], b"!ABCD!");
 }

 #[test]
 fn test_small_set2() {
    let out = run("@0123456789", &["0-9", "X"]);
-    assert_eq!(out.as_slice(), b"@XXXXXXXXXX");
+    assert_eq!(&out[..], b"@XXXXXXXXXX");
 }

 #[test]
 fn test_unicode() {
    let out = run("(,°□°）, ┬─┬", &[", ┬─┬", "╯︵┻━┻"]);
-    assert_eq!(out.as_slice(), "(╯°□°）╯︵┻━┻".as_bytes());
+    assert_eq!(&out[..], "(╯°□°）╯︵┻━┻".as_bytes());
 }

 #[test]
 fn test_delete() {
    let out = run("aBcD", &["-d", "a-z"]);
-    assert_eq!(out.as_slice(), b"BD");
+    assert_eq!(&out[..], b"BD");
 }

 #[test]
 fn test_delete_complement() {
    let out = run("aBcD", &["-d", "-c", "a-z"]);
-    assert_eq!(out.as_slice(), b"ac");
+    assert_eq!(&out[..], b"ac");
 }


--- a/test/unexpand.rs
+++ b/test/unexpand.rs
@ -1,74 +1,76 @@
-#![allow(unstable)]
-
-use std::old_io::process::Command;
+use std::io::Write;
+use std::process::{Command, Stdio};

 static PROGNAME: &'static str = "./unexpand";

 fn run(input: &str, args: &[&'static str]) -> Vec<u8> {
-    let mut process = Command::new(PROGNAME).args(args).spawn().unwrap();
+    let mut process = Command::new(PROGNAME)
+                                   .args(args)
+                                   .stdin(Stdio::piped())
+                                   .stdout(Stdio::piped())
+                                   .spawn()
+                                   .unwrap_or_else(|e| panic!("{}", e));

-    process.stdin.take().unwrap().write_str(input).unwrap();
+    process.stdin.take().unwrap_or_else(|| panic!("Could not take child process stdin"))
+        .write_all(input.as_bytes()).unwrap_or_else(|e| panic!("{}", e));

-    let po = match process.wait_with_output() {
-        Ok(p) => p,
-        Err(err) => panic!("{}", err),
-    };
-    po.output
+    let po = process.wait_with_output().unwrap_or_else(|e| panic!("{}", e));
+    po.stdout
 }

 #[test]
 fn unexpand_init_0() {
    let out = run(" 1\n  2\n   3\n    4\n", &["-t4"]);
-    assert_eq!(out.as_slice(), b" 1\n  2\n   3\n\t4\n");
+    assert_eq!(&out[..], b" 1\n  2\n   3\n\t4\n" as &[u8]);
 }

 #[test]
 fn unexpand_init_1() {
    let out = run("     5\n      6\n       7\n        8\n", &["-t4"]);
-    assert_eq!(out.as_slice(), b"\t 5\n\t  6\n\t   7\n\t\t8\n");
+    assert_eq!(&out[..], b"\t 5\n\t  6\n\t   7\n\t\t8\n" as &[u8]);
 }

 #[test]
 fn unexpand_init_list_0() {
    let out = run(" 1\n  2\n   3\n    4\n", &["-t2,4"]);
-    assert_eq!(out.as_slice(), b" 1\n\t2\n\t 3\n\t\t4\n");
+    assert_eq!(&out[..], b" 1\n\t2\n\t 3\n\t\t4\n" as &[u8]);
 }

 #[test]
 fn unexpand_init_list_1() {
    // Once the list is exhausted, spaces are not converted anymore
    let out = run("     5\n      6\n       7\n        8\n", &["-t2,4"]);
-    assert_eq!(out.as_slice(), b"\t\t 5\n\t\t  6\n\t\t   7\n\t\t    8\n");
+    assert_eq!(&out[..], b"\t\t 5\n\t\t  6\n\t\t   7\n\t\t    8\n" as &[u8]);
 }

 #[test]
 fn unexpand_aflag_0() {
-    let out = run("e     E\nf      F\ng       G\nh        H\n", &[]);
-    assert_eq!(out.as_slice(), b"e     E\nf      F\ng       G\nh        H\n");
+    let out = run("e     E\nf      F\ng       G\nh        H\n", &["--"]);
+    assert_eq!(&out[..], b"e     E\nf      F\ng       G\nh        H\n" as &[u8]);
 }

 #[test]
 fn unexpand_aflag_1() {
    let out = run("e     E\nf      F\ng       G\nh        H\n", &["-a"]);
-    assert_eq!(out.as_slice(), b"e     E\nf      F\ng\tG\nh\t H\n");
+    assert_eq!(&out[..], b"e     E\nf      F\ng\tG\nh\t H\n" as &[u8]);
 }

 #[test]
 fn unexpand_aflag_2() {
    let out = run("e     E\nf      F\ng       G\nh        H\n", &["-t8"]);
-    assert_eq!(out.as_slice(), b"e     E\nf      F\ng\tG\nh\t H\n");
+    assert_eq!(&out[..], b"e     E\nf      F\ng\tG\nh\t H\n" as &[u8]);
 }

 #[test]
 fn unexpand_first_only_0() {
    let out = run("        A     B", &["-t3"]);
-    assert_eq!(out.as_slice(), b"\t\t  A\t  B");
+    assert_eq!(&out[..], b"\t\t  A\t  B" as &[u8]);
 }

 #[test]
 fn unexpand_first_only_1() {
    let out = run("        A     B", &["-t3", "--first-only"]);
-    assert_eq!(out.as_slice(), b"\t\t  A     B");
+    assert_eq!(&out[..], b"\t\t  A     B" as &[u8]);
 }

 #[test]
@ -76,20 +78,20 @@ fn unexpand_trailing_space_0() { // evil
    // Individual spaces before fields starting with non blanks should not be
    // converted, unless they are at the beginning of the line.
    let out = run("123 \t1\n123 1\n123 \n123 ", &["-t4"]);
-    assert_eq!(out.as_slice(), b"123\t\t1\n123 1\n123 \n123 ");
+    assert_eq!(&out[..], b"123\t\t1\n123 1\n123 \n123 " as &[u8]);
 }

 #[test]
 fn unexpand_trailing_space_1() { // super evil
    let out = run(" abc d e  f  g ", &["-t1"]);
-    assert_eq!(out.as_slice(), b"\tabc d e\t\tf\t\tg ");
+    assert_eq!(&out[..], b"\tabc d e\t\tf\t\tg " as &[u8]);
 }

 #[test]
 fn unexpand_spaces_follow_tabs_0() {
    // The two first spaces can be included into the first tab.
    let out = run("  \t\t   A", &[]);
-    assert_eq!(out.as_slice(), b"\t\t   A");
+    assert_eq!(&out[..], b"\t\t   A" as &[u8]);
 }

 #[test]
@ -100,6 +102,7 @@ fn unexpand_spaces_follow_tabs_1() { // evil
    //      ' ' -> '\t'         // third tabstop (5)
    // '  B \t' -> '  B \t'     // after the list is exhausted, nothing must change
    let out = run("a \t   B \t", &["-t1,4,5"]);
-    assert_eq!(out.as_slice(), b"a\t\t  B \t");
+    assert_eq!(&out[..], b"a\t\t  B \t" as &[u8]);
 }

+