From 23bdaae577d22360af4952a46721c5da18f984e2 Mon Sep 17 00:00:00 2001 From: Boden Garman Date: Sun, 17 Nov 2013 20:41:40 +1100 Subject: [PATCH] added wc utility --- Makefile | 4 +- wc/wc.rs | 232 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 234 insertions(+), 2 deletions(-) create mode 100644 wc/wc.rs diff --git a/Makefile b/Makefile index 9f68e1a64..24e2f1df1 100644 --- a/Makefile +++ b/Makefile @@ -3,11 +3,11 @@ RUSTC ?= rustc RM := rm # Flags -RUSTCFLAGS := +RUSTCFLAGS := --opt-level=3 RMFLAGS := # Output names -EXES := false printenv true yes cat whoami env +EXES := false printenv true yes cat whoami env wc TESTS := cat # Utils stuff diff --git a/wc/wc.rs b/wc/wc.rs new file mode 100644 index 000000000..88fbe53cd --- /dev/null +++ b/wc/wc.rs @@ -0,0 +1,232 @@ +#[link(name="wc", vers="1.0.0", author="Boden Garman")]; + +/* + * This file is part of the uutils coreutils package. + * + * (c) Boden Garman + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +extern mod extra; + +use std::os; +use std::io::{stdin, stderr, File, result}; +use std::io::buffered::BufferedReader; +use std::str::from_utf8_slice_opt; +use extra::getopts::{groups, Matches}; + +struct Result { + filename: ~str, + bytes: uint, + chars: uint, + lines: uint, + words: uint, + max_line_length: uint, +} + +fn main() { + let args = os::args(); + let program = args[0].clone(); + let opts = ~[ + groups::optflag("c", "bytes", "print the byte counts"), + groups::optflag("m", "chars", "print the character counts"), + groups::optflag("l", "lines", "print the newline counts"), + groups::optflag("L", "max-line-length", "print the length of the longest line"), + groups::optflag("w", "words", "print the word counts"), + groups::optflag("h", "help", "display this help and exit"), + groups::optflag("V", "version", "output version information and exit"), + ]; + + let matches = match groups::getopts(args.tail(), opts) { + Ok(m) => m, + Err(f) => { + writeln!(&mut stderr() as &mut Writer, + "Invalid options\n{}", f.to_err_msg()); + os::set_exit_status(1); + return + } + }; + + if matches.opt_present("help") { + println("Usage:"); + println!(" {0:s} [OPTION]... [FILE]...", program); + println(""); + print(groups::usage("Print newline, word and byte counts for each FILE", opts)); + println(""); + println("With no FILE, or when FILE is -, read standard input."); + return; + } + + if (matches.opt_present("version")) { + println("cat 1.0.0"); + return; + } + + + + let mut files = matches.free.clone(); + if files.is_empty() { + files = ~[~"-"]; + } + + wc(files, &matches); +} + +static CR: u8 = '\r' as u8; +static LF: u8 = '\n' as u8; +static SPACE: u8 = ' ' as u8; +static TAB: u8 = '\t' as u8; +static SYN: u8 = 0x16 as u8; +static FF: u8 = 0x0C as u8; + +fn is_word_seperator(byte: u8) -> bool { + byte == SPACE || byte == TAB || byte == CR || byte == SYN || byte == FF +} + +pub fn wc(files: ~[~str], matches: &Matches) { + let mut total_line_count: uint = 0; + let mut total_word_count: uint = 0; + let mut total_char_count: uint = 0; + let mut total_byte_count: uint = 0; + let mut total_longest_line_length: uint = 0; + + let mut results: ~[Result] = ~[]; + let mut max_str_len: uint = 0; + + for path in files.iter() { + let mut reader = match open(path.to_owned()) { + Some(f) => f, + None => { continue } + }; + + let mut line_count: uint = 0; + let mut word_count: uint = 0; + let mut byte_count: uint = 0; + let mut char_count: uint = 0; + let mut current_char_count: uint = 0; + let mut longest_line_length: uint = 0; + + loop { + // reading from a TTY seems to raise a condition on, rather than return Some(0) like a file. + // hence the option wrapped in a result here + match result(| | reader.read_until(LF)) { + Ok(Some(raw_line)) => { + line_count += 1; + byte_count += raw_line.iter().len(); + + // try and convert the bytes to UTF-8 first + match from_utf8_slice_opt(raw_line) { + Some(line) => { + word_count += line.word_iter().len(); + current_char_count = line.iter().len(); + char_count += current_char_count; + }, + None => { + word_count += raw_line.split_iter(|&x| is_word_seperator(x)).len(); + for byte in raw_line.iter() { + match byte.is_ascii() { + true => { + current_char_count += 1; + } + false => { } + } + } + char_count += current_char_count; + } + } + + if (current_char_count > longest_line_length) { + longest_line_length = current_char_count; + } + }, + _ => break + } + + } + + results.push(Result { + filename: path.clone(), + bytes: byte_count, + chars: char_count, + lines: line_count, + words: word_count, + max_line_length: longest_line_length, + }); + + total_line_count += line_count; + total_word_count += word_count; + total_char_count += char_count; + total_byte_count += byte_count; + + if (longest_line_length > total_longest_line_length) { + total_longest_line_length = longest_line_length; + } + + // used for formatting + max_str_len = total_byte_count.to_str().len(); + } + + for result in results.iter() { + print_stats(&result.filename, result.lines, result.words, result.chars, result.bytes, result.max_line_length, matches, max_str_len); + } + + if (files.len() > 1) { + print_stats(&~"total", total_line_count, total_word_count, total_char_count, total_byte_count, total_longest_line_length, matches, max_str_len); + } +} + +fn print_stats(filename: &~str, line_count: uint, word_count: uint, char_count: uint, + byte_count: uint, longest_line_length: uint, matches: &Matches, max_str_len: uint) { + if (matches.opt_present("lines")) { + print!("{:1$}", line_count, max_str_len); + } + if (matches.opt_present("words")) { + print!("{:1$}", word_count, max_str_len); + } + if (matches.opt_present("bytes")) { + print!("{:1$}", byte_count, max_str_len + 1); + } + if (matches.opt_present("chars")) { + print!("{:1$}", char_count, max_str_len); + } + if (matches.opt_present("max-line-length")) { + print!("{:1$}", longest_line_length, max_str_len); + } + + // defaults + if (!matches.opt_present("bytes") && !matches.opt_present("chars") && !matches.opt_present("lines") + && !matches.opt_present("words") && !matches.opt_present("max-line-length")) { + print!("{:1$}", line_count, max_str_len); + print!("{:1$}", word_count, max_str_len + 1); + print!("{:1$}", byte_count, max_str_len + 1); + } + + if (*filename != ~"-") { + println!(" {}", *filename); + } + else { + println(""); + } +} + +fn open(path: ~str) -> Option> { + if "-" == path { + let reader = ~stdin() as ~Reader; + return Some(BufferedReader::new(reader)); + } + + match result(|| File::open(&std::path::Path::new(path.as_slice()))) { + Ok(fd) => { + let reader = ~fd as ~Reader; + return Some(BufferedReader::new(reader)); + }, + Err(e) => { + writeln!(&mut stderr() as &mut Writer, "wc: {0:s}: {1:s}", path, e.desc.to_str()); + os::set_exit_status(1); + } + } + + None +}