diff --git a/Cargo.toml b/Cargo.toml index d6db5a2ce..eba27bd9a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -203,6 +203,10 @@ path = "tty/tty.rs" name = "uname" path = "uname/uname.rs" +[[bin]] +name = "uniq" +path = "uniq/uniq.rs" + [[bin]] name = "unlink" path = "unlink/unlink.rs" diff --git a/Makefile b/Makefile index 786617d0b..884f90863 100644 --- a/Makefile +++ b/Makefile @@ -45,6 +45,7 @@ PROGS := \ true \ truncate \ unlink \ + uniq \ wc \ yes \ head \ diff --git a/uniq/uniq.rs b/uniq/uniq.rs new file mode 100644 index 000000000..e65d90077 --- /dev/null +++ b/uniq/uniq.rs @@ -0,0 +1,213 @@ +#![crate_id(name="uniq", vers="1.0.0", author="Chirag B. Jadwani")] +/* + * This file is part of the uutils coreutils package. + * + * (c) Chirag B Jadwani + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + * + */ + +#![feature(macro_rules)] + +extern crate getopts; + +use std::ascii::OwnedStrAsciiExt; +use std::cmp::min; +use std::from_str::FromStr; +use std::io; + +#[path = "../common/util.rs"] +mod util; + +static NAME: &'static str = "uniq"; +static VERSION: &'static str = "1.0.0"; + +struct Uniq { + repeats_only: bool, + uniques_only: bool, + all_repeated: bool, + delimiters: String, + show_counts: bool, + slice_start: Option, + slice_stop: Option, + ignore_case: bool, +} + +impl Uniq { + pub fn print_uniq(&self, reader: &mut io::BufferedReader, writer: &mut io::BufferedWriter) { + let mut lines: Vec = vec!(); + let mut first_line_printed = false; + let delimiters = self.delimiters.as_slice(); + + for io_line in reader.lines() { + let line = crash_if_err!(1, io_line); + if !lines.is_empty() && self.cmp_key(lines.get(0)) != self.cmp_key(&line) { + let print_delimiter = delimiters == "prepend" || (delimiters == "separate" && first_line_printed); + first_line_printed |= self.print_lines(writer, &lines, print_delimiter); + lines.truncate(0); + } + lines.push(line); + } + if !lines.is_empty() { + let print_delimiter = delimiters == "prepend" || (delimiters == "separate" && first_line_printed); + self.print_lines(writer, &lines, print_delimiter); + } + } + + fn cmp_key(&self, line: &String) -> String { + let len = line.len(); + if len > 0 { + let slice_start = match self.slice_start { + Some(i) => min(i, len - 1), + None => 0 + }; + let slice_stop = match self.slice_stop { + Some(i) => min(slice_start + i, len), + None => len + }; + let sliced = line.as_slice().slice(slice_start, slice_stop).into_string(); + if self.ignore_case { + sliced.into_ascii_upper() + } else { + sliced + } + } else { + line.clone() + } + } + + fn print_lines(&self, writer: &mut io::BufferedWriter, lines: &Vec, print_delimiter: bool) -> bool { + let mut first_line_printed = false; + let mut count = if self.all_repeated { 1 } else { lines.len() }; + if lines.len() == 1 && !self.repeats_only + || lines.len() > 1 && !self.uniques_only { + self.print_line(writer, lines.get(0), count, print_delimiter); + first_line_printed = true; + count += 1; + } + if self.all_repeated { + for line in lines.tail().iter() { + self.print_line(writer, line, count, print_delimiter && !first_line_printed); + first_line_printed = true; + count += 1; + } + } + first_line_printed + } + + fn print_line(&self, writer: &mut io::BufferedWriter, line: &String, count: uint, print_delimiter: bool) { + let output_line = if self.show_counts { + format!("{:7} {}", count, line) + } else { + line.clone() + }; + if print_delimiter { + crash_if_err!(1, writer.write_line("")); + } + crash_if_err!(1, writer.write_str(output_line.as_slice())); + } +} + +fn opt_parsed(opt_name: &str, matches: &getopts::Matches) -> Option { + matches.opt_str(opt_name).map(|arg_str| { + let opt_val: Option = from_str(arg_str.as_slice()); + opt_val.unwrap_or_else(|| + crash!(1, "Invalid argument for {}: {}", opt_name, arg_str)) + }) +} + +pub fn uumain(args: Vec) -> int { + let program_path = Path::new(args.get(0).clone()); + let program = program_path.filename_str().unwrap_or(NAME); + + let opts = [ + getopts::optflag("c", "count", "prefix lines by the number of occurrences"), + getopts::optflag("d", "repeated", "only print duplicate lines"), + getopts::optflagopt( + "D", + "all-repeated", + "print all duplicate lines delimit-method={none(default),prepend,separate} Delimiting is done with blank lines", + "delimit-method" + ), + getopts::optopt("s", "skip-chars", "avoid comparing the first N characters", "N"), + getopts::optopt("w", "check-chars", "compare no more than N characters in lines", "N"), + getopts::optflag("i", "ignore-case", "ignore differences in case when comparing"), + getopts::optflag("u", "unique", "only print unique lines"), + getopts::optflag("h", "help", "display this help and exit"), + getopts::optflag("V", "version", "output version information and exit") + ]; + let matches = match getopts::getopts(args.tail(), opts) { + Ok(m) => m, + Err(f) => crash!(1, "{}", f) + }; + + if matches.opt_present("help") { + println!("{} {}", NAME, VERSION); + println!(""); + println!("Usage:"); + println!(" {0:s} [OPTION]... [FILE]...", program); + println!(""); + print!("{}", getopts::usage("Filter adjacent matching lines from INPUT (or standard input),\n\ + writing to OUTPUT (or standard output).", opts)); + println!(""); + println!("Note: '{0}' does not detect repeated lines unless they are adjacent.\n\ + You may want to sort the input first, or use 'sort -u' without '{0}'.\n", program); + } else if matches.opt_present("version") { + println!("{} {}", NAME, VERSION); + } else { + let (in_file_name, out_file_name) = match matches.free.len() { + 0 => ("-".into_string(), "-".into_string()), + 1 => (matches.free.get(0).clone(), "-".into_string()), + 2 => (matches.free.get(0).clone(), matches.free.get(1).clone()), + _ => { + crash!(1, "Extra operand: {}", matches.free.get(2)); + } + }; + let uniq = Uniq { + repeats_only: matches.opt_present("repeated") || matches.opt_present("all-repeated"), + uniques_only: matches.opt_present("unique"), + all_repeated: matches.opt_present("all-repeated"), + delimiters: match matches.opt_default("all-repeated", "none") { + Some(ref opt_arg) if opt_arg.as_slice() != "none" => { + let rep_args = ["prepend".to_string(), "separate".to_string()]; + if !rep_args.contains(opt_arg) { + crash!(1, "Incorrect argument for all-repeated: {}", opt_arg.clone()); + } + opt_arg.clone() + }, + _ => "".to_string() + }, + show_counts: matches.opt_present("count"), + slice_start: opt_parsed("skip-chars", &matches), + slice_stop: opt_parsed("check-chars", &matches), + ignore_case: matches.opt_present("ignore-case"), + }; + uniq.print_uniq(&mut open_input_file(in_file_name), + &mut open_output_file(out_file_name)); + } + 0 +} + +fn open_input_file(in_file_name: String) -> io::BufferedReader> { + let in_file = if in_file_name.as_slice() == "-" { + box io::stdio::stdin_raw() as Box + } else { + let path = Path::new(in_file_name); + let in_file = io::File::open(&path); + box crash_if_err!(1, in_file) as Box + }; + io::BufferedReader::new(in_file) +} + +fn open_output_file(out_file_name: String) -> io::BufferedWriter> { + let out_file = if out_file_name.as_slice() == "-" { + box io::stdio::stdout_raw() as Box + } else { + let path = Path::new(out_file_name); + let in_file = io::File::create(&path); + box crash_if_err!(1, in_file) as Box + }; + io::BufferedWriter::new(out_file) +}