From 2ab586459b0a9a34ce461fd9e268fd8b85c4188f Mon Sep 17 00:00:00 2001 From: polyphemus Date: Sun, 8 Jun 2014 23:51:22 +0200 Subject: [PATCH] Add initial cut support, only bytes cutting --- cut/cut.rs | 285 ++++++++++++++++++++++++++++++++++++++++++++++++++ cut/ranges.rs | 108 +++++++++++++++++++ 2 files changed, 393 insertions(+) create mode 100644 cut/cut.rs create mode 100644 cut/ranges.rs diff --git a/cut/cut.rs b/cut/cut.rs new file mode 100644 index 000000000..2055dee1b --- /dev/null +++ b/cut/cut.rs @@ -0,0 +1,285 @@ +#![crate_id(name="cut", vers="1.0.0", author="Rolf Morel")] +#![feature(macro_rules)] + +extern crate getopts; +extern crate libc; + +use std::os; +use std::io::{print,stdin,stdout,File,BufferedWriter,BufferedReader}; +use getopts::{optopt, optflag, getopts, usage}; + +use ranges::Range; + +#[path = "../common/util.rs"] +mod util; +mod ranges; + +static NAME: &'static str = "cut"; +static VERSION: &'static str = "1.0.0"; + +struct Options { + out_delim: Option, +} + +struct FieldOptions { + delimiter: char, + out_delimeter: String, + only_delimited: bool, +} + +enum Mode { + Bytes(Vec, Options), + Characters(Vec, Options), + Fields(Vec, FieldOptions), +} + +fn list_to_ranges(list: &str, complement: bool) -> Result, String> { + use std::uint; + + let mut range_vec = { + try!( + if complement { + Range::from_list(list).map(|r| ranges::complement(&r)) + } else { + Range::from_list(list) + } + ) + }; + + // add sentinel value for increased performance during cutting + range_vec.push(Range{ low: uint::MAX, high: uint::MAX }); + + Ok(range_vec) +} + +fn cut_bytes(files: Vec, ranges: Vec, opts: Options) -> int { + let mut out = BufferedWriter::new(std::io::stdio::stdout_raw()); + let (use_delim, out_delim) = match opts.out_delim { + Some(delim) => (true, delim), + None => (false, "".to_string()) + }; + + for filename in files.move_iter() { + let mut file = match open(&filename) { + Some(file) => file, + None => continue + }; + + let mut byte_pos = 0; + let mut print_delim = false; + let mut range_pos = 0; + + loop { + let byte = match file.read_u8() { + Ok(byte) => byte, + Err(std::io::IoError{ kind: std::io::EndOfFile, ..}) => { + if byte_pos > 0 { + out.write_u8('\n' as u8); + } + break + } + _ => fail!(), + }; + + if byte == ('\n' as u8) { + out.write_u8('\n' as u8); + byte_pos = 0; + print_delim = false; + range_pos = 0; + } else { + byte_pos += 1; + + if byte_pos > ranges.get(range_pos).high { + range_pos += 1; + } + + let cur_range = *ranges.get(range_pos); + + if byte_pos >= cur_range.low { + if use_delim { + if print_delim && byte_pos == cur_range.low { + out.write_str(out_delim.as_slice()); + } + + print_delim = true; + } + + out.write_u8(byte); + } + } + } + } + + return 0; +} + +fn cut_charachters(files: Vec, ranges: Vec, + opts: Options) -> int { + return 0; +} + +fn cut_fields(files: Vec, ranges: Vec, + opts: FieldOptions) -> int { + for range in ranges.iter() { + println!("{}-{}", range.low, range.high); + } + + return 0; +} + +#[allow(dead_code)] +fn main() { os::set_exit_status(uumain(os::args())); } + +pub fn uumain(args: Vec) -> int { + let program = args.get(0).clone(); + let opts = [ + optopt("b", "bytes", "select only these bytes", "LIST"), + optopt("c", "characters", "select only these characters", "LIST"), + optopt("d", "delimiter", "use DELIM instead of TAB for field delimiter", "DELIM"), + optopt("f", "fields", "select only these fields; also print any line that contains no delimiter character, unless the -s option is specified", "LIST"), + optflag("n", "", "(ignored)"), + optflag("", "complement", "complement the set of selected bytes, characters or fields"), + optflag("s", "only-delimited", "do not print lines not containing delimiters"), + optopt("", "output-delimiter", "use STRING as the output delimiter the default is to use the input delimiter", "STRING"), + optflag("", "help", "display this help and exit"), + optflag("", "version", "output version information and exit"), + ]; + + let mut matches = match getopts(args.tail(), opts) { + Ok(m) => m, + Err(f) => { + show_error!(1, "Invalid options\n{}", f.to_err_msg()) + return 1; + } + }; + + if matches.opt_present("help") { + println!("Usage:"); + println!(" {0:s} OPTION... [FILE]...", program); + println!(""); + print(usage("Print selected parts of lines from each FILE to standard output.", opts).as_slice()); + println!(""); + println!("Use one, and only one of -b, -c or -f. Each LIST is made up of one"); + println!("range, or many ranges separated by commas. Selected input is written"); + println!("in the same order that it is read, and is written exactly once."); + println!("Each range is one of:"); + println!(""); + println!(" N N'th byte, character or field, counted from 1"); + println!(" N- from N'th byte, character or field, to end of line"); + println!(" N-M from N'th to M'th (included) byte, character or field"); + println!(" -M from first to M'th (included) byte, character or field"); + println!(""); + println!("With no FILE, or when FILE is -, read standard input."); + return 0; + } + + if matches.opt_present("version") { + println!("{} {}", NAME, VERSION); + return 0; + } + + let complement = matches.opt_present("complement"); + let mut out_delim = matches.opt_str("output-delimiter"); + + let mode = match (matches.opt_str("bytes"), matches.opt_str("characters"), + matches.opt_str("fields")) { + (Some(byte_ranges), None, None) => { + match list_to_ranges(byte_ranges.as_slice(), complement) { + Ok(ranges) => Bytes(ranges, Options{ out_delim: out_delim }), + Err(msg) => { + show_error!(1, "{}", msg); + return 1; + } + } + } + (None ,Some(char_ranges), None) => { + match list_to_ranges(char_ranges.as_slice(), complement) { + Ok(ranges) => Characters(ranges, + Options{ out_delim: out_delim }), + Err(msg) => { + show_error!(1, "{}", msg); + return 1; + } + } + } + (None, None ,Some(field_ranges)) => { + match list_to_ranges(field_ranges.as_slice(), complement) { + Ok(ranges) => { + use std::str::from_char; + + let only_delimited = matches.opt_present("only-delimited"); + let delim = matches.opt_str("delimiter") + .filtered(|s| s.len() == 1) + .map(|s| s.as_slice().char_at(0)) + .unwrap_or('\t'); + if out_delim.is_none() { + out_delim = Some(from_char(delim)); + } + + Fields(ranges, + FieldOptions{ delimiter: delim, + out_delimeter: out_delim.unwrap(), + only_delimited: only_delimited }) + } + Err(msg) => { + show_error!(1, "{}", msg); + return 1; + } + } + } + (ref b, ref c, ref f) if b.is_some() || c.is_some() || f.is_some() => { + crash!(1, "only one type of list may be specified"); + } + _ => crash!(1, "you must specify a list of bytes, characters, or fields") + }; + + match mode { + Bytes(..) | Characters(..) => { + if matches.opt_present("delimiter") { + show_error!(1, "an input delimiter may be specified only when operating on fields"); + return 1; + } + if matches.opt_present("only-delimited") { + show_error!(1, "suppressing non-delimited lines makes sense only when operating on fields"); + return 1; + } + } + _ => () + } + + for filename in matches.free.iter() { + if ! (filename.as_slice() == "-" || + Path::new(filename.as_slice()).exists()) { + show_error!(1, "{}: No such file or directory", filename); + return 1; + } + } + + if matches.free.len() == 0 { matches.free.push("-".to_string()); } + + match mode { + Bytes(ranges, opts) => return cut_bytes(matches.free, ranges, opts), + Characters(ranges, opts) => return cut_charachters(matches.free, + ranges, opts), + Fields(ranges, opts) => return cut_fields(matches.free, ranges, opts), + } +} + +fn open(path: &String) -> Option>> { + if "-" == path.as_slice() { + let reader = box stdin() as Box; + return Some(BufferedReader::new(reader)); + } + + match File::open(&std::path::Path::new(path.as_slice())) { + Ok(fd) => { + let reader = box fd as Box; + return Some(BufferedReader::new(reader)); + }, + Err(e) => { + show_error!(1, "{0:s}: {1:s}", *path, e.desc.to_str()); + } + } + + None +} diff --git a/cut/ranges.rs b/cut/ranges.rs new file mode 100644 index 000000000..e0fed0a68 --- /dev/null +++ b/cut/ranges.rs @@ -0,0 +1,108 @@ +/* + * This file is part of the uutils coreutils package. + * + * (c) Rolf Morel + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +use std; + +#[deriving(PartialEq,Eq,PartialOrd,Ord,Show)] +pub struct Range { + pub low: uint, + pub high: uint, +} + +impl std::from_str::FromStr for Range { + fn from_str(s: &str) -> Option { + use std::uint::MAX; + + let mut parts = s.splitn('-', 1); + + match (parts.next(), parts.next()) { + (Some(nm), None) => { + from_str::(nm).filtered(|nm| *nm > 0) + .map(|nm| Range{ low: nm, high: nm }) + } + (Some(n), Some(m)) if m.len() == 0 => { + from_str::(n).filtered(|low| *low > 0) + .map(|low| Range{ low: low, high: MAX }) + } + (Some(n), Some(m)) if n.len() == 0 => { + from_str::(m).filtered(|high| *high >= 1) + .map(|high| Range{ low: 1, high: high }) + } + (Some(n), Some(m)) => { + match (from_str::(n), from_str::(m)) { + (Some(low), Some(high)) if low > 0 && low <= high => { + Some(Range{ low: low, high: high }) + } + _ => None + } + } + _ => unreachable!() + } + } +} + +impl Range { + pub fn from_list(list: &str) -> Result, String> { + use std::cmp::max; + + let mut ranges = vec!(); + + for item in list.split(',') { + match from_str::(item) { + Some(range_item) => ranges.push(range_item), + None => return Err(format!("range '{}' was invalid", item)) + } + } + + ranges.sort(); + + // merge overlapping ranges + for i in range(0, ranges.len()) { + let j = i + 1; + + while j < ranges.len() && ranges.get(j).low <= ranges.get(i).high { + let j_high = ranges.remove(j).unwrap().high; + ranges.get_mut(i).high = max(ranges.get(i).high, j_high); + } + } + + Ok(ranges) + } +} + +pub fn complement(ranges: &Vec) -> Vec { + use std::uint; + + let mut complements = Vec::with_capacity(ranges.len() + 1); + + if ranges.len() > 0 && ranges.get(0).low > 1 { + complements.push(Range{ low: 1, high: ranges.get(0).low - 1 }); + } + + let mut ranges_iter = ranges.iter().peekable(); + loop { + match (ranges_iter.next(), ranges_iter.peek()) { + (Some(left), Some(right)) => { + if left.high + 1 != right.low { + complements.push(Range{ low: left.high + 1, + high: right.low - 1 }); + } + } + (Some(last), None) => { + if last.high < uint::MAX { + complements.push(Range{ low: last.high + 1, + high: uint::MAX }); + } + } + _ => break + } + } + + complements +}