diff --git a/Cargo.toml b/Cargo.toml index fc003d5a1..d6db5a2ce 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -155,6 +155,10 @@ path = "seq/seq.rs" name = "sleep" path = "sleep/sleep.rs" +[[bin]] +name = "split" +path = "split/split.rs" + [[bin]] name = "sum" path = "sum/sum.rs" diff --git a/Makefile b/Makefile index e6e5bec4d..786617d0b 100644 --- a/Makefile +++ b/Makefile @@ -34,6 +34,7 @@ PROGS := \ rm \ rmdir \ sleep \ + split \ seq \ sum \ sync \ diff --git a/split/README.md b/split/README.md new file mode 100644 index 000000000..aa4de21a9 --- /dev/null +++ b/split/README.md @@ -0,0 +1,10 @@ +# Rudimentary "split" Implementation + +## Missing Features + +### Flags +* `-C N` +* `--verbose` + +## Possible Optimizations +* Use slice (`[u8]`) directly as the `control.current_line`. diff --git a/split/split.rs b/split/split.rs new file mode 100644 index 000000000..500ed267f --- /dev/null +++ b/split/split.rs @@ -0,0 +1,278 @@ +#![crate_id(name="split", vers="1.0.0", author="Akira Hayakawa")] + +/* + * This file is part of the uutils coreutils package. + * + * (c) Akira Hayakawa + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +#![feature(macro_rules)] + +extern crate getopts; +extern crate libc; + +use std::io; +use std::num; +use std::char; + +#[path = "../common/util.rs"] +mod util; + +static NAME: &'static str = "split"; +static VERSION: &'static str = "1.0.0"; + +pub fn uumain(args: Vec) -> int { + let opts = [ + getopts::optopt("a", "suffix-length", "use suffixes of length N (default 2)", "N"), + getopts::optopt("b", "bytes", "put SIZE bytes per output file", "SIZE"), + getopts::optopt("C", "line-bytes", "put at most SIZE bytes of lines per output file", "SIZE"), + getopts::optflag("d", "numeric-suffixes", "use numeric suffixes instead of alphabetic"), + getopts::optopt("l", "lines", "put NUMBER lines per output file", "NUMBER"), + getopts::optflag("", "verbose", "print a diagnostic just before each output file is opened"), + getopts::optflag("h", "help", "display help and exit"), + getopts::optflag("V", "version", "output version information and exit"), + ]; + + let matches = match getopts::getopts(args.tail(), opts) { + Ok(m) => m, + Err(f) => crash!(1, "{}", f) + }; + + if matches.opt_present("h") { + println!("{} v{}", NAME, VERSION); + println!(""); + println!("Usage:"); + println!(" {0:s} [OPTION]... [INPUT [PREFIX]]", NAME); + println!(""); + io::print(getopts::usage("Output fixed-size pieces of INPUT to PREFIXaa, PREFIX ab, ...; default size is 1000, and default PREFIX is 'x'. With no INPUT, or when INPUT is -, read standard input." , opts).as_slice()); + return 0; + } + + if matches.opt_present("V") { + println!("{} v{}", NAME, VERSION); + return 0; + } + + let mut settings = Settings { + prefix: "".to_string(), + numeric_suffix: false, + suffix_length: 0, + input: "".to_string(), + strategy: "".to_string(), + strategy_param: "".to_string(), + verbose: false, + }; + + settings.numeric_suffix = if matches.opt_present("d") { true } else { false }; + + settings.suffix_length = match matches.opt_str("a") { + Some(n) => match from_str(n.as_slice()) { + Some(m) => m, + None => crash!(1, "cannot parse num") + }, + None => 2 + }; + + settings.verbose = if matches.opt_present("verbose") { true } else { false }; + + settings.strategy = "l".to_string(); + settings.strategy_param = "1000".to_string(); + let strategies = vec!["b", "C", "l"]; + for e in strategies.iter() { + match matches.opt_str(*e) { + Some(a) => { + if settings.strategy.as_slice() == "l" { + settings.strategy = e.to_string(); + settings.strategy_param = a; + } else { + crash!(1, "{}: cannot split in more than one way", NAME) + } + }, + None => {} + } + } + + let mut v = matches.free.iter(); + let (input, prefix) = match (v.next(), v.next()) { + (Some(a), None) => (a.to_string(), "x".to_string()), + (Some(a), Some(b)) => (a.to_string(), b.to_string()), + (None, _) => ("-".to_string(), "x".to_string()), + }; + settings.input = input; + settings.prefix = prefix; + + split(&settings) +} + +struct Settings { + prefix: String, + numeric_suffix: bool, + suffix_length: uint, + input: String, + strategy: String, + strategy_param: String, + verbose: bool, +} + +struct SplitControl { + current_line: String, // Don't touch + request_new_file: bool, // Splitter implementation requests new file +} + +trait Splitter { + // Factory pattern + fn new(_hint: Option, &Settings) -> Box; + + // Consume the current_line and return the consumed string + fn consume(&mut self, &mut SplitControl) -> String; +} + +struct LineSplitter { + saved_lines_to_write: uint, + lines_to_write: uint, +} + +impl Splitter for LineSplitter { + fn new(_: Option, settings: &Settings) -> Box { + let n = match from_str(settings.strategy_param.as_slice()) { + Some(a) => a, + _ => crash!(1, "invalid number of lines") + }; + box LineSplitter { + saved_lines_to_write: n, + lines_to_write: n, + } as Box + } + + fn consume(&mut self, control: &mut SplitControl) -> String { + self.lines_to_write -= 1; + if self.lines_to_write == 0 { + self.lines_to_write = self.saved_lines_to_write; + control.request_new_file = true; + } + control.current_line.clone() + } +} + +struct ByteSplitter { + saved_bytes_to_write: uint, + bytes_to_write: uint, +} + +impl Splitter for ByteSplitter { + fn new(_: Option, settings: &Settings) -> Box { + let n = match from_str(settings.strategy_param.as_slice()) { + Some(a) => a, + _ => crash!(1, "invalid number of lines") + }; + box ByteSplitter { + saved_bytes_to_write: n, + bytes_to_write: n, + } as Box + } + + fn consume(&mut self, control: &mut SplitControl) -> String { + let line = control.current_line.clone(); + let n = std::cmp::min(line.as_slice().char_len(), self.bytes_to_write); + self.bytes_to_write -= n; + if n == 0 { + self.bytes_to_write = self.saved_bytes_to_write; + control.request_new_file = true; + } + line.as_slice().slice(0, n).to_string() + } +} + +// (1, 3) -> "aab" +fn str_prefix(i: uint, width: uint) -> String { + let mut c = "".to_string(); + let mut n = i; + let mut w = width; + while w > 0 { + w -= 1; + let div = num::pow(26 as uint, w); + let r = n / div; + n -= r * div; + c.push_char(char::from_u32((r as u32) + 97).unwrap()); + } + c +} + +// (1, 3) -> "001" +fn num_prefix(i: uint, width: uint) -> String { + let mut c = "".to_string(); + let mut n = i; + let mut w = width; + while w > 0 { + w -= 1; + let div = num::pow(10 as uint, w); + let r = n / div; + n -= r * div; + c.push_char(char::from_digit(r, 10).unwrap()); + } + c +} + +fn split(settings: &Settings) -> int { + let mut reader = io::BufferedReader::new( + if settings.input.as_slice() == "-" { + box io::stdio::stdin_raw() as Box + } else { + box match io::File::open(&Path::new(settings.input.clone())) { + Ok(a) => a, + Err(_) => crash!(1, "cannot open '{}' for reading: No such file or directory", settings.input) + } as Box + } + ); + + let mut splitter: Box = + match settings.strategy.as_slice() { + "l" => Splitter::new(None::, settings), + "b" => Splitter::new(None::, settings), + a @ _ => crash!(1, "strategy {} not supported", a) + }; + + let mut control = SplitControl { + current_line: "".to_string(), // Request new line + request_new_file: true, // Request new file + }; + + let mut writer = io::BufferedWriter::new(box io::stdio::stdout_raw() as Box); + let mut fileno = 0; + loop { + if control.current_line.as_slice().char_len() == 0 { + match reader.read_line() { + Ok(a) => { control.current_line = a; } + Err(_) => { break; } + } + } + + if control.request_new_file { + let mut filename = settings.prefix.to_string(); + filename.push_str(if settings.numeric_suffix { + num_prefix(fileno, settings.suffix_length) + } else { + str_prefix(fileno, settings.suffix_length) + }.as_slice()); + + if fileno != 0 { + crash_if_err!(1, writer.flush()); + } + fileno += 1; + writer = io::BufferedWriter::new(box io::File::open_mode(&Path::new(filename.as_slice()), io::Open, io::Write) as Box); + control.request_new_file = false; + } + + let consumed = splitter.consume(&mut control); + crash_if_err!(1, writer.write_str(consumed.as_slice())); + + let advance = consumed.as_slice().char_len(); + let clone = control.current_line.clone(); + let sl = clone.as_slice(); + control.current_line = sl.slice(advance, sl.char_len()).to_string(); + } + 0 +}