From 4fe3c54ebd892714afdf2131d5f7a0144d7af54d Mon Sep 17 00:00:00 2001 From: Michal Piekarz Date: Sun, 4 Jan 2015 16:36:09 +0100 Subject: [PATCH 1/8] Added SIZE multiplier suffixes. --- src/split/split.rs | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/src/split/split.rs b/src/split/split.rs index f6d99fb86..2979d82b1 100644 --- a/src/split/split.rs +++ b/src/split/split.rs @@ -48,6 +48,8 @@ pub fn uumain(args: Vec) -> int { println!(" {0} [OPTION]... [INPUT [PREFIX]]", NAME); println!(""); io::print(getopts::usage("Output fixed-size pieces of INPUT to PREFIXaa, PREFIX ab, ...; default size is 1000, and default PREFIX is 'x'. With no INPUT, or when INPUT is -, read standard input." , &opts).as_slice()); + println!(""); + println!("SIZE may have a multiplier suffix: b for 512, k for 1K, m for 1 Meg."); return 0; } @@ -164,13 +166,29 @@ struct ByteSplitter { impl Splitter for ByteSplitter { fn new(_: Option, settings: &Settings) -> Box { - let n = match from_str(settings.strategy_param.as_slice()) { - Some(a) => a, - _ => crash!(1, "invalid number of lines") + let mut strategy_param : Vec = settings.strategy_param.chars().collect(); + let suffix = strategy_param.pop().unwrap(); + let multiplier = match suffix { + '0'...'9' => 1u, + 'b' => 512u, + 'k' => 1024u, + 'm' => 1024u * 1024u, + _ => crash!(1, "invalid number of bytes") + }; + let n = if suffix.is_alphabetic() { + match String::from_chars(strategy_param.as_slice()).as_slice().parse::() { + Some(a) => a, + _ => crash!(1, "invalid number of bytes") + } + } else { + match settings.strategy_param.as_slice().parse::() { + Some(a) => a, + _ => crash!(1, "invalid number of bytes") + } }; box ByteSplitter { - saved_bytes_to_write: n, - bytes_to_write: n, + saved_bytes_to_write: n * multiplier, + bytes_to_write: n * multiplier, } as Box } From 43606188c000233f4a1eb7c13c0ac928fd11d7af Mon Sep 17 00:00:00 2001 From: Michal Piekarz Date: Sun, 4 Jan 2015 16:39:34 +0100 Subject: [PATCH 2/8] Removed deprecated calls. --- src/split/split.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/split/split.rs b/src/split/split.rs index 2979d82b1..8a546c16b 100644 --- a/src/split/split.rs +++ b/src/split/split.rs @@ -71,7 +71,7 @@ pub fn uumain(args: Vec) -> int { settings.numeric_suffix = if matches.opt_present("d") { true } else { false }; settings.suffix_length = match matches.opt_str("a") { - Some(n) => match from_str(n.as_slice()) { + Some(n) => match n.as_slice().parse() { Some(m) => m, None => crash!(1, "cannot parse num") }, @@ -139,7 +139,7 @@ struct LineSplitter { impl Splitter for LineSplitter { fn new(_: Option, settings: &Settings) -> Box { - let n = match from_str(settings.strategy_param.as_slice()) { + let n = match settings.strategy_param.as_slice().parse() { Some(a) => a, _ => crash!(1, "invalid number of lines") }; @@ -194,7 +194,7 @@ impl Splitter for ByteSplitter { fn consume(&mut self, control: &mut SplitControl) -> String { let line = control.current_line.clone(); - let n = std::cmp::min(line.as_slice().char_len(), self.bytes_to_write); + let n = std::cmp::min(line.as_slice().chars().count(), self.bytes_to_write); self.bytes_to_write -= n; if n == 0 { self.bytes_to_write = self.saved_bytes_to_write; @@ -262,7 +262,7 @@ fn split(settings: &Settings) -> int { let mut writer = io::BufferedWriter::new(box io::stdio::stdout_raw() as Box); let mut fileno = 0; loop { - if control.current_line.as_slice().char_len() == 0 { + if control.current_line.as_slice().chars().count() == 0 { match reader.read_line() { Ok(a) => { control.current_line = a; } Err(_) => { break; } @@ -288,10 +288,10 @@ fn split(settings: &Settings) -> int { let consumed = splitter.consume(&mut control); crash_if_err!(1, writer.write_str(consumed.as_slice())); - let advance = consumed.as_slice().char_len(); + let advance = consumed.as_slice().chars().count(); let clone = control.current_line.clone(); let sl = clone.as_slice(); - control.current_line = sl.slice(advance, sl.char_len()).to_string(); + control.current_line = sl.slice(advance, sl.chars().count()).to_string(); } 0 } From e9fce8e7882d0c73dd2f5ecbc93b5a36a6500624 Mon Sep 17 00:00:00 2001 From: Michal Piekarz Date: Sun, 4 Jan 2015 17:39:15 +0100 Subject: [PATCH 3/8] C flag for split --- src/split/split.rs | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/src/split/split.rs b/src/split/split.rs index 8a546c16b..b202251bf 100644 --- a/src/split/split.rs +++ b/src/split/split.rs @@ -162,6 +162,8 @@ impl Splitter for LineSplitter { struct ByteSplitter { saved_bytes_to_write: uint, bytes_to_write: uint, + break_on_line_end: bool, + require_whole_line: bool, } impl Splitter for ByteSplitter { @@ -189,17 +191,28 @@ impl Splitter for ByteSplitter { box ByteSplitter { saved_bytes_to_write: n * multiplier, bytes_to_write: n * multiplier, + break_on_line_end: if settings.strategy == "b" { false } else { true }, + require_whole_line: false, } as Box } fn consume(&mut self, control: &mut SplitControl) -> String { let line = control.current_line.clone(); let n = std::cmp::min(line.as_slice().chars().count(), self.bytes_to_write); + if self.require_whole_line && n < line.as_slice().chars().count() { + self.bytes_to_write = self.saved_bytes_to_write; + control.request_new_file = true; + self.require_whole_line = false; + return line.as_slice().slice(0, 0).to_string(); + } self.bytes_to_write -= n; if n == 0 { self.bytes_to_write = self.saved_bytes_to_write; control.request_new_file = true; } + if self.break_on_line_end && n == line.as_slice().chars().count() { + self.require_whole_line = self.break_on_line_end; + } line.as_slice().slice(0, n).to_string() } } @@ -250,7 +263,7 @@ fn split(settings: &Settings) -> int { let mut splitter: Box = match settings.strategy.as_slice() { "l" => Splitter::new(None::, settings), - "b" => Splitter::new(None::, settings), + "b" | "C" => Splitter::new(None::, settings), a @ _ => crash!(1, "strategy {} not supported", a) }; From 2dc585813192acc7c0ab4562ea6da5f4f8b0f0d9 Mon Sep 17 00:00:00 2001 From: Michal Piekarz Date: Sun, 4 Jan 2015 17:47:57 +0100 Subject: [PATCH 4/8] Added verbose for split - prints created file name --- src/split/README.md | 3 +-- src/split/split.rs | 3 +++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/split/README.md b/src/split/README.md index aa4de21a9..582c9970a 100644 --- a/src/split/README.md +++ b/src/split/README.md @@ -3,8 +3,7 @@ ## Missing Features ### Flags -* `-C N` -* `--verbose` +* `--verbose` - created file printing is implemented, don't know if there is anything else ## Possible Optimizations * Use slice (`[u8]`) directly as the `control.current_line`. diff --git a/src/split/split.rs b/src/split/split.rs index b202251bf..81d85db67 100644 --- a/src/split/split.rs +++ b/src/split/split.rs @@ -296,6 +296,9 @@ fn split(settings: &Settings) -> int { fileno += 1; writer = io::BufferedWriter::new(box io::File::open_mode(&Path::new(filename.as_slice()), io::Open, io::Write) as Box); control.request_new_file = false; + if settings.verbose { + println!("creating file '{}'", filename); + } } let consumed = splitter.consume(&mut control); From dbd1d34ba3e45af0ba741e2c7514400bffcc842a Mon Sep 17 00:00:00 2001 From: Michal Piekarz Date: Sun, 4 Jan 2015 16:36:09 +0100 Subject: [PATCH 5/8] Added SIZE multiplier suffixes. --- src/split/split.rs | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/src/split/split.rs b/src/split/split.rs index 9755f6797..7c3cbd12e 100644 --- a/src/split/split.rs +++ b/src/split/split.rs @@ -48,6 +48,8 @@ pub fn uumain(args: Vec) -> int { println!(" {0} [OPTION]... [INPUT [PREFIX]]", NAME); println!(""); io::print(getopts::usage("Output fixed-size pieces of INPUT to PREFIXaa, PREFIX ab, ...; default size is 1000, and default PREFIX is 'x'. With no INPUT, or when INPUT is -, read standard input." , &opts).as_slice()); + println!(""); + println!("SIZE may have a multiplier suffix: b for 512, k for 1K, m for 1 Meg."); return 0; } @@ -164,13 +166,29 @@ struct ByteSplitter { impl Splitter for ByteSplitter { fn new(_: Option, settings: &Settings) -> Box { - let n = match from_str(settings.strategy_param.as_slice()) { - Some(a) => a, - _ => crash!(1, "invalid number of lines") + let mut strategy_param : Vec = settings.strategy_param.chars().collect(); + let suffix = strategy_param.pop().unwrap(); + let multiplier = match suffix { + '0'...'9' => 1u, + 'b' => 512u, + 'k' => 1024u, + 'm' => 1024u * 1024u, + _ => crash!(1, "invalid number of bytes") + }; + let n = if suffix.is_alphabetic() { + match String::from_chars(strategy_param.as_slice()).as_slice().parse::() { + Some(a) => a, + _ => crash!(1, "invalid number of bytes") + } + } else { + match settings.strategy_param.as_slice().parse::() { + Some(a) => a, + _ => crash!(1, "invalid number of bytes") + } }; box ByteSplitter { - saved_bytes_to_write: n, - bytes_to_write: n, + saved_bytes_to_write: n * multiplier, + bytes_to_write: n * multiplier, } as Box } From 7945e3d51c7e1541416b293fc57f52532ea1beca Mon Sep 17 00:00:00 2001 From: Michal Piekarz Date: Sun, 4 Jan 2015 16:39:34 +0100 Subject: [PATCH 6/8] Removed deprecated calls. --- src/split/split.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/split/split.rs b/src/split/split.rs index 7c3cbd12e..8a546c16b 100644 --- a/src/split/split.rs +++ b/src/split/split.rs @@ -71,7 +71,7 @@ pub fn uumain(args: Vec) -> int { settings.numeric_suffix = if matches.opt_present("d") { true } else { false }; settings.suffix_length = match matches.opt_str("a") { - Some(n) => match from_str(n.as_slice()) { + Some(n) => match n.as_slice().parse() { Some(m) => m, None => crash!(1, "cannot parse num") }, @@ -139,7 +139,7 @@ struct LineSplitter { impl Splitter for LineSplitter { fn new(_: Option, settings: &Settings) -> Box { - let n = match from_str(settings.strategy_param.as_slice()) { + let n = match settings.strategy_param.as_slice().parse() { Some(a) => a, _ => crash!(1, "invalid number of lines") }; From 93c3f0260095f3c83f737fb3cf6c4e3235cbb05b Mon Sep 17 00:00:00 2001 From: Michal Piekarz Date: Sun, 4 Jan 2015 17:39:15 +0100 Subject: [PATCH 7/8] C flag for split --- src/split/split.rs | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/src/split/split.rs b/src/split/split.rs index 8a546c16b..b202251bf 100644 --- a/src/split/split.rs +++ b/src/split/split.rs @@ -162,6 +162,8 @@ impl Splitter for LineSplitter { struct ByteSplitter { saved_bytes_to_write: uint, bytes_to_write: uint, + break_on_line_end: bool, + require_whole_line: bool, } impl Splitter for ByteSplitter { @@ -189,17 +191,28 @@ impl Splitter for ByteSplitter { box ByteSplitter { saved_bytes_to_write: n * multiplier, bytes_to_write: n * multiplier, + break_on_line_end: if settings.strategy == "b" { false } else { true }, + require_whole_line: false, } as Box } fn consume(&mut self, control: &mut SplitControl) -> String { let line = control.current_line.clone(); let n = std::cmp::min(line.as_slice().chars().count(), self.bytes_to_write); + if self.require_whole_line && n < line.as_slice().chars().count() { + self.bytes_to_write = self.saved_bytes_to_write; + control.request_new_file = true; + self.require_whole_line = false; + return line.as_slice().slice(0, 0).to_string(); + } self.bytes_to_write -= n; if n == 0 { self.bytes_to_write = self.saved_bytes_to_write; control.request_new_file = true; } + if self.break_on_line_end && n == line.as_slice().chars().count() { + self.require_whole_line = self.break_on_line_end; + } line.as_slice().slice(0, n).to_string() } } @@ -250,7 +263,7 @@ fn split(settings: &Settings) -> int { let mut splitter: Box = match settings.strategy.as_slice() { "l" => Splitter::new(None::, settings), - "b" => Splitter::new(None::, settings), + "b" | "C" => Splitter::new(None::, settings), a @ _ => crash!(1, "strategy {} not supported", a) }; From 61c20863105e7400a55930433e78c27098db6b69 Mon Sep 17 00:00:00 2001 From: Michal Piekarz Date: Sun, 4 Jan 2015 17:47:57 +0100 Subject: [PATCH 8/8] Added verbose for split - prints created file name --- src/split/README.md | 3 +-- src/split/split.rs | 3 +++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/split/README.md b/src/split/README.md index aa4de21a9..582c9970a 100644 --- a/src/split/README.md +++ b/src/split/README.md @@ -3,8 +3,7 @@ ## Missing Features ### Flags -* `-C N` -* `--verbose` +* `--verbose` - created file printing is implemented, don't know if there is anything else ## Possible Optimizations * Use slice (`[u8]`) directly as the `control.current_line`. diff --git a/src/split/split.rs b/src/split/split.rs index b202251bf..81d85db67 100644 --- a/src/split/split.rs +++ b/src/split/split.rs @@ -296,6 +296,9 @@ fn split(settings: &Settings) -> int { fileno += 1; writer = io::BufferedWriter::new(box io::File::open_mode(&Path::new(filename.as_slice()), io::Open, io::Write) as Box); control.request_new_file = false; + if settings.verbose { + println!("creating file '{}'", filename); + } } let consumed = splitter.consume(&mut control);