diff --git a/Cargo.toml b/Cargo.toml index 1e6172d49..d937ade3f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -37,6 +37,7 @@ feat_common_core = [ "cksum", "comm", "cp", + "csplit", "cut", "date", "df", @@ -241,6 +242,7 @@ chroot = { optional=true, version="0.0.1", package="uu_chroot", path="src/uu/c cksum = { optional=true, version="0.0.1", package="uu_cksum", path="src/uu/cksum" } comm = { optional=true, version="0.0.1", package="uu_comm", path="src/uu/comm" } cp = { optional=true, version="0.0.1", package="uu_cp", path="src/uu/cp" } +csplit = { optional=true, version="0.0.1", package="uu_csplit", path="src/uu/csplit" } cut = { optional=true, version="0.0.1", package="uu_cut", path="src/uu/cut" } date = { optional=true, version="0.0.1", package="uu_date", path="src/uu/date" } df = { optional=true, version="0.0.1", package="uu_df", path="src/uu/df" } @@ -332,6 +334,7 @@ pin_winapi-util = { version="0.1.2, < 0.1.3", package="winapi-util" } ## winapi- [dev-dependencies] conv = "0.3" filetime = "0.2" +glob = "0.3.0" libc = "0.2" rand = "0.7" regex = "1.0" diff --git a/GNUmakefile b/GNUmakefile index ff5a064ca..3eacc8659 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -53,6 +53,7 @@ PROGS := \ cksum \ comm \ cp \ + csplit \ cut \ df \ dircolors \ @@ -160,6 +161,7 @@ TEST_PROGS := \ cksum \ comm \ cp \ + csplit \ cut \ dircolors \ dirname \ diff --git a/src/uu/csplit/Cargo.toml b/src/uu/csplit/Cargo.toml new file mode 100644 index 000000000..f44f80da0 --- /dev/null +++ b/src/uu/csplit/Cargo.toml @@ -0,0 +1,27 @@ +[package] +name = "uu_csplit" +version = "0.0.1" +authors = ["uutils developers"] +license = "MIT" +description = "csplit ~ (uutils) Output pieces of FILE separated by PATTERN(s) to files 'xx00', 'xx01', ..., and output byte counts of each piece to standard output" + +homepage = "https://github.com/uutils/coreutils" +repository = "https://github.com/uutils/coreutils/tree/master/src/uu/ls" +keywords = ["coreutils", "uutils", "cross-platform", "cli", "utility"] +categories = ["command-line-utilities"] +edition = "2018" + +[lib] +path = "src/csplit.rs" + +[dependencies] +getopts = "0.2.17" +failure = "0.1.1" +failure_derive = "0.1.1" +regex = "1.0.0" +glob = "0.2.11" +uucore = { version=">=0.0.4", package="uucore", path="../../uucore", features=["entries", "fs"] } + +[[bin]] +name = "csplit" +path = "src/main.rs" diff --git a/src/uu/csplit/src/csplit.rs b/src/uu/csplit/src/csplit.rs new file mode 100644 index 000000000..efdb12738 --- /dev/null +++ b/src/uu/csplit/src/csplit.rs @@ -0,0 +1,760 @@ +#![crate_name = "uu_csplit"] + +#[macro_use] +extern crate failure; +#[macro_use] +extern crate uucore; +extern crate getopts; +extern crate regex; +use std::{fs::{File, remove_file}, io::{BufRead, BufWriter, Write}}; +use std::io::{self, BufReader}; +use getopts::Matches; +use regex::Regex; +/* +mod split_name; +mod patterns; +*/ +mod splitname; +mod patterns; +mod csplitError; + +use crate::splitname::SplitName; +use crate::csplitError::CsplitError; +//mod split_name; + + +//mod csplit; + +static SYNTAX: &'static str = "[OPTION]... FILE PATTERN..."; +static SUMMARY: &'static str = "split a file into sections determined by context lines"; +static LONG_HELP: &'static str = "Output pieces of FILE separated by PATTERN(s) to files 'xx00', 'xx01', ..., and output byte counts of each piece to standard output."; + +static SUFFIX_FORMAT_OPT: &'static str = "suffix-format"; +static SUPPRESS_MATCHED_OPT: &'static str = "suppress-matched"; +static DIGITS_OPT: &'static str = "digits"; +static PREFIX_OPT: &'static str = "prefix"; +static KEEP_FILES_OPT: &'static str = "keep-files"; +static QUIET_OPT: &'static str = "quiet"; +static ELIDE_EMPTY_FILES_OPT: &'static str = "elide-empty-files"; + +/// Command line options for csplit. +pub struct CsplitOptions { + split_name: crate::SplitName, + keep_files: bool, + quiet: bool, + elide_empty_files: bool, + suppress_matched: bool, +} + +impl CsplitOptions { + fn new(matches: &Matches) -> CsplitOptions { + let keep_files = matches.opt_present(KEEP_FILES_OPT); + let quiet = matches.opt_present(QUIET_OPT); + let elide_empty_files = matches.opt_present(ELIDE_EMPTY_FILES_OPT); + let suppress_matched = matches.opt_present(SUPPRESS_MATCHED_OPT); + + CsplitOptions { + split_name: crash_if_err!( + 1, + SplitName::new( + matches.opt_str(PREFIX_OPT), + matches.opt_str(SUFFIX_FORMAT_OPT), + matches.opt_str(DIGITS_OPT) + ) + ), + keep_files, + quiet, + elide_empty_files, + suppress_matched, + } + } +} + +/// Splits a file into severals according to the command line patterns. +/// +/// # Errors +/// +/// - [`io::Error`] if there is some problem reading/writing from/to a file. +/// - [`::CsplitError::LineOutOfRange`] if the linenum pattern is larger than the number of input +/// lines. +/// - [`::CsplitError::LineOutOfRangeOnRepetition`], like previous but after applying the pattern +/// more than once. +/// - [`::CsplitError::MatchNotFound`] if no line matched a regular expression. +/// - [`::CsplitError::MatchNotFoundOnRepetition`], like previous but after applying the pattern +/// more than once. +pub fn csplit( + options: &CsplitOptions, + patterns: Vec, + input: T, +) -> Result<(), CsplitError> +where + T: BufRead, +{ + let mut input_iter = InputSplitter::new(input.lines().enumerate()); + let mut split_writer = SplitWriter::new(&options)?; + let ret = do_csplit(&mut split_writer, patterns, &mut input_iter); + + // consume the rest + input_iter.rewind_buffer(); + if let Some((_, line)) = input_iter.next() { + split_writer.new_writer()?; + split_writer.writeln(line?)?; + for (_, line) in input_iter { + split_writer.writeln(line?)?; + } + split_writer.finish_split()?; + } + // delete files on error by default + if ret.is_err() && !options.keep_files { + split_writer.delete_all_splits()?; + } + ret +} + +fn do_csplit( + split_writer: &mut SplitWriter, + patterns: Vec, + input_iter: &mut InputSplitter, +) -> Result<(), CsplitError> +where + I: Iterator)>, +{ + // split the file based on patterns + for pattern in patterns.into_iter() { + let pattern_as_str = pattern.to_string(); + let is_skip = if let patterns::Pattern::SkipToMatch(_, _, _) = pattern { + true + } else { + false + }; + match pattern { + patterns::Pattern::UpToLine(n, ex) => { + let mut up_to_line = n; + for (_, ith) in ex.iter() { + split_writer.new_writer()?; + match split_writer.do_to_line(&pattern_as_str, up_to_line, input_iter) { + // the error happened when applying the pattern more than once + Err(CsplitError::LineOutOfRange(_)) if ith != 1 => { + return Err(CsplitError::LineOutOfRangeOnRepetition( + pattern_as_str.to_string(), + ith - 1, + )); + } + Err(err) => return Err(err), + // continue the splitting process + Ok(()) => (), + } + up_to_line += n; + } + } + patterns::Pattern::UpToMatch(regex, offset, ex) + | patterns::Pattern::SkipToMatch(regex, offset, ex) => { + for (max, ith) in ex.iter() { + if is_skip { + // when skipping a part of the input, no writer is created + split_writer.as_dev_null(); + } else { + split_writer.new_writer()?; + } + match ( + split_writer.do_to_match(&pattern_as_str, ®ex, offset, input_iter), + max, + ) { + // in case of ::pattern::ExecutePattern::Always, then it's fine not to find a + // matching line + (Err(CsplitError::MatchNotFound(_)), None) => { + return Ok(()); + } + // the error happened when applying the pattern more than once + (Err(CsplitError::MatchNotFound(_)), Some(m)) if m != 1 && ith != 1 => { + return Err(CsplitError::MatchNotFoundOnRepetition( + pattern_as_str.to_string(), + ith - 1, + )); + } + (Err(err), _) => return Err(err), + // continue the splitting process + (Ok(()), _) => (), + }; + } + } + }; + } + Ok(()) +} + +/// Write a portion of the input file into a split which filename is based on an incrementing +/// counter. +struct SplitWriter<'a> { + /// the options set through the command line + options: &'a CsplitOptions, + /// a split counter + counter: usize, + /// the writer to the current split + current_writer: Option>, + /// the size in bytes of the current split + size: usize, + /// flag to indicate that no content should be written to a split + dev_null: bool, +} + +impl<'a> Drop for SplitWriter<'a> { + fn drop(&mut self) { + if self.options.elide_empty_files && self.size == 0 { + let file_name = self.options.split_name.get(self.counter); + remove_file(file_name).expect("Failed to elide split"); + } + } +} + +impl<'a> SplitWriter<'a> { + fn new(options: &CsplitOptions) -> io::Result { + Ok(SplitWriter { + options, + counter: 0, + current_writer: None, + size: 0, + dev_null: false, + }) + } + + /// Creates a new split and returns its filename. + /// + /// # Errors + /// + /// The creation of the split file may fail with some [`io::Error`]. + fn new_writer(&mut self) -> io::Result<()> { + let file_name = self.options.split_name.get(self.counter); + let file = File::create(&file_name)?; + self.current_writer = Some(BufWriter::new(file)); + self.counter += 1; + self.size = 0; + self.dev_null = false; + Ok(()) + } + + /// The current split will not keep any of the read input lines. + fn as_dev_null(&mut self) { + self.dev_null = true; + } + + /// Writes the line to the current split, appending a newline character. + /// If [`dev_null`] is true, then the line is discarded. + /// + /// # Errors + /// + /// Some [`io::Error`] may occur when attempting to write the line. + fn writeln(&mut self, line: String) -> io::Result<()> { + if !self.dev_null { + match self.current_writer { + Some(ref mut current_writer) => { + let bytes = line.as_bytes(); + current_writer.write_all(bytes)?; + current_writer.write(b"\n")?; + self.size += bytes.len() + 1; + } + None => panic!("trying to write to a split that was not created"), + } + } + Ok(()) + } + + /// Perform some operations after completing a split, i.e., either remove it + /// if the [`::ELIDE_EMPTY_FILES_OPT`] option is enabled, or print how much bytes were written + /// to it if [`::QUIET_OPT`] is disabled. + /// + /// # Errors + /// + /// Some [`io::Error`] if the split could not be removed in case it should be elided. + fn finish_split(&mut self) -> io::Result<()> { + if !self.dev_null { + if self.options.elide_empty_files && self.size == 0 { + self.counter -= 1; + } else if !self.options.quiet { + println!("{}", self.size); + } + } + return Ok(()); + } + + /// Removes all the split files that were created. + /// + /// # Errors + /// + /// Returns an [`io::Error`] if there was a problem removing a split. + fn delete_all_splits(&self) -> io::Result<()> { + let mut ret = Ok(()); + for ith in 0..self.counter { + let file_name = self.options.split_name.get(ith); + if let Err(err) = remove_file(file_name) { + ret = Err(err); + } + } + ret + } + + /// Split the input stream up to the line number `n`. + /// + /// If the line number `n` is smaller than the current position in the input, then an empty + /// split is created. + /// + /// # Errors + /// + /// In addition to errors reading/writing from/to a file, if the line number + /// `n` is greater than the total available lines, then a + /// [`::CsplitError::LineOutOfRange`] error is returned. + fn do_to_line( + &mut self, + pattern_as_str: &str, + n: usize, + input_iter: &mut InputSplitter, + ) -> Result<(), CsplitError> + where + I: Iterator)>, + { + input_iter.rewind_buffer(); + input_iter.set_size_of_buffer(1); + + let mut ret = Err(CsplitError::LineOutOfRange(pattern_as_str.to_string())); + while let Some((ln, line)) = input_iter.next() { + let l = line?; + if ln + 1 > n { + if input_iter.add_line_to_buffer(ln, l).is_some() { + panic!("the buffer is big enough to contain 1 line"); + } + ret = Ok(()); + break; + } else if ln + 1 == n { + if !self.options.suppress_matched { + if input_iter.add_line_to_buffer(ln, l).is_some() { + panic!("the buffer is big enough to contain 1 line"); + } + } + ret = Ok(()); + break; + } + self.writeln(l)?; + } + self.finish_split()?; + ret + } + + /// Read lines up to the line matching a [`Regex`]. With a non-zero offset, + /// the block of relevant lines can be extended (if positive), or reduced + /// (if negative). + /// + /// # Errors + /// + /// In addition to errors reading/writing from/to a file, the following errors may be returned: + /// - if no line matched, an [`::CsplitError::MatchNotFound`]. + /// - if there are not enough lines to accomodate the offset, an + /// [`::CsplitError::LineOutOfRange`]. + fn do_to_match( + &mut self, + pattern_as_str: &str, + regex: &Regex, + mut offset: i32, + input_iter: &mut InputSplitter, + ) -> Result<(), CsplitError> + where + I: Iterator)>, + { + if offset >= 0 { + // The offset is zero or positive, no need for a buffer on the lines read. + // NOTE: drain the buffer of input_iter, no match should be done within. + for line in input_iter.drain_buffer() { + self.writeln(line)?; + } + // retain the matching line + input_iter.set_size_of_buffer(1); + + while let Some((ln, line)) = input_iter.next() { + let l = line?; + if regex.is_match(&l) { + match (self.options.suppress_matched, offset) { + // no offset, add the line to the next split + (false, 0) => { + if input_iter.add_line_to_buffer(ln, l).is_some() { + panic!("the buffer is big enough to contain 1 line"); + } + } + // a positive offset, some more lines need to be added to the current split + (false, _) => self.writeln(l)?, + _ => (), + }; + offset -= 1; + + // write the extra lines required by the offset + while offset > 0 { + match input_iter.next() { + Some((_, line)) => { + self.writeln(line?)?; + } + None => { + self.finish_split()?; + return Err(CsplitError::LineOutOfRange( + pattern_as_str.to_string(), + )); + } + }; + offset -= 1; + } + self.finish_split()?; + return Ok(()); + } + self.writeln(l)?; + } + } else { + // With a negative offset we use a buffer to keep the lines within the offset. + // NOTE: do not drain the buffer of input_iter, in case of an LineOutOfRange error + // but do not rewind it either since no match should be done within. + // The consequence is that the buffer may already be full with lines from a previous + // split, which is taken care of when calling `shrink_buffer_to_size`. + let offset_usize = -offset as usize; + input_iter.set_size_of_buffer(offset_usize); + while let Some((ln, line)) = input_iter.next() { + let l = line?; + if regex.is_match(&l) { + for line in input_iter.shrink_buffer_to_size() { + self.writeln(line)?; + } + if !self.options.suppress_matched { + // add 1 to the buffer size to make place for the matched line + input_iter.set_size_of_buffer(offset_usize + 1); + if input_iter.add_line_to_buffer(ln, l).is_some() { + panic!("should be big enough to hold every lines"); + } + } + self.finish_split()?; + if input_iter.buffer_len() < offset_usize { + return Err(CsplitError::LineOutOfRange(pattern_as_str.to_string())); + } + return Ok(()); + } + if let Some(line) = input_iter.add_line_to_buffer(ln, l) { + self.writeln(line)?; + } + } + // no match, drain the buffer into the current split + for line in input_iter.drain_buffer() { + self.writeln(line)?; + } + } + + self.finish_split()?; + Err(CsplitError::MatchNotFound(pattern_as_str.to_string())) + } +} + +/// An iterator which can output items from a buffer filled externally. +/// This is used to pass matching lines to the next split and to support patterns with a negative offset. +struct InputSplitter +where + I: Iterator)>, +{ + iter: I, + buffer: Vec<::Item>, + /// the number of elements the buffer may hold + size: usize, + /// flag to indicate content off the buffer should be returned instead of off the wrapped + /// iterator + rewind: bool, +} + +impl InputSplitter +where + I: Iterator)>, +{ + fn new(iter: I) -> InputSplitter { + InputSplitter { + iter, + buffer: Vec::new(), + rewind: false, + size: 1, + } + } + + /// Rewind the iteration by outputing the buffer's content. + fn rewind_buffer(&mut self) { + self.rewind = true; + } + + /// Shrink the buffer so that its length is equal to the set size, returning an iterator for + /// the elements that were too much. + fn shrink_buffer_to_size<'a>(&'a mut self) -> impl Iterator + 'a { + let mut shrink_offset = 0; + if self.buffer.len() > self.size { + shrink_offset = self.buffer.len() - self.size; + } + self.buffer + .drain(..shrink_offset) + .map(|(_, line)| line.unwrap()) + } + + /// Drain the content of the buffer. + fn drain_buffer<'a>(&'a mut self) -> impl Iterator + 'a { + self.buffer.drain(..).map(|(_, line)| line.unwrap()) + } + + /// Set the maximum number of lines to keep. + fn set_size_of_buffer(&mut self, size: usize) { + self.size = size; + } + + /// Add a line to the buffer. If the buffer has [`size`] elements, then its head is removed and + /// the new line is pushed to the buffer. The removed head is then available in the returned + /// option. + fn add_line_to_buffer(&mut self, ln: usize, line: String) -> Option { + if self.rewind { + self.buffer.insert(0, (ln, Ok(line))); + None + } else if self.buffer.len() >= self.size { + let (_, head_line) = self.buffer.remove(0); + self.buffer.push((ln, Ok(line))); + Some(head_line.unwrap()) + } else { + self.buffer.push((ln, Ok(line))); + None + } + } + + /// Returns the number of lines stored in the buffer + fn buffer_len(&self) -> usize { + self.buffer.len() + } +} + +impl Iterator for InputSplitter +where + I: Iterator)>, +{ + type Item = ::Item; + + fn next(&mut self) -> Option { + if self.rewind { + if !self.buffer.is_empty() { + return Some(self.buffer.remove(0)); + } + self.rewind = false; + } + self.iter.next() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn input_splitter() { + let input = vec![ + Ok(String::from("aaa")), + Ok(String::from("bbb")), + Ok(String::from("ccc")), + Ok(String::from("ddd")), + ]; + let mut input_splitter = InputSplitter::new(input.into_iter().enumerate()); + + input_splitter.set_size_of_buffer(2); + assert_eq!(input_splitter.buffer_len(), 0); + + match input_splitter.next() { + Some((0, Ok(line))) => { + assert_eq!(line, String::from("aaa")); + assert_eq!(input_splitter.add_line_to_buffer(0, line), None); + assert_eq!(input_splitter.buffer_len(), 1); + } + item @ _ => panic!("wrong item: {:?}", item), + }; + + match input_splitter.next() { + Some((1, Ok(line))) => { + assert_eq!(line, String::from("bbb")); + assert_eq!(input_splitter.add_line_to_buffer(1, line), None); + assert_eq!(input_splitter.buffer_len(), 2); + } + item @ _ => panic!("wrong item: {:?}", item), + }; + + match input_splitter.next() { + Some((2, Ok(line))) => { + assert_eq!(line, String::from("ccc")); + assert_eq!( + input_splitter.add_line_to_buffer(2, line), + Some(String::from("aaa")) + ); + assert_eq!(input_splitter.buffer_len(), 2); + } + item @ _ => panic!("wrong item: {:?}", item), + }; + + input_splitter.rewind_buffer(); + + match input_splitter.next() { + Some((1, Ok(line))) => { + assert_eq!(line, String::from("bbb")); + assert_eq!(input_splitter.buffer_len(), 1); + } + item @ _ => panic!("wrong item: {:?}", item), + }; + + match input_splitter.next() { + Some((2, Ok(line))) => { + assert_eq!(line, String::from("ccc")); + assert_eq!(input_splitter.buffer_len(), 0); + } + item @ _ => panic!("wrong item: {:?}", item), + }; + + match input_splitter.next() { + Some((3, Ok(line))) => { + assert_eq!(line, String::from("ddd")); + assert_eq!(input_splitter.buffer_len(), 0); + } + item @ _ => panic!("wrong item: {:?}", item), + }; + + assert!(input_splitter.next().is_none()); + } + + #[test] + fn input_splitter_interrupt_rewind() { + let input = vec![ + Ok(String::from("aaa")), + Ok(String::from("bbb")), + Ok(String::from("ccc")), + Ok(String::from("ddd")), + ]; + let mut input_splitter = InputSplitter::new(input.into_iter().enumerate()); + + input_splitter.set_size_of_buffer(3); + assert_eq!(input_splitter.buffer_len(), 0); + + match input_splitter.next() { + Some((0, Ok(line))) => { + assert_eq!(line, String::from("aaa")); + assert_eq!(input_splitter.add_line_to_buffer(0, line), None); + assert_eq!(input_splitter.buffer_len(), 1); + } + item @ _ => panic!("wrong item: {:?}", item), + }; + + match input_splitter.next() { + Some((1, Ok(line))) => { + assert_eq!(line, String::from("bbb")); + assert_eq!(input_splitter.add_line_to_buffer(1, line), None); + assert_eq!(input_splitter.buffer_len(), 2); + } + item @ _ => panic!("wrong item: {:?}", item), + }; + + match input_splitter.next() { + Some((2, Ok(line))) => { + assert_eq!(line, String::from("ccc")); + assert_eq!(input_splitter.add_line_to_buffer(2, line), None); + assert_eq!(input_splitter.buffer_len(), 3); + } + item @ _ => panic!("wrong item: {:?}", item), + }; + + input_splitter.rewind_buffer(); + + match input_splitter.next() { + Some((0, Ok(line))) => { + assert_eq!(line, String::from("aaa")); + assert_eq!(input_splitter.add_line_to_buffer(0, line), None); + assert_eq!(input_splitter.buffer_len(), 3); + } + item @ _ => panic!("wrong item: {:?}", item), + }; + + match input_splitter.next() { + Some((0, Ok(line))) => { + assert_eq!(line, String::from("aaa")); + assert_eq!(input_splitter.buffer_len(), 2); + } + item @ _ => panic!("wrong item: {:?}", item), + }; + + match input_splitter.next() { + Some((1, Ok(line))) => { + assert_eq!(line, String::from("bbb")); + assert_eq!(input_splitter.buffer_len(), 1); + } + item @ _ => panic!("wrong item: {:?}", item), + }; + + match input_splitter.next() { + Some((2, Ok(line))) => { + assert_eq!(line, String::from("ccc")); + assert_eq!(input_splitter.buffer_len(), 0); + } + item @ _ => panic!("wrong item: {:?}", item), + }; + + match input_splitter.next() { + Some((3, Ok(line))) => { + assert_eq!(line, String::from("ddd")); + assert_eq!(input_splitter.buffer_len(), 0); + } + item @ _ => panic!("wrong item: {:?}", item), + }; + + assert!(input_splitter.next().is_none()); + } +} + +pub fn uumain(args: impl uucore::Args) -> i32 { + let args = args.collect_str(); + + let matches = app!(SYNTAX, SUMMARY, LONG_HELP) + .optopt( + "b", + SUFFIX_FORMAT_OPT, + "use sprintf FORMAT instead of %02d", + "FORMAT", + ) + .optopt("f", PREFIX_OPT, "use PREFIX instead of 'xx'", "PREFIX") + .optflag("k", KEEP_FILES_OPT, "do not remove output files on errors") + .optflag( + "", + SUPPRESS_MATCHED_OPT, + "suppress the lines matching PATTERN", + ) + .optopt( + "n", + DIGITS_OPT, + "use specified number of digits instead of 2", + "DIGITS", + ) + .optflag("s", QUIET_OPT, "do not print counts of output file sizes") + .optflag("z", ELIDE_EMPTY_FILES_OPT, "remove empty output files") + .parse(args); + + // check for mandatory arguments + if matches.free.is_empty() { + show_error!("missing operand"); + exit!(1); + } + if matches.free.len() == 1 { + show_error!("missing operand after '{}'", matches.free[0]); + exit!(1); + } + // get the patterns to split on + let patterns = return_if_err!(1, patterns::get_patterns(&matches.free[1..])); + // get the file to split + let file_name: &str = &matches.free[0]; + let options = CsplitOptions::new(&matches); + if file_name == "-" { + let stdin = io::stdin(); + crash_if_err!(1, csplit(&options, patterns, stdin.lock())); + } else { + let file = return_if_err!(1, File::open(file_name)); + let file_metadata = return_if_err!(1, file.metadata()); + if !file_metadata.is_file() { + crash!(1, "'{}' is not a regular file", file_name); + } + crash_if_err!(1, csplit(&options, patterns, BufReader::new(file))); + }; + 0 +} diff --git a/src/uu/csplit/src/csplitError.rs b/src/uu/csplit/src/csplitError.rs new file mode 100644 index 000000000..dfbff3cb1 --- /dev/null +++ b/src/uu/csplit/src/csplitError.rs @@ -0,0 +1,34 @@ +use std::io; + +/// Errors thrown by the csplit command +#[derive(Debug, Fail)] +pub enum CsplitError { + #[fail(display = "IO error: {}", _0)] + IoError(io::Error), + #[fail(display = "'{}': line number out of range", _0)] + LineOutOfRange(String), + #[fail(display = "'{}': line number out of range on repetition {}", _0, _1)] + LineOutOfRangeOnRepetition(String, usize), + #[fail(display = "'{}': match not found", _0)] + MatchNotFound(String), + #[fail(display = "'{}': match not found on repetition {}", _0, _1)] + MatchNotFoundOnRepetition(String, usize), + #[fail(display = "line number must be greater than zero")] + LineNumberIsZero, + #[fail(display = "line number '{}' is smaller than preceding line number, {}", _0, _1)] + LineNumberSmallerThanPrevious(usize, usize), + #[fail(display = "invalid pattern: {}", _0)] + InvalidPattern(String), + #[fail(display = "invalid number: '{}'", _0)] + InvalidNumber(String), + #[fail(display = "incorrect conversion specification in suffix")] + SuffixFormatIncorrect, + #[fail(display = "too many % conversion specifications in suffix")] + SuffixFormatTooManyPercents, +} + +impl From for CsplitError { + fn from(error: io::Error) -> Self { + CsplitError::IoError(error) + } +} \ No newline at end of file diff --git a/src/uu/csplit/src/main.rs b/src/uu/csplit/src/main.rs new file mode 100644 index 000000000..5ebe43a18 --- /dev/null +++ b/src/uu/csplit/src/main.rs @@ -0,0 +1,2 @@ + +uucore_procs::main!(uu_csplit); // spell-checker:ignore procs uucore diff --git a/src/uu/csplit/src/patterns.rs b/src/uu/csplit/src/patterns.rs new file mode 100644 index 000000000..1fd1c8b8c --- /dev/null +++ b/src/uu/csplit/src/patterns.rs @@ -0,0 +1,353 @@ +use regex::Regex; +use crate::csplitError::CsplitError; + +/// The definition of a pattern to match on a line. +#[derive(Debug)] +pub enum Pattern { + /// Copy the file's content to a split up to, not including, the given line number. The number + /// of times the pattern is executed is detailed in [`ExecutePattern`]. + UpToLine(usize, ExecutePattern), + /// Copy the file's content to a split up to, not including, the line matching the regex. The + /// integer is an offset relative to the matched line of what to include (if positive) or + /// to exclude (if negative). The number of times the pattern is executed is detailed in + /// [`ExecutePattern`]. + UpToMatch(Regex, i32, ExecutePattern), + /// Skip the file's content up to, not including, the line matching the regex. The integer + /// is an offset relative to the matched line of what to include (if positive) or to exclude + /// (if negative). The number of times the pattern is executed is detailed in [`ExecutePattern`]. + SkipToMatch(Regex, i32, ExecutePattern), +} + +impl ToString for Pattern { + fn to_string(&self) -> String { + match self { + Pattern::UpToLine(n, _) => n.to_string(), + Pattern::UpToMatch(regex, 0, _) => format!("/{}/", regex.as_str()), + Pattern::UpToMatch(regex, offset, _) => format!("/{}/{:+}", regex.as_str(), offset), + Pattern::SkipToMatch(regex, 0, _) => format!("%{}%", regex.as_str()), + Pattern::SkipToMatch(regex, offset, _) => format!("%{}%{:+}", regex.as_str(), offset), + } + } +} + +/// The number of times a pattern can be used. +#[derive(Debug)] +pub enum ExecutePattern { + /// Execute the pattern as many times as possible + Always, + /// Execute the pattern a fixed number of times + Times(usize), +} + +impl ExecutePattern { + pub fn iter(&self) -> ExecutePatternIter { + match self { + ExecutePattern::Times(n) => ExecutePatternIter::new(Some(*n)), + ExecutePattern::Always => ExecutePatternIter::new(None), + } + } +} + +pub struct ExecutePatternIter { + max: Option, + cur: usize, +} + +impl ExecutePatternIter { + fn new(max: Option) -> ExecutePatternIter { + ExecutePatternIter { max, cur: 0 } + } +} + +impl Iterator for ExecutePatternIter { + type Item = (Option, usize); + + fn next(&mut self) -> Option<(Option, usize)> { + match self.max { + // iterate until m is reached + Some(m) => { + if self.cur == m { + None + } else { + self.cur += 1; + Some((self.max, self.cur)) + } + } + // no limit, just increment a counter + None => { + self.cur += 1; + Some((None, self.cur)) + } + } + } +} + +/// Parses the definitions of patterns given on the command line into a list of [`Pattern`]s. +/// +/// # Errors +/// +/// If a pattern is incorrect, a [`::CsplitError::InvalidPattern`] error is returned, which may be +/// due to, e.g.,: +/// - an invalid regular expression; +/// - an invalid number for, e.g., the offset. +pub fn get_patterns(args: &[String]) -> Result, CsplitError> { + let patterns = extract_patterns(args)?; + validate_line_numbers(&patterns)?; + Ok(patterns) +} + +fn extract_patterns(args: &[String]) -> Result, CsplitError> { + let mut patterns = Vec::with_capacity(args.len()); + let to_match_reg = + Regex::new(r"^(/(?P.+)/|%(?P.+)%)(?P[\+-]\d+)?$").unwrap(); + let execute_ntimes_reg = Regex::new(r"^\{(?P\d+)|\*\}$").unwrap(); + let mut iter = args.iter().peekable(); + + while let Some(arg) = iter.next() { + // get the number of times a pattern is repeated, which is at least once plus whatever is + // in the quantifier. + let execute_ntimes = match iter.peek() { + None => ExecutePattern::Times(1), + Some(&next_item) => { + match execute_ntimes_reg.captures(next_item) { + None => ExecutePattern::Times(1), + Some(r) => { + // skip the next item + iter.next(); + if let Some(times) = r.name("TIMES") { + ExecutePattern::Times(times.as_str().parse::().unwrap() + 1) + } else { + ExecutePattern::Always + } + } + } + } + }; + + // get the pattern definition + if let Some(captures) = to_match_reg.captures(arg) { + let offset = match captures.name("OFFSET") { + None => 0, + Some(m) => m.as_str().parse().unwrap(), + }; + if let Some(up_to_match) = captures.name("UPTO") { + let pattern = match Regex::new(up_to_match.as_str()) { + Err(_) => { + return Err(CsplitError::InvalidPattern(arg.to_string())); + } + Ok(reg) => reg, + }; + patterns.push(Pattern::UpToMatch(pattern, offset, execute_ntimes)); + } else if let Some(skip_to_match) = captures.name("SKIPTO") { + let pattern = match Regex::new(skip_to_match.as_str()) { + Err(_) => { + return Err(CsplitError::InvalidPattern(arg.to_string())); + } + Ok(reg) => reg, + }; + patterns.push(Pattern::SkipToMatch(pattern, offset, execute_ntimes)); + } + } else if let Some(line_number) = arg.parse::().ok() { + patterns.push(Pattern::UpToLine(line_number, execute_ntimes)); + } else { + return Err(CsplitError::InvalidPattern(arg.to_string())); + } + } + Ok(patterns) +} + +/// Asserts the line numbers are in increasing order, starting at 1. +fn validate_line_numbers(patterns: &[Pattern]) -> Result<(), CsplitError> { + patterns + .iter() + .filter_map(|pattern| match pattern { + Pattern::UpToLine(line_number, _) => Some(line_number), + _ => None, + }) + .try_fold(0, |prev_ln, ¤t_ln| match (prev_ln, current_ln) { + // a line number cannot be zero + (_, 0) => Err(CsplitError::LineNumberIsZero), + // two consecutifs numbers should not be equal + (n, m) if n == m => { + show_warning!("line number '{}' is the same as preceding line number", n); + Ok(n) + } + // a number cannot be greater than the one that follows + (n, m) if n > m => Err(CsplitError::LineNumberSmallerThanPrevious(m, n)), + (_, m) => Ok(m), + })?; + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn bad_pattern() { + let input = vec!["bad".to_string()]; + assert!(get_patterns(input.as_slice()).is_err()); + } + + #[test] + fn up_to_line_pattern() { + let input: Vec = vec!["24", "42", "{*}", "50", "{4}"] + .into_iter() + .map(|v| v.to_string()) + .collect(); + let patterns = get_patterns(input.as_slice()).unwrap(); + assert_eq!(patterns.len(), 3); + match patterns.get(0) { + Some(Pattern::UpToLine(24, ExecutePattern::Times(1))) => (), + _ => panic!("expected UpToLine pattern"), + }; + match patterns.get(1) { + Some(Pattern::UpToLine(42, ExecutePattern::Always)) => (), + _ => panic!("expected UpToLine pattern"), + }; + match patterns.get(2) { + Some(Pattern::UpToLine(50, ExecutePattern::Times(5))) => (), + _ => panic!("expected UpToLine pattern"), + }; + } + + #[test] + fn up_to_match_pattern() { + let input: Vec = vec![ + "/test1.*end$/", + "/test2.*end$/", + "{*}", + "/test3.*end$/", + "{4}", + "/test4.*end$/+3", + "/test5.*end$/-3", + ].into_iter() + .map(|v| v.to_string()) + .collect(); + let patterns = get_patterns(input.as_slice()).unwrap(); + assert_eq!(patterns.len(), 5); + match patterns.get(0) { + Some(Pattern::UpToMatch(reg, 0, ExecutePattern::Times(1))) => { + let parsed_reg = format!("{}", reg); + assert_eq!(parsed_reg, "test1.*end$"); + } + _ => panic!("expected UpToMatch pattern"), + }; + match patterns.get(1) { + Some(Pattern::UpToMatch(reg, 0, ExecutePattern::Always)) => { + let parsed_reg = format!("{}", reg); + assert_eq!(parsed_reg, "test2.*end$"); + } + _ => panic!("expected UpToMatch pattern"), + }; + match patterns.get(2) { + Some(Pattern::UpToMatch(reg, 0, ExecutePattern::Times(5))) => { + let parsed_reg = format!("{}", reg); + assert_eq!(parsed_reg, "test3.*end$"); + } + _ => panic!("expected UpToMatch pattern"), + }; + match patterns.get(3) { + Some(Pattern::UpToMatch(reg, 3, ExecutePattern::Times(1))) => { + let parsed_reg = format!("{}", reg); + assert_eq!(parsed_reg, "test4.*end$"); + } + _ => panic!("expected UpToMatch pattern"), + }; + match patterns.get(4) { + Some(Pattern::UpToMatch(reg, -3, ExecutePattern::Times(1))) => { + let parsed_reg = format!("{}", reg); + assert_eq!(parsed_reg, "test5.*end$"); + } + _ => panic!("expected UpToMatch pattern"), + }; + } + + #[test] + fn skip_to_match_pattern() { + let input: Vec = vec![ + "%test1.*end$%", + "%test2.*end$%", + "{*}", + "%test3.*end$%", + "{4}", + "%test4.*end$%+3", + "%test5.*end$%-3", + ].into_iter() + .map(|v| v.to_string()) + .collect(); + let patterns = get_patterns(input.as_slice()).unwrap(); + assert_eq!(patterns.len(), 5); + match patterns.get(0) { + Some(Pattern::SkipToMatch(reg, 0, ExecutePattern::Times(1))) => { + let parsed_reg = format!("{}", reg); + assert_eq!(parsed_reg, "test1.*end$"); + } + _ => panic!("expected SkipToMatch pattern"), + }; + match patterns.get(1) { + Some(Pattern::SkipToMatch(reg, 0, ExecutePattern::Always)) => { + let parsed_reg = format!("{}", reg); + assert_eq!(parsed_reg, "test2.*end$"); + } + _ => panic!("expected SkipToMatch pattern"), + }; + match patterns.get(2) { + Some(Pattern::SkipToMatch(reg, 0, ExecutePattern::Times(5))) => { + let parsed_reg = format!("{}", reg); + assert_eq!(parsed_reg, "test3.*end$"); + } + _ => panic!("expected SkipToMatch pattern"), + }; + match patterns.get(3) { + Some(Pattern::SkipToMatch(reg, 3, ExecutePattern::Times(1))) => { + let parsed_reg = format!("{}", reg); + assert_eq!(parsed_reg, "test4.*end$"); + } + _ => panic!("expected SkipToMatch pattern"), + }; + match patterns.get(4) { + Some(Pattern::SkipToMatch(reg, -3, ExecutePattern::Times(1))) => { + let parsed_reg = format!("{}", reg); + assert_eq!(parsed_reg, "test5.*end$"); + } + _ => panic!("expected SkipToMatch pattern"), + }; + } + + #[test] + fn line_number_zero() { + let patterns = vec![Pattern::UpToLine(0, ExecutePattern::Times(1))]; + match validate_line_numbers(&patterns) { + Err(::CsplitError::LineNumberIsZero) => (), + _ => panic!("expected LineNumberIsZero error"), + } + } + + #[test] + fn line_number_smaller_than_previous() { + let input: Vec = vec!["10".to_string(), "5".to_string()]; + match get_patterns(input.as_slice()) { + Err(::CsplitError::LineNumberSmallerThanPrevious(5, 10)) => (), + _ => panic!("expected LineNumberSmallerThanPrevious error"), + } + } + + #[test] + fn line_number_smaller_than_previous_separate() { + let input: Vec = vec!["10".to_string(), "/20/".to_string(), "5".to_string()]; + match get_patterns(input.as_slice()) { + Err(::CsplitError::LineNumberSmallerThanPrevious(5, 10)) => (), + _ => panic!("expected LineNumberSmallerThanPrevious error"), + } + } + + #[test] + fn line_number_zero_separate() { + let input: Vec = vec!["10".to_string(), "/20/".to_string(), "0".to_string()]; + match get_patterns(input.as_slice()) { + Err(::CsplitError::LineNumberIsZero) => (), + _ => panic!("expected LineNumberIsZero error"), + } + } +} diff --git a/src/uu/csplit/src/splitname.rs b/src/uu/csplit/src/splitname.rs new file mode 100644 index 000000000..1082d38ec --- /dev/null +++ b/src/uu/csplit/src/splitname.rs @@ -0,0 +1,397 @@ +use regex::Regex; + +//mod csplit; +use crate::CsplitError; + +/// Computes the filename of a split, taking into consideration a possible user-defined suffix +/// format. +pub struct SplitName { + fn_split_name: Box String>, +} + +impl SplitName { + /// Creates a new SplitName with the given user-defined options: + /// - `prefix_opt` specifies a prefix for all splits. + /// - `format_opt` specifies a custom format for the suffix part of the filename, using the + /// `sprintf` format notation. + /// - `n_digits_opt` defines the width of the split number. + /// + /// # Caveats + /// + /// If `prefix_opt` and `format_opt` are defined, and the `format_opt` has some string appearing + /// before the conversion pattern (e.g., "here-%05d"), then it is appended to the passed prefix + /// via `prefix_opt`. + /// + /// If `n_digits_opt` and `format_opt` are defined, then width defined in `format_opt` is + /// taken. + pub fn new( + prefix_opt: Option, + format_opt: Option, + n_digits_opt: Option, + ) -> Result { + // get the prefix + let prefix = prefix_opt.unwrap_or("xx".to_string()); + // the width for the split offset + let n_digits = match n_digits_opt { + None => 2, + Some(opt) => match opt.parse::() { + Ok(digits) => digits, + Err(_) => return Err(CsplitError::InvalidNumber(opt)), + }, + }; + // translate the custom format into a function + let fn_split_name: Box String> = match format_opt { + None => Box::new(move |n: usize| -> String { + format!("{}{:0width$}", prefix, n, width = n_digits) + }), + Some(custom) => { + let spec = Regex::new( + r"(?P%(?P[0#-])(?P\d+)?(?P[diuoxX]))", + ).unwrap(); + let mut captures_iter = spec.captures_iter(&custom); + let custom_fn: Box String> = match captures_iter.next() { + Some(captures) => { + let all = captures.name("ALL").unwrap(); + let before = custom[0..all.start()].to_owned(); + let after = custom[all.end()..].to_owned(); + let n_digits = match captures.name("WIDTH") { + None => 0, + Some(m) => m.as_str().parse::().unwrap(), + }; + match (captures.name("FLAG"), captures.name("TYPE")) { + (Some(ref f), Some(ref t)) => { + match (f.as_str(), t.as_str()) { + /* + * zero padding + */ + + // decimal + ("0", "d") | ("0", "i") | ("0", "u") => { + Box::new(move |n: usize| -> String { + format!( + "{}{}{:0width$}{}", + prefix, + before, + n, + after, + width = n_digits + ) + }) + } + // octal + ("0", "o") => Box::new(move |n: usize| -> String { + format!( + "{}{}{:0width$o}{}", + prefix, + before, + n, + after, + width = n_digits + ) + }), + // lower hexadecimal + ("0", "x") => Box::new(move |n: usize| -> String { + format!( + "{}{}{:0width$x}{}", + prefix, + before, + n, + after, + width = n_digits + ) + }), + // upper hexadecimal + ("0", "X") => Box::new(move |n: usize| -> String { + format!( + "{}{}{:0width$X}{}", + prefix, + before, + n, + after, + width = n_digits + ) + }), + + /* + * Alternate form + */ + + // octal + ("#", "o") => Box::new(move |n: usize| -> String { + format!( + "{}{}{:>#width$o}{}", + prefix, + before, + n, + after, + width = n_digits + ) + }), + // lower hexadecimal + ("#", "x") => Box::new(move |n: usize| -> String { + format!( + "{}{}{:>#width$x}{}", + prefix, + before, + n, + after, + width = n_digits + ) + }), + // upper hexadecimal + ("#", "X") => Box::new(move |n: usize| -> String { + format!( + "{}{}{:>#width$X}{}", + prefix, + before, + n, + after, + width = n_digits + ) + }), + + /* + * Left adjusted + */ + + // decimal + ("-", "d") | ("-", "i") | ("-", "u") => { + Box::new(move |n: usize| -> String { + format!( + "{}{}{:<#width$}{}", + prefix, + before, + n, + after, + width = n_digits + ) + }) + } + // octal + ("-", "o") => Box::new(move |n: usize| -> String { + format!( + "{}{}{:<#width$o}{}", + prefix, + before, + n, + after, + width = n_digits + ) + }), + // lower hexadecimal + ("-", "x") => Box::new(move |n: usize| -> String { + format!( + "{}{}{:<#width$x}{}", + prefix, + before, + n, + after, + width = n_digits + ) + }), + // upper hexadecimal + ("-", "X") => Box::new(move |n: usize| -> String { + format!( + "{}{}{:<#width$X}{}", + prefix, + before, + n, + after, + width = n_digits + ) + }), + + _ => return Err(CsplitError::SuffixFormatIncorrect), + } + } + _ => return Err(CsplitError::SuffixFormatIncorrect), + } + } + None => return Err(CsplitError::SuffixFormatIncorrect), + }; + + // there cannot be more than one format pattern + if captures_iter.next().is_some() { + return Err(CsplitError::SuffixFormatTooManyPercents); + } + custom_fn + } + }; + + Ok(SplitName { fn_split_name }) + } + + /// Returns the filename of the i-th split. + pub fn get(&self, n: usize) -> String { + (self.fn_split_name)(n) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn invalid_number() { + let split_name = SplitName::new(None, None, Some(String::from("bad"))); + match split_name { + Err(CsplitError::InvalidNumber(_)) => (), + _ => panic!("should fail with InvalidNumber"), + }; + } + + #[test] + fn invalid_suffix_format1() { + let split_name = SplitName::new(None, Some(String::from("no conversion string")), None); + match split_name { + Err(CsplitError::SuffixFormatIncorrect) => (), + _ => panic!("should fail with SuffixFormatIncorrect"), + }; + } + + #[test] + fn invalid_suffix_format2() { + let split_name = SplitName::new(None, Some(String::from("%042a")), None); + match split_name { + Err(CsplitError::SuffixFormatIncorrect) => (), + _ => panic!("should fail with SuffixFormatIncorrect"), + }; + } + + #[test] + fn default_formatter() { + let split_name = SplitName::new(None, None, None).unwrap(); + assert_eq!(split_name.get(2), "xx02"); + } + + #[test] + fn default_formatter_with_prefix() { + let split_name = SplitName::new(Some(String::from("aaa")), None, None).unwrap(); + assert_eq!(split_name.get(2), "aaa02"); + } + + #[test] + fn default_formatter_with_width() { + let split_name = SplitName::new(None, None, Some(String::from("5"))).unwrap(); + assert_eq!(split_name.get(2), "xx00002"); + } + + #[test] + fn zero_padding_decimal1() { + let split_name = SplitName::new(None, Some(String::from("cst-%03d-")), None).unwrap(); + assert_eq!(split_name.get(2), "xxcst-002-"); + } + + #[test] + fn zero_padding_decimal2() { + let split_name = SplitName::new( + Some(String::from("pre-")), + Some(String::from("cst-%03d-post")), + None, + ).unwrap(); + assert_eq!(split_name.get(2), "pre-cst-002-post"); + } + + #[test] + fn zero_padding_decimal3() { + let split_name = SplitName::new( + None, + Some(String::from("cst-%03d-")), + Some(String::from("42")), + ).unwrap(); + assert_eq!(split_name.get(2), "xxcst-002-"); + } + + #[test] + fn zero_padding_decimal4() { + let split_name = SplitName::new(None, Some(String::from("cst-%03i-")), None).unwrap(); + assert_eq!(split_name.get(2), "xxcst-002-"); + } + + #[test] + fn zero_padding_decimal5() { + let split_name = SplitName::new(None, Some(String::from("cst-%03u-")), None).unwrap(); + assert_eq!(split_name.get(2), "xxcst-002-"); + } + + #[test] + fn zero_padding_octal() { + let split_name = SplitName::new(None, Some(String::from("cst-%03o-")), None).unwrap(); + assert_eq!(split_name.get(42), "xxcst-052-"); + } + + #[test] + fn zero_padding_lower_hexa() { + let split_name = SplitName::new(None, Some(String::from("cst-%03x-")), None).unwrap(); + assert_eq!(split_name.get(42), "xxcst-02a-"); + } + + #[test] + fn zero_padding_upper_hexa() { + let split_name = SplitName::new(None, Some(String::from("cst-%03X-")), None).unwrap(); + assert_eq!(split_name.get(42), "xxcst-02A-"); + } + + #[test] + fn alternate_form_octal() { + let split_name = SplitName::new(None, Some(String::from("cst-%#10o-")), None).unwrap(); + assert_eq!(split_name.get(42), "xxcst- 0o52-"); + } + + #[test] + fn alternate_form_lower_hexa() { + let split_name = SplitName::new(None, Some(String::from("cst-%#10x-")), None).unwrap(); + assert_eq!(split_name.get(42), "xxcst- 0x2a-"); + } + + #[test] + fn alternate_form_upper_hexa() { + let split_name = SplitName::new(None, Some(String::from("cst-%#10X-")), None).unwrap(); + assert_eq!(split_name.get(42), "xxcst- 0x2A-"); + } + + #[test] + fn left_adjusted_decimal1() { + let split_name = SplitName::new(None, Some(String::from("cst-%-10d-")), None).unwrap(); + assert_eq!(split_name.get(42), "xxcst-42 -"); + } + + #[test] + fn left_adjusted_decimal2() { + let split_name = SplitName::new(None, Some(String::from("cst-%-10i-")), None).unwrap(); + assert_eq!(split_name.get(42), "xxcst-42 -"); + } + + #[test] + fn left_adjusted_decimal3() { + let split_name = SplitName::new(None, Some(String::from("cst-%-10u-")), None).unwrap(); + assert_eq!(split_name.get(42), "xxcst-42 -"); + } + + #[test] + fn left_adjusted_octal() { + let split_name = SplitName::new(None, Some(String::from("cst-%-10o-")), None).unwrap(); + assert_eq!(split_name.get(42), "xxcst-0o52 -"); + } + + #[test] + fn left_adjusted_lower_hexa() { + let split_name = SplitName::new(None, Some(String::from("cst-%-10x-")), None).unwrap(); + assert_eq!(split_name.get(42), "xxcst-0x2a -"); + } + + #[test] + fn left_adjusted_upper_hexa() { + let split_name = SplitName::new(None, Some(String::from("cst-%-10X-")), None).unwrap(); + assert_eq!(split_name.get(42), "xxcst-0x2A -"); + } + + #[test] + fn too_many_percent() { + let split_name = SplitName::new(None, Some(String::from("%02d-%-3x")), None); + match split_name { + Err(CsplitError::SuffixFormatTooManyPercents) => (), + _ => panic!("should fail with SuffixFormatTooManyPercents"), + }; + } +} diff --git a/tests/by-util/test_csplit.rs b/tests/by-util/test_csplit.rs new file mode 100644 index 000000000..51cab483c --- /dev/null +++ b/tests/by-util/test_csplit.rs @@ -0,0 +1,1335 @@ +use crate::common::util::*; +use glob::glob; + +/// Returns a string of numbers with the given range, each on a new line. +/// The upper bound is not included. +fn generate(from: u32, to: u32) -> String { + (from..to).fold(String::new(), |acc, v| format!("{}{}\n", acc, v)) +} + +#[test] +fn test_stdin() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["-", "10"]) + .pipe_in(generate(1, 51)) + .succeeds() + .stdout_only("18\n123\n"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("there should be splits created") + .count(); + assert_eq!(count, 2); + assert_eq!(at.read("xx00"), generate(1, 10)); + assert_eq!(at.read("xx01"), generate(10, 51)); +} + +#[test] +fn test_up_to_line() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "10"]) + .succeeds() + .stdout_only("18\n123\n"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("there should be splits created") + .count(); + assert_eq!(count, 2); + assert_eq!(at.read("xx00"), generate(1, 10)); + assert_eq!(at.read("xx01"), generate(10, 51)); +} + +#[test] +fn test_up_to_line_repeat_twice() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "10", "{2}"]) + .succeeds() + .stdout_only("18\n30\n30\n63\n"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("there should be splits created") + .count(); + assert_eq!(count, 4); + assert_eq!(at.read("xx00"), generate(1, 10)); + assert_eq!(at.read("xx01"), generate(10, 20)); + assert_eq!(at.read("xx02"), generate(20, 30)); + assert_eq!(at.read("xx03"), generate(30, 51)); +} + +#[test] +fn test_up_to_line_sequence() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "10", "25"]) + .succeeds() + .stdout_only("18\n45\n78\n"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("there should be splits created") + .count(); + assert_eq!(count, 3); + assert_eq!(at.read("xx00"), generate(1, 10)); + assert_eq!(at.read("xx01"), generate(10, 25)); + assert_eq!(at.read("xx02"), generate(25, 51)); +} + +#[test] +fn test_up_to_match() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "/9$/"]) + .succeeds() + .stdout_only("16\n125\n"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("there should be splits created") + .count(); + assert_eq!(count, 2); + assert_eq!(at.read("xx00"), generate(1, 9)); + assert_eq!(at.read("xx01"), generate(9, 51)); +} + +#[test] +fn test_up_to_match_repeat_twice() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "/9$/", "{2}"]) + .succeeds() + .stdout_only("16\n29\n30\n66\n"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("there should be splits created") + .count(); + assert_eq!(count, 4); + assert_eq!(at.read("xx00"), generate(1, 9)); + assert_eq!(at.read("xx01"), generate(9, 19)); + assert_eq!(at.read("xx02"), generate(19, 29)); + assert_eq!(at.read("xx03"), generate(29, 51)); +} + +#[test] +fn test_up_to_match_sequence() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "/9$/", "/5$/"]) + .succeeds() + .stdout_only("16\n17\n108\n"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("there should be splits created") + .count(); + assert_eq!(count, 3); + assert_eq!(at.read("xx00"), generate(1, 9)); + assert_eq!(at.read("xx01"), generate(9, 15)); + assert_eq!(at.read("xx02"), generate(15, 51)); +} + +#[test] +fn test_up_to_match_offset() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "/9$/+3"]) + .succeeds() + .stdout_only("24\n117\n"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("there should be splits created") + .count(); + assert_eq!(count, 2); + assert_eq!(at.read("xx00"), generate(1, 12)); + assert_eq!(at.read("xx01"), generate(12, 51)); +} + +#[test] +fn test_up_to_match_offset_repeat_twice() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "/9$/+3", "{2}"]) + .succeeds() + .stdout_only("24\n30\n30\n57\n"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("there should be splits created") + .count(); + assert_eq!(count, 4); + assert_eq!(at.read("xx00"), generate(1, 12)); + assert_eq!(at.read("xx01"), generate(12, 22)); + assert_eq!(at.read("xx02"), generate(22, 32)); + assert_eq!(at.read("xx03"), generate(32, 51)); +} + +#[test] +fn test_up_to_match_negative_offset() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "/9$/-3"]) + .succeeds() + .stdout_only("10\n131\n"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("there should be splits created") + .count(); + assert_eq!(count, 2); + assert_eq!(at.read("xx00"), generate(1, 6)); + assert_eq!(at.read("xx01"), generate(6, 51)); +} + +#[test] +fn test_up_to_match_negative_offset_repeat_twice() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "/9$/-3", "{2}"]) + .succeeds() + .stdout_only("10\n26\n30\n75\n"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("there should be splits created") + .count(); + assert_eq!(count, 4); + assert_eq!(at.read("xx00"), generate(1, 6)); + assert_eq!(at.read("xx01"), generate(6, 16)); + assert_eq!(at.read("xx02"), generate(16, 26)); + assert_eq!(at.read("xx03"), generate(26, 51)); +} + +#[test] +fn test_up_to_match_repeat_always() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "/9$/", "{*}"]) + .succeeds() + .stdout_only("16\n29\n30\n30\n30\n6\n"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("there should be splits created") + .count(); + assert_eq!(count, 6); + assert_eq!(at.read("xx00"), generate(1, 9)); + assert_eq!(at.read("xx01"), generate(9, 19)); + assert_eq!(at.read("xx02"), generate(19, 29)); + assert_eq!(at.read("xx03"), generate(29, 39)); + assert_eq!(at.read("xx04"), generate(39, 49)); + assert_eq!(at.read("xx05"), generate(49, 51)); +} + +#[test] +fn test_up_to_match_repeat_over() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "/9$/", "{50}"]) + .fails() + .stdout_is("16\n29\n30\n30\n30\n6\n") + .stderr_is("csplit: error: '/9$/': match not found on repetition 5"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("there should be splits created") + .count(); + assert_eq!(count, 0); + + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "/9$/", "{50}", "-k"]) + .fails() + .stdout_is("16\n29\n30\n30\n30\n6\n") + .stderr_is("csplit: error: '/9$/': match not found on repetition 5"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("there should be splits created") + .count(); + assert_eq!(count, 6); + assert_eq!(at.read("xx00"), generate(1, 9)); + assert_eq!(at.read("xx01"), generate(9, 19)); + assert_eq!(at.read("xx02"), generate(19, 29)); + assert_eq!(at.read("xx03"), generate(29, 39)); + assert_eq!(at.read("xx04"), generate(39, 49)); + assert_eq!(at.read("xx05"), generate(49, 51)); +} + +#[test] +fn test_skip_to_match() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "%23%"]) + .succeeds() + .stdout_only("84\n"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("there should be splits created") + .count(); + assert_eq!(count, 1); + assert_eq!(at.read("xx00"), generate(23, 51)); +} + +#[test] +fn test_skip_to_match_sequence1() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "%0$%", "%^4%"]) + .succeeds() + .stdout_only("33\n"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("there should be splits created") + .count(); + assert_eq!(count, 1); + assert_eq!(at.read("xx00"), generate(40, 51)); +} + +#[test] +fn test_skip_to_match_sequence2() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "%0$%", "{1}", "%^4%"]) + .succeeds() + .stdout_only("33\n"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("there should be splits created") + .count(); + assert_eq!(count, 1); + assert_eq!(at.read("xx00"), generate(40, 51)); +} + +#[test] +fn test_skip_to_match_sequence3() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "%0$%", "{1}", "/^4/"]) + .succeeds() + .stdout_only("60\n33\n"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("there should be splits created") + .count(); + assert_eq!(count, 2); + assert_eq!(at.read("xx00"), generate(20, 40)); + assert_eq!(at.read("xx01"), generate(40, 51)); +} + +#[test] +fn test_skip_to_match_sequence4() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "%0$%", "/^4/"]) + .succeeds() + .stdout_only("90\n33\n"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("there should be splits created") + .count(); + assert_eq!(count, 2); + assert_eq!(at.read("xx00"), generate(10, 40)); + assert_eq!(at.read("xx01"), generate(40, 51)); +} + +#[test] +fn test_skip_to_match_offset() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "%23%+3"]) + .succeeds() + .stdout_only("75\n"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("there should be splits created") + .count(); + assert_eq!(count, 1); + assert_eq!(at.read("xx00"), generate(26, 51)); +} + +#[test] +fn test_skip_to_match_negative_offset() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "%23%-3"]) + .succeeds() + .stdout_only("93\n"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("there should be splits created") + .count(); + assert_eq!(count, 1); + assert_eq!(at.read("xx00"), generate(20, 51)); +} + +#[test] +fn test_skip_to_match_repeat_always() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "%0$%", "{*}"]) + .succeeds() + .no_stdout(); + + let count = glob(&at.plus_as_string("xx*")).unwrap().count(); + assert_eq!(count, 0); +} + +#[test] +fn test_mix() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "13", "%25%", "/0$/"]) + .succeeds() + .stdout_only("27\n15\n63\n"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("there should be splits created") + .count(); + assert_eq!(count, 3); + assert_eq!(at.read("xx00"), generate(1, 13)); + assert_eq!(at.read("xx01"), generate(25, 30)); + assert_eq!(at.read("xx02"), generate(30, 51)); +} + +#[test] +fn test_option_keep() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["-k", "numbers50.txt", "/20/", "/nope/"]) + .fails() + .stderr_is("csplit: error: '/nope/': match not found") + .stdout_is("48\n93\n"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("there should be splits created") + .count(); + assert_eq!(count, 2); + assert_eq!(at.read("xx00"), generate(1, 20)); + assert_eq!(at.read("xx01"), generate(20, 51)); +} + +#[test] +fn test_option_quiet() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["--quiet", "numbers50.txt", "13", "%25%", "/0$/"]) + .succeeds() + .no_stdout(); + + let count = glob(&at.plus_as_string("xx*")) + .expect("there should be splits created") + .count(); + assert_eq!(count, 3); + assert_eq!(at.read("xx00"), generate(1, 13)); + assert_eq!(at.read("xx01"), generate(25, 30)); + assert_eq!(at.read("xx02"), generate(30, 51)); +} + +#[test] +fn test_option_prefix() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["--prefix", "dog", "numbers50.txt", "13", "%25%", "/0$/"]) + .succeeds() + .stdout_only("27\n15\n63\n"); + + let count = glob(&at.plus_as_string("dog*")) + .expect("there should be splits created") + .count(); + assert_eq!(count, 3); + assert_eq!(at.read("dog00"), generate(1, 13)); + assert_eq!(at.read("dog01"), generate(25, 30)); + assert_eq!(at.read("dog02"), generate(30, 51)); +} + +#[test] +fn test_negative_offset_at_start() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["-", "/a/-1", "{*}"]) + .pipe_in("\na\n") + .succeeds() + .stdout_only("0\n3\n"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("there should be splits created") + .count(); + assert_eq!(count, 2); + assert_eq!(at.read("xx00"), ""); + assert_eq!(at.read("xx01"), "\na\n"); +} + +#[test] +fn test_up_to_match_option_suppress_matched() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "--suppress-matched", "/0$/", "{*}"]) + .succeeds() + .stdout_only("18\n27\n27\n27\n27\n0\n"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("there should be splits created") + .count(); + assert_eq!(count, 6); + assert_eq!(at.read("xx00"), generate(1, 10)); + assert_eq!(at.read("xx01"), generate(11, 20)); + assert_eq!(at.read("xx02"), generate(21, 30)); + assert_eq!(at.read("xx03"), generate(31, 40)); + assert_eq!(at.read("xx04"), generate(41, 50)); + assert_eq!(at.read("xx05"), ""); +} + +#[test] +fn test_up_to_match_offset_option_suppress_matched() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "--suppress-matched", "/10/+4"]) + .succeeds() + .stdout_only("27\n111\n"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("there should be splits created") + .count(); + assert_eq!(count, 2); + assert_eq!(at.read("xx00"), generate(1, 10) + &generate(11, 14)); + assert_eq!(at.read("xx01"), generate(14, 51)); +} + +#[test] +fn test_up_to_match_negative_offset_option_suppress_matched() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "--suppress-matched", "/10/-4"]) + .succeeds() + .stdout_only("10\n128\n"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("there should be splits created") + .count(); + assert_eq!(count, 2); + assert_eq!(at.read("xx00"), generate(1, 6)); + assert_eq!(at.read("xx01"), generate(6, 10) + &generate(11, 51)); +} + +#[test] +fn test_up_to_line_option_suppress_matched() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "--suppress-matched", "10"]) + .succeeds() + .stdout_only("18\n120\n"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("there should be splits created") + .count(); + assert_eq!(count, 2); + assert_eq!(at.read("xx00"), generate(1, 10)); + assert_eq!(at.read("xx01"), generate(11, 51)); +} + +#[test] +fn test_skip_to_match_option_suppress_matched() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "--suppress-matched", "%0$%"]) + .succeeds() + .stdout_only("120\n"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("there should be splits created") + .count(); + assert_eq!(count, 1); + assert_eq!(at.read("xx00"), generate(11, 51)); +} + +#[test] +fn test_option_elide_empty_file1() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "--suppress-matched", "-z", "/0$/", "{*}"]) + .succeeds() + .stdout_only("18\n27\n27\n27\n27\n"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("there should be splits created") + .count(); + assert_eq!(count, 5); + assert_eq!(at.read("xx00"), generate(1, 10)); + assert_eq!(at.read("xx01"), generate(11, 20)); + assert_eq!(at.read("xx02"), generate(21, 30)); + assert_eq!(at.read("xx03"), generate(31, 40)); + assert_eq!(at.read("xx04"), generate(41, 50)); +} + +#[test] +fn test_option_elide_empty_file2() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["-", "-z", "/a/-1", "{*}"]) + .pipe_in("\na\n") + .succeeds() + .stdout_only("3\n"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("there should be splits created") + .count(); + assert_eq!(count, 1); + assert_eq!(at.read("xx00"), "\na\n"); +} + +#[test] +fn test_up_to_match_context_overflow() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "/45/+10"]) + .fails() + .stdout_is("141\n") + .stderr_is("csplit: error: '/45/+10': line number out of range"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("counting splits") + .count(); + assert_eq!(count, 0); + let (at, mut ucmd) = at_and_ucmd!(); + + ucmd.args(&["numbers50.txt", "/45/+10", "-k"]) + .fails() + .stdout_is("141\n") + .stderr_is("csplit: error: '/45/+10': line number out of range"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("counting splits") + .count(); + assert_eq!(count, 1); + assert_eq!(at.read("xx00"), generate(1, 51)); +} + +#[test] +fn test_skip_to_match_context_underflow() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "%5%-10"]) + .fails() + .stdout_is("141\n") + .stderr_is("csplit: error: '%5%-10': line number out of range"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("counting splits") + .count(); + assert_eq!(count, 0); + + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "%5%-10", "-k"]) + .fails() + .stdout_is("141\n") + .stderr_is("csplit: error: '%5%-10': line number out of range"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("counting splits") + .count(); + assert_eq!(count, 1); + assert_eq!(at.read("xx00"), generate(1, 51)); +} + +#[test] +fn test_skip_to_match_context_overflow() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "%45%+10"]) + .fails() + .stderr_only("csplit: error: '%45%+10': line number out of range"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("counting splits") + .count(); + assert_eq!(count, 0); + + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "%45%+10", "-k"]) + .fails() + .stderr_only("csplit: error: '%45%+10': line number out of range"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("counting splits") + .count(); + assert_eq!(count, 0); +} + +#[test] +fn test_up_to_no_match1() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "/4/", "/nope/"]) + .fails() + .stdout_is("6\n135\n") + .stderr_is("csplit: error: '/nope/': match not found"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("counting splits") + .count(); + assert_eq!(count, 0); + + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "/4/", "/nope/", "-k"]) + .fails() + .stdout_is("6\n135\n") + .stderr_is("csplit: error: '/nope/': match not found"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("counting splits") + .count(); + assert_eq!(count, 2); + assert_eq!(at.read("xx00"), generate(1, 4)); + assert_eq!(at.read("xx01"), generate(4, 51)); +} + +#[test] +fn test_up_to_no_match2() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "/4/", "/nope/", "{50}"]) + .fails() + .stdout_is("6\n135\n") + .stderr_is("csplit: error: '/nope/': match not found"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("counting splits") + .count(); + assert_eq!(count, 0); + + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "/4/", "/nope/", "{50}", "-k"]) + .fails() + .stdout_is("6\n135\n") + .stderr_is("csplit: error: '/nope/': match not found"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("counting splits") + .count(); + assert_eq!(count, 2); + assert_eq!(at.read("xx00"), generate(1, 4)); + assert_eq!(at.read("xx01"), generate(4, 51)); +} + +#[test] +fn test_up_to_no_match3() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "/0$/", "{50}"]) + .fails() + .stdout_is("18\n30\n30\n30\n30\n3\n") + .stderr_is("csplit: error: '/0$/': match not found on repetition 5"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("counting splits") + .count(); + assert_eq!(count, 0); + + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "/0$/", "{50}", "-k"]) + .fails() + .stdout_is("18\n30\n30\n30\n30\n3\n") + .stderr_is("csplit: error: '/0$/': match not found on repetition 5"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("counting splits") + .count(); + assert_eq!(count, 6); + assert_eq!(at.read("xx00"), generate(1, 10)); + assert_eq!(at.read("xx01"), generate(10, 20)); + assert_eq!(at.read("xx02"), generate(20, 30)); + assert_eq!(at.read("xx03"), generate(30, 40)); + assert_eq!(at.read("xx04"), generate(40, 50)); + assert_eq!(at.read("xx05"), "50\n"); +} + +#[test] +fn test_up_to_no_match4() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "/nope/", "/4/"]) + .fails() + .stdout_is("141\n") + .stderr_is("csplit: error: '/nope/': match not found"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("counting splits") + .count(); + assert_eq!(count, 0); + + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "/nope/", "/4/", "-k"]) + .fails() + .stdout_is("141\n") + .stderr_is("csplit: error: '/nope/': match not found"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("counting splits") + .count(); + assert_eq!(count, 1); + assert_eq!(at.read("xx00"), generate(1, 51)); +} + +#[test] +fn test_up_to_no_match5() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "/nope/", "{*}"]) + .succeeds() + .stdout_only("141\n"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("counting splits") + .count(); + assert_eq!(count, 1); + assert_eq!(at.read("xx00"), generate(1, 51)); +} + +#[test] +fn test_up_to_no_match6() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "/nope/-5"]) + .fails() + .stdout_is("141\n") + .stderr_is("csplit: error: '/nope/-5': match not found"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("counting splits") + .count(); + assert_eq!(count, 0); + + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "/nope/-5", "-k"]) + .fails() + .stdout_is("141\n") + .stderr_is("csplit: error: '/nope/-5': match not found"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("counting splits") + .count(); + assert_eq!(count, 1); + assert_eq!(at.read("xx00"), generate(1, 51)); +} + +#[test] +fn test_up_to_no_match7() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "/nope/+5"]) + .fails() + .stdout_is("141\n") + .stderr_is("csplit: error: '/nope/+5': match not found"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("counting splits") + .count(); + assert_eq!(count, 0); + + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "/nope/+5", "-k"]) + .fails() + .stdout_is("141\n") + .stderr_is("csplit: error: '/nope/+5': match not found"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("counting splits") + .count(); + assert_eq!(count, 1); + assert_eq!(at.read("xx00"), generate(1, 51)); +} + +#[test] +fn test_skip_to_no_match1() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "%nope%"]) + .fails() + .stderr_only("csplit: error: '%nope%': match not found"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("counting splits") + .count(); + assert_eq!(count, 0); +} + +#[test] +fn test_skip_to_no_match2() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "%nope%", "{50}"]) + .fails() + .stderr_only("csplit: error: '%nope%': match not found"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("counting splits") + .count(); + assert_eq!(count, 0); +} + +#[test] +fn test_skip_to_no_match3() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "%0$%", "{50}"]) + .fails() + .stderr_only("csplit: error: '%0$%': match not found on repetition 5"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("counting splits") + .count(); + assert_eq!(count, 0); +} + +#[test] +fn test_skip_to_no_match4() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "%nope%", "/4/"]) + .fails() + .stderr_only("csplit: error: '%nope%': match not found"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("counting splits") + .count(); + assert_eq!(count, 0); +} + +#[test] +fn test_skip_to_no_match5() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "%nope%", "{*}"]) + .succeeds() + .no_stderr() + .no_stdout(); + + let count = glob(&at.plus_as_string("xx*")) + .expect("counting splits") + .count(); + assert_eq!(count, 0); +} + +#[test] +fn test_skip_to_no_match6() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "%nope%-5"]) + .fails() + .stderr_only("csplit: error: '%nope%-5': match not found"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("counting splits") + .count(); + assert_eq!(count, 0); +} + +#[test] +fn test_skip_to_no_match7() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "%nope%+5"]) + .fails() + .stderr_only("csplit: error: '%nope%+5': match not found"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("counting splits") + .count(); + assert_eq!(count, 0); +} + +#[test] +fn test_no_match() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "%nope%"]) + .fails() + .stderr_only("csplit: error: '%nope%': match not found"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("counting splits") + .count(); + assert_eq!(count, 0); + + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "/nope/"]) + .fails() + .stdout_is("141\n") + .stderr_is("csplit: error: '/nope/': match not found"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("counting splits") + .count(); + assert_eq!(count, 0); +} + +#[test] +fn test_too_small_linenum() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "/20/", "10", "/40/"]) + .succeeds() + .stdout_only("48\n0\n60\n33\n"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("there should be splits created") + .count(); + assert_eq!(count, 4); + assert_eq!(at.read("xx00"), generate(1, 20)); + assert_eq!(at.read("xx01"), ""); + assert_eq!(at.read("xx02"), generate(20, 40)); + assert_eq!(at.read("xx03"), generate(40, 51)); +} + +#[test] +fn test_too_small_linenum_equal() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "/20/", "20"]) + .succeeds() + .stdout_only("48\n0\n93\n"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("there should be splits created") + .count(); + assert_eq!(count, 3); + assert_eq!(at.read("xx00"), generate(1, 20)); + assert_eq!(at.read("xx01"), ""); + assert_eq!(at.read("xx02"), generate(20, 51)); +} + +#[test] +fn test_too_small_linenum_elided() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "-z", "/20/", "10", "/40/"]) + .succeeds() + .stdout_only("48\n60\n33\n"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("there should be splits created") + .count(); + assert_eq!(count, 3); + assert_eq!(at.read("xx00"), generate(1, 20)); + assert_eq!(at.read("xx01"), generate(20, 40)); + assert_eq!(at.read("xx02"), generate(40, 51)); +} + +#[test] +fn test_too_small_linenum_negative_offset() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "/20/-5", "10", "/40/"]) + .succeeds() + .stdout_only("33\n0\n75\n33\n"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("there should be splits created") + .count(); + assert_eq!(count, 4); + assert_eq!(at.read("xx00"), generate(1, 15)); + assert_eq!(at.read("xx01"), ""); + assert_eq!(at.read("xx02"), generate(15, 40)); + assert_eq!(at.read("xx03"), generate(40, 51)); +} + +#[test] +fn test_too_small_linenum_twice() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "/20/", "10", "15", "/40/"]) + .succeeds() + .stdout_only("48\n0\n0\n60\n33\n"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("there should be splits created") + .count(); + assert_eq!(count, 5); + assert_eq!(at.read("xx00"), generate(1, 20)); + assert_eq!(at.read("xx01"), ""); + assert_eq!(at.read("xx02"), ""); + assert_eq!(at.read("xx03"), generate(20, 40)); + assert_eq!(at.read("xx04"), generate(40, 51)); +} + +#[test] +fn test_too_small_linenum_repeat() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "/20/", "10", "{*}"]) + .fails() + .stderr_is("csplit: error: '10': line number out of range on repetition 5") + .stdout_is("48\n0\n0\n30\n30\n30\n3\n"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("there should be splits created") + .count(); + assert_eq!(count, 0); + + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "/20/", "10", "{*}", "-k"]) + .fails() + .stderr_is("csplit: error: '10': line number out of range on repetition 5") + .stdout_is("48\n0\n0\n30\n30\n30\n3\n"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("there should be splits created") + .count(); + assert_eq!(count, 7); + assert_eq!(at.read("xx00"), generate(1, 20)); + assert_eq!(at.read("xx01"), ""); + assert_eq!(at.read("xx02"), ""); + assert_eq!(at.read("xx03"), generate(20, 30)); + assert_eq!(at.read("xx04"), generate(30, 40)); + assert_eq!(at.read("xx05"), generate(40, 50)); + assert_eq!(at.read("xx06"), "50\n"); +} + +#[test] +fn test_linenum_out_of_range1() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "100"]) + .fails() + .stdout_is("141\n") + .stderr_is("csplit: error: '100': line number out of range"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("there should be splits created") + .count(); + assert_eq!(count, 0); + + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "100", "-k"]) + .fails() + .stdout_is("141\n") + .stderr_is("csplit: error: '100': line number out of range"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("there should be splits created") + .count(); + assert_eq!(count, 1); + assert_eq!(at.read("xx00"), generate(1, 51)); +} + +#[test] +fn test_linenum_out_of_range2() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "10", "100"]) + .fails() + .stdout_is("18\n123\n") + .stderr_is("csplit: error: '100': line number out of range"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("there should be splits created") + .count(); + assert_eq!(count, 0); + + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "10", "100", "-k"]) + .fails() + .stdout_is("18\n123\n") + .stderr_is("csplit: error: '100': line number out of range"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("there should be splits created") + .count(); + assert_eq!(count, 2); + assert_eq!(at.read("xx00"), generate(1, 10)); + assert_eq!(at.read("xx01"), generate(10, 51)); +} + +#[test] +fn test_linenum_out_of_range3() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "40", "{2}"]) + .fails() + .stdout_is("108\n33\n") + .stderr_is("csplit: error: '40': line number out of range on repetition 1"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("there should be splits created") + .count(); + assert_eq!(count, 0); + + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "40", "{2}", "-k"]) + .fails() + .stdout_is("108\n33\n") + .stderr_is("csplit: error: '40': line number out of range on repetition 1"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("there should be splits created") + .count(); + assert_eq!(count, 2); + assert_eq!(at.read("xx00"), generate(1, 40)); + assert_eq!(at.read("xx01"), generate(40, 51)); +} + +#[test] +fn test_linenum_out_of_range4() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "40", "{*}"]) + .fails() + .stdout_is("108\n33\n") + .stderr_is("csplit: error: '40': line number out of range on repetition 1"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("there should be splits created") + .count(); + assert_eq!(count, 0); + + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "40", "{*}", "-k"]) + .fails() + .stdout_is("108\n33\n") + .stderr_is("csplit: error: '40': line number out of range on repetition 1"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("there should be splits created") + .count(); + assert_eq!(count, 2); + assert_eq!(at.read("xx00"), generate(1, 40)); + assert_eq!(at.read("xx01"), generate(40, 51)); +} + +#[test] +fn test_skip_to_match_negative_offset_before_a_match() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "/20/-10", "/15/"]) + .fails() + .stdout_is("18\n123\n") + .stderr_is("csplit: error: '/15/': match not found"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("there should be splits created") + .count(); + assert_eq!(count, 0); +} + +#[test] +fn test_skip_to_match_negative_offset_before_a_linenum() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "/20/-10", "15"]) + .succeeds() + .stdout_only("18\n15\n108\n"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("there should be splits created") + .count(); + assert_eq!(count, 3); + assert_eq!(at.read("xx00"), generate(1, 10)); + assert_eq!(at.read("xx01"), generate(10, 15)); + assert_eq!(at.read("xx02"), generate(15, 51)); +} + +#[test] +fn test_corner_case1() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "/10/", "11"]) + .succeeds() + .stdout_only("18\n3\n120\n"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("there should be splits created") + .count(); + assert_eq!(count, 3); + assert_eq!(at.read("xx00"), generate(1, 10)); + assert_eq!(at.read("xx01"), "10\n"); + assert_eq!(at.read("xx02"), generate(11, 51)); +} + +#[test] +fn test_corner_case2() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "/10/-5", "/10/"]) + .fails() + .stderr_is("csplit: error: '/10/': match not found") + .stdout_is("8\n133\n"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("there should be splits created") + .count(); + assert_eq!(count, 0); +} + +#[test] +fn test_corner_case3() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "/15/-3", "14", "/15/"]) + .fails() + .stderr_is("csplit: error: '/15/': match not found") + .stdout_is("24\n6\n111\n"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("there should be splits created") + .count(); + assert_eq!(count, 0); +} + +#[test] +fn test_corner_case4() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "/20/-10", "/30/-4"]) + .succeeds() + .stdout_only("18\n48\n75\n"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("there should be splits created") + .count(); + assert_eq!(count, 3); + assert_eq!(at.read("xx00"), generate(1, 10)); + assert_eq!(at.read("xx01"), generate(10, 26)); + assert_eq!(at.read("xx02"), generate(26, 51)); +} + +// NOTE: differs from gnu's output: the empty split is not written +#[test] +fn test_up_to_match_context_underflow() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "/5/-10"]) + .fails() + .stdout_is("0\n141\n") + .stderr_is("csplit: error: '/5/-10': line number out of range"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("counting splits") + .count(); + assert_eq!(count, 0); + + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "/5/-10", "-k"]) + .fails() + .stdout_is("0\n141\n") + .stderr_is("csplit: error: '/5/-10': line number out of range"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("counting splits") + .count(); + assert_eq!(count, 2); + assert_eq!(at.read("xx00"), ""); + assert_eq!(at.read("xx01"), generate(1, 51)); +} + +// the offset is out of range because of the first pattern +// NOTE: output different than gnu's: the empty split is written but the rest of the input file is not +#[test] +fn test_linenum_range_with_up_to_match1() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "10", "/12/-5"]) + .fails() + .stderr_is("csplit: error: '/12/-5': line number out of range") + .stdout_is("18\n0\n123\n"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("there should be splits created") + .count(); + assert_eq!(count, 0); + + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "10", "/12/-5", "-k"]) + .fails() + .stderr_is("csplit: error: '/12/-5': line number out of range") + .stdout_is("18\n0\n123\n"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("there should be splits created") + .count(); + assert_eq!(count, 3); + assert_eq!(at.read("xx00"), generate(1, 10)); + assert_eq!(at.read("xx01"), ""); + assert_eq!(at.read("xx02"), generate(10, 51)); +} + +// the offset is out of range because more lines are needed than physically available +// NOTE: output different than gnu's: the empty split is not written but the rest of the input file is +#[test] +fn test_linenum_range_with_up_to_match2() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "10", "/12/-15"]) + .fails() + .stderr_is("csplit: error: '/12/-15': line number out of range") + .stdout_is("18\n0\n123\n"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("there should be splits created") + .count(); + assert_eq!(count, 0); + + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "10", "/12/-15", "-k"]) + .fails() + .stderr_is("csplit: error: '/12/-15': line number out of range") + .stdout_is("18\n0\n123\n"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("there should be splits created") + .count(); + assert_eq!(count, 3); + assert_eq!(at.read("xx00"), generate(1, 10)); + assert_eq!(at.read("xx01"), ""); + assert_eq!(at.read("xx02"), generate(10, 51)); +} + +// NOTE: output different than gnu's: the pattern /10/ is matched but should not +#[test] +fn test_linenum_range_with_up_to_match3() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "10", "/10/", "-k"]) + .fails() + .stderr_is("csplit: error: '/10/': match not found") + .stdout_is("18\n123\n"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("there should be splits created") + .count(); + assert_eq!(count, 2); + assert_eq!(at.read("xx00"), generate(1, 10)); + assert_eq!(at.read("xx01"), generate(10, 51)); + + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "/10/", "10"]) + .succeeds() + .stdout_only("18\n0\n123\n"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("there should be splits created") + .count(); + assert_eq!(count, 3); + assert_eq!(at.read("xx00"), generate(1, 10)); + assert_eq!(at.read("xx01"), ""); + assert_eq!(at.read("xx02"), generate(10, 51)); +} diff --git a/tests/fixtures/csplit/numbers50.txt b/tests/fixtures/csplit/numbers50.txt new file mode 100644 index 000000000..96cc55885 --- /dev/null +++ b/tests/fixtures/csplit/numbers50.txt @@ -0,0 +1,50 @@ +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50