From 6718d97f97e092b6d6a0ec5edb72497ba1b85be1 Mon Sep 17 00:00:00 2001 From: Jeffrey Finkelstein Date: Wed, 9 Feb 2022 21:41:33 -0500 Subject: [PATCH] split: add support for -e argument Add the `-e` flag, which indicates whether to elide (that is, remove) empty files that would have been created by the `-n` option. The `-n` command-line argument gives a specific number of chunks into which the input files will be split. If the number of chunks is greater than the number of bytes, then empty files will be created for the excess chunks. But if `-e` is given, then empty files will not be created. For example, contrast $ printf 'a\n' > f && split -e -n 3 f && cat xaa xab xac a cat: xac: No such file or directory with $ printf 'a\n' > f && split -n 3 f && cat xaa xab xac a --- src/uu/split/src/split.rs | 36 ++++++++++++++++++++++++++++- tests/by-util/test_split.rs | 28 +++++++++++++++++++++- tests/fixtures/split/threebytes.txt | 1 + 3 files changed, 63 insertions(+), 2 deletions(-) create mode 100644 tests/fixtures/split/threebytes.txt diff --git a/src/uu/split/src/split.rs b/src/uu/split/src/split.rs index 70de45ce2..7ff08d37d 100644 --- a/src/uu/split/src/split.rs +++ b/src/uu/split/src/split.rs @@ -39,6 +39,7 @@ static OPT_VERBOSE: &str = "verbose"; //The ---io-blksize parameter is consumed and ignored. //The parameter is included to make GNU coreutils tests pass. static OPT_IO_BLKSIZE: &str = "-io-blksize"; +static OPT_ELIDE_EMPTY_FILES: &str = "elide-empty-files"; static ARG_INPUT: &str = "input"; static ARG_PREFIX: &str = "prefix"; @@ -128,6 +129,13 @@ pub fn uu_app<'a>() -> App<'a> { "write to shell COMMAND file name is $FILE (Currently not implemented for Windows)", ), ) + .arg( + Arg::new(OPT_ELIDE_EMPTY_FILES) + .long(OPT_ELIDE_EMPTY_FILES) + .short('e') + .takes_value(false) + .help("do not generate empty output files with '-n'"), + ) .arg( Arg::new(OPT_NUMERIC_SUFFIXES) .short('d') @@ -285,6 +293,16 @@ struct Settings { filter: Option, strategy: Strategy, verbose: bool, + + /// Whether to *not* produce empty files when using `-n`. + /// + /// The `-n` command-line argument gives a specific number of + /// chunks into which the input files will be split. If the number + /// of chunks is greater than the number of bytes, and this is + /// `false`, then empty files will be created for the excess + /// chunks. If this is `false`, then empty files will not be + /// created. + elide_empty_files: bool, } /// An error when parsing settings from command-line arguments. @@ -352,6 +370,7 @@ impl Settings { input: matches.value_of(ARG_INPUT).unwrap().to_owned(), prefix: matches.value_of(ARG_PREFIX).unwrap().to_owned(), filter: matches.value_of(OPT_FILTER).map(|s| s.to_owned()), + elide_empty_files: matches.is_present(OPT_ELIDE_EMPTY_FILES), }; #[cfg(windows)] if result.filter.is_some() { @@ -616,9 +635,24 @@ where { // Get the size of the input file in bytes and compute the number // of bytes per chunk. + // + // If the requested number of chunks exceeds the number of bytes + // in the file *and* the `elide_empty_files` parameter is enabled, + // then behave as if the number of chunks was set to the number of + // bytes in the file. This ensures that we don't write empty + // files. Otherwise, just write the `num_chunks - num_bytes` empty + // files. let metadata = metadata(&settings.input).unwrap(); let num_bytes = metadata.len(); - let chunk_size = (num_bytes / (num_chunks as u64)) as usize; + let will_have_empty_files = settings.elide_empty_files && num_chunks as u64 > num_bytes; + let (num_chunks, chunk_size) = if will_have_empty_files { + let num_chunks = num_bytes as usize; + let chunk_size = 1; + (num_chunks, chunk_size) + } else { + let chunk_size = ((num_bytes / (num_chunks as u64)) as usize).max(1); + (num_chunks, chunk_size) + }; // This object is responsible for creating the filename for each chunk. let mut filename_iterator = FilenameIterator::new( diff --git a/tests/by-util/test_split.rs b/tests/by-util/test_split.rs index 846d483b2..9454687ac 100644 --- a/tests/by-util/test_split.rs +++ b/tests/by-util/test_split.rs @@ -2,7 +2,7 @@ // * // * For the full copyright and license information, please view the LICENSE // * file that was distributed with this source code. -// spell-checker:ignore xzaaa sixhundredfiftyonebytes ninetyonebytes asciilowercase fghij klmno pqrst uvwxyz fivelines twohundredfortyonebytes +// spell-checker:ignore xzaaa sixhundredfiftyonebytes ninetyonebytes threebytes asciilowercase fghij klmno pqrst uvwxyz fivelines twohundredfortyonebytes extern crate rand; extern crate regex; @@ -526,3 +526,29 @@ fn test_include_newlines() { at.open("xac").read_to_string(&mut s).unwrap(); assert_eq!(s, "5\n"); } + +#[test] +fn test_allow_empty_files() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["-n", "4", "threebytes.txt"]) + .succeeds() + .no_stdout() + .no_stderr(); + assert_eq!(at.read("xaa"), "a"); + assert_eq!(at.read("xab"), "b"); + assert_eq!(at.read("xac"), "c"); + assert_eq!(at.read("xad"), ""); +} + +#[test] +fn test_elide_empty_files() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["-e", "-n", "4", "threebytes.txt"]) + .succeeds() + .no_stdout() + .no_stderr(); + assert_eq!(at.read("xaa"), "a"); + assert_eq!(at.read("xab"), "b"); + assert_eq!(at.read("xac"), "c"); + assert!(!at.plus("xad").exists()); +} diff --git a/tests/fixtures/split/threebytes.txt b/tests/fixtures/split/threebytes.txt new file mode 100644 index 000000000..f2ba8f84a --- /dev/null +++ b/tests/fixtures/split/threebytes.txt @@ -0,0 +1 @@ +abc \ No newline at end of file