diff --git a/src/uu/split/src/split.rs b/src/uu/split/src/split.rs index 70de45ce2..7ff08d37d 100644 --- a/src/uu/split/src/split.rs +++ b/src/uu/split/src/split.rs @@ -39,6 +39,7 @@ static OPT_VERBOSE: &str = "verbose"; //The ---io-blksize parameter is consumed and ignored. //The parameter is included to make GNU coreutils tests pass. static OPT_IO_BLKSIZE: &str = "-io-blksize"; +static OPT_ELIDE_EMPTY_FILES: &str = "elide-empty-files"; static ARG_INPUT: &str = "input"; static ARG_PREFIX: &str = "prefix"; @@ -128,6 +129,13 @@ pub fn uu_app<'a>() -> App<'a> { "write to shell COMMAND file name is $FILE (Currently not implemented for Windows)", ), ) + .arg( + Arg::new(OPT_ELIDE_EMPTY_FILES) + .long(OPT_ELIDE_EMPTY_FILES) + .short('e') + .takes_value(false) + .help("do not generate empty output files with '-n'"), + ) .arg( Arg::new(OPT_NUMERIC_SUFFIXES) .short('d') @@ -285,6 +293,16 @@ struct Settings { filter: Option, strategy: Strategy, verbose: bool, + + /// Whether to *not* produce empty files when using `-n`. + /// + /// The `-n` command-line argument gives a specific number of + /// chunks into which the input files will be split. If the number + /// of chunks is greater than the number of bytes, and this is + /// `false`, then empty files will be created for the excess + /// chunks. If this is `false`, then empty files will not be + /// created. + elide_empty_files: bool, } /// An error when parsing settings from command-line arguments. @@ -352,6 +370,7 @@ impl Settings { input: matches.value_of(ARG_INPUT).unwrap().to_owned(), prefix: matches.value_of(ARG_PREFIX).unwrap().to_owned(), filter: matches.value_of(OPT_FILTER).map(|s| s.to_owned()), + elide_empty_files: matches.is_present(OPT_ELIDE_EMPTY_FILES), }; #[cfg(windows)] if result.filter.is_some() { @@ -616,9 +635,24 @@ where { // Get the size of the input file in bytes and compute the number // of bytes per chunk. + // + // If the requested number of chunks exceeds the number of bytes + // in the file *and* the `elide_empty_files` parameter is enabled, + // then behave as if the number of chunks was set to the number of + // bytes in the file. This ensures that we don't write empty + // files. Otherwise, just write the `num_chunks - num_bytes` empty + // files. let metadata = metadata(&settings.input).unwrap(); let num_bytes = metadata.len(); - let chunk_size = (num_bytes / (num_chunks as u64)) as usize; + let will_have_empty_files = settings.elide_empty_files && num_chunks as u64 > num_bytes; + let (num_chunks, chunk_size) = if will_have_empty_files { + let num_chunks = num_bytes as usize; + let chunk_size = 1; + (num_chunks, chunk_size) + } else { + let chunk_size = ((num_bytes / (num_chunks as u64)) as usize).max(1); + (num_chunks, chunk_size) + }; // This object is responsible for creating the filename for each chunk. let mut filename_iterator = FilenameIterator::new( diff --git a/tests/by-util/test_split.rs b/tests/by-util/test_split.rs index 846d483b2..9454687ac 100644 --- a/tests/by-util/test_split.rs +++ b/tests/by-util/test_split.rs @@ -2,7 +2,7 @@ // * // * For the full copyright and license information, please view the LICENSE // * file that was distributed with this source code. -// spell-checker:ignore xzaaa sixhundredfiftyonebytes ninetyonebytes asciilowercase fghij klmno pqrst uvwxyz fivelines twohundredfortyonebytes +// spell-checker:ignore xzaaa sixhundredfiftyonebytes ninetyonebytes threebytes asciilowercase fghij klmno pqrst uvwxyz fivelines twohundredfortyonebytes extern crate rand; extern crate regex; @@ -526,3 +526,29 @@ fn test_include_newlines() { at.open("xac").read_to_string(&mut s).unwrap(); assert_eq!(s, "5\n"); } + +#[test] +fn test_allow_empty_files() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["-n", "4", "threebytes.txt"]) + .succeeds() + .no_stdout() + .no_stderr(); + assert_eq!(at.read("xaa"), "a"); + assert_eq!(at.read("xab"), "b"); + assert_eq!(at.read("xac"), "c"); + assert_eq!(at.read("xad"), ""); +} + +#[test] +fn test_elide_empty_files() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["-e", "-n", "4", "threebytes.txt"]) + .succeeds() + .no_stdout() + .no_stderr(); + assert_eq!(at.read("xaa"), "a"); + assert_eq!(at.read("xab"), "b"); + assert_eq!(at.read("xac"), "c"); + assert!(!at.plus("xad").exists()); +} diff --git a/tests/fixtures/split/threebytes.txt b/tests/fixtures/split/threebytes.txt new file mode 100644 index 000000000..f2ba8f84a --- /dev/null +++ b/tests/fixtures/split/threebytes.txt @@ -0,0 +1 @@ +abc \ No newline at end of file