split: pass GNU test l-chunk

2025-09-15 11:36:16 +00:00 · 2023-11-21 11:04:38 -05:00 · 2023-11-21 11:04:38 -05:00 · 4dc46f10e9
commit 4dc46f10e9
parent 97d30bd486
3 changed files with 264 additions and 176 deletions
--- a/src/uu/split/src/split.rs
+++ b/src/uu/split/src/split.rs
@ -1130,14 +1130,68 @@ impl<'a> Write for LineBytesChunkWriter<'a> {
    }
 }

+/// Output file parameters
+struct OutFile {
+    filename: String,
+    maybe_writer: Option<BufWriter<Box<dyn Write>>>,
+}
+
+impl OutFile {
+    /// Get the writer for the output file
+    /// Instantiate the writer if it has not been instantiated upfront
+    fn get_writer(&mut self, settings: &Settings) -> UResult<&mut BufWriter<Box<dyn Write>>> {
+        if self.maybe_writer.is_some() {
+            Ok(self.maybe_writer.as_mut().unwrap())
+        } else {
+            // Writer was not instantiated upfront
+            // Instantiate it and record for future use
+            self.maybe_writer = Some(settings.instantiate_current_writer(self.filename.as_str())?);
+            Ok(self.maybe_writer.as_mut().unwrap())
+        }
+    }
+}
+
+/// Generate a set of Output Files
+/// This is a helper function to [`n_chunks_by_byte`], [`n_chunks_by_line`]
+/// and [`n_chunks_by_line_round_robin`].
+/// Each OutFile is generated with filename, while the writer for it could be
+/// optional, to be instantiated later by the calling function as needed.
+/// Optional writers could happen in [`n_chunks_by_line`]
+/// if `elide_empty_files` parameter is set to `true`.
+fn get_out_files(
+    num_files: u64,
+    settings: &Settings,
+    is_writer_optional: bool,
+) -> UResult<Vec<OutFile>> {
+    // This object is responsible for creating the filename for each chunk
+    let mut filename_iterator: FilenameIterator<'_> =
+        FilenameIterator::new(&settings.prefix, &settings.suffix)
+            .map_err(|e| io::Error::new(ErrorKind::Other, format!("{e}")))?;
+    let mut out_files: Vec<OutFile> = Vec::new();
+    for _ in 0..num_files {
+        let filename = filename_iterator
+            .next()
+            .ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?;
+        let maybe_writer = if is_writer_optional {
+            None
+        } else {
+            Some(settings.instantiate_current_writer(filename.as_str())?)
+        };
+        out_files.push(OutFile {
+            filename,
+            maybe_writer,
+        });
+    }
+    Ok(out_files)
+}
+
 /// Split a file or STDIN into a specific number of chunks by byte.
-/// If in Kth chunk of N mode - print the k-th chunk to STDOUT.
 ///
 /// When file size cannot be evenly divided into the number of chunks of the same size,
 /// the first X chunks are 1 byte longer than the rest,
 /// where X is a modulus reminder of (file size % number of chunks)
 ///
-/// In Kth chunk of N mode - writes to stdout the contents of the chunk identified by `kth_chunk`
+/// In Kth chunk of N mode - writes to STDOUT the contents of the chunk identified by `kth_chunk`
 ///
 /// In N chunks mode - this function always creates one output file for each chunk, even
 /// if there is an error reading or writing one of the chunks or if
@ -1207,7 +1261,7 @@ where
    // In Kth chunk of N mode - we will write to stdout instead of to a file.
    let mut stdout_writer = std::io::stdout().lock();
    // In N chunks mode - we will write to `num_chunks` files
-    let mut writers = vec![];
+    let mut out_files: Vec<OutFile> = Vec::new();

    // Calculate chunk size base and modulo reminder
    // to be used in calculating chunk_size later on
@ -1219,16 +1273,7 @@ where
    // This will create each of the underlying files
    // or stdin pipes to child shell/command processes if in `--filter` mode
    if kth_chunk.is_none() {
-        // This object is responsible for creating the filename for each chunk.
-        let mut filename_iterator = FilenameIterator::new(&settings.prefix, &settings.suffix)
-            .map_err(|e| io::Error::new(ErrorKind::Other, format!("{e}")))?;
-        for _ in 0..num_chunks {
-            let filename = filename_iterator
-                .next()
-                .ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?;
-            let writer = settings.instantiate_current_writer(filename.as_str())?;
-            writers.push(writer);
-        }
+        out_files = get_out_files(num_chunks, settings, false)?;
    }

    for i in 1_u64..=num_chunks {
@ -1272,7 +1317,7 @@ where
                }
                None => {
                    let idx = (i - 1) as usize;
-                    let writer = writers.get_mut(idx).unwrap();
+                    let writer = out_files[idx].get_writer(settings)?;
                    writer.write_all(buf)?;
                }
            }
@ -1284,9 +1329,14 @@ where
 }

 /// Split a file or STDIN into a specific number of chunks by line.
-/// If in Kth chunk of N mode - print the k-th chunk to STDOUT.
 ///
-/// In Kth chunk of N mode - writes to stdout the contents of the chunk identified by `kth_chunk`
+/// It is most likely that input cannot be evenly divided into the number of chunks
+/// of the same size in bytes or number of lines, since we cannot break lines.
+/// It is also likely that there could be empty files (having `elide_empty_files` is disabled)
+/// when a long line overlaps one or more chunks.
+///
+/// In Kth chunk of N mode - writes to STDOUT the contents of the chunk identified by `kth_chunk`
+/// Note: the `elide_empty_files` flag is ignored in this mode
 ///
 /// In N chunks mode - this function always creates one output file for each chunk, even
 /// if there is an error reading or writing one of the chunks or if
@ -1322,76 +1372,97 @@ where
    let initial_buf = &mut Vec::new();
    let num_bytes = get_input_size(&settings.input, reader, initial_buf, &settings.io_blksize)?;
    let reader = initial_buf.chain(reader);
-    let chunk_size = (num_bytes / num_chunks) as usize;

    // If input file is empty and we would not have determined the Kth chunk
    // in the Kth chunk of N chunk mode, then terminate immediately.
    // This happens on `split -n l/3/10 /dev/null`, for example.
-    if kth_chunk.is_some() && num_bytes == 0 {
+    // Similarly, if input file is empty and `elide_empty_files` parameter is enabled,
+    // then we would have written zero chunks of output,
+    // so terminate immediately as well.
+    // This happens on `split -e -n l/3 /dev/null`, for example.
+    if num_bytes == 0 && (kth_chunk.is_some() || settings.elide_empty_files) {
        return Ok(());
    }

    // In Kth chunk of N mode - we will write to stdout instead of to a file.
    let mut stdout_writer = std::io::stdout().lock();
    // In N chunks mode - we will write to `num_chunks` files
-    let mut writers = vec![];
+    let mut out_files: Vec<OutFile> = Vec::new();
+
+    // Calculate chunk size base and modulo reminder
+    // to be used in calculating `num_bytes_should_be_written` later on
+    let chunk_size_base = num_bytes / num_chunks;
+    let chunk_size_reminder = num_bytes % num_chunks;

    // If in N chunks mode
-    // Create one writer for each chunk.
-    // This will create each of the underlying files
-    // or stdin pipes to child shell/command processes if in `--filter` mode
+    // Generate filenames for each file and
+    // if `elide_empty_files` parameter is NOT enabled - instantiate the writer
+    // which will create each of the underlying files or stdin pipes
+    // to child shell/command processes if in `--filter` mode.
+    // Otherwise keep writer optional, to be instantiated later if there is data
+    // to write for the associated chunk.
    if kth_chunk.is_none() {
-        // This object is responsible for creating the filename for each chunk.
-        let mut filename_iterator = FilenameIterator::new(&settings.prefix, &settings.suffix)
-            .map_err(|e| io::Error::new(ErrorKind::Other, format!("{e}")))?;
-        for _ in 0..num_chunks {
-            let filename = filename_iterator
-                .next()
-                .ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?;
-            let writer = settings.instantiate_current_writer(filename.as_str())?;
-            writers.push(writer);
-        }
+        out_files = get_out_files(num_chunks, settings, settings.elide_empty_files)?;
    }

-    let mut num_bytes_remaining_in_current_chunk = chunk_size;
-    let mut i = 1;
+    let mut chunk_number = 1;
    let sep = settings.separator;
+    let mut num_bytes_should_be_written = chunk_size_base + (chunk_size_reminder > 0) as u64;
+    let mut num_bytes_written = 0;

    for line_result in reader.split(sep) {
-        // add separator back in at the end of the line
        let mut line = line_result?;
-        line.push(sep);
+        // add separator back in at the end of the line,
+        // since `reader.split(sep)` removes it,
+        // except if the last line did not end with separator character
+        if (num_bytes_written + line.len() as u64) < num_bytes {
+            line.push(sep);
+        }
        let bytes = line.as_slice();

        match kth_chunk {
-            Some(chunk_number) => {
-                if i == chunk_number {
+            Some(kth) => {
+                if chunk_number == kth {
                    stdout_writer.write_all(bytes)?;
                }
            }
            None => {
-                let idx = (i - 1) as usize;
-                let maybe_writer = writers.get_mut(idx);
-                let writer = maybe_writer.unwrap();
+                // Should write into a file
+                let idx = (chunk_number - 1) as usize;
+                let writer = out_files[idx].get_writer(settings)?;
                custom_write_all(bytes, writer, settings)?;
            }
        }

-        let num_bytes = bytes.len();
-        if num_bytes >= num_bytes_remaining_in_current_chunk {
-            num_bytes_remaining_in_current_chunk = chunk_size;
-            i += 1;
-        } else {
-            num_bytes_remaining_in_current_chunk -= num_bytes;
+        // Advance to the next chunk if the current one is filled.
+        // There could be a situation when a long line, which started in current chunk,
+        // would overlap the next chunk (or even several next chunks),
+        // and since we cannot break lines for this split strategy, we could end up with
+        // empty files in place(s) of skipped chunk(s)
+        let num_line_bytes = bytes.len() as u64;
+        num_bytes_written += num_line_bytes;
+        let mut skipped = -1;
+        while num_bytes_should_be_written <= num_bytes_written {
+            num_bytes_should_be_written +=
+                chunk_size_base + (chunk_size_reminder > chunk_number) as u64;
+            chunk_number += 1;
+            skipped += 1;
        }

-        if let Some(chunk_number) = kth_chunk {
-            if i > chunk_number {
+        // If a chunk was skipped and `elide_empty_files` flag is set,
+        // roll chunk_number back to preserve sequential continuity
+        // of file names for files written to,
+        // except for Kth chunk of N mode
+        if settings.elide_empty_files && skipped > 0 && kth_chunk.is_none() {
+            chunk_number -= skipped as u64;
+        }
+
+        if let Some(kth) = kth_chunk {
+            if chunk_number > kth {
                break;
            }
        }
    }
-
    Ok(())
 }

@ -1432,23 +1503,14 @@ where
    // In Kth chunk of N mode - we will write to stdout instead of to a file.
    let mut stdout_writer = std::io::stdout().lock();
    // In N chunks mode - we will write to `num_chunks` files
-    let mut writers = vec![];
+    let mut out_files: Vec<OutFile> = Vec::new();

    // If in N chunks mode
    // Create one writer for each chunk.
    // This will create each of the underlying files
    // or stdin pipes to child shell/command processes if in `--filter` mode
    if kth_chunk.is_none() {
-        // This object is responsible for creating the filename for each chunk.
-        let mut filename_iterator = FilenameIterator::new(&settings.prefix, &settings.suffix)
-            .map_err(|e| io::Error::new(ErrorKind::Other, format!("{e}")))?;
-        for _ in 0..num_chunks {
-            let filename = filename_iterator
-                .next()
-                .ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?;
-            let writer = settings.instantiate_current_writer(filename.as_str())?;
-            writers.push(writer);
-        }
+        out_files = get_out_files(num_chunks, settings, false)?;
    }

    let num_chunks: usize = num_chunks.try_into().unwrap();
@ -1470,9 +1532,7 @@ where
                }
            }
            None => {
-                let maybe_writer = writers.get_mut(i % num_chunks);
-                let writer = maybe_writer.unwrap();
-
+                let writer = out_files[i % num_chunks].get_writer(settings)?;
                let writer_stdin_open = custom_write_all(bytes, writer, settings)?;
                if !writer_stdin_open {
                    closed_writers += 1;
--- a/src/uu/split/src/strategy.rs
+++ b/src/uu/split/src/strategy.rs
@ -8,7 +8,10 @@
 use crate::{OPT_BYTES, OPT_LINES, OPT_LINE_BYTES, OPT_NUMBER};
 use clap::{parser::ValueSource, ArgMatches};
 use std::fmt;
-use uucore::parse_size::{parse_size_u64, parse_size_u64_max, ParseSizeError};
+use uucore::{
+    display::Quotable,
+    parse_size::{parse_size_u64, parse_size_u64_max, ParseSizeError},
+};

 /// Sub-strategy of the [`Strategy::Number`]
 /// Splitting a file into a specific number of chunks.
@ -208,10 +211,10 @@ impl fmt::Display for StrategyError {
            Self::Lines(e) => write!(f, "invalid number of lines: {e}"),
            Self::Bytes(e) => write!(f, "invalid number of bytes: {e}"),
            Self::NumberType(NumberTypeError::NumberOfChunks(s)) => {
-                write!(f, "invalid number of chunks: {s}")
+                write!(f, "invalid number of chunks: {}", s.quote())
            }
            Self::NumberType(NumberTypeError::ChunkNumber(s)) => {
-                write!(f, "invalid chunk number: {s}")
+                write!(f, "invalid chunk number: {}", s.quote())
            }
            Self::MultipleWays => write!(f, "cannot split in more than one way"),
        }
--- a/tests/by-util/test_split.rs
+++ b/tests/by-util/test_split.rs
@ -606,13 +606,13 @@ fn test_split_obs_lines_as_other_option_value() {
        .args(&["-n", "-200", "file"])
        .fails()
        .code_is(1)
-        .stderr_contains("split: invalid number of chunks: -200\n");
+        .stderr_contains("split: invalid number of chunks: '-200'\n");
    scene
        .ucmd()
        .args(&["--number", "-e200", "file"])
        .fails()
        .code_is(1)
-        .stderr_contains("split: invalid number of chunks: -e200\n");
+        .stderr_contains("split: invalid number of chunks: '-e200'\n");
 }

 /// Test for using more than one obsolete lines option (standalone)
@ -708,7 +708,7 @@ fn test_split_overflow_bytes_size() {
 fn test_split_stdin_num_chunks() {
    let (at, mut ucmd) = at_and_ucmd!();
    ucmd.args(&["--number=1"]).pipe_in("").succeeds();
-    assert_eq!(file_read(&at, "xaa"), "");
+    assert_eq!(at.read("xaa"), "");
    assert!(!at.plus("xab").exists());
 }

@ -727,8 +727,8 @@ fn test_split_stdin_num_line_chunks() {
    ucmd.args(&["--number=l/2"])
        .pipe_in("1\n2\n3\n4\n5\n")
        .succeeds();
-    assert_eq!(file_read(&at, "xaa"), "1\n2\n3\n");
-    assert_eq!(file_read(&at, "xab"), "4\n5\n");
+    assert_eq!(at.read("xaa"), "1\n2\n3\n");
+    assert_eq!(at.read("xab"), "4\n5\n");
    assert!(!at.plus("xac").exists());
 }

@ -741,12 +741,6 @@ fn test_split_stdin_num_kth_line_chunk() {
        .stdout_only("2\n");
 }

-fn file_read(at: &AtPath, filename: &str) -> String {
-    let mut s = String::new();
-    at.open(filename).read_to_string(&mut s).unwrap();
-    s
-}
-
 /// Test for the default suffix length behavior: dynamically increasing size.
 #[test]
 fn test_alphabetic_dynamic_suffix_length() {
@ -766,11 +760,11 @@ fn test_alphabetic_dynamic_suffix_length() {
    for i in b'a'..=b'y' {
        for j in b'a'..=b'z' {
            let filename = format!("x{}{}", i as char, j as char);
-            let contents = file_read(&at, &filename);
+            let contents = at.read(&filename);
            assert_eq!(contents, "a");
        }
    }
-    assert_eq!(file_read(&at, "xzaaa"), "a");
+    assert_eq!(at.read("xzaaa"), "a");
 }

 /// Test for the default suffix length behavior: dynamically increasing size.
@ -790,10 +784,10 @@ fn test_numeric_dynamic_suffix_length() {
        .succeeds();
    for i in 0..90 {
        let filename = format!("x{i:02}");
-        let contents = file_read(&at, &filename);
+        let contents = at.read(&filename);
        assert_eq!(contents, "a");
    }
-    assert_eq!(file_read(&at, "x9000"), "a");
+    assert_eq!(at.read("x9000"), "a");
 }

 #[test]
@ -812,10 +806,10 @@ fn test_hex_dynamic_suffix_length() {
        .succeeds();
    for i in 0..240 {
        let filename = format!("x{i:02x}");
-        let contents = file_read(&at, &filename);
+        let contents = at.read(&filename);
        assert_eq!(contents, "a");
    }
-    assert_eq!(file_read(&at, "xf000"), "a");
+    assert_eq!(at.read("xf000"), "a");
 }

 /// Test for dynamic suffix length (auto-widening) disabled when suffix start number is specified
@ -833,7 +827,7 @@ fn test_dynamic_suffix_length_on_with_suffix_start_no_value() {
    let (at, mut ucmd) = at_and_ucmd!();
    ucmd.args(&["-b", "1", "--numeric-suffixes", "ninetyonebytes.txt"])
        .succeeds();
-    assert_eq!(file_read(&at, "x9000"), "a");
+    assert_eq!(at.read("x9000"), "a");
 }

 /// Test for suffix auto-width with --number strategy and suffix start number
@ -845,8 +839,8 @@ fn test_suffix_auto_width_with_number() {
    let glob = Glob::new(&at, ".", r"x\d\d\d$");
    assert_eq!(glob.count(), 100);
    assert_eq!(glob.collate(), at.read_bytes("fivelines.txt"));
-    assert_eq!(file_read(&at, "x001"), "1\n");
-    assert_eq!(file_read(&at, "x100"), "");
+    assert_eq!(at.read("x001"), "1\n");
+    assert_eq!(at.read("x100"), "");

    new_ucmd!()
        .args(&["--numeric-suffixes=100", "--number=r/100", "fivelines.txt"])
@ -926,17 +920,12 @@ creating file 'xaf'
 #[test]
 fn test_number_n() {
    let (at, mut ucmd) = at_and_ucmd!();
-    let file_read = |f| {
-        let mut s = String::new();
-        at.open(f).read_to_string(&mut s).unwrap();
-        s
-    };
    ucmd.args(&["-n", "5", "asciilowercase.txt"]).succeeds();
-    assert_eq!(file_read("xaa"), "abcdef");
-    assert_eq!(file_read("xab"), "ghijkl");
-    assert_eq!(file_read("xac"), "mnopq");
-    assert_eq!(file_read("xad"), "rstuv");
-    assert_eq!(file_read("xae"), "wxyz\n");
+    assert_eq!(at.read("xaa"), "abcdef");
+    assert_eq!(at.read("xab"), "ghijkl");
+    assert_eq!(at.read("xac"), "mnopq");
+    assert_eq!(at.read("xad"), "rstuv");
+    assert_eq!(at.read("xae"), "wxyz\n");
    #[cfg(unix)]
    new_ucmd!()
        .args(&["--number=100", "/dev/null"])
@ -974,11 +963,11 @@ fn test_number_kth_of_n() {
    new_ucmd!()
        .args(&["--number=0/5", "asciilowercase.txt"])
        .fails()
-        .stderr_contains("split: invalid chunk number: 0");
+        .stderr_contains("split: invalid chunk number: '0'");
    new_ucmd!()
        .args(&["--number=10/5", "asciilowercase.txt"])
        .fails()
-        .stderr_contains("split: invalid chunk number: 10");
+        .stderr_contains("split: invalid chunk number: '10'");
    #[cfg(target_pointer_width = "64")]
    new_ucmd!()
        .args(&[
@ -986,7 +975,7 @@ fn test_number_kth_of_n() {
            "asciilowercase.txt",
        ])
        .fails()
-        .stderr_contains("split: invalid number of chunks: 18446744073709551616");
+        .stderr_contains("split: invalid number of chunks: '18446744073709551616'");
 }

 #[test]
@ -1020,32 +1009,27 @@ fn test_number_kth_of_n_round_robin() {
            "fivelines.txt",
        ])
        .fails()
-        .stderr_contains("split: invalid number of chunks: 18446744073709551616");
+        .stderr_contains("split: invalid number of chunks: '18446744073709551616'");
    new_ucmd!()
        .args(&["--number", "r/0/3", "fivelines.txt"])
        .fails()
-        .stderr_contains("split: invalid chunk number: 0");
+        .stderr_contains("split: invalid chunk number: '0'");
    new_ucmd!()
        .args(&["--number", "r/10/3", "fivelines.txt"])
        .fails()
-        .stderr_contains("split: invalid chunk number: 10");
+        .stderr_contains("split: invalid chunk number: '10'");
 }

 #[test]
 fn test_split_number_with_io_blksize() {
    let (at, mut ucmd) = at_and_ucmd!();
-    let file_read = |f| {
-        let mut s = String::new();
-        at.open(f).read_to_string(&mut s).unwrap();
-        s
-    };
    ucmd.args(&["-n", "5", "asciilowercase.txt", "---io-blksize", "1024"])
        .succeeds();
-    assert_eq!(file_read("xaa"), "abcdef");
-    assert_eq!(file_read("xab"), "ghijkl");
-    assert_eq!(file_read("xac"), "mnopq");
-    assert_eq!(file_read("xad"), "rstuv");
-    assert_eq!(file_read("xae"), "wxyz\n");
+    assert_eq!(at.read("xaa"), "abcdef");
+    assert_eq!(at.read("xab"), "ghijkl");
+    assert_eq!(at.read("xac"), "mnopq");
+    assert_eq!(at.read("xad"), "rstuv");
+    assert_eq!(at.read("xae"), "wxyz\n");
 }

 #[test]
@ -1153,7 +1137,7 @@ fn test_allow_empty_files() {
 }

 #[test]
-fn test_elide_empty_files() {
+fn test_elide_empty_files_n_chunks() {
    let (at, mut ucmd) = at_and_ucmd!();
    ucmd.args(&["-e", "-n", "4", "threebytes.txt"])
        .succeeds()
@ -1167,7 +1151,7 @@ fn test_elide_empty_files() {

 #[test]
 #[cfg(unix)]
-fn test_elide_dev_null() {
+fn test_elide_dev_null_n_chunks() {
    let (at, mut ucmd) = at_and_ucmd!();
    ucmd.args(&["-e", "-n", "3", "/dev/null"])
        .succeeds()
@ -1191,24 +1175,58 @@ fn test_dev_zero() {
 }

 #[test]
-fn test_lines() {
+fn test_elide_empty_files_l_chunks() {
    let (at, mut ucmd) = at_and_ucmd!();
-
-    let file_read = |f| {
-        let mut s = String::new();
-        at.open(f).read_to_string(&mut s).unwrap();
-        s
-    };
-
-    // Split into two files without splitting up lines.
-    ucmd.args(&["-n", "l/2", "fivelines.txt"]).succeeds();
-
-    assert_eq!(file_read("xaa"), "1\n2\n3\n");
-    assert_eq!(file_read("xab"), "4\n5\n");
+    ucmd.args(&["-e", "-n", "l/7", "fivelines.txt"])
+        .succeeds()
+        .no_stdout()
+        .no_stderr();
+    assert_eq!(at.read("xaa"), "1\n");
+    assert_eq!(at.read("xab"), "2\n");
+    assert_eq!(at.read("xac"), "3\n");
+    assert_eq!(at.read("xad"), "4\n");
+    assert_eq!(at.read("xae"), "5\n");
+    assert!(!at.plus("xaf").exists());
+    assert!(!at.plus("xag").exists());
 }

 #[test]
-fn test_lines_kth() {
+#[cfg(unix)]
+fn test_elide_dev_null_l_chunks() {
+    let (at, mut ucmd) = at_and_ucmd!();
+    ucmd.args(&["-e", "-n", "l/3", "/dev/null"])
+        .succeeds()
+        .no_stdout()
+        .no_stderr();
+    assert!(!at.plus("xaa").exists());
+    assert!(!at.plus("xab").exists());
+    assert!(!at.plus("xac").exists());
+}
+
+#[test]
+#[cfg(unix)]
+fn test_number_by_bytes_dev_zero() {
+    let (at, mut ucmd) = at_and_ucmd!();
+    ucmd.args(&["-n", "3", "/dev/zero"])
+        .fails()
+        .stderr_only("split: /dev/zero: cannot determine file size\n");
+    assert!(!at.plus("xaa").exists());
+    assert!(!at.plus("xab").exists());
+    assert!(!at.plus("xac").exists());
+}
+
+#[test]
+fn test_number_by_lines() {
+    let (at, mut ucmd) = at_and_ucmd!();
+    // Split into two files without splitting up lines.
+    ucmd.args(&["-n", "l/2", "fivelines.txt"]).succeeds();
+
+    assert_eq!(at.read("xaa"), "1\n2\n3\n");
+    assert_eq!(at.read("xab"), "4\n5\n");
+}
+
+#[test]
+fn test_number_by_lines_kth() {
    new_ucmd!()
        .args(&["-n", "l/3/10", "onehundredlines.txt"])
        .succeeds()
@ -1217,13 +1235,27 @@ fn test_lines_kth() {

 #[test]
 #[cfg(unix)]
-fn test_lines_kth_dev_null() {
+fn test_number_by_lines_kth_dev_null() {
    new_ucmd!()
        .args(&["-n", "l/3/10", "/dev/null"])
        .succeeds()
        .stdout_only("");
 }

+#[test]
+fn test_number_by_lines_kth_no_end_sep() {
+    new_ucmd!()
+        .args(&["-n", "l/3/10"])
+        .pipe_in("1\n2222\n3\n4")
+        .succeeds()
+        .stdout_only("2222\n");
+    new_ucmd!()
+        .args(&["-e", "-n", "l/8/10"])
+        .pipe_in("1\n2222\n3\n4")
+        .succeeds()
+        .stdout_only("3\n");
+}
+
 #[test]
 fn test_line_bytes() {
    let (at, mut ucmd) = at_and_ucmd!();
@ -1588,17 +1620,10 @@ fn test_effective_suffix_hex_last() {
 #[test]
 fn test_round_robin() {
    let (at, mut ucmd) = at_and_ucmd!();
-
-    let file_read = |f| {
-        let mut s = String::new();
-        at.open(f).read_to_string(&mut s).unwrap();
-        s
-    };
-
    ucmd.args(&["-n", "r/2", "fivelines.txt"]).succeeds();

-    assert_eq!(file_read("xaa"), "1\n3\n5\n");
-    assert_eq!(file_read("xab"), "2\n4\n");
+    assert_eq!(at.read("xaa"), "1\n3\n5\n");
+    assert_eq!(at.read("xab"), "2\n4\n");
 }

 #[test]
@ -1631,7 +1656,7 @@ fn test_split_invalid_input() {
        .args(&["-n", "0", "file"])
        .fails()
        .no_stdout()
-        .stderr_contains("split: invalid number of chunks: 0");
+        .stderr_contains("split: invalid number of chunks: '0'");
 }

 /// Test if there are invalid (non UTF-8) in the arguments - unix
@ -1690,9 +1715,9 @@ fn test_split_separator_nl_lines() {
        .pipe_in("1\n2\n3\n4\n5\n")
        .succeeds();

-    assert_eq!(file_read(&at, "xaa"), "1\n2\n");
-    assert_eq!(file_read(&at, "xab"), "3\n4\n");
-    assert_eq!(file_read(&at, "xac"), "5\n");
+    assert_eq!(at.read("xaa"), "1\n2\n");
+    assert_eq!(at.read("xab"), "3\n4\n");
+    assert_eq!(at.read("xac"), "5\n");
    assert!(!at.plus("xad").exists());
 }

@ -1703,9 +1728,9 @@ fn test_split_separator_nl_line_bytes() {
        .pipe_in("1\n2\n3\n4\n5\n")
        .succeeds();

-    assert_eq!(file_read(&at, "xaa"), "1\n2\n");
-    assert_eq!(file_read(&at, "xab"), "3\n4\n");
-    assert_eq!(file_read(&at, "xac"), "5\n");
+    assert_eq!(at.read("xaa"), "1\n2\n");
+    assert_eq!(at.read("xab"), "3\n4\n");
+    assert_eq!(at.read("xac"), "5\n");
    assert!(!at.plus("xad").exists());
 }

@ -1715,9 +1740,9 @@ fn test_split_separator_nl_number_l() {
    ucmd.args(&["--number=l/3", "--separator=\n", "fivelines.txt"])
        .succeeds();

-    assert_eq!(file_read(&at, "xaa"), "1\n2\n");
-    assert_eq!(file_read(&at, "xab"), "3\n4\n");
-    assert_eq!(file_read(&at, "xac"), "5\n");
+    assert_eq!(at.read("xaa"), "1\n2\n");
+    assert_eq!(at.read("xab"), "3\n4\n");
+    assert_eq!(at.read("xac"), "5\n");
    assert!(!at.plus("xad").exists());
 }

@ -1727,9 +1752,9 @@ fn test_split_separator_nl_number_r() {
    ucmd.args(&["--number=r/3", "--separator", "\n", "fivelines.txt"])
        .succeeds();

-    assert_eq!(file_read(&at, "xaa"), "1\n4\n");
-    assert_eq!(file_read(&at, "xab"), "2\n5\n");
-    assert_eq!(file_read(&at, "xac"), "3\n");
+    assert_eq!(at.read("xaa"), "1\n4\n");
+    assert_eq!(at.read("xab"), "2\n5\n");
+    assert_eq!(at.read("xac"), "3\n");
    assert!(!at.plus("xad").exists());
 }

@ -1739,9 +1764,9 @@ fn test_split_separator_nul_lines() {
    ucmd.args(&["--lines=2", "-t", "\\0", "separator_nul.txt"])
        .succeeds();

-    assert_eq!(file_read(&at, "xaa"), "1\x002\0");
-    assert_eq!(file_read(&at, "xab"), "3\x004\0");
-    assert_eq!(file_read(&at, "xac"), "5\0");
+    assert_eq!(at.read("xaa"), "1\x002\0");
+    assert_eq!(at.read("xab"), "3\x004\0");
+    assert_eq!(at.read("xac"), "5\0");
    assert!(!at.plus("xad").exists());
 }

@ -1751,9 +1776,9 @@ fn test_split_separator_nul_line_bytes() {
    ucmd.args(&["--line-bytes=4", "-t", "\\0", "separator_nul.txt"])
        .succeeds();

-    assert_eq!(file_read(&at, "xaa"), "1\x002\0");
-    assert_eq!(file_read(&at, "xab"), "3\x004\0");
-    assert_eq!(file_read(&at, "xac"), "5\0");
+    assert_eq!(at.read("xaa"), "1\x002\0");
+    assert_eq!(at.read("xab"), "3\x004\0");
+    assert_eq!(at.read("xac"), "5\0");
    assert!(!at.plus("xad").exists());
 }

@ -1763,9 +1788,9 @@ fn test_split_separator_nul_number_l() {
    ucmd.args(&["--number=l/3", "--separator=\\0", "separator_nul.txt"])
        .succeeds();

-    assert_eq!(file_read(&at, "xaa"), "1\x002\0");
-    assert_eq!(file_read(&at, "xab"), "3\x004\0");
-    assert_eq!(file_read(&at, "xac"), "5\0");
+    assert_eq!(at.read("xaa"), "1\x002\0");
+    assert_eq!(at.read("xab"), "3\x004\0");
+    assert_eq!(at.read("xac"), "5\0");
    assert!(!at.plus("xad").exists());
 }

@ -1775,9 +1800,9 @@ fn test_split_separator_nul_number_r() {
    ucmd.args(&["--number=r/3", "--separator=\\0", "separator_nul.txt"])
        .succeeds();

-    assert_eq!(file_read(&at, "xaa"), "1\x004\0");
-    assert_eq!(file_read(&at, "xab"), "2\x005\0");
-    assert_eq!(file_read(&at, "xac"), "3\0");
+    assert_eq!(at.read("xaa"), "1\x004\0");
+    assert_eq!(at.read("xab"), "2\x005\0");
+    assert_eq!(at.read("xac"), "3\0");
    assert!(!at.plus("xad").exists());
 }

@ -1787,9 +1812,9 @@ fn test_split_separator_semicolon_lines() {
    ucmd.args(&["--lines=2", "-t", ";", "separator_semicolon.txt"])
        .succeeds();

-    assert_eq!(file_read(&at, "xaa"), "1;2;");
-    assert_eq!(file_read(&at, "xab"), "3;4;");
-    assert_eq!(file_read(&at, "xac"), "5;");
+    assert_eq!(at.read("xaa"), "1;2;");
+    assert_eq!(at.read("xab"), "3;4;");
+    assert_eq!(at.read("xac"), "5;");
    assert!(!at.plus("xad").exists());
 }

@ -1799,9 +1824,9 @@ fn test_split_separator_semicolon_line_bytes() {
    ucmd.args(&["--line-bytes=4", "-t", ";", "separator_semicolon.txt"])
        .succeeds();

-    assert_eq!(file_read(&at, "xaa"), "1;2;");
-    assert_eq!(file_read(&at, "xab"), "3;4;");
-    assert_eq!(file_read(&at, "xac"), "5;");
+    assert_eq!(at.read("xaa"), "1;2;");
+    assert_eq!(at.read("xab"), "3;4;");
+    assert_eq!(at.read("xac"), "5;");
    assert!(!at.plus("xad").exists());
 }

@ -1811,9 +1836,9 @@ fn test_split_separator_semicolon_number_l() {
    ucmd.args(&["--number=l/3", "--separator=;", "separator_semicolon.txt"])
        .succeeds();

-    assert_eq!(file_read(&at, "xaa"), "1;2;");
-    assert_eq!(file_read(&at, "xab"), "3;4;");
-    assert_eq!(file_read(&at, "xac"), "5;");
+    assert_eq!(at.read("xaa"), "1;2;");
+    assert_eq!(at.read("xab"), "3;4;");
+    assert_eq!(at.read("xac"), "5;");
    assert!(!at.plus("xad").exists());
 }

@ -1823,9 +1848,9 @@ fn test_split_separator_semicolon_number_r() {
    ucmd.args(&["--number=r/3", "--separator=;", "separator_semicolon.txt"])
        .succeeds();

-    assert_eq!(file_read(&at, "xaa"), "1;4;");
-    assert_eq!(file_read(&at, "xab"), "2;5;");
-    assert_eq!(file_read(&at, "xac"), "3;");
+    assert_eq!(at.read("xaa"), "1;4;");
+    assert_eq!(at.read("xab"), "2;5;");
+    assert_eq!(at.read("xac"), "3;");
    assert!(!at.plus("xad").exists());
 }