split: pass GNU tests/b-chunk.sh (#5475)

--------- Co-authored-by: Terts Diepraam <terts.diepraam@gmail.com> Co-authored-by: Daniel Hofstetter <daniel.hofstetter@42dh.com> Co-authored-by: Brandon Elam Barker <brandon.barker@gmail.com> Co-authored-by: Kostiantyn Hryshchuk <statheres@gmail.com> Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2025-09-15 03:26:18 +00:00 · 2023-11-17 11:19:10 -05:00 · 2023-11-17 11:19:10 -05:00 · eb00c195c6
commit eb00c195c6
parent a7e5af4770
2 changed files with 446 additions and 375 deletions
--- a/src/uu/split/src/split.rs
+++ b/src/uu/split/src/split.rs
@ -18,11 +18,12 @@ use std::ffi::OsString;
 use std::fmt;
 use std::fs::{metadata, File};
 use std::io;
-use std::io::{stdin, BufRead, BufReader, BufWriter, ErrorKind, Read, Write};
+use std::io::{stdin, BufRead, BufReader, BufWriter, ErrorKind, Read, Seek, SeekFrom, Write};
 use std::path::Path;
 use std::u64;
 use uucore::display::Quotable;
 use uucore::error::{FromIo, UIoError, UResult, USimpleError, UUsageError};
 use uucore::parse_size::parse_size_u64;
 use uucore::uio_error;
 use uucore::{format_usage, help_about, help_section, help_usage};
@ -40,11 +41,20 @@ static OPT_HEX_SUFFIXES_SHORT: &str = "-x";
 static OPT_SUFFIX_LENGTH: &str = "suffix-length";
 static OPT_VERBOSE: &str = "verbose";
 static OPT_SEPARATOR: &str = "separator";
 //The ---io and ---io-blksize parameters are consumed and ignored.
 //The parameter is included to make GNU coreutils tests pass.
 static OPT_IO: &str = "-io";
 static OPT_IO_BLKSIZE: &str = "-io-blksize";
 static OPT_ELIDE_EMPTY_FILES: &str = "elide-empty-files";
 static OPT_IO_BLKSIZE: &str = "-io-blksize";
 // Cap ---io-blksize value
 // For 64bit systems the max value is the same as in GNU
 // and is equivalent of `i32::MAX >> 20 << 20` operation.
 // On 32bit systems however, even though it fits within `u32` and `i32`,
 // it causes rust-lang `library/alloc/src/raw_vec.rs` to panic with 'capacity overflow' error.
 // Could be due to how `std::io::BufReader` handles internal buffers.
 // So we use much smaller value for those
 static OPT_IO_BLKSIZE_MAX: usize = if usize::BITS >= 64 {
    2_146_435_072
 } else {
    1_000_000_000
 };
 static ARG_INPUT: &str = "input";
 static ARG_PREFIX: &str = "prefix";
@ -311,7 +321,6 @@ pub fn uu_app() -> Command {
        .arg(
            Arg::new(OPT_NUMERIC_SUFFIXES)
                .long(OPT_NUMERIC_SUFFIXES)
                .alias("numeric")
                .require_equals(true)
                .num_args(0..=1)
                .overrides_with_all([
@ -338,7 +347,6 @@ pub fn uu_app() -> Command {
        .arg(
            Arg::new(OPT_HEX_SUFFIXES)
                .long(OPT_HEX_SUFFIXES)
                .alias("hex")
                .require_equals(true)
                .num_args(0..=1)
                .overrides_with_all([
@ -373,12 +381,6 @@ pub fn uu_app() -> Command {
                .action(ArgAction::Append)
                .help("use SEP instead of newline as the record separator; '\\0' (zero) specifies the NUL character"),
        )
        .arg(
            Arg::new(OPT_IO)
                .long("io")
                .alias(OPT_IO)
                .hide(true),
        )
        .arg(
            Arg::new(OPT_IO_BLKSIZE)
                .long("io-blksize")
@ -419,6 +421,7 @@ struct Settings {
    /// chunks. If this is `false`, then empty files will not be
    /// created.
    elide_empty_files: bool,
    io_blksize: Option<usize>,
 }
 /// An error when parsing settings from command-line arguments.
@ -441,6 +444,9 @@ enum SettingsError {
    /// r/K/N
    FilterWithKthChunkNumber,
    /// Invalid IO block size
    InvalidIOBlockSize(String),
    /// The `--filter` option is not supported on Windows.
    #[cfg(windows)]
    NotSupported,
@ -471,6 +477,7 @@ impl fmt::Display for SettingsError {
            Self::FilterWithKthChunkNumber => {
                write!(f, "--filter does not process a chunk extracted to stdout")
            }
            Self::InvalidIOBlockSize(s) => write!(f, "invalid IO block size: {}", s.quote()),
            #[cfg(windows)]
            Self::NotSupported => write!(
                f,
@ -499,12 +506,29 @@ impl Settings {
                match first.as_str() {
                    "\\0" => b'\0',
                    s if s.as_bytes().len() == 1 => s.as_bytes()[0],
-                    s => return Err(SettingsError::MultiCharacterSeparator(s.to_owned())),
+                    s => return Err(SettingsError::MultiCharacterSeparator(s.to_string())),
                }
            }
            None => b'\n',
        };
        let io_blksize: Option<usize> = if let Some(s) = matches.get_one::<String>(OPT_IO_BLKSIZE) {
            match parse_size_u64(s) {
                Ok(n) => {
                    let n: usize = n
                        .try_into()
                        .map_err(|_| SettingsError::InvalidIOBlockSize(s.to_string()))?;
                    if n > OPT_IO_BLKSIZE_MAX {
                        return Err(SettingsError::InvalidIOBlockSize(s.to_string()));
                    }
                    Some(n)
                }
                _ => return Err(SettingsError::InvalidIOBlockSize(s.to_string())),
            }
        } else {
            None
        };
        let result = Self {
            prefix: matches.get_one::<String>(ARG_PREFIX).unwrap().clone(),
            suffix,
@ -514,6 +538,7 @@ impl Settings {
            verbose: matches.value_source(OPT_VERBOSE) == Some(ValueSource::CommandLine),
            separator,
            elide_empty_files: matches.get_flag(OPT_ELIDE_EMPTY_FILES),
            io_blksize,
        };
        #[cfg(windows)]
@ -591,6 +616,93 @@ fn custom_write_all<T: Write>(
    }
 }
 /// Get the size of the input file in bytes
 /// Used only for subset of `--number=CHUNKS` strategy, as there is a need
 /// to determine input file size upfront in order to know chunk size
 /// to be written into each of N files/chunks:
 /// * N       split into N files based on size of input
 /// * K/N     output Kth of N to stdout
 /// * l/N     split into N files without splitting lines/records
 /// * l/K/N   output Kth of N to stdout without splitting lines/records
 ///
 /// For most files the size will be determined by either reading entire file content into a buffer
 /// or by `len()` function of [`std::fs::metadata`].
 ///
 /// However, for some files which report filesystem metadata size that does not match
 /// their actual content size, we will need to attempt to find the end of file
 /// with direct `seek()` on [`std::fs::File`].
 ///
 /// For STDIN stream - read into a buffer up to a limit
 /// If input stream does not EOF before that - return an error
 /// (i.e. "infinite" input as in `cat /dev/zero | split ...`, `yes | split ...` etc.).
 ///
 /// Note: The `buf` might end up with either partial or entire input content.
 fn get_input_size<R>(
    input: &String,
    reader: &mut R,
    buf: &mut Vec<u8>,
    io_blksize: &Option<usize>,
 ) -> std::io::Result<u64>
 where
    R: BufRead,
 {
    // Set read limit to io_blksize if specified
    // Otherwise to OPT_IO_BLKSIZE_MAX
    let read_limit = io_blksize.unwrap_or(OPT_IO_BLKSIZE_MAX) as u64;
    // Try to read into buffer up to a limit
    let num_bytes = reader
        .by_ref()
        .take(read_limit)
        .read_to_end(buf)
        .map(|n| n as u64)?;
    if num_bytes < read_limit {
        // Finite file or STDIN stream that fits entirely
        // into a buffer within the limit
        // Note: files like /dev/null or similar,
        // empty STDIN stream,
        // and files with true file size 0
        // will also fit here
        Ok(num_bytes)
    } else if input == "-" {
        // STDIN stream that did not fit all content into a buffer
        // Most likely continuous/infinite input stream
        return Err(io::Error::new(
            ErrorKind::Other,
            format!("{}: cannot determine input size", input),
        ));
    } else {
        // Could be that file size is larger than set read limit
        // Get the file size from filesystem metadata
        let metadata = metadata(input)?;
        let metadata_size = metadata.len();
        if num_bytes <= metadata_size {
            Ok(metadata_size)
        } else {
            // Could be a file from locations like /dev, /sys, /proc or similar
            // which report filesystem metadata size that does not match
            // their actual content size
            // Attempt direct `seek()` for the end of a file
            let mut tmp_fd = File::open(Path::new(input))?;
            let end = tmp_fd.seek(SeekFrom::End(0))?;
            if end > 0 {
                Ok(end)
            } else {
                // Edge case of either "infinite" file (i.e. /dev/zero)
                // or some other "special" non-standard file type
                // Give up and return an error
                // TODO It might be possible to do more here
                // to address all possible file types and edge cases
                return Err(io::Error::new(
                    ErrorKind::Other,
                    format!("{}: cannot determine file size", input),
                ));
            }
        }
    }
 }
 /// Write a certain number of bytes to one file, then move on to another one.
 ///
 /// This struct maintains an underlying writer representing the
@ -1018,155 +1130,110 @@ impl<'a> Write for LineBytesChunkWriter<'a> {
    }
 }
-/// Split a file into a specific number of chunks by byte.
+/// Split a file or STDIN into a specific number of chunks by byte.
 /// If in Kth chunk of N mode - print the k-th chunk to STDOUT.
 ///
-/// This function always creates one output file for each chunk, even
+/// When file size cannot be evenly divided into the number of chunks of the same size,
 /// the first X chunks are 1 byte longer than the rest,
 /// where X is a modulus reminder of (file size % number of chunks)
 ///
 /// In Kth chunk of N mode - writes to stdout the contents of the chunk identified by `kth_chunk`
 ///
 /// In N chunks mode - this function always creates one output file for each chunk, even
 /// if there is an error reading or writing one of the chunks or if
-/// the input file is truncated. However, if the `filter` option is
+/// the input file is truncated. However, if the `--filter` option is
-/// being used, then no files are created.
+/// being used, then files will only be created if `$FILE` variable was used
 /// in filter command,
 /// i.e. `split -n 10 --filter='head -c1 > $FILE' in`
 ///
 /// # Errors
 ///
 /// This function returns an error if there is a problem reading from
-/// `reader` or writing to one of the output files.
+/// `reader` or writing to one of the output files or stdout.
 ///
 /// # See also
 ///
 /// * [`n_chunks_by_line`], which splits its input into a specific number of chunks by line.
 ///
 /// Implements `--number=CHUNKS`
 /// Where CHUNKS
 /// * N
-fn split_into_n_chunks_by_byte<R>(
+/// * K/N
 fn n_chunks_by_byte<R>(
    settings: &Settings,
    reader: &mut R,
    num_chunks: u64,
    kth_chunk: Option<u64>,
 ) -> UResult<()>
 where
-    R: Read,
+    R: BufRead,
 {
-    // Get the size of the input file in bytes and compute the number
+    // Get the size of the input in bytes
-    // of bytes per chunk.
+    let initial_buf = &mut Vec::new();
-    //
+    let mut num_bytes = get_input_size(&settings.input, reader, initial_buf, &settings.io_blksize)?;
-    // If the requested number of chunks exceeds the number of bytes
+    let mut reader = initial_buf.chain(reader);
    // in the file *and* the `elide_empty_files` parameter is enabled,
    // then behave as if the number of chunks was set to the number of
    // bytes in the file. This ensures that we don't write empty
    // files. Otherwise, just write the `num_chunks - num_bytes` empty
    // files.
    let metadata = metadata(&settings.input).map_err(|_| {
        USimpleError::new(1, format!("{}: cannot determine file size", settings.input))
    })?;
-    let num_bytes = metadata.len();
+    // If input file is empty and we would not have determined the Kth chunk
-    let will_have_empty_files = settings.elide_empty_files && num_chunks > num_bytes;
+    // in the Kth chunk of N chunk mode, then terminate immediately.
-    let (num_chunks, chunk_size) = if will_have_empty_files {
+    // This happens on `split -n 3/10 /dev/null`, for example.
-        let num_chunks = num_bytes;
+    if kth_chunk.is_some() && num_bytes == 0 {
-        let chunk_size = 1;
+        return Ok(());
-        (num_chunks, chunk_size)
+    }
    // If the requested number of chunks exceeds the number of bytes
    // in the input:
    // * in Kth chunk of N mode - just write empty byte string to stdout
    //   NOTE: the `elide_empty_files` parameter is ignored here
    //   as we do not generate any files
    //   and instead writing to stdout
    // * In N chunks mode - if the `elide_empty_files` parameter is enabled,
    //   then behave as if the number of chunks was set to the number of
    //   bytes in the file. This ensures that we don't write empty
    //   files. Otherwise, just write the `num_chunks - num_bytes` empty files.
    let num_chunks = if kth_chunk.is_none() && settings.elide_empty_files && num_chunks > num_bytes
    {
        num_bytes
    } else {
-        let chunk_size = (num_bytes / (num_chunks)).max(1);
+        num_chunks
        (num_chunks, chunk_size)
    };
    // If we would have written zero chunks of output, then terminate
    // immediately. This happens on `split -e -n 3 /dev/null`, for
    // example.
-    if num_chunks == 0 || num_bytes == 0 {
+    if num_chunks == 0 {
        return Ok(());
    }
-    let num_chunks: usize = num_chunks
+    // In Kth chunk of N mode - we will write to stdout instead of to a file.
-        .try_into()
+    let mut stdout_writer = std::io::stdout().lock();
-        .map_err(|_| USimpleError::new(1, "Number of chunks too big"))?;
+    // In N chunks mode - we will write to `num_chunks` files
    // This object is responsible for creating the filename for each chunk.
    let mut filename_iterator = FilenameIterator::new(&settings.prefix, &settings.suffix)?;
    // Create one writer for each chunk. This will create each
    // of the underlying files (if not in `--filter` mode).
    let mut writers = vec![];
-    for _ in 0..num_chunks {
+
-        let filename = filename_iterator
+    // Calculate chunk size base and modulo reminder
-            .next()
+    // to be used in calculating chunk_size later on
-            .ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?;
+    let chunk_size_base = num_bytes / num_chunks;
-        let writer = settings.instantiate_current_writer(filename.as_str())?;
+    let chunk_size_reminder = num_bytes % num_chunks;
-        writers.push(writer);
+
    // If in N chunks mode
    // Create one writer for each chunk.
    // This will create each of the underlying files
    // or stdin pipes to child shell/command processes if in `--filter` mode
    if kth_chunk.is_none() {
        // This object is responsible for creating the filename for each chunk.
        let mut filename_iterator = FilenameIterator::new(&settings.prefix, &settings.suffix)
            .map_err(|e| io::Error::new(ErrorKind::Other, format!("{e}")))?;
        for _ in 0..num_chunks {
            let filename = filename_iterator
                .next()
                .ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?;
            let writer = settings.instantiate_current_writer(filename.as_str())?;
            writers.push(writer);
        }
    }
-    // Write `chunk_size` bytes from the reader into each writer
+    for i in 1_u64..=num_chunks {
-    // except the last.
+        let chunk_size = chunk_size_base + (chunk_size_reminder > i - 1) as u64;
-    //
+        let buf = &mut Vec::new();
    // The last writer gets all remaining bytes so that if the number
    // of bytes in the input file was not evenly divisible by
    // `num_chunks`, we don't leave any bytes behind.
    for writer in writers.iter_mut().take(num_chunks - 1) {
        match io::copy(&mut reader.by_ref().take(chunk_size), writer) {
            Ok(_) => continue,
            Err(e) if ignorable_io_error(&e, settings) => continue,
            Err(e) => return Err(uio_error!(e, "input/output error")),
        };
    }
    // Write all the remaining bytes to the last chunk.
    let i = num_chunks - 1;
    let last_chunk_size = num_bytes - (chunk_size * (num_chunks as u64 - 1));
    match io::copy(&mut reader.by_ref().take(last_chunk_size), &mut writers[i]) {
        Ok(_) => Ok(()),
        Err(e) if ignorable_io_error(&e, settings) => Ok(()),
        Err(e) => Err(uio_error!(e, "input/output error")),
    }
 }
 /// Print the k-th chunk of a file to stdout, splitting by byte.
 ///
 /// This function is like [`split_into_n_chunks_by_byte`], but instead
 /// of writing each chunk to its own file, it only writes to stdout
 /// the contents of the chunk identified by `chunk_number`
 ///
 /// # Errors
 ///
 /// This function returns an error if there is a problem reading from
 /// `reader` or writing to stdout.
 ///
 /// Implements `--number=CHUNKS`
 /// Where CHUNKS
 /// * K/N
 fn kth_chunks_by_byte<R>(
    settings: &Settings,
    reader: &mut R,
    chunk_number: u64,
    num_chunks: u64,
 ) -> UResult<()>
 where
    R: BufRead,
 {
    // Get the size of the input file in bytes and compute the number
    // of bytes per chunk.
    //
    // If the requested number of chunks exceeds the number of bytes
    // in the file - just write empty byte string to stdout
    // NOTE: the `elide_empty_files` parameter is ignored here
    // as we do not generate any files
    // and instead writing to stdout
    let metadata = metadata(&settings.input).map_err(|_| {
        USimpleError::new(1, format!("{}: cannot determine file size", settings.input))
    })?;
    let num_bytes = metadata.len();
    // If input file is empty and we would have written zero chunks of output,
    // then terminate immediately.
    // This happens on `split -e -n 3 /dev/null`, for example.
    if num_bytes == 0 {
        return Ok(());
    }
    // Write to stdout instead of to a file.
    let stdout = std::io::stdout();
    let mut writer = stdout.lock();
    let chunk_size = (num_bytes / (num_chunks)).max(1);
    let mut num_bytes: usize = num_bytes.try_into().unwrap();
    let mut i = 1;
    loop {
        let buf: &mut Vec<u8> = &mut vec![];
        if num_bytes > 0 {
            // Read `chunk_size` bytes from the reader into `buf`
            // except the last.
@ -1176,15 +1243,17 @@ where
            // `num_chunks`, we don't leave any bytes behind.
            let limit = {
                if i == num_chunks {
-                    num_bytes.try_into().unwrap()
+                    num_bytes
                } else {
                    chunk_size
                }
            };
            let n_bytes_read = reader.by_ref().take(limit).read_to_end(buf);
            match n_bytes_read {
                Ok(n_bytes) => {
-                    num_bytes -= n_bytes;
+                    num_bytes -= n_bytes as u64;
                }
                Err(error) => {
                    return Err(USimpleError::new(
@ -1193,11 +1262,20 @@ where
                    ));
                }
            }
-            if i == chunk_number {
+
-                writer.write_all(buf)?;
+            match kth_chunk {
-                break;
+                Some(chunk_number) => {
                    if i == chunk_number {
                        stdout_writer.write_all(buf)?;
                        break;
                    }
                }
                None => {
                    let idx = (i - 1) as usize;
                    let writer = writers.get_mut(idx).unwrap();
                    writer.write_all(buf)?;
                }
            }
            i += 1;
        } else {
            break;
        }
@ -1205,12 +1283,17 @@ where
    Ok(())
 }
-/// Split a file into a specific number of chunks by line.
+/// Split a file or STDIN into a specific number of chunks by line.
 /// If in Kth chunk of N mode - print the k-th chunk to STDOUT.
 ///
-/// This function always creates one output file for each chunk, even
+/// In Kth chunk of N mode - writes to stdout the contents of the chunk identified by `kth_chunk`
 ///
 /// In N chunks mode - this function always creates one output file for each chunk, even
 /// if there is an error reading or writing one of the chunks or if
-/// the input file is truncated. However, if the `filter` option is
+/// the input file is truncated. However, if the `--filter` option is
-/// being used, then no files are created.
+/// being used, then files will only be created if `$FILE` variable was used
 /// in filter command,
 /// i.e. `split -n l/10 --filter='head -c1 > $FILE' in`
 ///
 /// # Errors
 ///
@ -1219,119 +1302,82 @@ where
 ///
 /// # See also
 ///
-/// * [`kth_chunk_by_line`], which splits its input in the same way,
+/// * [`n_chunks_by_byte`], which splits its input into a specific number of chunks by byte.
 ///   but writes only one specified chunk to stdout.
 ///
 /// Implements `--number=CHUNKS`
 /// Where CHUNKS
 /// * l/N
-fn split_into_n_chunks_by_line<R>(
+/// * l/K/N
 fn n_chunks_by_line<R>(
    settings: &Settings,
    reader: &mut R,
    num_chunks: u64,
    kth_chunk: Option<u64>,
 ) -> UResult<()>
 where
    R: BufRead,
 {
-    // Get the size of the input file in bytes and compute the number
+    // Get the size of the input in bytes and compute the number
    // of bytes per chunk.
-    let metadata = metadata(&settings.input).map_err(|_| {
+    let initial_buf = &mut Vec::new();
-        USimpleError::new(1, format!("{}: cannot determine file size", settings.input))
+    let num_bytes = get_input_size(&settings.input, reader, initial_buf, &settings.io_blksize)?;
-    })?;
+    let reader = initial_buf.chain(reader);
    let num_bytes = metadata.len();
    let chunk_size = (num_bytes / num_chunks) as usize;
-    // This object is responsible for creating the filename for each chunk.
+    // If input file is empty and we would not have determined the Kth chunk
-    let mut filename_iterator = FilenameIterator::new(&settings.prefix, &settings.suffix)?;
+    // in the Kth chunk of N chunk mode, then terminate immediately.
-
+    // This happens on `split -n l/3/10 /dev/null`, for example.
-    // Create one writer for each chunk. This will create each
+    if kth_chunk.is_some() && num_bytes == 0 {
-    // of the underlying files (if not in `--filter` mode).
+        return Ok(());
    let mut writers = vec![];
    for _ in 0..num_chunks {
        let filename = filename_iterator
            .next()
            .ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?;
        let writer = settings.instantiate_current_writer(filename.as_str())?;
        writers.push(writer);
    }
-    let mut num_bytes_remaining_in_current_chunk = chunk_size;
+    // In Kth chunk of N mode - we will write to stdout instead of to a file.
-    let mut i = 0;
+    let mut stdout_writer = std::io::stdout().lock();
-    let sep = settings.separator;
+    // In N chunks mode - we will write to `num_chunks` files
-    for line_result in reader.split(sep) {
+    let mut writers = vec![];
        let line = line_result.unwrap();
        let maybe_writer = writers.get_mut(i);
        let writer = maybe_writer.unwrap();
        let bytes = line.as_slice();
        custom_write_all(bytes, writer, settings)?;
        custom_write_all(&[sep], writer, settings)?;
-        // Add one byte for the separator character.
+    // If in N chunks mode
-        let num_bytes = bytes.len() + 1;
+    // Create one writer for each chunk.
-        if num_bytes > num_bytes_remaining_in_current_chunk {
+    // This will create each of the underlying files
-            num_bytes_remaining_in_current_chunk = chunk_size;
+    // or stdin pipes to child shell/command processes if in `--filter` mode
-            i += 1;
+    if kth_chunk.is_none() {
-        } else {
+        // This object is responsible for creating the filename for each chunk.
-            num_bytes_remaining_in_current_chunk -= num_bytes;
+        let mut filename_iterator = FilenameIterator::new(&settings.prefix, &settings.suffix)
            .map_err(|e| io::Error::new(ErrorKind::Other, format!("{e}")))?;
        for _ in 0..num_chunks {
            let filename = filename_iterator
                .next()
                .ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?;
            let writer = settings.instantiate_current_writer(filename.as_str())?;
            writers.push(writer);
        }
    }
    Ok(())
 }
 /// Print the k-th chunk of a file, splitting by line.
 ///
 /// This function is like [`split_into_n_chunks_by_line`], but instead
 /// of writing each chunk to its own file, it only writes to stdout
 /// the contents of the chunk identified by `chunk_number`.
 ///
 /// # Errors
 ///
 /// This function returns an error if there is a problem reading from
 /// `reader` or writing to one of the output files.
 ///
 /// # See also
 ///
 /// * [`split_into_n_chunks_by_line`], which splits its input in the
 ///   same way, but writes each chunk to its own file.
 ///
 /// Implements `--number=CHUNKS`
 /// Where CHUNKS
 /// * l/K/N
 fn kth_chunk_by_line<R>(
    settings: &Settings,
    reader: &mut R,
    chunk_number: u64,
    num_chunks: u64,
 ) -> UResult<()>
 where
    R: BufRead,
 {
    // Get the size of the input file in bytes and compute the number
    // of bytes per chunk.
    let metadata = metadata(&settings.input).map_err(|_| {
        USimpleError::new(1, format!("{}: cannot determine file size", settings.input))
    })?;
    let num_bytes = metadata.len();
    let chunk_size = (num_bytes / num_chunks) as usize;
    // Write to stdout instead of to a file.
    let stdout = std::io::stdout();
    let mut writer = stdout.lock();
    let mut num_bytes_remaining_in_current_chunk = chunk_size;
    let mut i = 1;
    let sep = settings.separator;
    for line_result in reader.split(sep) {
-        let line = line_result?;
+        // add separator back in at the end of the line
        let mut line = line_result?;
        line.push(sep);
        let bytes = line.as_slice();
-        if i == chunk_number {
+
-            writer.write_all(bytes)?;
+        match kth_chunk {
-            writer.write_all(&[sep])?;
+            Some(chunk_number) => {
                if i == chunk_number {
                    stdout_writer.write_all(bytes)?;
                }
            }
            None => {
                let idx = (i - 1) as usize;
                let maybe_writer = writers.get_mut(idx);
                let writer = maybe_writer.unwrap();
                custom_write_all(bytes, writer, settings)?;
            }
        }
-        // Add one byte for the separator character.
+        let num_bytes = bytes.len();
        let num_bytes = bytes.len() + 1;
        if num_bytes >= num_bytes_remaining_in_current_chunk {
            num_bytes_remaining_in_current_chunk = chunk_size;
            i += 1;
@ -1339,72 +1385,8 @@ where
            num_bytes_remaining_in_current_chunk -= num_bytes;
        }
-        if i > chunk_number {
+        if let Some(chunk_number) = kth_chunk {
-            break;
+            if i > chunk_number {
        }
    }
    Ok(())
 }
 /// Split a file into a specific number of chunks by line, but
 /// assign lines via round-robin
 ///
 /// This function always creates one output file for each chunk, even
 /// if there is an error reading or writing one of the chunks or if
 /// the input file is truncated. However, if the `filter` option is
 /// being used, then no files are created.
 ///
 /// # Errors
 ///
 /// This function returns an error if there is a problem reading from
 /// `reader` or writing to one of the output files.
 ///
 /// # See also
 ///
 /// * [`split_into_n_chunks_by_line`], which splits its input in the same way,
 ///   but without round robin distribution.
 ///
 /// Implements `--number=CHUNKS`
 /// Where CHUNKS
 /// * r/N
 fn split_into_n_chunks_by_line_round_robin<R>(
    settings: &Settings,
    reader: &mut R,
    num_chunks: u64,
 ) -> UResult<()>
 where
    R: BufRead,
 {
    // This object is responsible for creating the filename for each chunk.
    let mut filename_iterator = FilenameIterator::new(&settings.prefix, &settings.suffix)
        .map_err(|e| io::Error::new(ErrorKind::Other, format!("{e}")))?;
    // Create one writer for each chunk. This will create each
    // of the underlying files (if not in `--filter` mode).
    let mut writers = vec![];
    for _ in 0..num_chunks {
        let filename = filename_iterator
            .next()
            .ok_or_else(|| io::Error::new(ErrorKind::Other, "output file suffixes exhausted"))?;
        let writer = settings.instantiate_current_writer(filename.as_str())?;
        writers.push(writer);
    }
    let num_chunks: usize = num_chunks.try_into().unwrap();
    let sep = settings.separator;
    let mut closed_writers = 0;
    for (i, line_result) in reader.split(sep).enumerate() {
        let maybe_writer = writers.get_mut(i % num_chunks);
        let writer = maybe_writer.unwrap();
        let mut line = line_result.unwrap();
        line.push(sep);
        let bytes = line.as_slice();
        let writer_stdin_open = custom_write_all(bytes, writer, settings)?;
        if !writer_stdin_open {
            closed_writers += 1;
            if closed_writers == num_chunks {
                // all writers are closed - stop reading
                break;
            }
        }
@ -1413,14 +1395,17 @@ where
    Ok(())
 }
-/// Print the k-th chunk of a file, splitting by line, but
+/// Split a file or STDIN into a specific number of chunks by line, but
-/// assign lines via round-robin to the specified number of output
+/// assign lines via round-robin
 /// chunks, but output only the *k*th chunk.
 ///
-/// This function is like [`kth_chunk_by_line`], as it only writes to stdout and
+/// In Kth chunk of N mode - writes to stdout the contents of the chunk identified by `kth_chunk`
-/// prints out only *k*th chunk
+///
-/// It is also like [`split_into_n_chunks_by_line_round_robin`], as it is assigning chunks
+/// In N chunks mode - this function always creates one output file for each chunk, even
-/// using round robin distribution
+/// if there is an error reading or writing one of the chunks or if
 /// the input file is truncated. However, if the `--filter` option is
 /// being used, then files will only be created if `$FILE` variable was used
 /// in filter command,
 /// i.e. `split -n r/10 --filter='head -c1 > $FILE' in`
 ///
 /// # Errors
 ///
@ -1429,46 +1414,83 @@ where
 ///
 /// # See also
 ///
-/// * [`split_into_n_chunks_by_line_round_robin`], which splits its input in the
+/// * [`n_chunks_by_line`], which splits its input into a specific number of chunks by line.
 ///   same way, but writes each chunk to its own file.
 ///
 /// Implements `--number=CHUNKS`
 /// Where CHUNKS
 /// * r/N
 /// * r/K/N
-fn kth_chunk_by_line_round_robin<R>(
+fn n_chunks_by_line_round_robin<R>(
    settings: &Settings,
    reader: &mut R,
    chunk_number: u64,
    num_chunks: u64,
    kth_chunk: Option<u64>,
 ) -> UResult<()>
 where
    R: BufRead,
 {
-    // Write to stdout instead of to a file.
+    // In Kth chunk of N mode - we will write to stdout instead of to a file.
-    let stdout = std::io::stdout();
+    let mut stdout_writer = std::io::stdout().lock();
-    let mut writer = stdout.lock();
+    // In N chunks mode - we will write to `num_chunks` files
    let mut writers = vec![];
-    let num_chunks: usize = num_chunks.try_into().unwrap();
+    // If in N chunks mode
-    let chunk_number: usize = chunk_number.try_into().unwrap();
+    // Create one writer for each chunk.
-    let sep = settings.separator;
+    // This will create each of the underlying files
-    // The chunk number is given as a 1-indexed number, but it
+    // or stdin pipes to child shell/command processes if in `--filter` mode
-    // is a little easier to deal with a 0-indexed number
+    if kth_chunk.is_none() {
-    // since `.enumerate()` returns index `i` starting with 0
+        // This object is responsible for creating the filename for each chunk.
-    let chunk_number = chunk_number - 1;
+        let mut filename_iterator = FilenameIterator::new(&settings.prefix, &settings.suffix)
-    for (i, line_result) in reader.split(sep).enumerate() {
+            .map_err(|e| io::Error::new(ErrorKind::Other, format!("{e}")))?;
-        let line = line_result?;
+        for _ in 0..num_chunks {
-        let bytes = line.as_slice();
+            let filename = filename_iterator
-        if (i % num_chunks) == chunk_number {
+                .next()
-            writer.write_all(bytes)?;
+                .ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?;
-            writer.write_all(&[sep])?;
+            let writer = settings.instantiate_current_writer(filename.as_str())?;
            writers.push(writer);
        }
    }
    let num_chunks: usize = num_chunks.try_into().unwrap();
    let sep = settings.separator;
    let mut closed_writers = 0;
    for (i, line_result) in reader.split(sep).enumerate() {
        // add separator back in at the end of the line
        let mut line = line_result?;
        line.push(sep);
        let bytes = line.as_slice();
        match kth_chunk {
            Some(chunk_number) => {
                // The `.enumerate()` method returns index `i` starting with 0,
                // but chunk number is given as a 1-indexed number,
                // so compare to `chunk_number - 1`
                if (i % num_chunks) == (chunk_number - 1) as usize {
                    stdout_writer.write_all(bytes)?;
                }
            }
            None => {
                let maybe_writer = writers.get_mut(i % num_chunks);
                let writer = maybe_writer.unwrap();
                let writer_stdin_open = custom_write_all(bytes, writer, settings)?;
                if !writer_stdin_open {
                    closed_writers += 1;
                    if closed_writers == num_chunks {
                        // all writers are closed - stop reading
                        break;
                    }
                }
            }
        }
    }
    Ok(())
 }
 #[allow(clippy::cognitive_complexity)]
 fn split(settings: &Settings) -> UResult<()> {
-    let mut reader = BufReader::new(if settings.input == "-" {
+    let r_box = if settings.input == "-" {
        Box::new(stdin()) as Box<dyn Read>
    } else {
        let r = File::open(Path::new(&settings.input)).map_err_context(|| {
@ -1478,26 +1500,33 @@ fn split(settings: &Settings) -> UResult<()> {
            )
        })?;
        Box::new(r) as Box<dyn Read>
-    });
+    };
    let mut reader = if let Some(c) = settings.io_blksize {
        BufReader::with_capacity(c, r_box)
    } else {
        BufReader::new(r_box)
    };
    match settings.strategy {
        Strategy::Number(NumberType::Bytes(num_chunks)) => {
-            split_into_n_chunks_by_byte(settings, &mut reader, num_chunks)
+            // split_into_n_chunks_by_byte(settings, &mut reader, num_chunks)
            n_chunks_by_byte(settings, &mut reader, num_chunks, None)
        }
        Strategy::Number(NumberType::KthBytes(chunk_number, num_chunks)) => {
-            kth_chunks_by_byte(settings, &mut reader, chunk_number, num_chunks)
+            // kth_chunks_by_byte(settings, &mut reader, chunk_number, num_chunks)
            n_chunks_by_byte(settings, &mut reader, num_chunks, Some(chunk_number))
        }
        Strategy::Number(NumberType::Lines(num_chunks)) => {
-            split_into_n_chunks_by_line(settings, &mut reader, num_chunks)
+            n_chunks_by_line(settings, &mut reader, num_chunks, None)
        }
        Strategy::Number(NumberType::KthLines(chunk_number, num_chunks)) => {
-            kth_chunk_by_line(settings, &mut reader, chunk_number, num_chunks)
+            n_chunks_by_line(settings, &mut reader, num_chunks, Some(chunk_number))
        }
        Strategy::Number(NumberType::RoundRobin(num_chunks)) => {
-            split_into_n_chunks_by_line_round_robin(settings, &mut reader, num_chunks)
+            n_chunks_by_line_round_robin(settings, &mut reader, num_chunks, None)
        }
        Strategy::Number(NumberType::KthRoundRobin(chunk_number, num_chunks)) => {
-            kth_chunk_by_line_round_robin(settings, &mut reader, chunk_number, num_chunks)
+            n_chunks_by_line_round_robin(settings, &mut reader, num_chunks, Some(chunk_number))
        }
        Strategy::Lines(chunk_size) => {
            let mut writer = LineChunkWriter::new(chunk_size, settings)?;
--- a/tests/by-util/test_split.rs
+++ b/tests/by-util/test_split.rs
@ -2,7 +2,7 @@
 //
 // For the full copyright and license information, please view the LICENSE
 // file that was distributed with this source code.
-// spell-checker:ignore xzaaa sixhundredfiftyonebytes ninetyonebytes threebytes asciilowercase fghij klmno pqrst uvwxyz fivelines twohundredfortyonebytes onehundredlines nbbbb dxen ncccc
+// spell-checker:ignore xzaaa sixhundredfiftyonebytes ninetyonebytes threebytes asciilowercase ghijkl mnopq rstuv wxyz fivelines twohundredfortyonebytes onehundredlines nbbbb dxen ncccc
 use crate::common::util::{AtPath, TestScenario};
 use rand::{thread_rng, Rng, SeedableRng};
@ -704,54 +704,41 @@ fn test_split_overflow_bytes_size() {
    assert_eq!(glob.collate(), at.read_bytes(name));
 }
 #[test]
 #[cfg(target_pointer_width = "32")]
 fn test_split_chunks_num_chunks_oversized_32() {
    let scene = TestScenario::new(util_name!());
    let at = &scene.fixtures;
    at.touch("file");
    scene
        .ucmd()
        .args(&["--number", "5000000000", "sixhundredfiftyonebytes.txt"])
        .fails()
        .code_is(1)
        .stderr_only("split: Number of chunks too big\n");
 }
 #[test]
 fn test_split_stdin_num_chunks() {
-    new_ucmd!()
+    let (at, mut ucmd) = at_and_ucmd!();
-        .args(&["--number=1"])
+    ucmd.args(&["--number=1"]).pipe_in("").succeeds();
-        .fails()
+    assert_eq!(file_read(&at, "xaa"), "");
-        .code_is(1)
+    assert!(!at.plus("xab").exists());
        .stderr_only("split: -: cannot determine file size\n");
 }
 #[test]
 fn test_split_stdin_num_kth_chunk() {
    new_ucmd!()
        .args(&["--number=1/2"])
-        .fails()
+        .pipe_in("1\n2\n3\n4\n5\n")
-        .code_is(1)
+        .succeeds()
-        .stderr_only("split: -: cannot determine file size\n");
+        .stdout_only("1\n2\n3");
 }
 #[test]
 fn test_split_stdin_num_line_chunks() {
-    new_ucmd!()
+    let (at, mut ucmd) = at_and_ucmd!();
-        .args(&["--number=l/2"])
+    ucmd.args(&["--number=l/2"])
-        .fails()
+        .pipe_in("1\n2\n3\n4\n5\n")
-        .code_is(1)
+        .succeeds();
-        .stderr_only("split: -: cannot determine file size\n");
+    assert_eq!(file_read(&at, "xaa"), "1\n2\n3\n");
    assert_eq!(file_read(&at, "xab"), "4\n5\n");
    assert!(!at.plus("xac").exists());
 }
 #[test]
 fn test_split_stdin_num_kth_line_chunk() {
    new_ucmd!()
        .args(&["--number=l/2/5"])
-        .fails()
+        .pipe_in("1\n2\n3\n4\n5\n")
-        .code_is(1)
+        .succeeds()
-        .stderr_only("split: -: cannot determine file size\n");
+        .stdout_only("2\n");
 }
 fn file_read(at: &AtPath, filename: &str) -> String {
@ -912,6 +899,14 @@ fn test_suffixes_exhausted() {
        .stderr_only("split: output file suffixes exhausted\n");
 }
 #[test]
 fn test_suffix_length_req() {
    new_ucmd!()
        .args(&["-n", "100", "-a", "1", "asciilowercase.txt"])
        .fails()
        .stderr_only("split: the suffix length needs to be at least 2\n");
 }
 #[test]
 fn test_verbose() {
    new_ucmd!()
@ -937,11 +932,11 @@ fn test_number_n() {
        s
    };
    ucmd.args(&["-n", "5", "asciilowercase.txt"]).succeeds();
-    assert_eq!(file_read("xaa"), "abcde");
+    assert_eq!(file_read("xaa"), "abcdef");
-    assert_eq!(file_read("xab"), "fghij");
+    assert_eq!(file_read("xab"), "ghijkl");
-    assert_eq!(file_read("xac"), "klmno");
+    assert_eq!(file_read("xac"), "mnopq");
-    assert_eq!(file_read("xad"), "pqrst");
+    assert_eq!(file_read("xad"), "rstuv");
-    assert_eq!(file_read("xae"), "uvwxyz\n");
+    assert_eq!(file_read("xae"), "wxyz\n");
    #[cfg(unix)]
    new_ucmd!()
        .args(&["--number=100", "/dev/null"])
@ -954,11 +949,11 @@ fn test_number_kth_of_n() {
    new_ucmd!()
        .args(&["--number=3/5", "asciilowercase.txt"])
        .succeeds()
-        .stdout_only("klmno");
+        .stdout_only("mnopq");
    new_ucmd!()
        .args(&["--number=5/5", "asciilowercase.txt"])
        .succeeds()
-        .stdout_only("uvwxyz\n");
+        .stdout_only("wxyz\n");
    new_ucmd!()
        .args(&["-e", "--number=99/100", "asciilowercase.txt"])
        .succeeds()
@ -1046,11 +1041,11 @@ fn test_split_number_with_io_blksize() {
    };
    ucmd.args(&["-n", "5", "asciilowercase.txt", "---io-blksize", "1024"])
        .succeeds();
-    assert_eq!(file_read("xaa"), "abcde");
+    assert_eq!(file_read("xaa"), "abcdef");
-    assert_eq!(file_read("xab"), "fghij");
+    assert_eq!(file_read("xab"), "ghijkl");
-    assert_eq!(file_read("xac"), "klmno");
+    assert_eq!(file_read("xac"), "mnopq");
-    assert_eq!(file_read("xad"), "pqrst");
+    assert_eq!(file_read("xad"), "rstuv");
-    assert_eq!(file_read("xae"), "uvwxyz\n");
+    assert_eq!(file_read("xae"), "wxyz\n");
 }
 #[test]
@ -1065,6 +1060,32 @@ fn test_split_default_with_io_blksize() {
    assert_eq!(glob.collate(), at.read_bytes(name));
 }
 #[test]
 fn test_split_invalid_io_blksize() {
    new_ucmd!()
        .args(&["---io-blksize=XYZ", "threebytes.txt"])
        .fails()
        .stderr_only("split: invalid IO block size: 'XYZ'\n");
    new_ucmd!()
        .args(&["---io-blksize=5000000000", "threebytes.txt"])
        .fails()
        .stderr_only("split: invalid IO block size: '5000000000'\n");
    #[cfg(target_pointer_width = "32")]
    new_ucmd!()
        .args(&["---io-blksize=2146435072", "threebytes.txt"])
        .fails()
        .stderr_only("split: invalid IO block size: '2146435072'\n");
 }
 #[test]
 fn test_split_number_oversized_stdin() {
    new_ucmd!()
        .args(&["--number=3", "---io-blksize=600"])
        .pipe_in_fixture("sixhundredfiftyonebytes.txt")
        .fails()
        .stderr_only("split: -: cannot determine input size\n");
 }
 #[test]
 fn test_invalid_suffix_length() {
    new_ucmd!()
@ -1157,6 +1178,18 @@ fn test_elide_dev_null() {
    assert!(!at.plus("xac").exists());
 }
 #[test]
 #[cfg(unix)]
 fn test_dev_zero() {
    let (at, mut ucmd) = at_and_ucmd!();
    ucmd.args(&["-n", "3", "/dev/zero"])
        .fails()
        .stderr_only("split: /dev/zero: cannot determine file size\n");
    assert!(!at.plus("xaa").exists());
    assert!(!at.plus("xab").exists());
    assert!(!at.plus("xac").exists());
 }
 #[test]
 fn test_lines() {
    let (at, mut ucmd) = at_and_ucmd!();
@ -1182,6 +1215,15 @@ fn test_lines_kth() {
        .stdout_only("20\n21\n22\n23\n24\n25\n26\n27\n28\n29\n");
 }
 #[test]
 #[cfg(unix)]
 fn test_lines_kth_dev_null() {
    new_ucmd!()
        .args(&["-n", "l/3/10", "/dev/null"])
        .succeeds()
        .stdout_only("");
 }
 #[test]
 fn test_line_bytes() {
    let (at, mut ucmd) = at_and_ucmd!();
@ -1321,7 +1363,7 @@ fn test_numeric_suffix() {
 }
 #[test]
-fn test_numeric_suffix_alias() {
+fn test_numeric_suffix_inferred() {
    let (at, mut ucmd) = at_and_ucmd!();
    ucmd.args(&["-n", "4", "--numeric=9", "threebytes.txt"])
        .succeeds()