split: pass GNU tests/b-chunk.sh (#5475)

--------- Co-authored-by: Terts Diepraam <terts.diepraam@gmail.com> Co-authored-by: Daniel Hofstetter <daniel.hofstetter@42dh.com> Co-authored-by: Brandon Elam Barker <brandon.barker@gmail.com> Co-authored-by: Kostiantyn Hryshchuk <statheres@gmail.com> Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2025-09-14 19:16:17 +00:00 · 2023-11-17 11:19:10 -05:00 · 2023-11-17 11:19:10 -05:00 · eb00c195c6
commit eb00c195c6
parent a7e5af4770
2 changed files with 446 additions and 375 deletions
--- a/src/uu/split/src/split.rs
+++ b/src/uu/split/src/split.rs
@ -18,11 +18,12 @@ use std::ffi::OsString;
 use std::fmt;
 use std::fs::{metadata, File};
 use std::io;
-use std::io::{stdin, BufRead, BufReader, BufWriter, ErrorKind, Read, Write};
+use std::io::{stdin, BufRead, BufReader, BufWriter, ErrorKind, Read, Seek, SeekFrom, Write};
 use std::path::Path;
 use std::u64;
 use uucore::display::Quotable;
 use uucore::error::{FromIo, UIoError, UResult, USimpleError, UUsageError};
+use uucore::parse_size::parse_size_u64;

 use uucore::uio_error;
 use uucore::{format_usage, help_about, help_section, help_usage};
@ -40,11 +41,20 @@ static OPT_HEX_SUFFIXES_SHORT: &str = "-x";
 static OPT_SUFFIX_LENGTH: &str = "suffix-length";
 static OPT_VERBOSE: &str = "verbose";
 static OPT_SEPARATOR: &str = "separator";
-//The ---io and ---io-blksize parameters are consumed and ignored.
-//The parameter is included to make GNU coreutils tests pass.
-static OPT_IO: &str = "-io";
-static OPT_IO_BLKSIZE: &str = "-io-blksize";
 static OPT_ELIDE_EMPTY_FILES: &str = "elide-empty-files";
+static OPT_IO_BLKSIZE: &str = "-io-blksize";
+// Cap ---io-blksize value
+// For 64bit systems the max value is the same as in GNU
+// and is equivalent of `i32::MAX >> 20 << 20` operation.
+// On 32bit systems however, even though it fits within `u32` and `i32`,
+// it causes rust-lang `library/alloc/src/raw_vec.rs` to panic with 'capacity overflow' error.
+// Could be due to how `std::io::BufReader` handles internal buffers.
+// So we use much smaller value for those
+static OPT_IO_BLKSIZE_MAX: usize = if usize::BITS >= 64 {
+    2_146_435_072
+} else {
+    1_000_000_000
+};

 static ARG_INPUT: &str = "input";
 static ARG_PREFIX: &str = "prefix";
@ -311,7 +321,6 @@ pub fn uu_app() -> Command {
        .arg(
            Arg::new(OPT_NUMERIC_SUFFIXES)
                .long(OPT_NUMERIC_SUFFIXES)
-                .alias("numeric")
                .require_equals(true)
                .num_args(0..=1)
                .overrides_with_all([
@ -338,7 +347,6 @@ pub fn uu_app() -> Command {
        .arg(
            Arg::new(OPT_HEX_SUFFIXES)
                .long(OPT_HEX_SUFFIXES)
-                .alias("hex")
                .require_equals(true)
                .num_args(0..=1)
                .overrides_with_all([
@ -373,12 +381,6 @@ pub fn uu_app() -> Command {
                .action(ArgAction::Append)
                .help("use SEP instead of newline as the record separator; '\\0' (zero) specifies the NUL character"),
        )
-        .arg(
-            Arg::new(OPT_IO)
-                .long("io")
-                .alias(OPT_IO)
-                .hide(true),
-        )
        .arg(
            Arg::new(OPT_IO_BLKSIZE)
                .long("io-blksize")
@ -419,6 +421,7 @@ struct Settings {
    /// chunks. If this is `false`, then empty files will not be
    /// created.
    elide_empty_files: bool,
+    io_blksize: Option<usize>,
 }

 /// An error when parsing settings from command-line arguments.
@ -441,6 +444,9 @@ enum SettingsError {
    /// r/K/N
    FilterWithKthChunkNumber,

+    /// Invalid IO block size
+    InvalidIOBlockSize(String),
+
    /// The `--filter` option is not supported on Windows.
    #[cfg(windows)]
    NotSupported,
@ -471,6 +477,7 @@ impl fmt::Display for SettingsError {
            Self::FilterWithKthChunkNumber => {
                write!(f, "--filter does not process a chunk extracted to stdout")
            }
+            Self::InvalidIOBlockSize(s) => write!(f, "invalid IO block size: {}", s.quote()),
            #[cfg(windows)]
            Self::NotSupported => write!(
                f,
@ -499,12 +506,29 @@ impl Settings {
                match first.as_str() {
                    "\\0" => b'\0',
                    s if s.as_bytes().len() == 1 => s.as_bytes()[0],
-                    s => return Err(SettingsError::MultiCharacterSeparator(s.to_owned())),
+                    s => return Err(SettingsError::MultiCharacterSeparator(s.to_string())),
                }
            }
            None => b'\n',
        };

+        let io_blksize: Option<usize> = if let Some(s) = matches.get_one::<String>(OPT_IO_BLKSIZE) {
+            match parse_size_u64(s) {
+                Ok(n) => {
+                    let n: usize = n
+                        .try_into()
+                        .map_err(|_| SettingsError::InvalidIOBlockSize(s.to_string()))?;
+                    if n > OPT_IO_BLKSIZE_MAX {
+                        return Err(SettingsError::InvalidIOBlockSize(s.to_string()));
+                    }
+                    Some(n)
+                }
+                _ => return Err(SettingsError::InvalidIOBlockSize(s.to_string())),
+            }
+        } else {
+            None
+        };
+
        let result = Self {
            prefix: matches.get_one::<String>(ARG_PREFIX).unwrap().clone(),
            suffix,
@ -514,6 +538,7 @@ impl Settings {
            verbose: matches.value_source(OPT_VERBOSE) == Some(ValueSource::CommandLine),
            separator,
            elide_empty_files: matches.get_flag(OPT_ELIDE_EMPTY_FILES),
+            io_blksize,
        };

        #[cfg(windows)]
@ -591,6 +616,93 @@ fn custom_write_all<T: Write>(
    }
 }

+/// Get the size of the input file in bytes
+/// Used only for subset of `--number=CHUNKS` strategy, as there is a need
+/// to determine input file size upfront in order to know chunk size
+/// to be written into each of N files/chunks:
+/// * N       split into N files based on size of input
+/// * K/N     output Kth of N to stdout
+/// * l/N     split into N files without splitting lines/records
+/// * l/K/N   output Kth of N to stdout without splitting lines/records
+///
+/// For most files the size will be determined by either reading entire file content into a buffer
+/// or by `len()` function of [`std::fs::metadata`].
+///
+/// However, for some files which report filesystem metadata size that does not match
+/// their actual content size, we will need to attempt to find the end of file
+/// with direct `seek()` on [`std::fs::File`].
+///
+/// For STDIN stream - read into a buffer up to a limit
+/// If input stream does not EOF before that - return an error
+/// (i.e. "infinite" input as in `cat /dev/zero | split ...`, `yes | split ...` etc.).
+///
+/// Note: The `buf` might end up with either partial or entire input content.
+fn get_input_size<R>(
+    input: &String,
+    reader: &mut R,
+    buf: &mut Vec<u8>,
+    io_blksize: &Option<usize>,
+) -> std::io::Result<u64>
+where
+    R: BufRead,
+{
+    // Set read limit to io_blksize if specified
+    // Otherwise to OPT_IO_BLKSIZE_MAX
+    let read_limit = io_blksize.unwrap_or(OPT_IO_BLKSIZE_MAX) as u64;
+
+    // Try to read into buffer up to a limit
+    let num_bytes = reader
+        .by_ref()
+        .take(read_limit)
+        .read_to_end(buf)
+        .map(|n| n as u64)?;
+
+    if num_bytes < read_limit {
+        // Finite file or STDIN stream that fits entirely
+        // into a buffer within the limit
+        // Note: files like /dev/null or similar,
+        // empty STDIN stream,
+        // and files with true file size 0
+        // will also fit here
+        Ok(num_bytes)
+    } else if input == "-" {
+        // STDIN stream that did not fit all content into a buffer
+        // Most likely continuous/infinite input stream
+        return Err(io::Error::new(
+            ErrorKind::Other,
+            format!("{}: cannot determine input size", input),
+        ));
+    } else {
+        // Could be that file size is larger than set read limit
+        // Get the file size from filesystem metadata
+        let metadata = metadata(input)?;
+        let metadata_size = metadata.len();
+        if num_bytes <= metadata_size {
+            Ok(metadata_size)
+        } else {
+            // Could be a file from locations like /dev, /sys, /proc or similar
+            // which report filesystem metadata size that does not match
+            // their actual content size
+            // Attempt direct `seek()` for the end of a file
+            let mut tmp_fd = File::open(Path::new(input))?;
+            let end = tmp_fd.seek(SeekFrom::End(0))?;
+            if end > 0 {
+                Ok(end)
+            } else {
+                // Edge case of either "infinite" file (i.e. /dev/zero)
+                // or some other "special" non-standard file type
+                // Give up and return an error
+                // TODO It might be possible to do more here
+                // to address all possible file types and edge cases
+                return Err(io::Error::new(
+                    ErrorKind::Other,
+                    format!("{}: cannot determine file size", input),
+                ));
+            }
+        }
+    }
+}
+
 /// Write a certain number of bytes to one file, then move on to another one.
 ///
 /// This struct maintains an underlying writer representing the
@ -1018,70 +1130,98 @@ impl<'a> Write for LineBytesChunkWriter<'a> {
    }
 }

-/// Split a file into a specific number of chunks by byte.
+/// Split a file or STDIN into a specific number of chunks by byte.
+/// If in Kth chunk of N mode - print the k-th chunk to STDOUT.
 ///
-/// This function always creates one output file for each chunk, even
+/// When file size cannot be evenly divided into the number of chunks of the same size,
+/// the first X chunks are 1 byte longer than the rest,
+/// where X is a modulus reminder of (file size % number of chunks)
+///
+/// In Kth chunk of N mode - writes to stdout the contents of the chunk identified by `kth_chunk`
+///
+/// In N chunks mode - this function always creates one output file for each chunk, even
 /// if there is an error reading or writing one of the chunks or if
-/// the input file is truncated. However, if the `filter` option is
-/// being used, then no files are created.
+/// the input file is truncated. However, if the `--filter` option is
+/// being used, then files will only be created if `$FILE` variable was used
+/// in filter command,
+/// i.e. `split -n 10 --filter='head -c1 > $FILE' in`
 ///
 /// # Errors
 ///
 /// This function returns an error if there is a problem reading from
-/// `reader` or writing to one of the output files.
+/// `reader` or writing to one of the output files or stdout.
+///
+/// # See also
+///
+/// * [`n_chunks_by_line`], which splits its input into a specific number of chunks by line.
 ///
 /// Implements `--number=CHUNKS`
 /// Where CHUNKS
 /// * N
-fn split_into_n_chunks_by_byte<R>(
+/// * K/N
+fn n_chunks_by_byte<R>(
    settings: &Settings,
    reader: &mut R,
    num_chunks: u64,
+    kth_chunk: Option<u64>,
 ) -> UResult<()>
 where
-    R: Read,
+    R: BufRead,
 {
-    // Get the size of the input file in bytes and compute the number
-    // of bytes per chunk.
-    //
+    // Get the size of the input in bytes
+    let initial_buf = &mut Vec::new();
+    let mut num_bytes = get_input_size(&settings.input, reader, initial_buf, &settings.io_blksize)?;
+    let mut reader = initial_buf.chain(reader);
+
+    // If input file is empty and we would not have determined the Kth chunk
+    // in the Kth chunk of N chunk mode, then terminate immediately.
+    // This happens on `split -n 3/10 /dev/null`, for example.
+    if kth_chunk.is_some() && num_bytes == 0 {
+        return Ok(());
+    }
+
    // If the requested number of chunks exceeds the number of bytes
-    // in the file *and* the `elide_empty_files` parameter is enabled,
+    // in the input:
+    // * in Kth chunk of N mode - just write empty byte string to stdout
+    //   NOTE: the `elide_empty_files` parameter is ignored here
+    //   as we do not generate any files
+    //   and instead writing to stdout
+    // * In N chunks mode - if the `elide_empty_files` parameter is enabled,
    //   then behave as if the number of chunks was set to the number of
    //   bytes in the file. This ensures that we don't write empty
-    // files. Otherwise, just write the `num_chunks - num_bytes` empty
-    // files.
-    let metadata = metadata(&settings.input).map_err(|_| {
-        USimpleError::new(1, format!("{}: cannot determine file size", settings.input))
-    })?;
-
-    let num_bytes = metadata.len();
-    let will_have_empty_files = settings.elide_empty_files && num_chunks > num_bytes;
-    let (num_chunks, chunk_size) = if will_have_empty_files {
-        let num_chunks = num_bytes;
-        let chunk_size = 1;
-        (num_chunks, chunk_size)
+    //   files. Otherwise, just write the `num_chunks - num_bytes` empty files.
+    let num_chunks = if kth_chunk.is_none() && settings.elide_empty_files && num_chunks > num_bytes
+    {
+        num_bytes
    } else {
-        let chunk_size = (num_bytes / (num_chunks)).max(1);
-        (num_chunks, chunk_size)
+        num_chunks
    };

    // If we would have written zero chunks of output, then terminate
    // immediately. This happens on `split -e -n 3 /dev/null`, for
    // example.
-    if num_chunks == 0 || num_bytes == 0 {
+    if num_chunks == 0 {
        return Ok(());
    }

-    let num_chunks: usize = num_chunks
-        .try_into()
-        .map_err(|_| USimpleError::new(1, "Number of chunks too big"))?;
-
-    // This object is responsible for creating the filename for each chunk.
-    let mut filename_iterator = FilenameIterator::new(&settings.prefix, &settings.suffix)?;
-
-    // Create one writer for each chunk. This will create each
-    // of the underlying files (if not in `--filter` mode).
+    // In Kth chunk of N mode - we will write to stdout instead of to a file.
+    let mut stdout_writer = std::io::stdout().lock();
+    // In N chunks mode - we will write to `num_chunks` files
    let mut writers = vec![];
+
+    // Calculate chunk size base and modulo reminder
+    // to be used in calculating chunk_size later on
+    let chunk_size_base = num_bytes / num_chunks;
+    let chunk_size_reminder = num_bytes % num_chunks;
+
+    // If in N chunks mode
+    // Create one writer for each chunk.
+    // This will create each of the underlying files
+    // or stdin pipes to child shell/command processes if in `--filter` mode
+    if kth_chunk.is_none() {
+        // This object is responsible for creating the filename for each chunk.
+        let mut filename_iterator = FilenameIterator::new(&settings.prefix, &settings.suffix)
+            .map_err(|e| io::Error::new(ErrorKind::Other, format!("{e}")))?;
        for _ in 0..num_chunks {
            let filename = filename_iterator
                .next()
@ -1089,84 +1229,11 @@ where
            let writer = settings.instantiate_current_writer(filename.as_str())?;
            writers.push(writer);
        }
-
-    // Write `chunk_size` bytes from the reader into each writer
-    // except the last.
-    //
-    // The last writer gets all remaining bytes so that if the number
-    // of bytes in the input file was not evenly divisible by
-    // `num_chunks`, we don't leave any bytes behind.
-    for writer in writers.iter_mut().take(num_chunks - 1) {
-        match io::copy(&mut reader.by_ref().take(chunk_size), writer) {
-            Ok(_) => continue,
-            Err(e) if ignorable_io_error(&e, settings) => continue,
-            Err(e) => return Err(uio_error!(e, "input/output error")),
-        };
    }

-    // Write all the remaining bytes to the last chunk.
-    let i = num_chunks - 1;
-    let last_chunk_size = num_bytes - (chunk_size * (num_chunks as u64 - 1));
-    match io::copy(&mut reader.by_ref().take(last_chunk_size), &mut writers[i]) {
-        Ok(_) => Ok(()),
-        Err(e) if ignorable_io_error(&e, settings) => Ok(()),
-        Err(e) => Err(uio_error!(e, "input/output error")),
-    }
-}
-
-/// Print the k-th chunk of a file to stdout, splitting by byte.
-///
-/// This function is like [`split_into_n_chunks_by_byte`], but instead
-/// of writing each chunk to its own file, it only writes to stdout
-/// the contents of the chunk identified by `chunk_number`
-///
-/// # Errors
-///
-/// This function returns an error if there is a problem reading from
-/// `reader` or writing to stdout.
-///
-/// Implements `--number=CHUNKS`
-/// Where CHUNKS
-/// * K/N
-fn kth_chunks_by_byte<R>(
-    settings: &Settings,
-    reader: &mut R,
-    chunk_number: u64,
-    num_chunks: u64,
-) -> UResult<()>
-where
-    R: BufRead,
-{
-    // Get the size of the input file in bytes and compute the number
-    // of bytes per chunk.
-    //
-    // If the requested number of chunks exceeds the number of bytes
-    // in the file - just write empty byte string to stdout
-    // NOTE: the `elide_empty_files` parameter is ignored here
-    // as we do not generate any files
-    // and instead writing to stdout
-    let metadata = metadata(&settings.input).map_err(|_| {
-        USimpleError::new(1, format!("{}: cannot determine file size", settings.input))
-    })?;
-
-    let num_bytes = metadata.len();
-    // If input file is empty and we would have written zero chunks of output,
-    // then terminate immediately.
-    // This happens on `split -e -n 3 /dev/null`, for example.
-    if num_bytes == 0 {
-        return Ok(());
-    }
-
-    // Write to stdout instead of to a file.
-    let stdout = std::io::stdout();
-    let mut writer = stdout.lock();
-
-    let chunk_size = (num_bytes / (num_chunks)).max(1);
-    let mut num_bytes: usize = num_bytes.try_into().unwrap();
-
-    let mut i = 1;
-    loop {
-        let buf: &mut Vec<u8> = &mut vec![];
+    for i in 1_u64..=num_chunks {
+        let chunk_size = chunk_size_base + (chunk_size_reminder > i - 1) as u64;
+        let buf = &mut Vec::new();
        if num_bytes > 0 {
            // Read `chunk_size` bytes from the reader into `buf`
            // except the last.
@ -1176,15 +1243,17 @@ where
            // `num_chunks`, we don't leave any bytes behind.
            let limit = {
                if i == num_chunks {
-                    num_bytes.try_into().unwrap()
+                    num_bytes
                } else {
                    chunk_size
                }
            };
+
            let n_bytes_read = reader.by_ref().take(limit).read_to_end(buf);
+
            match n_bytes_read {
                Ok(n_bytes) => {
-                    num_bytes -= n_bytes;
+                    num_bytes -= n_bytes as u64;
                }
                Err(error) => {
                    return Err(USimpleError::new(
@ -1193,11 +1262,20 @@ where
                    ));
                }
            }
+
+            match kth_chunk {
+                Some(chunk_number) => {
                    if i == chunk_number {
-                writer.write_all(buf)?;
+                        stdout_writer.write_all(buf)?;
                        break;
                    }
-            i += 1;
+                }
+                None => {
+                    let idx = (i - 1) as usize;
+                    let writer = writers.get_mut(idx).unwrap();
+                    writer.write_all(buf)?;
+                }
+            }
        } else {
            break;
        }
@ -1205,12 +1283,17 @@ where
    Ok(())
 }

-/// Split a file into a specific number of chunks by line.
+/// Split a file or STDIN into a specific number of chunks by line.
+/// If in Kth chunk of N mode - print the k-th chunk to STDOUT.
 ///
-/// This function always creates one output file for each chunk, even
+/// In Kth chunk of N mode - writes to stdout the contents of the chunk identified by `kth_chunk`
+///
+/// In N chunks mode - this function always creates one output file for each chunk, even
 /// if there is an error reading or writing one of the chunks or if
-/// the input file is truncated. However, if the `filter` option is
-/// being used, then no files are created.
+/// the input file is truncated. However, if the `--filter` option is
+/// being used, then files will only be created if `$FILE` variable was used
+/// in filter command,
+/// i.e. `split -n l/10 --filter='head -c1 > $FILE' in`
 ///
 /// # Errors
 ///
@ -1219,34 +1302,48 @@ where
 ///
 /// # See also
 ///
-/// * [`kth_chunk_by_line`], which splits its input in the same way,
-///   but writes only one specified chunk to stdout.
+/// * [`n_chunks_by_byte`], which splits its input into a specific number of chunks by byte.
 ///
 /// Implements `--number=CHUNKS`
 /// Where CHUNKS
 /// * l/N
-fn split_into_n_chunks_by_line<R>(
+/// * l/K/N
+fn n_chunks_by_line<R>(
    settings: &Settings,
    reader: &mut R,
    num_chunks: u64,
+    kth_chunk: Option<u64>,
 ) -> UResult<()>
 where
    R: BufRead,
 {
-    // Get the size of the input file in bytes and compute the number
+    // Get the size of the input in bytes and compute the number
    // of bytes per chunk.
-    let metadata = metadata(&settings.input).map_err(|_| {
-        USimpleError::new(1, format!("{}: cannot determine file size", settings.input))
-    })?;
-    let num_bytes = metadata.len();
+    let initial_buf = &mut Vec::new();
+    let num_bytes = get_input_size(&settings.input, reader, initial_buf, &settings.io_blksize)?;
+    let reader = initial_buf.chain(reader);
    let chunk_size = (num_bytes / num_chunks) as usize;

-    // This object is responsible for creating the filename for each chunk.
-    let mut filename_iterator = FilenameIterator::new(&settings.prefix, &settings.suffix)?;
+    // If input file is empty and we would not have determined the Kth chunk
+    // in the Kth chunk of N chunk mode, then terminate immediately.
+    // This happens on `split -n l/3/10 /dev/null`, for example.
+    if kth_chunk.is_some() && num_bytes == 0 {
+        return Ok(());
+    }

-    // Create one writer for each chunk. This will create each
-    // of the underlying files (if not in `--filter` mode).
+    // In Kth chunk of N mode - we will write to stdout instead of to a file.
+    let mut stdout_writer = std::io::stdout().lock();
+    // In N chunks mode - we will write to `num_chunks` files
    let mut writers = vec![];
+
+    // If in N chunks mode
+    // Create one writer for each chunk.
+    // This will create each of the underlying files
+    // or stdin pipes to child shell/command processes if in `--filter` mode
+    if kth_chunk.is_none() {
+        // This object is responsible for creating the filename for each chunk.
+        let mut filename_iterator = FilenameIterator::new(&settings.prefix, &settings.suffix)
+            .map_err(|e| io::Error::new(ErrorKind::Other, format!("{e}")))?;
        for _ in 0..num_chunks {
            let filename = filename_iterator
                .next()
@ -1254,84 +1351,33 @@ where
            let writer = settings.instantiate_current_writer(filename.as_str())?;
            writers.push(writer);
        }
-
-    let mut num_bytes_remaining_in_current_chunk = chunk_size;
-    let mut i = 0;
-    let sep = settings.separator;
-    for line_result in reader.split(sep) {
-        let line = line_result.unwrap();
-        let maybe_writer = writers.get_mut(i);
-        let writer = maybe_writer.unwrap();
-        let bytes = line.as_slice();
-        custom_write_all(bytes, writer, settings)?;
-        custom_write_all(&[sep], writer, settings)?;
-
-        // Add one byte for the separator character.
-        let num_bytes = bytes.len() + 1;
-        if num_bytes > num_bytes_remaining_in_current_chunk {
-            num_bytes_remaining_in_current_chunk = chunk_size;
-            i += 1;
-        } else {
-            num_bytes_remaining_in_current_chunk -= num_bytes;
    }
-    }
-
-    Ok(())
-}
-
-/// Print the k-th chunk of a file, splitting by line.
-///
-/// This function is like [`split_into_n_chunks_by_line`], but instead
-/// of writing each chunk to its own file, it only writes to stdout
-/// the contents of the chunk identified by `chunk_number`.
-///
-/// # Errors
-///
-/// This function returns an error if there is a problem reading from
-/// `reader` or writing to one of the output files.
-///
-/// # See also
-///
-/// * [`split_into_n_chunks_by_line`], which splits its input in the
-///   same way, but writes each chunk to its own file.
-///
-/// Implements `--number=CHUNKS`
-/// Where CHUNKS
-/// * l/K/N
-fn kth_chunk_by_line<R>(
-    settings: &Settings,
-    reader: &mut R,
-    chunk_number: u64,
-    num_chunks: u64,
-) -> UResult<()>
-where
-    R: BufRead,
-{
-    // Get the size of the input file in bytes and compute the number
-    // of bytes per chunk.
-    let metadata = metadata(&settings.input).map_err(|_| {
-        USimpleError::new(1, format!("{}: cannot determine file size", settings.input))
-    })?;
-    let num_bytes = metadata.len();
-    let chunk_size = (num_bytes / num_chunks) as usize;
-
-    // Write to stdout instead of to a file.
-    let stdout = std::io::stdout();
-    let mut writer = stdout.lock();

    let mut num_bytes_remaining_in_current_chunk = chunk_size;
    let mut i = 1;
    let sep = settings.separator;
+
    for line_result in reader.split(sep) {
-        let line = line_result?;
+        // add separator back in at the end of the line
+        let mut line = line_result?;
+        line.push(sep);
        let bytes = line.as_slice();
+
+        match kth_chunk {
+            Some(chunk_number) => {
                if i == chunk_number {
-            writer.write_all(bytes)?;
-            writer.write_all(&[sep])?;
+                    stdout_writer.write_all(bytes)?;
+                }
+            }
+            None => {
+                let idx = (i - 1) as usize;
+                let maybe_writer = writers.get_mut(idx);
+                let writer = maybe_writer.unwrap();
+                custom_write_all(bytes, writer, settings)?;
+            }
        }

-        // Add one byte for the separator character.
-        let num_bytes = bytes.len() + 1;
+        let num_bytes = bytes.len();
        if num_bytes >= num_bytes_remaining_in_current_chunk {
            num_bytes_remaining_in_current_chunk = chunk_size;
            i += 1;
@ -1339,21 +1385,27 @@ where
            num_bytes_remaining_in_current_chunk -= num_bytes;
        }

+        if let Some(chunk_number) = kth_chunk {
            if i > chunk_number {
                break;
            }
        }
+    }

    Ok(())
 }

-/// Split a file into a specific number of chunks by line, but
+/// Split a file or STDIN into a specific number of chunks by line, but
 /// assign lines via round-robin
 ///
-/// This function always creates one output file for each chunk, even
+/// In Kth chunk of N mode - writes to stdout the contents of the chunk identified by `kth_chunk`
+///
+/// In N chunks mode - this function always creates one output file for each chunk, even
 /// if there is an error reading or writing one of the chunks or if
-/// the input file is truncated. However, if the `filter` option is
-/// being used, then no files are created.
+/// the input file is truncated. However, if the `--filter` option is
+/// being used, then files will only be created if `$FILE` variable was used
+/// in filter command,
+/// i.e. `split -n r/10 --filter='head -c1 > $FILE' in`
 ///
 /// # Errors
 ///
@ -1362,44 +1414,65 @@ where
 ///
 /// # See also
 ///
-/// * [`split_into_n_chunks_by_line`], which splits its input in the same way,
-///   but without round robin distribution.
+/// * [`n_chunks_by_line`], which splits its input into a specific number of chunks by line.
 ///
 /// Implements `--number=CHUNKS`
 /// Where CHUNKS
 /// * r/N
-fn split_into_n_chunks_by_line_round_robin<R>(
+/// * r/K/N
+fn n_chunks_by_line_round_robin<R>(
    settings: &Settings,
    reader: &mut R,
    num_chunks: u64,
+    kth_chunk: Option<u64>,
 ) -> UResult<()>
 where
    R: BufRead,
 {
+    // In Kth chunk of N mode - we will write to stdout instead of to a file.
+    let mut stdout_writer = std::io::stdout().lock();
+    // In N chunks mode - we will write to `num_chunks` files
+    let mut writers = vec![];
+
+    // If in N chunks mode
+    // Create one writer for each chunk.
+    // This will create each of the underlying files
+    // or stdin pipes to child shell/command processes if in `--filter` mode
+    if kth_chunk.is_none() {
        // This object is responsible for creating the filename for each chunk.
        let mut filename_iterator = FilenameIterator::new(&settings.prefix, &settings.suffix)
            .map_err(|e| io::Error::new(ErrorKind::Other, format!("{e}")))?;
-
-    // Create one writer for each chunk. This will create each
-    // of the underlying files (if not in `--filter` mode).
-    let mut writers = vec![];
        for _ in 0..num_chunks {
            let filename = filename_iterator
                .next()
-            .ok_or_else(|| io::Error::new(ErrorKind::Other, "output file suffixes exhausted"))?;
+                .ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?;
            let writer = settings.instantiate_current_writer(filename.as_str())?;
            writers.push(writer);
        }
+    }

    let num_chunks: usize = num_chunks.try_into().unwrap();
    let sep = settings.separator;
    let mut closed_writers = 0;
    for (i, line_result) in reader.split(sep).enumerate() {
-        let maybe_writer = writers.get_mut(i % num_chunks);
-        let writer = maybe_writer.unwrap();
-        let mut line = line_result.unwrap();
+        // add separator back in at the end of the line
+        let mut line = line_result?;
        line.push(sep);
        let bytes = line.as_slice();
+
+        match kth_chunk {
+            Some(chunk_number) => {
+                // The `.enumerate()` method returns index `i` starting with 0,
+                // but chunk number is given as a 1-indexed number,
+                // so compare to `chunk_number - 1`
+                if (i % num_chunks) == (chunk_number - 1) as usize {
+                    stdout_writer.write_all(bytes)?;
+                }
+            }
+            None => {
+                let maybe_writer = writers.get_mut(i % num_chunks);
+                let writer = maybe_writer.unwrap();
+
                let writer_stdin_open = custom_write_all(bytes, writer, settings)?;
                if !writer_stdin_open {
                    closed_writers += 1;
@ -1409,66 +1482,15 @@ where
                    }
                }
            }
-
-    Ok(())
-}
-
-/// Print the k-th chunk of a file, splitting by line, but
-/// assign lines via round-robin to the specified number of output
-/// chunks, but output only the *k*th chunk.
-///
-/// This function is like [`kth_chunk_by_line`], as it only writes to stdout and
-/// prints out only *k*th chunk
-/// It is also like [`split_into_n_chunks_by_line_round_robin`], as it is assigning chunks
-/// using round robin distribution
-///
-/// # Errors
-///
-/// This function returns an error if there is a problem reading from
-/// `reader` or writing to one of the output files.
-///
-/// # See also
-///
-/// * [`split_into_n_chunks_by_line_round_robin`], which splits its input in the
-///   same way, but writes each chunk to its own file.
-///
-/// Implements `--number=CHUNKS`
-/// Where CHUNKS
-/// * r/K/N
-fn kth_chunk_by_line_round_robin<R>(
-    settings: &Settings,
-    reader: &mut R,
-    chunk_number: u64,
-    num_chunks: u64,
-) -> UResult<()>
-where
-    R: BufRead,
-{
-    // Write to stdout instead of to a file.
-    let stdout = std::io::stdout();
-    let mut writer = stdout.lock();
-
-    let num_chunks: usize = num_chunks.try_into().unwrap();
-    let chunk_number: usize = chunk_number.try_into().unwrap();
-    let sep = settings.separator;
-    // The chunk number is given as a 1-indexed number, but it
-    // is a little easier to deal with a 0-indexed number
-    // since `.enumerate()` returns index `i` starting with 0
-    let chunk_number = chunk_number - 1;
-    for (i, line_result) in reader.split(sep).enumerate() {
-        let line = line_result?;
-        let bytes = line.as_slice();
-        if (i % num_chunks) == chunk_number {
-            writer.write_all(bytes)?;
-            writer.write_all(&[sep])?;
        }
    }
+
    Ok(())
 }

 #[allow(clippy::cognitive_complexity)]
 fn split(settings: &Settings) -> UResult<()> {
-    let mut reader = BufReader::new(if settings.input == "-" {
+    let r_box = if settings.input == "-" {
        Box::new(stdin()) as Box<dyn Read>
    } else {
        let r = File::open(Path::new(&settings.input)).map_err_context(|| {
@ -1478,26 +1500,33 @@ fn split(settings: &Settings) -> UResult<()> {
            )
        })?;
        Box::new(r) as Box<dyn Read>
-    });
+    };
+    let mut reader = if let Some(c) = settings.io_blksize {
+        BufReader::with_capacity(c, r_box)
+    } else {
+        BufReader::new(r_box)
+    };

    match settings.strategy {
        Strategy::Number(NumberType::Bytes(num_chunks)) => {
-            split_into_n_chunks_by_byte(settings, &mut reader, num_chunks)
+            // split_into_n_chunks_by_byte(settings, &mut reader, num_chunks)
+            n_chunks_by_byte(settings, &mut reader, num_chunks, None)
        }
        Strategy::Number(NumberType::KthBytes(chunk_number, num_chunks)) => {
-            kth_chunks_by_byte(settings, &mut reader, chunk_number, num_chunks)
+            // kth_chunks_by_byte(settings, &mut reader, chunk_number, num_chunks)
+            n_chunks_by_byte(settings, &mut reader, num_chunks, Some(chunk_number))
        }
        Strategy::Number(NumberType::Lines(num_chunks)) => {
-            split_into_n_chunks_by_line(settings, &mut reader, num_chunks)
+            n_chunks_by_line(settings, &mut reader, num_chunks, None)
        }
        Strategy::Number(NumberType::KthLines(chunk_number, num_chunks)) => {
-            kth_chunk_by_line(settings, &mut reader, chunk_number, num_chunks)
+            n_chunks_by_line(settings, &mut reader, num_chunks, Some(chunk_number))
        }
        Strategy::Number(NumberType::RoundRobin(num_chunks)) => {
-            split_into_n_chunks_by_line_round_robin(settings, &mut reader, num_chunks)
+            n_chunks_by_line_round_robin(settings, &mut reader, num_chunks, None)
        }
        Strategy::Number(NumberType::KthRoundRobin(chunk_number, num_chunks)) => {
-            kth_chunk_by_line_round_robin(settings, &mut reader, chunk_number, num_chunks)
+            n_chunks_by_line_round_robin(settings, &mut reader, num_chunks, Some(chunk_number))
        }
        Strategy::Lines(chunk_size) => {
            let mut writer = LineChunkWriter::new(chunk_size, settings)?;
--- a/tests/by-util/test_split.rs
+++ b/tests/by-util/test_split.rs
@ -2,7 +2,7 @@
 //
 // For the full copyright and license information, please view the LICENSE
 // file that was distributed with this source code.
-// spell-checker:ignore xzaaa sixhundredfiftyonebytes ninetyonebytes threebytes asciilowercase fghij klmno pqrst uvwxyz fivelines twohundredfortyonebytes onehundredlines nbbbb dxen ncccc
+// spell-checker:ignore xzaaa sixhundredfiftyonebytes ninetyonebytes threebytes asciilowercase ghijkl mnopq rstuv wxyz fivelines twohundredfortyonebytes onehundredlines nbbbb dxen ncccc

 use crate::common::util::{AtPath, TestScenario};
 use rand::{thread_rng, Rng, SeedableRng};
@ -704,54 +704,41 @@ fn test_split_overflow_bytes_size() {
    assert_eq!(glob.collate(), at.read_bytes(name));
 }

-#[test]
-#[cfg(target_pointer_width = "32")]
-fn test_split_chunks_num_chunks_oversized_32() {
-    let scene = TestScenario::new(util_name!());
-    let at = &scene.fixtures;
-    at.touch("file");
-    scene
-        .ucmd()
-        .args(&["--number", "5000000000", "sixhundredfiftyonebytes.txt"])
-        .fails()
-        .code_is(1)
-        .stderr_only("split: Number of chunks too big\n");
-}
-
 #[test]
 fn test_split_stdin_num_chunks() {
-    new_ucmd!()
-        .args(&["--number=1"])
-        .fails()
-        .code_is(1)
-        .stderr_only("split: -: cannot determine file size\n");
+    let (at, mut ucmd) = at_and_ucmd!();
+    ucmd.args(&["--number=1"]).pipe_in("").succeeds();
+    assert_eq!(file_read(&at, "xaa"), "");
+    assert!(!at.plus("xab").exists());
 }

 #[test]
 fn test_split_stdin_num_kth_chunk() {
    new_ucmd!()
        .args(&["--number=1/2"])
-        .fails()
-        .code_is(1)
-        .stderr_only("split: -: cannot determine file size\n");
+        .pipe_in("1\n2\n3\n4\n5\n")
+        .succeeds()
+        .stdout_only("1\n2\n3");
 }

 #[test]
 fn test_split_stdin_num_line_chunks() {
-    new_ucmd!()
-        .args(&["--number=l/2"])
-        .fails()
-        .code_is(1)
-        .stderr_only("split: -: cannot determine file size\n");
+    let (at, mut ucmd) = at_and_ucmd!();
+    ucmd.args(&["--number=l/2"])
+        .pipe_in("1\n2\n3\n4\n5\n")
+        .succeeds();
+    assert_eq!(file_read(&at, "xaa"), "1\n2\n3\n");
+    assert_eq!(file_read(&at, "xab"), "4\n5\n");
+    assert!(!at.plus("xac").exists());
 }

 #[test]
 fn test_split_stdin_num_kth_line_chunk() {
    new_ucmd!()
        .args(&["--number=l/2/5"])
-        .fails()
-        .code_is(1)
-        .stderr_only("split: -: cannot determine file size\n");
+        .pipe_in("1\n2\n3\n4\n5\n")
+        .succeeds()
+        .stdout_only("2\n");
 }

 fn file_read(at: &AtPath, filename: &str) -> String {
@ -912,6 +899,14 @@ fn test_suffixes_exhausted() {
        .stderr_only("split: output file suffixes exhausted\n");
 }

+#[test]
+fn test_suffix_length_req() {
+    new_ucmd!()
+        .args(&["-n", "100", "-a", "1", "asciilowercase.txt"])
+        .fails()
+        .stderr_only("split: the suffix length needs to be at least 2\n");
+}
+
 #[test]
 fn test_verbose() {
    new_ucmd!()
@ -937,11 +932,11 @@ fn test_number_n() {
        s
    };
    ucmd.args(&["-n", "5", "asciilowercase.txt"]).succeeds();
-    assert_eq!(file_read("xaa"), "abcde");
-    assert_eq!(file_read("xab"), "fghij");
-    assert_eq!(file_read("xac"), "klmno");
-    assert_eq!(file_read("xad"), "pqrst");
-    assert_eq!(file_read("xae"), "uvwxyz\n");
+    assert_eq!(file_read("xaa"), "abcdef");
+    assert_eq!(file_read("xab"), "ghijkl");
+    assert_eq!(file_read("xac"), "mnopq");
+    assert_eq!(file_read("xad"), "rstuv");
+    assert_eq!(file_read("xae"), "wxyz\n");
    #[cfg(unix)]
    new_ucmd!()
        .args(&["--number=100", "/dev/null"])
@ -954,11 +949,11 @@ fn test_number_kth_of_n() {
    new_ucmd!()
        .args(&["--number=3/5", "asciilowercase.txt"])
        .succeeds()
-        .stdout_only("klmno");
+        .stdout_only("mnopq");
    new_ucmd!()
        .args(&["--number=5/5", "asciilowercase.txt"])
        .succeeds()
-        .stdout_only("uvwxyz\n");
+        .stdout_only("wxyz\n");
    new_ucmd!()
        .args(&["-e", "--number=99/100", "asciilowercase.txt"])
        .succeeds()
@ -1046,11 +1041,11 @@ fn test_split_number_with_io_blksize() {
    };
    ucmd.args(&["-n", "5", "asciilowercase.txt", "---io-blksize", "1024"])
        .succeeds();
-    assert_eq!(file_read("xaa"), "abcde");
-    assert_eq!(file_read("xab"), "fghij");
-    assert_eq!(file_read("xac"), "klmno");
-    assert_eq!(file_read("xad"), "pqrst");
-    assert_eq!(file_read("xae"), "uvwxyz\n");
+    assert_eq!(file_read("xaa"), "abcdef");
+    assert_eq!(file_read("xab"), "ghijkl");
+    assert_eq!(file_read("xac"), "mnopq");
+    assert_eq!(file_read("xad"), "rstuv");
+    assert_eq!(file_read("xae"), "wxyz\n");
 }

 #[test]
@ -1065,6 +1060,32 @@ fn test_split_default_with_io_blksize() {
    assert_eq!(glob.collate(), at.read_bytes(name));
 }

+#[test]
+fn test_split_invalid_io_blksize() {
+    new_ucmd!()
+        .args(&["---io-blksize=XYZ", "threebytes.txt"])
+        .fails()
+        .stderr_only("split: invalid IO block size: 'XYZ'\n");
+    new_ucmd!()
+        .args(&["---io-blksize=5000000000", "threebytes.txt"])
+        .fails()
+        .stderr_only("split: invalid IO block size: '5000000000'\n");
+    #[cfg(target_pointer_width = "32")]
+    new_ucmd!()
+        .args(&["---io-blksize=2146435072", "threebytes.txt"])
+        .fails()
+        .stderr_only("split: invalid IO block size: '2146435072'\n");
+}
+
+#[test]
+fn test_split_number_oversized_stdin() {
+    new_ucmd!()
+        .args(&["--number=3", "---io-blksize=600"])
+        .pipe_in_fixture("sixhundredfiftyonebytes.txt")
+        .fails()
+        .stderr_only("split: -: cannot determine input size\n");
+}
+
 #[test]
 fn test_invalid_suffix_length() {
    new_ucmd!()
@ -1157,6 +1178,18 @@ fn test_elide_dev_null() {
    assert!(!at.plus("xac").exists());
 }

+#[test]
+#[cfg(unix)]
+fn test_dev_zero() {
+    let (at, mut ucmd) = at_and_ucmd!();
+    ucmd.args(&["-n", "3", "/dev/zero"])
+        .fails()
+        .stderr_only("split: /dev/zero: cannot determine file size\n");
+    assert!(!at.plus("xaa").exists());
+    assert!(!at.plus("xab").exists());
+    assert!(!at.plus("xac").exists());
+}
+
 #[test]
 fn test_lines() {
    let (at, mut ucmd) = at_and_ucmd!();
@ -1182,6 +1215,15 @@ fn test_lines_kth() {
        .stdout_only("20\n21\n22\n23\n24\n25\n26\n27\n28\n29\n");
 }

+#[test]
+#[cfg(unix)]
+fn test_lines_kth_dev_null() {
+    new_ucmd!()
+        .args(&["-n", "l/3/10", "/dev/null"])
+        .succeeds()
+        .stdout_only("");
+}
+
 #[test]
 fn test_line_bytes() {
    let (at, mut ucmd) = at_and_ucmd!();
@ -1321,7 +1363,7 @@ fn test_numeric_suffix() {
 }

 #[test]
-fn test_numeric_suffix_alias() {
+fn test_numeric_suffix_inferred() {
    let (at, mut ucmd) = at_and_ucmd!();
    ucmd.args(&["-n", "4", "--numeric=9", "threebytes.txt"])
        .succeeds()