split: implementing separator option (#5331)

* split: implementing separator option * split: separator option - handle multiple update * split: style * split: separator tests * split: separator tests - stdin in ci/cd * split: tests - ci/cd stdin errors * split: refactor based on feedback * split: improve test coverage * split: fix broken pipe error in tests with stdin * split: fix for handle_multiple_separator_options * split: comments * split: refactor separator code * split: changes based on feedback * split: changes based on feedback
2025-07-27 19:17:43 +00:00 · 2023-10-02 18:42:46 -04:00 · 2023-10-02 18:42:46 -04:00 · c5a0aa92f8
commit c5a0aa92f8
parent 9f6a720582
5 changed files with 348 additions and 38 deletions
--- a/src/uu/split/split.md
+++ b/src/uu/split/split.md
@ -11,3 +11,16 @@ Create output files containing consecutive or interleaved sections of input
 ## After Help
 Output fixed-size pieces of INPUT to PREFIXaa, PREFIXab, ...; default size is 1000, and default PREFIX is 'x'. With no INPUT, or when INPUT is -, read standard input.
 The SIZE argument is an integer and optional unit (example: 10K is 10*1024).
 Units are K,M,G,T,P,E,Z,Y,R,Q (powers of 1024) or KB,MB,... (powers of 1000).
 Binary prefixes can be used, too: KiB=K, MiB=M, and so on.
 CHUNKS may be:
 - N       split into N files based on size of input
 - K/N     output Kth of N to stdout
 - l/N     split into N files without splitting lines/records
 - l/K/N   output Kth of N to stdout without splitting lines/records
 - r/N     like 'l' but use round robin distribution
 - r/K/N   likewise but only output Kth of N to stdout
--- a/src/uu/split/src/split.rs
+++ b/src/uu/split/src/split.rs
@ -11,7 +11,7 @@ mod platform;
 use crate::filenames::FilenameIterator;
 use crate::filenames::SuffixType;
-use clap::{crate_version, parser::ValueSource, Arg, ArgAction, ArgMatches, Command};
+use clap::{crate_version, parser::ValueSource, Arg, ArgAction, ArgMatches, Command, ValueHint};
 use std::env;
 use std::ffi::OsString;
 use std::fmt;
@ -39,6 +39,7 @@ static OPT_HEX_SUFFIXES_SHORT: &str = "-x";
 static OPT_SUFFIX_LENGTH: &str = "suffix-length";
 static OPT_DEFAULT_SUFFIX_LENGTH: &str = "0";
 static OPT_VERBOSE: &str = "verbose";
 static OPT_SEPARATOR: &str = "separator";
 //The ---io and ---io-blksize parameters are consumed and ignored.
 //The parameter is included to make GNU coreutils tests pass.
 static OPT_IO: &str = "-io";
@ -55,7 +56,6 @@ const AFTER_HELP: &str = help_section!("after help", "split.md");
 #[uucore::main]
 pub fn uumain(args: impl uucore::Args) -> UResult<()> {
    let (args, obs_lines) = handle_obsolete(args);
    let matches = uu_app().try_get_matches_from(args)?;
    match Settings::from(&matches, &obs_lines) {
@ -145,6 +145,7 @@ fn should_extract_obs_lines(
        && !slice.starts_with("-C")
        && !slice.starts_with("-l")
        && !slice.starts_with("-n")
        && !slice.starts_with("-t")
 }
 /// Helper function to [`filter_args`]
@ -208,13 +209,18 @@ fn handle_preceding_options(
            || &slice[2..] == OPT_ADDITIONAL_SUFFIX
            || &slice[2..] == OPT_FILTER
            || &slice[2..] == OPT_NUMBER
-            || &slice[2..] == OPT_SUFFIX_LENGTH;
+            || &slice[2..] == OPT_SUFFIX_LENGTH
            || &slice[2..] == OPT_SEPARATOR;
    }
    // capture if current slice is a preceding short option that requires value and does not have value in the same slice (value separated by whitespace)
    // following slice should be treaded as value for this option
    // even if it starts with '-' (which would be treated as hyphen prefixed value)
-    *preceding_short_opt_req_value =
+    *preceding_short_opt_req_value = slice == "-b"
-        slice == "-b" || slice == "-C" || slice == "-l" || slice == "-n" || slice == "-a";
+        || slice == "-C"
        || slice == "-l"
        || slice == "-n"
        || slice == "-a"
        || slice == "-t";
    // slice is a value
    // reset preceding option flags
    if !slice.starts_with('-') {
@ -278,7 +284,7 @@ pub fn uu_app() -> Command {
                .long(OPT_FILTER)
                .allow_hyphen_values(true)
                .value_name("COMMAND")
-                .value_hint(clap::ValueHint::CommandName)
+                .value_hint(ValueHint::CommandName)
                .help(
                    "write to shell COMMAND; file name is $FILE (Currently not implemented for Windows)",
                ),
@ -293,7 +299,7 @@ pub fn uu_app() -> Command {
        .arg(
            Arg::new(OPT_NUMERIC_SUFFIXES_SHORT)
                .short('d')
-                .action(clap::ArgAction::SetTrue)
+                .action(ArgAction::SetTrue)
                .overrides_with_all([
                    OPT_NUMERIC_SUFFIXES,
                    OPT_NUMERIC_SUFFIXES_SHORT,
@ -314,12 +320,13 @@ pub fn uu_app() -> Command {
                    OPT_HEX_SUFFIXES,
                    OPT_HEX_SUFFIXES_SHORT
                ])
                .value_name("FROM")
                .help("same as -d, but allow setting the start value"),
        )
        .arg(
            Arg::new(OPT_HEX_SUFFIXES_SHORT)
                .short('x')
-                .action(clap::ArgAction::SetTrue)
+                .action(ArgAction::SetTrue)
                .overrides_with_all([
                    OPT_NUMERIC_SUFFIXES,
                    OPT_NUMERIC_SUFFIXES_SHORT,
@ -340,6 +347,7 @@ pub fn uu_app() -> Command {
                    OPT_HEX_SUFFIXES,
                    OPT_HEX_SUFFIXES_SHORT
                ])
                .value_name("FROM")
                .help("same as -x, but allow setting the start value"),
        )
        .arg(
@ -357,6 +365,15 @@ pub fn uu_app() -> Command {
                .help("print a diagnostic just before each output file is opened")
                .action(ArgAction::SetTrue),
        )
        .arg(
            Arg::new(OPT_SEPARATOR)
                .short('t')
                .long(OPT_SEPARATOR)
                .allow_hyphen_values(true)
                .value_name("SEP")
                .action(ArgAction::Append)
                .help("use SEP instead of newline as the record separator; '\0' (zero) specifies the NUL character"),
        )
        .arg(
            Arg::new(OPT_IO)
                .long("io")
@ -372,7 +389,7 @@ pub fn uu_app() -> Command {
        .arg(
            Arg::new(ARG_INPUT)
                .default_value("-")
-                .value_hint(clap::ValueHint::FilePath),
+                .value_hint(ValueHint::FilePath),
        )
        .arg(
            Arg::new(ARG_PREFIX)
@ -696,6 +713,7 @@ struct Settings {
    filter: Option<String>,
    strategy: Strategy,
    verbose: bool,
    separator: u8,
    /// Whether to *not* produce empty files when using `-n`.
    ///
@ -722,6 +740,12 @@ enum SettingsError {
    /// Suffix is not large enough to split into specified chunks
    SuffixTooSmall(usize),
    /// Multi-character (Invalid) separator
    MultiCharacterSeparator(String),
    /// Multiple different separator characters
    MultipleSeparatorCharacters,
    /// The `--filter` option is not supported on Windows.
    #[cfg(windows)]
    NotSupported,
@ -743,6 +767,12 @@ impl fmt::Display for SettingsError {
            Self::Strategy(e) => e.fmt(f),
            Self::SuffixNotParsable(s) => write!(f, "invalid suffix length: {}", s.quote()),
            Self::SuffixTooSmall(i) => write!(f, "the suffix length needs to be at least {i}"),
            Self::MultiCharacterSeparator(s) => {
                write!(f, "multi-character separator {}", s.quote())
            }
            Self::MultipleSeparatorCharacters => {
                write!(f, "multiple separator characters specified")
            }
            Self::SuffixContainsSeparator(s) => write!(
                f,
                "invalid suffix {}, contains directory separator",
@ -783,6 +813,26 @@ impl Settings {
                }
            }
        }
        // Make sure that separator is only one UTF8 character (if specified)
        // defaults to '\n' - newline character
        // If the same separator (the same value) was used multiple times - `split` should NOT fail
        // If the separator was used multiple times but with different values (not all values are the same) - `split` should fail
        let separator = match matches.get_many::<String>(OPT_SEPARATOR) {
            Some(mut sep_values) => {
                let first = sep_values.next().unwrap(); // it is safe to just unwrap here since Clap should not return empty ValuesRef<'_,String> in the option from get_many() call
                if !sep_values.all(|s| s == first) {
                    return Err(SettingsError::MultipleSeparatorCharacters);
                }
                match first.as_str() {
                    "\\0" => b'\0',
                    s if s.as_bytes().len() == 1 => s.as_bytes()[0],
                    s => return Err(SettingsError::MultiCharacterSeparator(s.to_owned())),
                }
            }
            None => b'\n',
        };
        let result = Self {
            suffix_length: suffix_length_str
                .parse()
@ -791,6 +841,7 @@ impl Settings {
            suffix_start,
            additional_suffix,
            verbose: matches.value_source("verbose") == Some(ValueSource::CommandLine),
            separator,
            strategy,
            input: matches.get_one::<String>(ARG_INPUT).unwrap().to_owned(),
            prefix: matches.get_one::<String>(ARG_PREFIX).unwrap().to_owned(),
@ -1019,7 +1070,8 @@ impl<'a> Write for LineChunkWriter<'a> {
        // corresponds to the current chunk number.
        let mut prev = 0;
        let mut total_bytes_written = 0;
-        for i in memchr::memchr_iter(b'\n', buf) {
+        let sep = self.settings.separator;
        for i in memchr::memchr_iter(sep, buf) {
            // If we have exceeded the number of lines to write in the
            // current chunk, then start a new chunk and its
            // corresponding writer.
@ -1036,8 +1088,8 @@ impl<'a> Write for LineChunkWriter<'a> {
            }
            // Write the line, starting from *after* the previous
-            // newline character and ending *after* the current
+            // separator character and ending *after* the current
-            // newline character.
+            // separator character.
            let n = self.inner.write(&buf[prev..i + 1])?;
            total_bytes_written += n;
            prev = i + 1;
@ -1175,21 +1227,22 @@ impl<'a> Write for LineBytesChunkWriter<'a> {
                self.num_bytes_remaining_in_current_chunk = self.chunk_size.try_into().unwrap();
            }
-            // Find the first newline character in the buffer.
+            // Find the first separator (default - newline character) in the buffer.
-            match memchr::memchr(b'\n', buf) {
+            let sep = self.settings.separator;
-                // If there is no newline character and the buffer is
+            match memchr::memchr(sep, buf) {
                // If there is no separator character and the buffer is
                // not empty, then write as many bytes as we can and
                // then move on to the next chunk if necessary.
                None => {
                    let end = self.num_bytes_remaining_in_current_chunk;
                    // This is ugly but here to match GNU behavior. If the input
-                    // doesn't end with a \n, pretend that it does for handling
+                    // doesn't end with a separator, pretend that it does for handling
                    // the second to last segment chunk. See `line-bytes.sh`.
                    if end == buf.len()
                        && self.num_bytes_remaining_in_current_chunk
                            < self.chunk_size.try_into().unwrap()
-                        && buf[buf.len() - 1] != b'\n'
+                        && buf[buf.len() - 1] != sep
                    {
                        self.num_bytes_remaining_in_current_chunk = 0;
                    } else {
@ -1200,8 +1253,8 @@ impl<'a> Write for LineBytesChunkWriter<'a> {
                    }
                }
-                // If there is a newline character and the line
+                // If there is a separator character and the line
-                // (including the newline character) will fit in the
+                // (including the separator character) will fit in the
                // current chunk, then write the entire line and
                // continue to the next iteration. (See chunk 1 in the
                // example comment above.)
@ -1212,8 +1265,8 @@ impl<'a> Write for LineBytesChunkWriter<'a> {
                    buf = &buf[num_bytes_written..];
                }
-                // If there is a newline character, the line
+                // If there is a separator character, the line
-                // (including the newline character) will not fit in
+                // (including the separator character) will not fit in
                // the current chunk, *and* no other lines have been
                // written to the current chunk, then write as many
                // bytes as we can and continue to the next
@ -1230,8 +1283,8 @@ impl<'a> Write for LineBytesChunkWriter<'a> {
                    buf = &buf[num_bytes_written..];
                }
-                // If there is a newline character, the line
+                // If there is a separator character, the line
-                // (including the newline character) will not fit in
+                // (including the separator character) will not fit in
                // the current chunk, and at least one other line has
                // been written to the current chunk, then signal to
                // the next iteration that a new chunk needs to be
@ -1489,15 +1542,16 @@ where
    let mut num_bytes_remaining_in_current_chunk = chunk_size;
    let mut i = 0;
-    for line_result in reader.lines() {
+    let sep = settings.separator;
    for line_result in reader.split(sep) {
        let line = line_result.unwrap();
        let maybe_writer = writers.get_mut(i);
        let writer = maybe_writer.unwrap();
-        let bytes = line.as_bytes();
+        let bytes = line.as_slice();
        writer.write_all(bytes)?;
-        writer.write_all(b"\n")?;
+        writer.write_all(&[sep])?;
-        // Add one byte for the newline character.
+        // Add one byte for the separator character.
        let num_bytes = bytes.len() + 1;
        if num_bytes > num_bytes_remaining_in_current_chunk {
            num_bytes_remaining_in_current_chunk = chunk_size;
@ -1546,15 +1600,16 @@ where
    let mut num_bytes_remaining_in_current_chunk = chunk_size;
    let mut i = 0;
-    for line_result in reader.lines() {
+    let sep = settings.separator;
    for line_result in reader.split(sep) {
        let line = line_result?;
-        let bytes = line.as_bytes();
+        let bytes = line.as_slice();
        if i == chunk_number {
            writer.write_all(bytes)?;
-            writer.write_all(b"\n")?;
+            writer.write_all(&[sep])?;
        }
-        // Add one byte for the newline character.
+        // Add one byte for the separator character.
        let num_bytes = bytes.len() + 1;
        if num_bytes >= num_bytes_remaining_in_current_chunk {
            num_bytes_remaining_in_current_chunk = chunk_size;
@ -1601,13 +1656,14 @@ where
    }
    let num_chunks: usize = num_chunks.try_into().unwrap();
-    for (i, line_result) in reader.lines().enumerate() {
+    let sep = settings.separator;
    for (i, line_result) in reader.split(sep).enumerate() {
        let line = line_result.unwrap();
        let maybe_writer = writers.get_mut(i % num_chunks);
        let writer = maybe_writer.unwrap();
-        let bytes = line.as_bytes();
+        let bytes = line.as_slice();
        writer.write_all(bytes)?;
-        writer.write_all(b"\n")?;
+        writer.write_all(&[sep])?;
    }
    Ok(())
@ -1632,7 +1688,7 @@ where
 /// * [`split_into_n_chunks_by_line_round_robin`], which splits its input in the
 ///   same way, but writes each chunk to its own file.
 fn kth_chunk_by_line_round_robin<R>(
-    _settings: &Settings,
+    settings: &Settings,
    reader: &mut R,
    chunk_number: u64,
    num_chunks: u64,
@ -1646,12 +1702,13 @@ where
    let num_chunks: usize = num_chunks.try_into().unwrap();
    let chunk_number: usize = chunk_number.try_into().unwrap();
-    for (i, line_result) in reader.lines().enumerate() {
+    let sep = settings.separator;
    for (i, line_result) in reader.split(sep).enumerate() {
        let line = line_result?;
-        let bytes = line.as_bytes();
+        let bytes = line.as_slice();
        if (i % num_chunks) == chunk_number {
            writer.write_all(bytes)?;
-            writer.write_all(b"\n")?;
+            writer.write_all(&[sep])?;
        }
    }
    Ok(())
--- a/tests/by-util/test_split.rs
+++ b/tests/by-util/test_split.rs
@ -1483,3 +1483,242 @@ fn test_split_non_utf8_argument_windows() {
        .fails()
        .stderr_contains("error: invalid UTF-8 was detected in one or more arguments");
 }
 // Test '--separator' / '-t' option following GNU tests example
 // test separators: '\n' , '\0' , ';'
 // test with '--lines=2' , '--line-bytes=4' , '--number=l/3' , '--number=r/3' , '--number=l/1/3' , '--number=r/1/3'
 #[test]
 fn test_split_separator_nl_lines() {
    let (at, mut ucmd) = at_and_ucmd!();
    ucmd.args(&["--lines=2", "-t", "\n"])
        .pipe_in("1\n2\n3\n4\n5\n")
        .succeeds();
    assert_eq!(file_read(&at, "xaa"), "1\n2\n");
    assert_eq!(file_read(&at, "xab"), "3\n4\n");
    assert_eq!(file_read(&at, "xac"), "5\n");
    assert!(!at.plus("xad").exists());
 }
 #[test]
 fn test_split_separator_nl_line_bytes() {
    let (at, mut ucmd) = at_and_ucmd!();
    ucmd.args(&["--line-bytes=4", "-t", "\n"])
        .pipe_in("1\n2\n3\n4\n5\n")
        .succeeds();
    assert_eq!(file_read(&at, "xaa"), "1\n2\n");
    assert_eq!(file_read(&at, "xab"), "3\n4\n");
    assert_eq!(file_read(&at, "xac"), "5\n");
    assert!(!at.plus("xad").exists());
 }
 #[test]
 fn test_split_separator_nl_number_l() {
    let (at, mut ucmd) = at_and_ucmd!();
    ucmd.args(&["--number=l/3", "--separator=\n", "fivelines.txt"])
        .succeeds();
    assert_eq!(file_read(&at, "xaa"), "1\n2\n");
    assert_eq!(file_read(&at, "xab"), "3\n4\n");
    assert_eq!(file_read(&at, "xac"), "5\n");
    assert!(!at.plus("xad").exists());
 }
 #[test]
 fn test_split_separator_nl_number_r() {
    let (at, mut ucmd) = at_and_ucmd!();
    ucmd.args(&["--number=r/3", "--separator", "\n", "fivelines.txt"])
        .succeeds();
    assert_eq!(file_read(&at, "xaa"), "1\n4\n");
    assert_eq!(file_read(&at, "xab"), "2\n5\n");
    assert_eq!(file_read(&at, "xac"), "3\n");
    assert!(!at.plus("xad").exists());
 }
 #[test]
 fn test_split_separator_nul_lines() {
    let (at, mut ucmd) = at_and_ucmd!();
    ucmd.args(&["--lines=2", "-t", "\\0", "separator_nul.txt"])
        .succeeds();
    assert_eq!(file_read(&at, "xaa"), "1\02\0");
    assert_eq!(file_read(&at, "xab"), "3\04\0");
    assert_eq!(file_read(&at, "xac"), "5\0");
    assert!(!at.plus("xad").exists());
 }
 #[test]
 fn test_split_separator_nul_line_bytes() {
    let (at, mut ucmd) = at_and_ucmd!();
    ucmd.args(&["--line-bytes=4", "-t", "\\0", "separator_nul.txt"])
        .succeeds();
    assert_eq!(file_read(&at, "xaa"), "1\02\0");
    assert_eq!(file_read(&at, "xab"), "3\04\0");
    assert_eq!(file_read(&at, "xac"), "5\0");
    assert!(!at.plus("xad").exists());
 }
 #[test]
 fn test_split_separator_nul_number_l() {
    let (at, mut ucmd) = at_and_ucmd!();
    ucmd.args(&["--number=l/3", "--separator=\\0", "separator_nul.txt"])
        .succeeds();
    assert_eq!(file_read(&at, "xaa"), "1\02\0");
    assert_eq!(file_read(&at, "xab"), "3\04\0");
    assert_eq!(file_read(&at, "xac"), "5\0");
    assert!(!at.plus("xad").exists());
 }
 #[test]
 fn test_split_separator_nul_number_r() {
    let (at, mut ucmd) = at_and_ucmd!();
    ucmd.args(&["--number=r/3", "--separator=\\0", "separator_nul.txt"])
        .succeeds();
    assert_eq!(file_read(&at, "xaa"), "1\04\0");
    assert_eq!(file_read(&at, "xab"), "2\05\0");
    assert_eq!(file_read(&at, "xac"), "3\0");
    assert!(!at.plus("xad").exists());
 }
 #[test]
 fn test_split_separator_semicolon_lines() {
    let (at, mut ucmd) = at_and_ucmd!();
    ucmd.args(&["--lines=2", "-t", ";", "separator_semicolon.txt"])
        .succeeds();
    assert_eq!(file_read(&at, "xaa"), "1;2;");
    assert_eq!(file_read(&at, "xab"), "3;4;");
    assert_eq!(file_read(&at, "xac"), "5;");
    assert!(!at.plus("xad").exists());
 }
 #[test]
 fn test_split_separator_semicolon_line_bytes() {
    let (at, mut ucmd) = at_and_ucmd!();
    ucmd.args(&["--line-bytes=4", "-t", ";", "separator_semicolon.txt"])
        .succeeds();
    assert_eq!(file_read(&at, "xaa"), "1;2;");
    assert_eq!(file_read(&at, "xab"), "3;4;");
    assert_eq!(file_read(&at, "xac"), "5;");
    assert!(!at.plus("xad").exists());
 }
 #[test]
 fn test_split_separator_semicolon_number_l() {
    let (at, mut ucmd) = at_and_ucmd!();
    ucmd.args(&["--number=l/3", "--separator=;", "separator_semicolon.txt"])
        .succeeds();
    assert_eq!(file_read(&at, "xaa"), "1;2;");
    assert_eq!(file_read(&at, "xab"), "3;4;");
    assert_eq!(file_read(&at, "xac"), "5;");
    assert!(!at.plus("xad").exists());
 }
 #[test]
 fn test_split_separator_semicolon_number_r() {
    let (at, mut ucmd) = at_and_ucmd!();
    ucmd.args(&["--number=r/3", "--separator=;", "separator_semicolon.txt"])
        .succeeds();
    assert_eq!(file_read(&at, "xaa"), "1;4;");
    assert_eq!(file_read(&at, "xab"), "2;5;");
    assert_eq!(file_read(&at, "xac"), "3;");
    assert!(!at.plus("xad").exists());
 }
 #[test]
 fn test_split_separator_semicolon_number_kth_l() {
    new_ucmd!()
        .args(&[
            "--number=l/1/3",
            "--separator",
            ";",
            "separator_semicolon.txt",
        ])
        .succeeds()
        .stdout_only("1;2;");
 }
 #[test]
 fn test_split_separator_semicolon_number_kth_r() {
    new_ucmd!()
        .args(&[
            "--number=r/1/3",
            "--separator",
            ";",
            "separator_semicolon.txt",
        ])
        .succeeds()
        .stdout_only("1;4;");
 }
 // Test error edge cases for separator option
 #[test]
 fn test_split_separator_no_value() {
    new_ucmd!()
        .args(&["-t"])
        .ignore_stdin_write_error()
        .pipe_in("a\n")
        .fails()
        .stderr_contains(
            "error: a value is required for '--separator <SEP>' but none was supplied",
        );
 }
 #[test]
 fn test_split_separator_invalid_usage() {
    let scene = TestScenario::new(util_name!());
    scene
        .ucmd()
        .args(&["--separator=xx"])
        .ignore_stdin_write_error()
        .pipe_in("a\n")
        .fails()
        .no_stdout()
        .stderr_contains("split: multi-character separator 'xx'");
    scene
        .ucmd()
        .args(&["-ta", "-tb"])
        .ignore_stdin_write_error()
        .pipe_in("a\n")
        .fails()
        .no_stdout()
        .stderr_contains("split: multiple separator characters specified");
    scene
        .ucmd()
        .args(&["-t'\n'", "-tb"])
        .ignore_stdin_write_error()
        .pipe_in("a\n")
        .fails()
        .no_stdout()
        .stderr_contains("split: multiple separator characters specified");
 }
 // Test using same separator multiple times
 #[test]
 fn test_split_separator_same_multiple() {
    let scene = TestScenario::new(util_name!());
    scene
        .ucmd()
        .args(&["--separator=:", "--separator=:", "fivelines.txt"])
        .succeeds();
    scene
        .ucmd()
        .args(&["-t:", "--separator=:", "fivelines.txt"])
        .succeeds();
    scene
        .ucmd()
        .args(&["-t", ":", "-t", ":", "fivelines.txt"])
        .succeeds();
    scene
        .ucmd()
        .args(&["-t:", "-t:", "-t,", "fivelines.txt"])
        .fails();
 }
--- a/tests/fixtures/split/separator_nul.txt
+++ b/tests/fixtures/split/separator_nul.txt
--- a/tests/fixtures/split/separator_semicolon.txt
+++ b/tests/fixtures/split/separator_semicolon.txt
@ -0,0 +1 @@
 1;2;3;4;5;