Merge pull request #5252 from zhitkoff/split-gnu-test-fail.sh

split: implement remaining -n variants and pass GNU tests/split/fail.sh
2025-09-15 11:36:16 +00:00 · 2023-09-07 17:15:09 +02:00 · 2023-09-07 17:15:09 +02:00 · 80f8eb6724
commit 80f8eb6724
parent 9d8656c0fa 3f065eed8a
3 changed files with 947 additions and 59 deletions
--- a/tests/by-util/test_split.rs
+++ b/tests/by-util/test_split.rs
@ -2,7 +2,7 @@
 //
 // For the full copyright and license information, please view the LICENSE
 // file that was distributed with this source code.
-// spell-checker:ignore xzaaa sixhundredfiftyonebytes ninetyonebytes threebytes asciilowercase fghij klmno pqrst uvwxyz fivelines twohundredfortyonebytes onehundredlines nbbbb dxen
+// spell-checker:ignore xzaaa sixhundredfiftyonebytes ninetyonebytes threebytes asciilowercase fghij klmno pqrst uvwxyz fivelines twohundredfortyonebytes onehundredlines nbbbb dxen ncccc

 use crate::common::util::{AtPath, TestScenario};
 use rand::{thread_rng, Rng, SeedableRng};
@ -170,6 +170,22 @@ fn test_split_str_prefixed_chunks_by_bytes() {
    assert_eq!(glob.collate(), at.read_bytes(name));
 }

+/// Test short bytes option concatenated with value
+#[test]
+fn test_split_by_bytes_short_concatenated_with_value() {
+    let (at, mut ucmd) = at_and_ucmd!();
+    let name = "split_by_bytes_short_concatenated_with_value";
+    RandomFile::new(&at, name).add_bytes(10000);
+    ucmd.args(&["-b1000", name]).succeeds();
+
+    let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$");
+    assert_eq!(glob.count(), 10);
+    for filename in glob.collect() {
+        assert_eq!(glob.directory.metadata(&filename).len(), 1000);
+    }
+    assert_eq!(glob.collate(), at.read_bytes(name));
+}
+
 // This is designed to test what happens when the desired part size is not a
 // multiple of the buffer size and we hopefully don't overshoot the desired part
 // size.
@ -238,6 +254,18 @@ fn test_additional_suffix_no_slash() {
        .usage_error("invalid suffix 'a/b', contains directory separator");
 }

+#[test]
+fn test_split_additional_suffix_hyphen_value() {
+    let (at, mut ucmd) = at_and_ucmd!();
+    let name = "split_additional_suffix";
+    RandomFile::new(&at, name).add_lines(2000);
+    ucmd.args(&["--additional-suffix", "-300", name]).succeeds();
+
+    let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]-300$");
+    assert_eq!(glob.count(), 2);
+    assert_eq!(glob.collate(), at.read_bytes(name));
+}
+
 // note: the test_filter* tests below are unix-only
 // windows support has been waived for now because of the difficulty of getting
 // the `cmd` call right
@ -312,12 +340,292 @@ fn test_split_lines_number() {
        .succeeds()
        .no_stderr()
        .no_stdout();
+    scene
+        .ucmd()
+        .args(&["--lines", "0", "file"])
+        .fails()
+        .code_is(1)
+        .stderr_only("split: invalid number of lines: 0\n");
+    scene
+        .ucmd()
+        .args(&["-0", "file"])
+        .fails()
+        .code_is(1)
+        .stderr_only("split: invalid number of lines: 0\n");
    scene
        .ucmd()
        .args(&["--lines", "2fb", "file"])
        .fails()
        .code_is(1)
        .stderr_only("split: invalid number of lines: '2fb'\n");
+    scene
+        .ucmd()
+        .args(&["--lines", "file"])
+        .fails()
+        .code_is(1)
+        .stderr_only("split: invalid number of lines: 'file'\n");
+}
+
+/// Test short lines option with value concatenated
+#[test]
+fn test_split_lines_short_concatenated_with_value() {
+    let (at, mut ucmd) = at_and_ucmd!();
+    let name = "split_num_prefixed_chunks_by_lines";
+    RandomFile::new(&at, name).add_lines(10000);
+    ucmd.args(&["-l1000", name]).succeeds();
+
+    let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$");
+    assert_eq!(glob.count(), 10);
+    assert_eq!(glob.collate(), at.read_bytes(name));
+}
+
+/// Test for obsolete lines option standalone
+#[test]
+fn test_split_obs_lines_standalone() {
+    let (at, mut ucmd) = at_and_ucmd!();
+    let name = "obs-lines-standalone";
+    RandomFile::new(&at, name).add_lines(4);
+    ucmd.args(&["-2", name]).succeeds().no_stderr().no_stdout();
+    let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$");
+    assert_eq!(glob.count(), 2);
+    assert_eq!(glob.collate(), at.read_bytes(name));
+}
+
+/// Test for obsolete lines option standalone overflow
+#[test]
+fn test_split_obs_lines_standalone_overflow() {
+    let (at, mut ucmd) = at_and_ucmd!();
+    let name = "obs-lines-standalone";
+    RandomFile::new(&at, name).add_lines(4);
+    ucmd.args(&["-99999999999999999991", name])
+        .succeeds()
+        .no_stderr()
+        .no_stdout();
+    let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$");
+    assert_eq!(glob.count(), 1);
+    assert_eq!(glob.collate(), at.read_bytes(name));
+}
+
+/// Test for obsolete lines option as part of invalid combined short options
+#[test]
+fn test_split_obs_lines_within_invalid_combined_shorts() {
+    let scene = TestScenario::new(util_name!());
+    let at = &scene.fixtures;
+    at.touch("file");
+
+    scene
+        .ucmd()
+        .args(&["-2fb", "file"])
+        .fails()
+        .code_is(1)
+        .stderr_contains("error: unexpected argument '-f' found\n");
+}
+
+/// Test for obsolete lines option as part of combined short options
+#[test]
+fn test_split_obs_lines_within_combined_shorts() {
+    let scene = TestScenario::new(util_name!());
+    let at = &scene.fixtures;
+    let name = "obs-lines-within-shorts";
+    RandomFile::new(&at, name).add_lines(400);
+
+    scene
+        .ucmd()
+        .args(&["-x200de", name])
+        .succeeds()
+        .no_stderr()
+        .no_stdout();
+    let glob = Glob::new(&at, ".", r"x\d\d$");
+    assert_eq!(glob.count(), 2);
+    assert_eq!(glob.collate(), at.read_bytes(name))
+}
+
+/// Test for obsolete lines option as part of combined short options with tailing suffix length with value
+#[test]
+fn test_split_obs_lines_within_combined_shorts_tailing_suffix_length() {
+    let (at, mut ucmd) = at_and_ucmd!();
+    let name = "obs-lines-combined-shorts-tailing-suffix-length";
+    RandomFile::new(&at, name).add_lines(1000);
+    ucmd.args(&["-d200a4", name]).succeeds();
+
+    let glob = Glob::new(&at, ".", r"x\d\d\d\d$");
+    assert_eq!(glob.count(), 5);
+    assert_eq!(glob.collate(), at.read_bytes(name));
+}
+
+/// Test for obsolete lines option starts as part of combined short options
+#[test]
+fn test_split_obs_lines_starts_combined_shorts() {
+    let scene = TestScenario::new(util_name!());
+    let at = &scene.fixtures;
+    let name = "obs-lines-starts-shorts";
+    RandomFile::new(&at, name).add_lines(400);
+
+    scene
+        .ucmd()
+        .args(&["-200xd", name])
+        .succeeds()
+        .no_stderr()
+        .no_stdout();
+    let glob = Glob::new(&at, ".", r"x\d\d$");
+    assert_eq!(glob.count(), 2);
+    assert_eq!(glob.collate(), at.read_bytes(name))
+}
+
+/// Test for using both obsolete lines (standalone) option and short/long lines option simultaneously
+#[test]
+fn test_split_both_lines_and_obs_lines_standalone() {
+    // This test will ensure that:
+    // if both lines option '-l' or '--lines' (with value) and obsolete lines option '-100' are used - it fails
+    // if standalone lines option is used incorrectly and treated as a hyphen prefixed value of other option - it fails
+    let scene = TestScenario::new(util_name!());
+    let at = &scene.fixtures;
+    at.touch("file");
+
+    scene
+        .ucmd()
+        .args(&["-l", "2", "-2", "file"])
+        .fails()
+        .code_is(1)
+        .stderr_contains("split: cannot split in more than one way\n");
+    scene
+        .ucmd()
+        .args(&["--lines", "2", "-2", "file"])
+        .fails()
+        .code_is(1)
+        .stderr_contains("split: cannot split in more than one way\n");
+}
+
+/// Test for using obsolete lines option incorrectly, so it is treated as a hyphen prefixed value of other option
+#[test]
+fn test_split_obs_lines_as_other_option_value() {
+    // This test will ensure that:
+    // if obsolete lines option is used incorrectly and treated as a hyphen prefixed value of other option - it fails
+    let scene = TestScenario::new(util_name!());
+    let at = &scene.fixtures;
+    at.touch("file");
+
+    scene
+        .ucmd()
+        .args(&["--lines", "-200", "file"])
+        .fails()
+        .code_is(1)
+        .stderr_contains("split: invalid number of lines: '-200'\n");
+    scene
+        .ucmd()
+        .args(&["-l", "-200", "file"])
+        .fails()
+        .code_is(1)
+        .stderr_contains("split: invalid number of lines: '-200'\n");
+    scene
+        .ucmd()
+        .args(&["-a", "-200", "file"])
+        .fails()
+        .code_is(1)
+        .stderr_contains("split: invalid suffix length: '-200'\n");
+    scene
+        .ucmd()
+        .args(&["--suffix-length", "-d200e", "file"])
+        .fails()
+        .code_is(1)
+        .stderr_contains("split: invalid suffix length: '-d200e'\n");
+    scene
+        .ucmd()
+        .args(&["-C", "-200", "file"])
+        .fails()
+        .code_is(1)
+        .stderr_contains("split: invalid number of bytes: '-200'\n");
+    scene
+        .ucmd()
+        .args(&["--line-bytes", "-x200a4", "file"])
+        .fails()
+        .code_is(1)
+        .stderr_contains("split: invalid number of bytes: '-x200a4'\n");
+    scene
+        .ucmd()
+        .args(&["-b", "-200", "file"])
+        .fails()
+        .code_is(1)
+        .stderr_contains("split: invalid number of bytes: '-200'\n");
+    scene
+        .ucmd()
+        .args(&["--bytes", "-200xd", "file"])
+        .fails()
+        .code_is(1)
+        .stderr_contains("split: invalid number of bytes: '-200xd'\n");
+    scene
+        .ucmd()
+        .args(&["-n", "-200", "file"])
+        .fails()
+        .code_is(1)
+        .stderr_contains("split: invalid number of chunks: -200\n");
+    scene
+        .ucmd()
+        .args(&["--number", "-e200", "file"])
+        .fails()
+        .code_is(1)
+        .stderr_contains("split: invalid number of chunks: -e200\n");
+}
+
+/// Test for using more than one obsolete lines option (standalone)
+/// last one wins
+#[test]
+fn test_split_multiple_obs_lines_standalone() {
+    let scene = TestScenario::new(util_name!());
+    let at = &scene.fixtures;
+    let name = "multiple-obs-lines";
+    RandomFile::new(&at, name).add_lines(400);
+
+    scene
+        .ucmd()
+        .args(&["-3000", "-200", name])
+        .succeeds()
+        .no_stderr()
+        .no_stdout();
+    let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$");
+    assert_eq!(glob.count(), 2);
+    assert_eq!(glob.collate(), at.read_bytes(name))
+}
+
+/// Test for using more than one obsolete lines option within combined shorts
+/// last one wins
+#[test]
+fn test_split_multiple_obs_lines_within_combined() {
+    let scene = TestScenario::new(util_name!());
+    let at = &scene.fixtures;
+    let name = "multiple-obs-lines";
+    RandomFile::new(&at, name).add_lines(400);
+
+    scene
+        .ucmd()
+        .args(&["-d5000x", "-e200d", name])
+        .succeeds()
+        .no_stderr()
+        .no_stdout();
+    let glob = Glob::new(&at, ".", r"x\d\d$");
+    assert_eq!(glob.count(), 2);
+    assert_eq!(glob.collate(), at.read_bytes(name))
+}
+
+/// Test for using both obsolete lines option within combined shorts with conflicting -n option simultaneously
+#[test]
+fn test_split_obs_lines_within_combined_with_number() {
+    let scene = TestScenario::new(util_name!());
+    let at = &scene.fixtures;
+    at.touch("file");
+
+    scene
+        .ucmd()
+        .args(&["-3dxen", "4", "file"])
+        .fails()
+        .code_is(1)
+        .stderr_contains("split: cannot split in more than one way\n");
+    scene
+        .ucmd()
+        .args(&["-dxe30n", "4", "file"])
+        .fails()
+        .code_is(1)
+        .stderr_contains("split: cannot split in more than one way\n");
 }

 #[test]
@ -327,14 +635,6 @@ fn test_split_invalid_bytes_size() {
        .fails()
        .code_is(1)
        .stderr_only("split: invalid number of bytes: '1024R'\n");
-    #[cfg(not(target_pointer_width = "128"))]
-    new_ucmd!()
-        .args(&["-b", "1Y"])
-        .fails()
-        .code_is(1)
-        .stderr_only(
-            "split: invalid number of bytes: '1Y': Value too large for defined data type\n",
-        );
    #[cfg(target_pointer_width = "32")]
    {
        let sizes = ["1000G", "10T"];
@ -345,19 +645,29 @@ fn test_split_invalid_bytes_size() {
 }

 #[test]
+fn test_split_overflow_bytes_size() {
+    #[cfg(not(target_pointer_width = "128"))]
+    let (at, mut ucmd) = at_and_ucmd!();
+    let name = "test_split_overflow_bytes_size";
+    RandomFile::new(&at, name).add_bytes(1000);
+    ucmd.args(&["-b", "1Y", name]).succeeds();
+    let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$");
+    assert_eq!(glob.count(), 1);
+    assert_eq!(glob.collate(), at.read_bytes(name));
+}
+
+#[test]
+#[cfg(target_pointer_width = "32")]
 fn test_split_chunks_num_chunks_oversized_32() {
-    #[cfg(target_pointer_width = "32")]
-    {
-        let scene = TestScenario::new(util_name!());
-        let at = &scene.fixtures;
-        at.touch("file");
-        scene
-            .ucmd()
-            .args(&["--number", "5000000000", "file"])
-            .fails()
-            .code_is(1)
-            .stderr_only("split: Number of chunks too big\n");
-    }
+    let scene = TestScenario::new(util_name!());
+    let at = &scene.fixtures;
+    at.touch("file");
+    scene
+        .ucmd()
+        .args(&["--number", "5000000000", "sixhundredfiftyonebytes.txt"])
+        .fails()
+        .code_is(1)
+        .stderr_only("split: Number of chunks too big\n");
 }

 #[test]
@ -369,6 +679,15 @@ fn test_split_stdin_num_chunks() {
        .stderr_only("split: -: cannot determine file size\n");
 }

+#[test]
+fn test_split_stdin_num_kth_chunk() {
+    new_ucmd!()
+        .args(&["--number=1/2"])
+        .fails()
+        .code_is(1)
+        .stderr_only("split: -: cannot determine file size\n");
+}
+
 fn file_read(at: &AtPath, filename: &str) -> String {
    let mut s = String::new();
    at.open(filename).read_to_string(&mut s).unwrap();
@ -471,7 +790,7 @@ creating file 'xaf'
 }

 #[test]
-fn test_number() {
+fn test_number_n() {
    let (at, mut ucmd) = at_and_ucmd!();
    let file_read = |f| {
        let mut s = String::new();
@ -484,6 +803,98 @@ fn test_number() {
    assert_eq!(file_read("xac"), "klmno");
    assert_eq!(file_read("xad"), "pqrst");
    assert_eq!(file_read("xae"), "uvwxyz\n");
+    #[cfg(unix)]
+    new_ucmd!()
+        .args(&["--number=100", "/dev/null"])
+        .succeeds()
+        .stdout_only("");
+}
+
+#[test]
+fn test_number_kth_of_n() {
+    new_ucmd!()
+        .args(&["--number=3/5", "asciilowercase.txt"])
+        .succeeds()
+        .stdout_only("klmno");
+    new_ucmd!()
+        .args(&["--number=5/5", "asciilowercase.txt"])
+        .succeeds()
+        .stdout_only("uvwxyz\n");
+    new_ucmd!()
+        .args(&["-e", "--number=99/100", "asciilowercase.txt"])
+        .succeeds()
+        .stdout_only("");
+    #[cfg(unix)]
+    new_ucmd!()
+        .args(&["--number=3/10", "/dev/null"])
+        .succeeds()
+        .stdout_only("");
+    #[cfg(target_pointer_width = "64")]
+    new_ucmd!()
+        .args(&[
+            "--number=r/9223372036854775807/18446744073709551615",
+            "asciilowercase.txt",
+        ])
+        .succeeds()
+        .stdout_only("");
+    new_ucmd!()
+        .args(&["--number=0/5", "asciilowercase.txt"])
+        .fails()
+        .stderr_contains("split: invalid chunk number: 0");
+    new_ucmd!()
+        .args(&["--number=10/5", "asciilowercase.txt"])
+        .fails()
+        .stderr_contains("split: invalid chunk number: 10");
+    #[cfg(target_pointer_width = "64")]
+    new_ucmd!()
+        .args(&[
+            "--number=9223372036854775807/18446744073709551616",
+            "asciilowercase.txt",
+        ])
+        .fails()
+        .stderr_contains("split: invalid number of chunks: 18446744073709551616");
+}
+
+#[test]
+fn test_number_kth_of_n_round_robin() {
+    new_ucmd!()
+        .args(&["--number", "r/2/3", "fivelines.txt"])
+        .succeeds()
+        .stdout_only("2\n5\n");
+    new_ucmd!()
+        .args(&["--number", "r/1/4", "fivelines.txt"])
+        .succeeds()
+        .stdout_only("1\n5\n");
+    new_ucmd!()
+        .args(&["-e", "--number", "r/7/7", "fivelines.txt"])
+        .succeeds()
+        .stdout_only("");
+    #[cfg(target_pointer_width = "64")]
+    new_ucmd!()
+        .args(&[
+            "--number",
+            "r/9223372036854775807/18446744073709551615",
+            "fivelines.txt",
+        ])
+        .succeeds()
+        .stdout_only("");
+    #[cfg(target_pointer_width = "64")]
+    new_ucmd!()
+        .args(&[
+            "--number",
+            "r/9223372036854775807/18446744073709551616",
+            "fivelines.txt",
+        ])
+        .fails()
+        .stderr_contains("split: invalid number of chunks: 18446744073709551616");
+    new_ucmd!()
+        .args(&["--number", "r/0/3", "fivelines.txt"])
+        .fails()
+        .stderr_contains("split: invalid chunk number: 0");
+    new_ucmd!()
+        .args(&["--number", "r/10/3", "fivelines.txt"])
+        .fails()
+        .stderr_contains("split: invalid chunk number: 10");
 }

 #[test]
@ -524,6 +935,19 @@ fn test_invalid_suffix_length() {
        .stderr_contains("invalid suffix length: 'xyz'");
 }

+/// Test short suffix length option with value concatenated
+#[test]
+fn test_split_suffix_length_short_concatenated_with_value() {
+    let (at, mut ucmd) = at_and_ucmd!();
+    let name = "split_num_prefixed_chunks_by_lines";
+    RandomFile::new(&at, name).add_lines(10000);
+    ucmd.args(&["-a4", name]).succeeds();
+
+    let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]][[:alpha:]][[:alpha:]]$");
+    assert_eq!(glob.count(), 10);
+    assert_eq!(glob.collate(), at.read_bytes(name));
+}
+
 #[test]
 fn test_include_newlines() {
    let (at, mut ucmd) = at_and_ucmd!();
@ -542,6 +966,19 @@ fn test_include_newlines() {
    assert_eq!(s, "5\n");
 }

+/// Test short number of chunks option concatenated with value
+#[test]
+fn test_split_number_chunks_short_concatenated_with_value() {
+    let (at, mut ucmd) = at_and_ucmd!();
+    ucmd.args(&["-n3", "threebytes.txt"])
+        .succeeds()
+        .no_stdout()
+        .no_stderr();
+    assert_eq!(at.read("xaa"), "a");
+    assert_eq!(at.read("xab"), "b");
+    assert_eq!(at.read("xac"), "c");
+}
+
 #[test]
 fn test_allow_empty_files() {
    let (at, mut ucmd) = at_and_ucmd!();
@ -616,6 +1053,25 @@ fn test_line_bytes() {
    assert_eq!(at.read("xad"), "ee\n");
 }

+#[test]
+#[cfg(target_pointer_width = "64")]
+fn test_line_bytes_overflow() {
+    let (at, mut ucmd) = at_and_ucmd!();
+    ucmd.args(&["-C", "18446744073709551616", "letters.txt"])
+        .succeeds();
+    assert_eq!(at.read("xaa"), "aaaaaaaaa\nbbbb\ncccc\ndd\nee\n");
+}
+
+#[test]
+fn test_line_bytes_concatenated_with_value() {
+    let (at, mut ucmd) = at_and_ucmd!();
+    ucmd.args(&["-C8", "letters.txt"]).succeeds();
+    assert_eq!(at.read("xaa"), "aaaaaaaa");
+    assert_eq!(at.read("xab"), "a\nbbbb\n");
+    assert_eq!(at.read("xac"), "cccc\ndd\n");
+    assert_eq!(at.read("xad"), "ee\n");
+}
+
 #[test]
 fn test_line_bytes_no_final_newline() {
    let (at, mut ucmd) = at_and_ucmd!();
@ -970,3 +1426,49 @@ fn test_split_invalid_input() {
        .no_stdout()
        .stderr_contains("split: invalid number of chunks: 0");
 }
+
+/// Test if there are invalid (non UTF-8) in the arguments - unix
+/// clap is expected to fail/panic
+#[test]
+#[cfg(unix)]
+fn test_split_non_utf8_argument_unix() {
+    use std::ffi::OsStr;
+    use std::os::unix::ffi::OsStrExt;
+
+    let (at, mut ucmd) = at_and_ucmd!();
+    let name = "test_split_non_utf8_argument";
+    let opt = OsStr::from_bytes("--additional-suffix".as_bytes());
+    RandomFile::new(&at, name).add_lines(2000);
+    // Here, the values 0x66 and 0x6f correspond to 'f' and 'o'
+    // respectively. The value 0x80 is a lone continuation byte, invalid
+    // in a UTF-8 sequence.
+    let opt_value = [0x66, 0x6f, 0x80, 0x6f];
+    let opt_value = OsStr::from_bytes(&opt_value[..]);
+    let name = OsStr::from_bytes(name.as_bytes());
+    ucmd.args(&[opt, opt_value, name])
+        .fails()
+        .stderr_contains("error: invalid UTF-8 was detected in one or more arguments");
+}
+
+/// Test if there are invalid (non UTF-8) in the arguments - windows
+/// clap is expected to fail/panic
+#[test]
+#[cfg(windows)]
+fn test_split_non_utf8_argument_windows() {
+    use std::ffi::OsString;
+    use std::os::windows::ffi::OsStringExt;
+
+    let (at, mut ucmd) = at_and_ucmd!();
+    let name = "test_split_non_utf8_argument";
+    let opt = OsString::from("--additional-suffix");
+    RandomFile::new(&at, name).add_lines(2000);
+    // Here the values 0x0066 and 0x006f correspond to 'f' and 'o'
+    // respectively. The value 0xD800 is a lone surrogate half, invalid
+    // in a UTF-16 sequence.
+    let opt_value = [0x0066, 0x006f, 0xD800, 0x006f];
+    let opt_value = OsString::from_wide(&opt_value[..]);
+    let name = OsString::from(name);
+    ucmd.args(&[opt, opt_value, name])
+        .fails()
+        .stderr_contains("error: invalid UTF-8 was detected in one or more arguments");
+}