shuf: Use OS strings, don't split individual arguments, cleanup

- shuf now uses OS strings, so it can read from filenames that are invalid Unicode and it can shuffle arguments that are invalid Unicode. `uucore` now has an `OsWrite` trait to support this without platform-specific boilerplate. - shuf no longer tries to split individual command line arguments, only bulk input from a file/stdin. (This matches GNU and busybox.) - More values are parsed inside clap instead of manually, leading to better error messages and less code. - Some code has been simplified or made more idiomatic.
2025-07-28 19:47:45 +00:00 · 2024-03-02 12:12:35 +01:00 · 2024-03-02 12:12:35 +01:00 · f562543b6c
commit f562543b6c
parent 87ec8285c3
6 changed files with 250 additions and 155 deletions
--- a/tests/by-util/test_shuf.rs
+++ b/tests/by-util/test_shuf.rs
@ -362,6 +362,51 @@ fn test_echo_short_collapsed_zero() {
    assert_eq!(result_seq, ["a", "b", "c"], "Output is not a permutation");
 }

+#[test]
+fn test_echo_separators_in_arguments() {
+    // We used to split arguments themselves on newlines, but this was wrong.
+    // shuf should behave as though it's shuffling two arguments and therefore
+    // output all of them.
+    // (Note that arguments can't contain null bytes so we don't need to test that.)
+    let result = new_ucmd!()
+        .arg("-e")
+        .arg("-n2")
+        .arg("a\nb")
+        .arg("c\nd")
+        .succeeds();
+    result.no_stderr();
+    assert_eq!(result.stdout_str().len(), 8, "Incorrect output length");
+}
+
+#[cfg(unix)]
+#[test]
+fn test_echo_invalid_unicode_in_arguments() {
+    use std::{ffi::OsStr, os::unix::ffi::OsStrExt};
+
+    let result = new_ucmd!()
+        .arg("-e")
+        .arg(OsStr::from_bytes(b"a\xFFb"))
+        .arg("ok")
+        .succeeds();
+    result.no_stderr();
+    assert!(result.stdout().contains(&b'\xFF'));
+}
+
+#[cfg(any(unix, target_os = "wasi"))]
+#[cfg(not(target_os = "macos"))]
+#[test]
+fn test_invalid_unicode_in_filename() {
+    use std::{ffi::OsStr, os::unix::ffi::OsStrExt};
+
+    let (at, mut ucmd) = at_and_ucmd!();
+    let filename = OsStr::from_bytes(b"a\xFFb");
+    at.append(filename, "foo\n");
+
+    let result = ucmd.arg(filename).succeeds();
+    result.no_stderr();
+    assert_eq!(result.stdout(), b"foo\n");
+}
+
 #[test]
 fn test_head_count() {
    let repeat_limit = 5;
@ -647,23 +692,21 @@ fn test_shuf_invalid_input_range_one() {
    new_ucmd!()
        .args(&["-i", "0"])
        .fails()
-        .stderr_contains("invalid input range");
+        .stderr_contains("invalid value '0' for '--input-range <LO-HI>': missing '-'");
 }

 #[test]
 fn test_shuf_invalid_input_range_two() {
-    new_ucmd!()
-        .args(&["-i", "a-9"])
-        .fails()
-        .stderr_contains("invalid input range: 'a'");
+    new_ucmd!().args(&["-i", "a-9"]).fails().stderr_contains(
+        "invalid value 'a-9' for '--input-range <LO-HI>': invalid digit found in string",
+    );
 }

 #[test]
 fn test_shuf_invalid_input_range_three() {
-    new_ucmd!()
-        .args(&["-i", "0-b"])
-        .fails()
-        .stderr_contains("invalid input range: 'b'");
+    new_ucmd!().args(&["-i", "0-b"]).fails().stderr_contains(
+        "invalid value '0-b' for '--input-range <LO-HI>': invalid digit found in string",
+    );
 }

 #[test]
@ -702,10 +745,9 @@ fn test_shuf_three_input_files() {

 #[test]
 fn test_shuf_invalid_input_line_count() {
-    new_ucmd!()
-        .args(&["-n", "a"])
-        .fails()
-        .stderr_contains("invalid line count: 'a'");
+    new_ucmd!().args(&["-n", "a"]).fails().stderr_contains(
+        "invalid value 'a' for '--head-count <COUNT>': invalid digit found in string",
+    );
 }

 #[test]
@ -772,7 +814,7 @@ fn test_range_empty_minus_one() {
        .arg("-i5-3")
        .fails()
        .no_stdout()
-        .stderr_only("shuf: invalid input range: '5-3'\n");
+        .stderr_contains("invalid value '5-3' for '--input-range <LO-HI>': start exceeds end\n");
 }

 #[test]
@ -802,5 +844,5 @@ fn test_range_repeat_empty_minus_one() {
        .arg("-ri5-3")
        .fails()
        .no_stdout()
-        .stderr_only("shuf: invalid input range: '5-3'\n");
+        .stderr_contains("invalid value '5-3' for '--input-range <LO-HI>': start exceeds end\n");
 }