split: use "parse_size" from uucore

* make stderr of parsing SIZE/NUMBER argument consistent with GNU's behavior * add error handling * add tests
2025-09-15 19:36:16 +00:00 · 2021-06-02 18:37:21 +02:00 · 2021-06-02 18:37:21 +02:00 · 2f5f7c6fa1
commit 2f5f7c6fa1
parent 6b8de1dd8b
3 changed files with 69 additions and 37 deletions
--- a/src/uu/split/src/split.rs
+++ b/src/uu/split/src/split.rs
@ -13,11 +13,13 @@ extern crate uucore;
 mod platform;
 use clap::{App, Arg};
 use std::convert::TryFrom;
 use std::env;
 use std::fs::File;
 use std::io::{stdin, BufRead, BufReader, BufWriter, Read, Write};
 use std::path::Path;
 use std::{char, fs::remove_file};
 use uucore::parse_size::{parse_size, ParseSizeError};
 static NAME: &str = "split";
 static VERSION: &str = env!("CARGO_PKG_VERSION");
@ -232,10 +234,9 @@ struct LineSplitter {
 impl LineSplitter {
    fn new(settings: &Settings) -> LineSplitter {
        LineSplitter {
-            lines_per_split: settings
+            lines_per_split: settings.strategy_param.parse().unwrap_or_else(|_| {
-                .strategy_param
+                crash!(1, "invalid number of lines: ‘{}’", settings.strategy_param)
-                .parse()
+            }),
                .unwrap_or_else(|e| crash!(1, "invalid number of lines: {}", e)),
        }
    }
 }
@ -277,40 +278,23 @@ struct ByteSplitter {
 impl ByteSplitter {
    fn new(settings: &Settings) -> ByteSplitter {
-        // These multipliers are the same as supported by GNU coreutils.
+        let size_string = &settings.strategy_param;
-        let modifiers: Vec<(&str, u128)> = vec![
+        let size_num = match parse_size(&size_string) {
-            ("K", 1024u128),
+            Ok(n) => n,
-            ("M", 1024 * 1024),
+            Err(e) => match e {
-            ("G", 1024 * 1024 * 1024),
+                ParseSizeError::ParseFailure(_) => {
-            ("T", 1024 * 1024 * 1024 * 1024),
+                    crash!(1, "invalid number of bytes: {}", e.to_string())
-            ("P", 1024 * 1024 * 1024 * 1024 * 1024),
+                }
-            ("E", 1024 * 1024 * 1024 * 1024 * 1024 * 1024),
+                ParseSizeError::SizeTooBig(_) => crash!(
-            ("Z", 1024 * 1024 * 1024 * 1024 * 1024 * 1024 * 1024),
+                    1,
-            ("Y", 1024 * 1024 * 1024 * 1024 * 1024 * 1024 * 1024 * 1024),
+                    "invalid number of bytes: ‘{}’: Value too large for defined data type",
-            ("KB", 1000),
+                    size_string
-            ("MB", 1000 * 1000),
+                ),
-            ("GB", 1000 * 1000 * 1000),
+            },
-            ("TB", 1000 * 1000 * 1000 * 1000),
+        };
            ("PB", 1000 * 1000 * 1000 * 1000 * 1000),
            ("EB", 1000 * 1000 * 1000 * 1000 * 1000 * 1000),
            ("ZB", 1000 * 1000 * 1000 * 1000 * 1000 * 1000 * 1000),
            ("YB", 1000 * 1000 * 1000 * 1000 * 1000 * 1000 * 1000 * 1000),
        ];
        // This sequential find is acceptable since none of the modifiers are
        // suffixes of any other modifiers, a la Huffman codes.
        let (suffix, multiplier) = modifiers
            .iter()
            .find(|(suffix, _)| settings.strategy_param.ends_with(suffix))
            .unwrap_or(&("", 1));
        // Try to parse the actual numeral.
        let n = &settings.strategy_param[0..(settings.strategy_param.len() - suffix.len())]
            .parse::<u128>()
            .unwrap_or_else(|e| crash!(1, "invalid number of bytes: {}", e));
        ByteSplitter {
-            bytes_per_split: n * multiplier,
+            bytes_per_split: u128::try_from(size_num).unwrap(),
        }
    }
 }
--- a/src/uucore/src/lib/parser/parse_size.rs
+++ b/src/uucore/src/lib/parser/parse_size.rs
@ -117,7 +117,9 @@ impl ParseSizeError {
    fn size_too_big(s: &str) -> ParseSizeError {
        // has to be handled in the respective uutils because strings differ, e.g.
        // truncate: Invalid number: ‘1Y’: Value too large to be stored in data type
-        // tail: invalid number of bytes: ‘1Y’: Value too large to be stored in data type
+        // tail:     invalid number of bytes: ‘1Y’: Value too large to be stored in data type
        // split:    invalid number of bytes: ‘1Y’: Value too large for defined data type
        // etc.
        ParseSizeError::SizeTooBig(format!(
            "‘{}’: Value too large to be stored in data type",
            s
--- a/tests/by-util/test_split.rs
+++ b/tests/by-util/test_split.rs
@ -285,3 +285,49 @@ fn test_filter_command_fails() {
    ucmd.args(&["--filter=/a/path/that/totally/does/not/exist", name])
        .fails();
 }
 #[test]
 fn test_split_lines_number() {
    // Test if stdout/stderr for '--lines' option is correct
    new_ucmd!()
        .args(&["--lines", "2"])
        .pipe_in("abcde")
        .succeeds()
        .no_stderr()
        .no_stdout();
    new_ucmd!()
        .args(&["--lines", "2fb"])
        .pipe_in("abcde")
        .fails()
        .code_is(1)
        .stderr_only("split: invalid number of lines: ‘2fb’");
 }
 #[test]
 fn test_split_invalid_bytes_size() {
    new_ucmd!()
        .args(&["-b", "1024R"])
        .fails()
        .code_is(1)
        .stderr_only("split: invalid number of bytes: ‘1024R’");
    #[cfg(not(target_pointer_width = "128"))]
    new_ucmd!()
        .args(&["-b", "1Y"])
        .fails()
        .code_is(1)
        .stderr_only("split: invalid number of bytes: ‘1Y’: Value too large for defined data type");
    #[cfg(target_pointer_width = "32")]
    {
        let sizes = ["1000G", "10T"];
        for size in &sizes {
            new_ucmd!()
                .args(&["-b", size])
                .fails()
                .code_is(1)
                .stderr_only(format!(
                    "split: invalid number of bytes: ‘{}’: Value too large for defined data type",
                    size
                ));
        }
    }
 }