From 1eae064e5c68bb2de2738f295f443e91c6630e97 Mon Sep 17 00:00:00 2001 From: Yury Zhytkou <54360928+zhitkoff@users.noreply.github.com> Date: Mon, 28 Aug 2023 04:09:52 -0400 Subject: [PATCH] split: better handle numeric and hex suffixes, short and long, with and without values (#5198) * split: better handle numeric and hex suffixes, short and long, with and without values Fixes #5171 * refactoring with overrides_with_all() in args definitions * fixed comments * updated help on suffixes to match GNU * comments * refactor to remove value_parser() * split: refactor suffix processing + updated tests * split: minor formatting --- src/uu/split/src/split.rs | 105 ++++++++++++++------ tests/by-util/test_split.rs | 190 +++++++++++++++++++++++++++++++++++- 2 files changed, 263 insertions(+), 32 deletions(-) diff --git a/src/uu/split/src/split.rs b/src/uu/split/src/split.rs index 71aacfed0..517031791 100644 --- a/src/uu/split/src/split.rs +++ b/src/uu/split/src/split.rs @@ -3,7 +3,7 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -// spell-checker:ignore nbbbb ncccc +// spell-checker:ignore nbbbb ncccc hexdigit mod filenames; mod number; @@ -11,8 +11,7 @@ mod platform; use crate::filenames::FilenameIterator; use crate::filenames::SuffixType; -use clap::ArgAction; -use clap::{crate_version, parser::ValueSource, Arg, ArgMatches, Command}; +use clap::{crate_version, parser::ValueSource, Arg, ArgAction, ArgMatches, Command}; use std::env; use std::fmt; use std::fs::{metadata, File}; @@ -32,7 +31,9 @@ static OPT_ADDITIONAL_SUFFIX: &str = "additional-suffix"; static OPT_FILTER: &str = "filter"; static OPT_NUMBER: &str = "number"; static OPT_NUMERIC_SUFFIXES: &str = "numeric-suffixes"; +static OPT_NUMERIC_SUFFIXES_SHORT: &str = "-d"; static OPT_HEX_SUFFIXES: &str = "hex-suffixes"; +static OPT_HEX_SUFFIXES_SHORT: &str = "-x"; static OPT_SUFFIX_LENGTH: &str = "suffix-length"; static OPT_DEFAULT_SUFFIX_LENGTH: &str = "0"; static OPT_VERBOSE: &str = "verbose"; @@ -122,12 +123,56 @@ pub fn uu_app() -> Command { .action(ArgAction::SetTrue), ) .arg( - Arg::new(OPT_NUMERIC_SUFFIXES) + Arg::new(OPT_NUMERIC_SUFFIXES_SHORT) .short('d') + .action(clap::ArgAction::SetTrue) + .overrides_with_all([ + OPT_NUMERIC_SUFFIXES, + OPT_NUMERIC_SUFFIXES_SHORT, + OPT_HEX_SUFFIXES, + OPT_HEX_SUFFIXES_SHORT + ]) + .help("use numeric suffixes starting at 0, not alphabetic"), + ) + .arg( + Arg::new(OPT_NUMERIC_SUFFIXES) .long(OPT_NUMERIC_SUFFIXES) + .require_equals(true) .default_missing_value("0") .num_args(0..=1) - .help("use numeric suffixes instead of alphabetic"), + .overrides_with_all([ + OPT_NUMERIC_SUFFIXES, + OPT_NUMERIC_SUFFIXES_SHORT, + OPT_HEX_SUFFIXES, + OPT_HEX_SUFFIXES_SHORT + ]) + .help("same as -d, but allow setting the start value"), + ) + .arg( + Arg::new(OPT_HEX_SUFFIXES_SHORT) + .short('x') + .action(clap::ArgAction::SetTrue) + .overrides_with_all([ + OPT_NUMERIC_SUFFIXES, + OPT_NUMERIC_SUFFIXES_SHORT, + OPT_HEX_SUFFIXES, + OPT_HEX_SUFFIXES_SHORT + ]) + .help("use hex suffixes starting at 0, not alphabetic"), + ) + .arg( + Arg::new(OPT_HEX_SUFFIXES) + .long(OPT_HEX_SUFFIXES) + .default_missing_value("0") + .require_equals(true) + .num_args(0..=1) + .overrides_with_all([ + OPT_NUMERIC_SUFFIXES, + OPT_NUMERIC_SUFFIXES_SHORT, + OPT_HEX_SUFFIXES, + OPT_HEX_SUFFIXES_SHORT + ]) + .help("same as -x, but allow setting the start value"), ) .arg( Arg::new(OPT_SUFFIX_LENGTH) @@ -137,14 +182,6 @@ pub fn uu_app() -> Command { .default_value(OPT_DEFAULT_SUFFIX_LENGTH) .help("use suffixes of fixed length N. 0 implies dynamic length."), ) - .arg( - Arg::new(OPT_HEX_SUFFIXES) - .short('x') - .long(OPT_HEX_SUFFIXES) - .default_missing_value("0") - .num_args(0..=1) - .help("use hex suffixes instead of alphabetic"), - ) .arg( Arg::new(OPT_VERBOSE) .long(OPT_VERBOSE) @@ -409,22 +446,32 @@ impl Strategy { /// Parse the suffix type from the command-line arguments. fn suffix_type_from(matches: &ArgMatches) -> Result<(SuffixType, usize), SettingsError> { - if matches.value_source(OPT_NUMERIC_SUFFIXES) == Some(ValueSource::CommandLine) { - let suffix_start = matches.get_one::(OPT_NUMERIC_SUFFIXES); - let suffix_start = suffix_start.ok_or(SettingsError::SuffixNotParsable(String::new()))?; - let suffix_start = suffix_start - .parse() - .map_err(|_| SettingsError::SuffixNotParsable(suffix_start.to_string()))?; - Ok((SuffixType::Decimal, suffix_start)) - } else if matches.value_source(OPT_HEX_SUFFIXES) == Some(ValueSource::CommandLine) { - let suffix_start = matches.get_one::(OPT_HEX_SUFFIXES); - let suffix_start = suffix_start.ok_or(SettingsError::SuffixNotParsable(String::new()))?; - let suffix_start = usize::from_str_radix(suffix_start, 16) - .map_err(|_| SettingsError::SuffixNotParsable(suffix_start.to_string()))?; - Ok((SuffixType::Hexadecimal, suffix_start)) - } else { - // no numeric/hex suffix - Ok((SuffixType::Alphabetic, 0)) + // Check if the user is specifying one or more than one suffix + // Any combination of suffixes is allowed + // Since all suffixes are setup with 'overrides_with_all()' against themselves and each other, + // last one wins, all others are ignored + match ( + matches.get_one::(OPT_NUMERIC_SUFFIXES), + matches.get_one::(OPT_HEX_SUFFIXES), + matches.get_flag(OPT_NUMERIC_SUFFIXES_SHORT), + matches.get_flag(OPT_HEX_SUFFIXES_SHORT), + ) { + (Some(v), _, _, _) => { + let suffix_start = v; + let suffix_start = suffix_start + .parse::() + .map_err(|_| SettingsError::SuffixNotParsable(suffix_start.to_string()))?; + Ok((SuffixType::Decimal, suffix_start)) + } + (_, Some(v), _, _) => { + let suffix_start = v; + let suffix_start = usize::from_str_radix(suffix_start, 16) + .map_err(|_| SettingsError::SuffixNotParsable(suffix_start.to_string()))?; + Ok((SuffixType::Hexadecimal, suffix_start)) + } + (_, _, true, _) => Ok((SuffixType::Decimal, 0)), // short numeric suffix '-d', default start 0 + (_, _, _, true) => Ok((SuffixType::Hexadecimal, 0)), // short hex suffix '-x', default start 0 + _ => Ok((SuffixType::Alphabetic, 0)), // no numeric/hex suffix, using default alphabetic } } diff --git a/tests/by-util/test_split.rs b/tests/by-util/test_split.rs index c5f32482e..f3317d4c7 100644 --- a/tests/by-util/test_split.rs +++ b/tests/by-util/test_split.rs @@ -2,7 +2,7 @@ // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -// spell-checker:ignore xzaaa sixhundredfiftyonebytes ninetyonebytes threebytes asciilowercase fghij klmno pqrst uvwxyz fivelines twohundredfortyonebytes onehundredlines nbbbb +// spell-checker:ignore xzaaa sixhundredfiftyonebytes ninetyonebytes threebytes asciilowercase fghij klmno pqrst uvwxyz fivelines twohundredfortyonebytes onehundredlines nbbbb dxen use crate::common::util::{AtPath, TestScenario}; use rand::{thread_rng, Rng, SeedableRng}; @@ -715,7 +715,7 @@ fn test_multiple_of_input_chunk() { #[test] fn test_numeric_suffix() { let (at, mut ucmd) = at_and_ucmd!(); - ucmd.args(&["-n", "4", "--numeric-suffixes", "9", "threebytes.txt"]) + ucmd.args(&["-n", "4", "--numeric-suffixes=9", "threebytes.txt"]) .succeeds() .no_stdout() .no_stderr(); @@ -728,7 +728,7 @@ fn test_numeric_suffix() { #[test] fn test_hex_suffix() { let (at, mut ucmd) = at_and_ucmd!(); - ucmd.args(&["-n", "4", "--hex-suffixes", "9", "threebytes.txt"]) + ucmd.args(&["-n", "4", "--hex-suffixes=9", "threebytes.txt"]) .succeeds() .no_stdout() .no_stderr(); @@ -738,6 +738,190 @@ fn test_hex_suffix() { assert_eq!(at.read("x0c"), ""); } +#[test] +fn test_numeric_suffix_no_equal() { + new_ucmd!() + .args(&["-n", "4", "--numeric-suffixes", "9", "threebytes.txt"]) + .fails() + .stderr_contains("split: cannot open '9' for reading: No such file or directory"); +} + +#[test] +fn test_hex_suffix_no_equal() { + new_ucmd!() + .args(&["-n", "4", "--hex-suffixes", "9", "threebytes.txt"]) + .fails() + .stderr_contains("split: cannot open '9' for reading: No such file or directory"); +} + +/// Test for short numeric suffix not having any value +#[test] +fn test_short_numeric_suffix_no_value() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["-l", "9", "-d", "onehundredlines.txt"]) + .succeeds() + .no_stdout() + .no_stderr(); + assert_eq!(at.read("x00"), "00\n01\n02\n03\n04\n05\n06\n07\n08\n"); + assert_eq!(at.read("x01"), "09\n10\n11\n12\n13\n14\n15\n16\n17\n"); + assert_eq!(at.read("x02"), "18\n19\n20\n21\n22\n23\n24\n25\n26\n"); + assert_eq!(at.read("x03"), "27\n28\n29\n30\n31\n32\n33\n34\n35\n"); + assert_eq!(at.read("x04"), "36\n37\n38\n39\n40\n41\n42\n43\n44\n"); + assert_eq!(at.read("x05"), "45\n46\n47\n48\n49\n50\n51\n52\n53\n"); + assert_eq!(at.read("x06"), "54\n55\n56\n57\n58\n59\n60\n61\n62\n"); + assert_eq!(at.read("x07"), "63\n64\n65\n66\n67\n68\n69\n70\n71\n"); + assert_eq!(at.read("x08"), "72\n73\n74\n75\n76\n77\n78\n79\n80\n"); + assert_eq!(at.read("x09"), "81\n82\n83\n84\n85\n86\n87\n88\n89\n"); + assert_eq!(at.read("x10"), "90\n91\n92\n93\n94\n95\n96\n97\n98\n"); + assert_eq!(at.read("x11"), "99\n"); +} + +/// Test for long numeric suffix not having any value +#[test] +fn test_numeric_suffix_no_value() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["-l", "9", "--numeric-suffixes", "onehundredlines.txt"]) + .succeeds() + .no_stdout() + .no_stderr(); + assert_eq!(at.read("x00"), "00\n01\n02\n03\n04\n05\n06\n07\n08\n"); + assert_eq!(at.read("x01"), "09\n10\n11\n12\n13\n14\n15\n16\n17\n"); + assert_eq!(at.read("x02"), "18\n19\n20\n21\n22\n23\n24\n25\n26\n"); + assert_eq!(at.read("x03"), "27\n28\n29\n30\n31\n32\n33\n34\n35\n"); + assert_eq!(at.read("x04"), "36\n37\n38\n39\n40\n41\n42\n43\n44\n"); + assert_eq!(at.read("x05"), "45\n46\n47\n48\n49\n50\n51\n52\n53\n"); + assert_eq!(at.read("x06"), "54\n55\n56\n57\n58\n59\n60\n61\n62\n"); + assert_eq!(at.read("x07"), "63\n64\n65\n66\n67\n68\n69\n70\n71\n"); + assert_eq!(at.read("x08"), "72\n73\n74\n75\n76\n77\n78\n79\n80\n"); + assert_eq!(at.read("x09"), "81\n82\n83\n84\n85\n86\n87\n88\n89\n"); + assert_eq!(at.read("x10"), "90\n91\n92\n93\n94\n95\n96\n97\n98\n"); + assert_eq!(at.read("x11"), "99\n"); +} + +/// Test for short hex suffix not having any value +#[test] +fn test_short_hex_suffix_no_value() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["-l", "9", "-x", "onehundredlines.txt"]) + .succeeds() + .no_stdout() + .no_stderr(); + assert_eq!(at.read("x00"), "00\n01\n02\n03\n04\n05\n06\n07\n08\n"); + assert_eq!(at.read("x01"), "09\n10\n11\n12\n13\n14\n15\n16\n17\n"); + assert_eq!(at.read("x02"), "18\n19\n20\n21\n22\n23\n24\n25\n26\n"); + assert_eq!(at.read("x03"), "27\n28\n29\n30\n31\n32\n33\n34\n35\n"); + assert_eq!(at.read("x04"), "36\n37\n38\n39\n40\n41\n42\n43\n44\n"); + assert_eq!(at.read("x05"), "45\n46\n47\n48\n49\n50\n51\n52\n53\n"); + assert_eq!(at.read("x06"), "54\n55\n56\n57\n58\n59\n60\n61\n62\n"); + assert_eq!(at.read("x07"), "63\n64\n65\n66\n67\n68\n69\n70\n71\n"); + assert_eq!(at.read("x08"), "72\n73\n74\n75\n76\n77\n78\n79\n80\n"); + assert_eq!(at.read("x09"), "81\n82\n83\n84\n85\n86\n87\n88\n89\n"); + assert_eq!(at.read("x0a"), "90\n91\n92\n93\n94\n95\n96\n97\n98\n"); + assert_eq!(at.read("x0b"), "99\n"); +} + +/// Test for long hex suffix not having any value +#[test] +fn test_hex_suffix_no_value() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["-l", "9", "--hex-suffixes", "onehundredlines.txt"]) + .succeeds() + .no_stdout() + .no_stderr(); + assert_eq!(at.read("x00"), "00\n01\n02\n03\n04\n05\n06\n07\n08\n"); + assert_eq!(at.read("x01"), "09\n10\n11\n12\n13\n14\n15\n16\n17\n"); + assert_eq!(at.read("x02"), "18\n19\n20\n21\n22\n23\n24\n25\n26\n"); + assert_eq!(at.read("x03"), "27\n28\n29\n30\n31\n32\n33\n34\n35\n"); + assert_eq!(at.read("x04"), "36\n37\n38\n39\n40\n41\n42\n43\n44\n"); + assert_eq!(at.read("x05"), "45\n46\n47\n48\n49\n50\n51\n52\n53\n"); + assert_eq!(at.read("x06"), "54\n55\n56\n57\n58\n59\n60\n61\n62\n"); + assert_eq!(at.read("x07"), "63\n64\n65\n66\n67\n68\n69\n70\n71\n"); + assert_eq!(at.read("x08"), "72\n73\n74\n75\n76\n77\n78\n79\n80\n"); + assert_eq!(at.read("x09"), "81\n82\n83\n84\n85\n86\n87\n88\n89\n"); + assert_eq!(at.read("x0a"), "90\n91\n92\n93\n94\n95\n96\n97\n98\n"); + assert_eq!(at.read("x0b"), "99\n"); +} + +/// Test for short numeric suffix having value provided after space - should fail +#[test] +fn test_short_numeric_suffix_with_value_spaced() { + new_ucmd!() + .args(&["-n", "4", "-d", "9", "threebytes.txt"]) + .fails() + .stderr_contains("split: cannot open '9' for reading: No such file or directory"); +} + +/// Test for short numeric suffix having value provided after space - should fail +#[test] +fn test_short_hex_suffix_with_value_spaced() { + new_ucmd!() + .args(&["-n", "4", "-x", "9", "threebytes.txt"]) + .fails() + .stderr_contains("split: cannot open '9' for reading: No such file or directory"); +} + +/// Test for some combined short options +#[test] +fn test_short_combination() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["-dxen", "4", "threebytes.txt"]) + .succeeds() + .no_stdout() + .no_stderr(); + assert_eq!(at.read("x00"), "a"); + assert_eq!(at.read("x01"), "b"); + assert_eq!(at.read("x02"), "c"); + assert_eq!(at.file_exists("x03"), false); +} + +/// Test for the last effective suffix, ignoring all others - numeric long last +/// Any combination of short and long (as well as duplicates) should be allowed +#[test] +fn test_effective_suffix_numeric_last() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&[ + "-n", + "4", + "--numeric-suffixes=7", + "--hex-suffixes=4", + "-d", + "-x", + "--numeric-suffixes=9", + "threebytes.txt", + ]) + .succeeds() + .no_stdout() + .no_stderr(); + assert_eq!(at.read("x09"), "a"); + assert_eq!(at.read("x10"), "b"); + assert_eq!(at.read("x11"), "c"); + assert_eq!(at.read("x12"), ""); +} + +/// Test for the last effective suffix, ignoring all others - hex long last +/// Any combination of short and long (as well as duplicates) should be allowed +#[test] +fn test_effective_suffix_hex_last() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&[ + "-n", + "4", + "--hex-suffixes=7", + "--numeric-suffixes=4", + "-x", + "-d", + "--hex-suffixes=9", + "threebytes.txt", + ]) + .succeeds() + .no_stdout() + .no_stderr(); + assert_eq!(at.read("x09"), "a"); + assert_eq!(at.read("x0a"), "b"); + assert_eq!(at.read("x0b"), "c"); + assert_eq!(at.read("x0c"), ""); +} + #[test] fn test_round_robin() { let (at, mut ucmd) = at_and_ucmd!();