From 84d96f9d028a4dd8c0f98ba6db2ba6dc90c79dcb Mon Sep 17 00:00:00 2001 From: zhitkoff Date: Sat, 26 Aug 2023 11:11:46 -0400 Subject: [PATCH 01/10] split: refactor for more common use case --- src/uu/split/src/split.rs | 81 ++++++++++++++++++- tests/by-util/test_split.rs | 152 +++++++++++++++++++++++++++++++++++- 2 files changed, 229 insertions(+), 4 deletions(-) diff --git a/src/uu/split/src/split.rs b/src/uu/split/src/split.rs index 71aacfed0..c4b7e6d66 100644 --- a/src/uu/split/src/split.rs +++ b/src/uu/split/src/split.rs @@ -51,14 +51,66 @@ const AFTER_HELP: &str = help_section!("after help", "split.md"); #[uucore::main] pub fn uumain(args: impl uucore::Args) -> UResult<()> { + let args = args.collect_lossy(); + + let (args, obs_lines) = handle_obsolete(&args[..]); + let matches = uu_app().try_get_matches_from(args)?; - match Settings::from(&matches) { + + match Settings::from(&matches, &obs_lines) { Ok(settings) => split(&settings), Err(e) if e.requires_usage() => Err(UUsageError::new(1, format!("{e}"))), Err(e) => Err(USimpleError::new(1, format!("{e}"))), } } +/// Extract obsolete shorthand (if any) for specifying lines in following scenarios (and similar) +/// `split -22 file` would mean `split -l 22 file` +/// `split -2de file` would mean `split -l 2 -d -e file` +/// `split -x300e file` would mean `split -x -l 300 -e file` +/// `split -x300e -22 file` would mean `split -x -e -l 22 file` (last obsolete lines option wins) +/// following GNU `split` behavior +fn handle_obsolete(args: &[String]) -> (Vec, Option) { + let mut v: Vec = vec![]; + let mut obs_lines = None; + for arg in args.iter() { + let slice = &arg; + if slice.starts_with('-') && !slice.starts_with("--") { + // start of the short option string + // extract numeric part and filter it out + let mut obs_lines_extracted: Vec = vec![]; + let filtered_slice: Vec = slice + .chars() + .filter(|c| { + if c.is_ascii_digit() { + obs_lines_extracted.push(*c); + false + } else { + true + } + }) + .collect(); + + if filtered_slice.get(1).is_some() { + // there were some short options in front of obsolete lines number + // i.e. '-xd100' or similar + // preserve it + v.push(filtered_slice.iter().collect()) + } + if !obs_lines_extracted.is_empty() { + // obsolete lines value was extracted + obs_lines = Some(obs_lines_extracted.iter().collect()); + } + } else { + // not a short option + // preserve it + v.push(arg.to_owned()); + } + } + println!("{:#?}",v); + (v, obs_lines) +} + pub fn uu_app() -> Command { Command::new(uucore::util_name()) .version(crate_version!()) @@ -357,6 +409,17 @@ impl fmt::Display for StrategyError { } impl Strategy { + /// Parse a strategy from obsolete line option value + fn from_obs(obs_value: &str) -> Result { + let n = parse_size(obs_value).map_err(StrategyError::Lines)?; + if n > 0 { + Ok(Self::Lines(n)) + } else { + Err(StrategyError::Lines(ParseSizeError::ParseFailure( + obs_value.to_string(), + ))) + } + } /// Parse a strategy from the command-line arguments. fn from(matches: &ArgMatches) -> Result { fn get_and_parse( @@ -506,7 +569,7 @@ impl fmt::Display for SettingsError { impl Settings { /// Parse a strategy from the command-line arguments. - fn from(matches: &ArgMatches) -> Result { + fn from(matches: &ArgMatches, obs_lines: &Option) -> Result { let additional_suffix = matches .get_one::(OPT_ADDITIONAL_SUFFIX) .unwrap() @@ -514,7 +577,19 @@ impl Settings { if additional_suffix.contains('/') { return Err(SettingsError::SuffixContainsSeparator(additional_suffix)); } - let strategy = Strategy::from(matches).map_err(SettingsError::Strategy)?; + + // obsolete lines option cannot be used simultaneously with OPT_LINES + let strategy = match obs_lines { + Some(obs_value) => { + if matches.value_source(OPT_LINES) == Some(ValueSource::CommandLine) { + return Err(SettingsError::Strategy(StrategyError::MultipleWays)); + } else { + Strategy::from_obs(obs_value).map_err(SettingsError::Strategy)? + } + } + None => Strategy::from(matches).map_err(SettingsError::Strategy)?, + }; + let (suffix_type, suffix_start) = suffix_type_from(matches)?; let suffix_length_str = matches.get_one::(OPT_SUFFIX_LENGTH).unwrap(); let suffix_length: usize = suffix_length_str diff --git a/tests/by-util/test_split.rs b/tests/by-util/test_split.rs index c5f32482e..fe753f084 100644 --- a/tests/by-util/test_split.rs +++ b/tests/by-util/test_split.rs @@ -2,7 +2,7 @@ // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -// spell-checker:ignore xzaaa sixhundredfiftyonebytes ninetyonebytes threebytes asciilowercase fghij klmno pqrst uvwxyz fivelines twohundredfortyonebytes onehundredlines nbbbb +// spell-checker:ignore xzaaa sixhundredfiftyonebytes ninetyonebytes threebytes asciilowercase fghij klmno pqrst uvwxyz fivelines twohundredfortyonebytes onehundredlines nbbbb dxen use crate::common::util::{AtPath, TestScenario}; use rand::{thread_rng, Rng, SeedableRng}; @@ -320,6 +320,156 @@ fn test_split_lines_number() { .stderr_only("split: invalid number of lines: '2fb'\n"); } +/// Test for obsolete lines option standalone +#[test] +fn test_split_obs_lines_standalone() { + let (at, mut ucmd) = at_and_ucmd!(); + let name = "obs-lines-standalone"; + RandomFile::new(&at, name).add_lines(4); + ucmd.args(&["-2", name]).succeeds().no_stderr().no_stdout(); + let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$"); + assert_eq!(glob.count(), 2); + assert_eq!(glob.collate(), at.read_bytes(name)); +} + +/// Test for invalid obsolete lines option +#[test] +fn test_split_invalid_obs_lines_standalone() { + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + at.touch("file"); + + scene + .ucmd() + .args(&["-2fb", "file"]) + .fails() + .code_is(1) + .stderr_only("error: unexpected argument '-f' found\n"); +} + +/// Test for obsolete lines option as part of combined short options +#[test] +fn test_split_obs_lines_within_combined_shorts() { + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + let name = "obs-lines-within-shorts"; + RandomFile::new(&at, name).add_lines(400); + + scene + .ucmd() + .args(&["-d200xe", name]) + .succeeds() + .no_stderr() + .no_stdout(); + let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$"); + assert_eq!(glob.count(), 2); + assert_eq!(glob.collate(), at.read_bytes(name)) +} + +/// Test for obsolete lines option starts as part of combined short options +#[test] +fn test_split_obs_lines_starts_combined_shorts() { + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + let name = "obs-lines-starts-shorts"; + RandomFile::new(&at, name).add_lines(400); + + scene + .ucmd() + .args(&["-x200d", name]) + .succeeds() + .no_stderr() + .no_stdout(); + let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$"); + assert_eq!(glob.count(), 2); + assert_eq!(glob.collate(), at.read_bytes(name)) +} + +/// Test for using both obsolete lines (standalone) option and short/long lines option simultaneously +#[test] +fn test_split_both_lines_and_obs_lines_standalone() { + // This test will ensure that if both lines option '-l' or '--lines' + // and obsolete lines option '-100' are used + // it fails + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + at.touch("file"); + + scene + .ucmd() + .args(&["-l", "2", "-2", "file"]) + .fails() + .code_is(1) + .stderr_contains("split: cannot split in more than one way\n"); + scene + .ucmd() + .args(&["--lines", "2", "-2", "file"]) + .fails() + .code_is(1) + .stderr_contains("split: cannot split in more than one way\n"); +} + +/// Test for using more than one obsolete lines option (standalone) +/// last one wins +#[test] +fn test_split_multiple_obs_lines_standalone() { + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + let name = "multiple-obs-lines"; + RandomFile::new(&at, name).add_lines(400); + + scene + .ucmd() + .args(&["-3000", "-200", name]) + .succeeds() + .no_stderr() + .no_stdout(); + let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$"); + assert_eq!(glob.count(), 2); + assert_eq!(glob.collate(), at.read_bytes(name)) +} + +/// Test for using more than one obsolete lines option within combined shorts +/// last one wins +#[test] +fn test_split_multiple_obs_lines_within_combined() { + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + let name = "multiple-obs-lines"; + RandomFile::new(&at, name).add_lines(400); + + scene + .ucmd() + .args(&["-x5000d -e200x", name]) + .succeeds() + .no_stderr() + .no_stdout(); + let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$"); + assert_eq!(glob.count(), 2); + assert_eq!(glob.collate(), at.read_bytes(name)) +} + +/// Test for using both obsolete lines option within combined shorts with conflicting -n option simultaneously +#[test] +fn test_split_obs_lines_within_combined_with_number() { + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + at.touch("file"); + + scene + .ucmd() + .args(&["-3dxen", "4", "file"]) + .fails() + .code_is(1) + .stderr_contains("split: cannot split in more than one way\n"); + scene + .ucmd() + .args(&["-dxe30n", "4", "file"]) + .fails() + .code_is(1) + .stderr_contains("split: cannot split in more than one way\n"); +} + #[test] fn test_split_invalid_bytes_size() { new_ucmd!() From 2f35989ac34cda92b65328d325bdc615057e2dbe Mon Sep 17 00:00:00 2001 From: zhitkoff Date: Mon, 28 Aug 2023 18:26:48 -0400 Subject: [PATCH 02/10] split: comments --- src/uu/split/src/split.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/uu/split/src/split.rs b/src/uu/split/src/split.rs index 77c87e7f5..698c7818c 100644 --- a/src/uu/split/src/split.rs +++ b/src/uu/split/src/split.rs @@ -108,7 +108,6 @@ fn handle_obsolete(args: &[String]) -> (Vec, Option) { v.push(arg.to_owned()); } } - // println!("{:#?} , {:#?}", v, obs_lines); (v, obs_lines) } @@ -465,8 +464,7 @@ impl Strategy { // Check that the user is not specifying more than one strategy. // // Note: right now, this exact behavior cannot be handled by - // `ArgGroup` since `ArgGroup` considers a default value `Arg` - // as "defined". + // overrides_with_all() due to obsolete lines value option match ( obs_lines, matches.value_source(OPT_LINES) == Some(ValueSource::CommandLine), From e79753c1cf25db25104e853e52e7f687dd776cfb Mon Sep 17 00:00:00 2001 From: zhitkoff Date: Tue, 29 Aug 2023 13:58:26 -0400 Subject: [PATCH 03/10] split: refactor handle_obsolete() function --- src/uu/split/src/split.rs | 73 +++++++++++++++++++++------------------ 1 file changed, 40 insertions(+), 33 deletions(-) diff --git a/src/uu/split/src/split.rs b/src/uu/split/src/split.rs index 698c7818c..876d04606 100644 --- a/src/uu/split/src/split.rs +++ b/src/uu/split/src/split.rs @@ -72,43 +72,50 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { /// `split -x300e -22 file` would mean `split -x -e -l 22 file` (last obsolete lines option wins) /// following GNU `split` behavior fn handle_obsolete(args: &[String]) -> (Vec, Option) { - let mut v: Vec = vec![]; let mut obs_lines = None; - for arg in args.iter() { - let slice = &arg; - if slice.starts_with('-') && !slice.starts_with("--") { - // start of the short option string - // extract numeric part and filter it out - let mut obs_lines_extracted: Vec = vec![]; - let filtered_slice: Vec = slice - .chars() - .filter(|c| { - if c.is_ascii_digit() { - obs_lines_extracted.push(*c); - false - } else { - true - } - }) - .collect(); + let filtered_args = args + .iter() + .filter_map(|slice| { + if slice.starts_with('-') && !slice.starts_with("--") { + // start of the short option string + // extract numeric part and filter it out + let mut obs_lines_extracted: Vec = vec![]; + let filtered_slice: Vec = slice + .chars() + .filter(|c| { + if c.is_ascii_digit() { + obs_lines_extracted.push(*c); + false + } else { + true + } + }) + .collect(); - if filtered_slice.get(1).is_some() { - // there were some short options in front of obsolete lines number - // i.e. '-xd100' or similar + if obs_lines_extracted.is_empty() { + // no obsolete lines value found/extracted + Some(slice.to_owned()) + } else { + // obsolete lines value was extracted + obs_lines = Some(obs_lines_extracted.iter().collect()); + if filtered_slice.get(1).is_some() { + // there were some short options in front of or after obsolete lines value + // i.e. '-xd100' or '-100de' or similar, which after extraction of obsolete lines value + // would look like '-xd' or '-de' or similar + // preserve it + Some(filtered_slice.iter().collect()) + } else { + None + } + } + } else { + // not a short option // preserve it - v.push(filtered_slice.iter().collect()); + Some(slice.to_owned()) } - if !obs_lines_extracted.is_empty() { - // obsolete lines value was extracted - obs_lines = Some(obs_lines_extracted.iter().collect()); - } - } else { - // not a short option - // preserve it - v.push(arg.to_owned()); - } - } - (v, obs_lines) + }) + .collect(); + (filtered_args, obs_lines) } pub fn uu_app() -> Command { From 15c7170d20c532889ac8f3470c5d59bc12d10f2a Mon Sep 17 00:00:00 2001 From: zhitkoff Date: Tue, 29 Aug 2023 15:49:47 -0400 Subject: [PATCH 04/10] split: fix for GNU Tests regression + tests --- src/uu/split/src/split.rs | 15 ++++++--- tests/by-util/test_split.rs | 65 +++++++++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+), 4 deletions(-) diff --git a/src/uu/split/src/split.rs b/src/uu/split/src/split.rs index 876d04606..dae24d36b 100644 --- a/src/uu/split/src/split.rs +++ b/src/uu/split/src/split.rs @@ -76,8 +76,16 @@ fn handle_obsolete(args: &[String]) -> (Vec, Option) { let filtered_args = args .iter() .filter_map(|slice| { - if slice.starts_with('-') && !slice.starts_with("--") { + if slice.starts_with('-') + && !slice.starts_with("--") + && !slice.starts_with("-a") + && !slice.starts_with("-b") + && !slice.starts_with("-C") + && !slice.starts_with("-l") + && !slice.starts_with("-n") + { // start of the short option string + // that can have obsolete lines option value in it // extract numeric part and filter it out let mut obs_lines_extracted: Vec = vec![]; let filtered_slice: Vec = slice @@ -102,15 +110,14 @@ fn handle_obsolete(args: &[String]) -> (Vec, Option) { // there were some short options in front of or after obsolete lines value // i.e. '-xd100' or '-100de' or similar, which after extraction of obsolete lines value // would look like '-xd' or '-de' or similar - // preserve it Some(filtered_slice.iter().collect()) } else { None } } } else { - // not a short option - // preserve it + // either not a short option + // or a short option that cannot have obsolete lines value in it Some(slice.to_owned()) } }) diff --git a/tests/by-util/test_split.rs b/tests/by-util/test_split.rs index aabfcbe90..9fba2177e 100644 --- a/tests/by-util/test_split.rs +++ b/tests/by-util/test_split.rs @@ -170,6 +170,22 @@ fn test_split_str_prefixed_chunks_by_bytes() { assert_eq!(glob.collate(), at.read_bytes(name)); } +// Test short bytes option concatenated with value +#[test] +fn test_split_by_bytes_short_concatenated_with_value() { + let (at, mut ucmd) = at_and_ucmd!(); + let name = "split_by_bytes_short_concatenated_with_value"; + RandomFile::new(&at, name).add_bytes(10000); + ucmd.args(&["-b1000", name]).succeeds(); + + let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$"); + assert_eq!(glob.count(), 10); + for filename in glob.collect() { + assert_eq!(glob.directory.metadata(&filename).len(), 1000); + } + assert_eq!(glob.collate(), at.read_bytes(name)); +} + // This is designed to test what happens when the desired part size is not a // multiple of the buffer size and we hopefully don't overshoot the desired part // size. @@ -326,6 +342,19 @@ fn test_split_lines_number() { .stderr_only("split: invalid number of lines: 'file'\n"); } +// Test short lines option with value concatenated +#[test] +fn test_split_lines_short_concatenated_with_value() { + let (at, mut ucmd) = at_and_ucmd!(); + let name = "split_num_prefixed_chunks_by_lines"; + RandomFile::new(&at, name).add_lines(10000); + ucmd.args(&["-l1000", name]).succeeds(); + + let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$"); + assert_eq!(glob.count(), 10); + assert_eq!(glob.collate(), at.read_bytes(name)); +} + /// Test for obsolete lines option standalone #[test] fn test_split_obs_lines_standalone() { @@ -692,6 +721,19 @@ fn test_invalid_suffix_length() { .stderr_contains("invalid suffix length: 'xyz'"); } +// Test short suffix length option with value concatenated +#[test] +fn test_split_suffix_length_short_concatenated_with_value() { + let (at, mut ucmd) = at_and_ucmd!(); + let name = "split_num_prefixed_chunks_by_lines"; + RandomFile::new(&at, name).add_lines(10000); + ucmd.args(&["-a4", name]).succeeds(); + + let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]][[:alpha:]][[:alpha:]]$"); + assert_eq!(glob.count(), 10); + assert_eq!(glob.collate(), at.read_bytes(name)); +} + #[test] fn test_include_newlines() { let (at, mut ucmd) = at_and_ucmd!(); @@ -710,6 +752,19 @@ fn test_include_newlines() { assert_eq!(s, "5\n"); } +// Test short number of chunks option concatenated with value +#[test] +fn test_split_number_chunks_short_concatenated_with_value() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["-n3", "threebytes.txt"]) + .succeeds() + .no_stdout() + .no_stderr(); + assert_eq!(at.read("xaa"), "a"); + assert_eq!(at.read("xab"), "b"); + assert_eq!(at.read("xac"), "c"); +} + #[test] fn test_allow_empty_files() { let (at, mut ucmd) = at_and_ucmd!(); @@ -784,6 +839,16 @@ fn test_line_bytes() { assert_eq!(at.read("xad"), "ee\n"); } +#[test] +fn test_line_bytes_concatenated_with_value() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["-C8", "letters.txt"]).succeeds(); + assert_eq!(at.read("xaa"), "aaaaaaaa"); + assert_eq!(at.read("xab"), "a\nbbbb\n"); + assert_eq!(at.read("xac"), "cccc\ndd\n"); + assert_eq!(at.read("xad"), "ee\n"); +} + #[test] fn test_line_bytes_no_final_newline() { let (at, mut ucmd) = at_and_ucmd!(); From 7f905a3b8d6d08c271d71b25dcba3d9559f83959 Mon Sep 17 00:00:00 2001 From: zhitkoff Date: Tue, 29 Aug 2023 16:35:00 -0400 Subject: [PATCH 05/10] split: edge case for obs lines within combined shorts + test --- src/uu/split/src/split.rs | 9 ++++++++- tests/by-util/test_split.rs | 21 +++++++++++++++++---- 2 files changed, 25 insertions(+), 5 deletions(-) diff --git a/src/uu/split/src/split.rs b/src/uu/split/src/split.rs index dae24d36b..afe635456 100644 --- a/src/uu/split/src/split.rs +++ b/src/uu/split/src/split.rs @@ -88,13 +88,20 @@ fn handle_obsolete(args: &[String]) -> (Vec, Option) { // that can have obsolete lines option value in it // extract numeric part and filter it out let mut obs_lines_extracted: Vec = vec![]; + let mut obs_lines_end_reached = false; let filtered_slice: Vec = slice .chars() .filter(|c| { - if c.is_ascii_digit() { + // To correctly process scenario like '-x200a4' + // we need to stop extracting digits once alphabetic character is encountered + // after we already have something in obs_lines_extracted + if c.is_ascii_digit() && !obs_lines_end_reached { obs_lines_extracted.push(*c); false } else { + if !obs_lines_extracted.is_empty() { + obs_lines_end_reached = true; + } true } }) diff --git a/tests/by-util/test_split.rs b/tests/by-util/test_split.rs index 9fba2177e..3280fff67 100644 --- a/tests/by-util/test_split.rs +++ b/tests/by-util/test_split.rs @@ -170,7 +170,7 @@ fn test_split_str_prefixed_chunks_by_bytes() { assert_eq!(glob.collate(), at.read_bytes(name)); } -// Test short bytes option concatenated with value +/// Test short bytes option concatenated with value #[test] fn test_split_by_bytes_short_concatenated_with_value() { let (at, mut ucmd) = at_and_ucmd!(); @@ -342,7 +342,7 @@ fn test_split_lines_number() { .stderr_only("split: invalid number of lines: 'file'\n"); } -// Test short lines option with value concatenated +/// Test short lines option with value concatenated #[test] fn test_split_lines_short_concatenated_with_value() { let (at, mut ucmd) = at_and_ucmd!(); @@ -401,6 +401,19 @@ fn test_split_obs_lines_within_combined_shorts() { assert_eq!(glob.collate(), at.read_bytes(name)) } +/// Test for obsolete lines option as part of combined short options with tailing suffix length with value +#[test] +fn test_split_obs_lines_within_combined_shorts_tailing_suffix_length() { + let (at, mut ucmd) = at_and_ucmd!(); + let name = "obs-lines-combined-shorts-tailing-suffix-length"; + RandomFile::new(&at, name).add_lines(1000); + ucmd.args(&["-d200a4", name]).succeeds(); + + let glob = Glob::new(&at, ".", r"x\d\d\d\d$"); + assert_eq!(glob.count(), 5); + assert_eq!(glob.collate(), at.read_bytes(name)); +} + /// Test for obsolete lines option starts as part of combined short options #[test] fn test_split_obs_lines_starts_combined_shorts() { @@ -721,7 +734,7 @@ fn test_invalid_suffix_length() { .stderr_contains("invalid suffix length: 'xyz'"); } -// Test short suffix length option with value concatenated +/// Test short suffix length option with value concatenated #[test] fn test_split_suffix_length_short_concatenated_with_value() { let (at, mut ucmd) = at_and_ucmd!(); @@ -752,7 +765,7 @@ fn test_include_newlines() { assert_eq!(s, "5\n"); } -// Test short number of chunks option concatenated with value +/// Test short number of chunks option concatenated with value #[test] fn test_split_number_chunks_short_concatenated_with_value() { let (at, mut ucmd) = at_and_ucmd!(); From 6f37b4b4cfcbe45725d91d63dadabd1aa9873a2b Mon Sep 17 00:00:00 2001 From: zhitkoff Date: Wed, 30 Aug 2023 19:29:57 -0400 Subject: [PATCH 06/10] split: hyphenated values + tests --- src/uu/split/src/split.rs | 50 ++++++++++++++++++++--- tests/by-util/test_split.rs | 81 ++++++++++++++++++++++++++++++++++--- 2 files changed, 120 insertions(+), 11 deletions(-) diff --git a/src/uu/split/src/split.rs b/src/uu/split/src/split.rs index afe635456..7834dfe68 100644 --- a/src/uu/split/src/split.rs +++ b/src/uu/split/src/split.rs @@ -73,11 +73,18 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { /// following GNU `split` behavior fn handle_obsolete(args: &[String]) -> (Vec, Option) { let mut obs_lines = None; + let mut preceding_long_opt_req_value = false; + let mut preceding_short_opt_req_value = false; let filtered_args = args .iter() .filter_map(|slice| { + let filter: Option; + // check if the slice is a true short option (and not hyphen prefixed value of an option) + // and if so, a short option that can contain obsolete lines value if slice.starts_with('-') && !slice.starts_with("--") + && !preceding_long_opt_req_value + && !preceding_short_opt_req_value && !slice.starts_with("-a") && !slice.starts_with("-b") && !slice.starts_with("-C") @@ -109,7 +116,7 @@ fn handle_obsolete(args: &[String]) -> (Vec, Option) { if obs_lines_extracted.is_empty() { // no obsolete lines value found/extracted - Some(slice.to_owned()) + filter = Some(slice.to_owned()); } else { // obsolete lines value was extracted obs_lines = Some(obs_lines_extracted.iter().collect()); @@ -117,16 +124,41 @@ fn handle_obsolete(args: &[String]) -> (Vec, Option) { // there were some short options in front of or after obsolete lines value // i.e. '-xd100' or '-100de' or similar, which after extraction of obsolete lines value // would look like '-xd' or '-de' or similar - Some(filtered_slice.iter().collect()) + filter = Some(filtered_slice.iter().collect()); } else { - None + filter = None; } } } else { // either not a short option // or a short option that cannot have obsolete lines value in it - Some(slice.to_owned()) + filter = Some(slice.to_owned()); } + // capture if current slice is a preceding long option that requires value and does not use '=' to assign that value + // following slice should be treaded as value for this option + // even if it starts with '-' (which would be treated as hyphen prefixed value) + if slice.starts_with("--") { + preceding_long_opt_req_value = &slice[2..] == OPT_BYTES + || &slice[2..] == OPT_LINE_BYTES + || &slice[2..] == OPT_LINES + || &slice[2..] == OPT_ADDITIONAL_SUFFIX + || &slice[2..] == OPT_FILTER + || &slice[2..] == OPT_NUMBER + || &slice[2..] == OPT_SUFFIX_LENGTH; + } + // capture if current slice is a preceding short option that requires value and does not have value in the same slice (value separated by whitespace) + // following slice should be treaded as value for this option + // even if it starts with '-' (which would be treated as hyphen prefixed value) + preceding_short_opt_req_value = + slice == "-b" || slice == "-C" || slice == "-l" || slice == "-n" || slice == "-a"; + // slice is a value + // reset preceding option flags + if !slice.starts_with('-') { + preceding_short_opt_req_value = false; + preceding_long_opt_req_value = false; + } + // return filter + filter }) .collect(); (filtered_args, obs_lines) @@ -144,6 +176,7 @@ pub fn uu_app() -> Command { Arg::new(OPT_BYTES) .short('b') .long(OPT_BYTES) + .allow_hyphen_values(true) .value_name("SIZE") .help("put SIZE bytes per output file"), ) @@ -151,14 +184,15 @@ pub fn uu_app() -> Command { Arg::new(OPT_LINE_BYTES) .short('C') .long(OPT_LINE_BYTES) + .allow_hyphen_values(true) .value_name("SIZE") - .default_value("2") .help("put at most SIZE bytes of lines per output file"), ) .arg( Arg::new(OPT_LINES) .short('l') .long(OPT_LINES) + .allow_hyphen_values(true) .value_name("NUMBER") .default_value("1000") .help("put NUMBER lines/records per output file"), @@ -167,6 +201,7 @@ pub fn uu_app() -> Command { Arg::new(OPT_NUMBER) .short('n') .long(OPT_NUMBER) + .allow_hyphen_values(true) .value_name("CHUNKS") .help("generate CHUNKS output files; see explanation below"), ) @@ -174,6 +209,7 @@ pub fn uu_app() -> Command { .arg( Arg::new(OPT_ADDITIONAL_SUFFIX) .long(OPT_ADDITIONAL_SUFFIX) + .allow_hyphen_values(true) .value_name("SUFFIX") .default_value("") .help("additional SUFFIX to append to output file names"), @@ -181,6 +217,7 @@ pub fn uu_app() -> Command { .arg( Arg::new(OPT_FILTER) .long(OPT_FILTER) + .allow_hyphen_values(true) .value_name("COMMAND") .value_hint(clap::ValueHint::CommandName) .help( @@ -250,9 +287,10 @@ pub fn uu_app() -> Command { Arg::new(OPT_SUFFIX_LENGTH) .short('a') .long(OPT_SUFFIX_LENGTH) + .allow_hyphen_values(true) .value_name("N") .default_value(OPT_DEFAULT_SUFFIX_LENGTH) - .help("use suffixes of fixed length N. 0 implies dynamic length."), + .help("use suffixes of fixed length N. 0 implies dynamic length, starting with 2"), ) .arg( Arg::new(OPT_VERBOSE) diff --git a/tests/by-util/test_split.rs b/tests/by-util/test_split.rs index 3280fff67..466dabda9 100644 --- a/tests/by-util/test_split.rs +++ b/tests/by-util/test_split.rs @@ -254,6 +254,18 @@ fn test_additional_suffix_no_slash() { .usage_error("invalid suffix 'a/b', contains directory separator"); } +#[test] +fn test_split_additional_suffix_hyphen_value() { + let (at, mut ucmd) = at_and_ucmd!(); + let name = "split_additional_suffix"; + RandomFile::new(&at, name).add_lines(2000); + ucmd.args(&["--additional-suffix", "-300", name]).succeeds(); + + let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]-300$"); + assert_eq!(glob.count(), 2); + assert_eq!(glob.collate(), at.read_bytes(name)); +} + // note: the test_filter* tests below are unix-only // windows support has been waived for now because of the difficulty of getting // the `cmd` call right @@ -436,9 +448,9 @@ fn test_split_obs_lines_starts_combined_shorts() { /// Test for using both obsolete lines (standalone) option and short/long lines option simultaneously #[test] fn test_split_both_lines_and_obs_lines_standalone() { - // This test will ensure that if both lines option '-l' or '--lines' - // and obsolete lines option '-100' are used - // it fails + // This test will ensure that: + // if both lines option '-l' or '--lines' (with value) and obsolete lines option '-100' are used - it fails + // if standalone lines option is used incorrectly and treated as a hyphen prefixed value of other option - it fails let scene = TestScenario::new(util_name!()); let at = &scene.fixtures; at.touch("file"); @@ -455,18 +467,77 @@ fn test_split_both_lines_and_obs_lines_standalone() { .fails() .code_is(1) .stderr_contains("split: cannot split in more than one way\n"); +} + +/// Test for using obsolete lines option incorrectly, so it is treated as a hyphen prefixed value of other option +#[test] +fn test_split_obs_lines_as_other_option_value() { + // This test will ensure that: + // if obsolete lines option is used incorrectly and treated as a hyphen prefixed value of other option - it fails + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + at.touch("file"); + scene .ucmd() .args(&["--lines", "-200", "file"]) .fails() .code_is(1) - .stderr_contains("split: cannot split in more than one way\n"); + .stderr_contains("split: invalid number of lines: '-200'\n"); scene .ucmd() .args(&["-l", "-200", "file"]) .fails() .code_is(1) - .stderr_contains("split: cannot split in more than one way\n"); + .stderr_contains("split: invalid number of lines: '-200'\n"); + scene + .ucmd() + .args(&["-a", "-200", "file"]) + .fails() + .code_is(1) + .stderr_contains("split: invalid suffix length: '-200'\n"); + scene + .ucmd() + .args(&["--suffix-length", "-d200e", "file"]) + .fails() + .code_is(1) + .stderr_contains("split: invalid suffix length: '-d200e'\n"); + scene + .ucmd() + .args(&["-C", "-200", "file"]) + .fails() + .code_is(1) + .stderr_contains("split: invalid number of bytes: '-200'\n"); + scene + .ucmd() + .args(&["--line-bytes", "-x200a4", "file"]) + .fails() + .code_is(1) + .stderr_contains("split: invalid number of bytes: '-x200a4'\n"); + scene + .ucmd() + .args(&["-b", "-200", "file"]) + .fails() + .code_is(1) + .stderr_contains("split: invalid number of bytes: '-200'\n"); + scene + .ucmd() + .args(&["--bytes", "-200xd", "file"]) + .fails() + .code_is(1) + .stderr_contains("split: invalid number of bytes: '-200xd'\n"); + scene + .ucmd() + .args(&["-n", "-200", "file"]) + .fails() + .code_is(1) + .stderr_contains("split: invalid number of chunks: -200\n"); + scene + .ucmd() + .args(&["--number", "-e200", "file"]) + .fails() + .code_is(1) + .stderr_contains("split: invalid number of chunks: -e200\n"); } /// Test for using more than one obsolete lines option (standalone) From 5bfe9b19ef775adf40c3e915a731611dbc4a6256 Mon Sep 17 00:00:00 2001 From: zhitkoff Date: Thu, 31 Aug 2023 14:46:56 -0400 Subject: [PATCH 07/10] split: avoid using `collect_lossy` + test for invalid UTF8 arguments --- src/uu/split/src/split.rs | 167 +++++++++++++++++++----------------- tests/by-util/test_split.rs | 45 ++++++++++ 2 files changed, 134 insertions(+), 78 deletions(-) diff --git a/src/uu/split/src/split.rs b/src/uu/split/src/split.rs index 7834dfe68..68692d03c 100644 --- a/src/uu/split/src/split.rs +++ b/src/uu/split/src/split.rs @@ -13,6 +13,7 @@ use crate::filenames::FilenameIterator; use crate::filenames::SuffixType; use clap::{crate_version, parser::ValueSource, Arg, ArgAction, ArgMatches, Command}; use std::env; +use std::ffi::OsString; use std::fmt; use std::fs::{metadata, File}; use std::io; @@ -52,9 +53,8 @@ const AFTER_HELP: &str = help_section!("after help", "split.md"); #[uucore::main] pub fn uumain(args: impl uucore::Args) -> UResult<()> { - let args = args.collect_lossy(); - - let (args, obs_lines) = handle_obsolete(&args[..]); + + let (args, obs_lines) = handle_obsolete(args); let matches = uu_app().try_get_matches_from(args)?; @@ -71,91 +71,102 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { /// `split -x300e file` would mean `split -x -l 300 -e file` /// `split -x300e -22 file` would mean `split -x -e -l 22 file` (last obsolete lines option wins) /// following GNU `split` behavior -fn handle_obsolete(args: &[String]) -> (Vec, Option) { +fn handle_obsolete(args: impl uucore::Args) -> (Vec, Option) { let mut obs_lines = None; let mut preceding_long_opt_req_value = false; let mut preceding_short_opt_req_value = false; let filtered_args = args - .iter() - .filter_map(|slice| { - let filter: Option; - // check if the slice is a true short option (and not hyphen prefixed value of an option) - // and if so, a short option that can contain obsolete lines value - if slice.starts_with('-') - && !slice.starts_with("--") - && !preceding_long_opt_req_value - && !preceding_short_opt_req_value - && !slice.starts_with("-a") - && !slice.starts_with("-b") - && !slice.starts_with("-C") - && !slice.starts_with("-l") - && !slice.starts_with("-n") - { - // start of the short option string - // that can have obsolete lines option value in it - // extract numeric part and filter it out - let mut obs_lines_extracted: Vec = vec![]; - let mut obs_lines_end_reached = false; - let filtered_slice: Vec = slice - .chars() - .filter(|c| { - // To correctly process scenario like '-x200a4' - // we need to stop extracting digits once alphabetic character is encountered - // after we already have something in obs_lines_extracted - if c.is_ascii_digit() && !obs_lines_end_reached { - obs_lines_extracted.push(*c); - false - } else { - if !obs_lines_extracted.is_empty() { - obs_lines_end_reached = true; + .filter_map(|os_slice| { + let filter: Option; + if let Some(slice) = os_slice.to_str() { + // check if the slice is a true short option (and not hyphen prefixed value of an option) + // and if so, a short option that can contain obsolete lines value + if slice.starts_with('-') + && !slice.starts_with("--") + && !preceding_long_opt_req_value + && !preceding_short_opt_req_value + && !slice.starts_with("-a") + && !slice.starts_with("-b") + && !slice.starts_with("-C") + && !slice.starts_with("-l") + && !slice.starts_with("-n") + { + // start of the short option string + // that can have obsolete lines option value in it + // extract numeric part and filter it out + let mut obs_lines_extracted: Vec = vec![]; + let mut obs_lines_end_reached = false; + let filtered_slice: Vec = slice + .chars() + .filter(|c| { + // To correctly process scenario like '-x200a4' + // we need to stop extracting digits once alphabetic character is encountered + // after we already have something in obs_lines_extracted + if c.is_ascii_digit() && !obs_lines_end_reached { + obs_lines_extracted.push(*c); + false + } else { + if !obs_lines_extracted.is_empty() { + obs_lines_end_reached = true; + } + true } - true - } - }) - .collect(); + }) + .collect(); - if obs_lines_extracted.is_empty() { - // no obsolete lines value found/extracted - filter = Some(slice.to_owned()); - } else { - // obsolete lines value was extracted - obs_lines = Some(obs_lines_extracted.iter().collect()); - if filtered_slice.get(1).is_some() { - // there were some short options in front of or after obsolete lines value - // i.e. '-xd100' or '-100de' or similar, which after extraction of obsolete lines value - // would look like '-xd' or '-de' or similar - filter = Some(filtered_slice.iter().collect()); + if obs_lines_extracted.is_empty() { + // no obsolete lines value found/extracted + filter = Some(OsString::from(slice)); } else { - filter = None; + // obsolete lines value was extracted + obs_lines = Some(obs_lines_extracted.iter().collect()); + if filtered_slice.get(1).is_some() { + // there were some short options in front of or after obsolete lines value + // i.e. '-xd100' or '-100de' or similar, which after extraction of obsolete lines value + // would look like '-xd' or '-de' or similar + let filtered_slice: String = filtered_slice.iter().collect(); + filter = Some(OsString::from(filtered_slice)); + } else { + filter = None; + } } + } else { + // either not a short option + // or a short option that cannot have obsolete lines value in it + filter = Some(OsString::from(slice)); + } + // capture if current slice is a preceding long option that requires value and does not use '=' to assign that value + // following slice should be treaded as value for this option + // even if it starts with '-' (which would be treated as hyphen prefixed value) + if slice.starts_with("--") { + preceding_long_opt_req_value = &slice[2..] == OPT_BYTES + || &slice[2..] == OPT_LINE_BYTES + || &slice[2..] == OPT_LINES + || &slice[2..] == OPT_ADDITIONAL_SUFFIX + || &slice[2..] == OPT_FILTER + || &slice[2..] == OPT_NUMBER + || &slice[2..] == OPT_SUFFIX_LENGTH; + } + // capture if current slice is a preceding short option that requires value and does not have value in the same slice (value separated by whitespace) + // following slice should be treaded as value for this option + // even if it starts with '-' (which would be treated as hyphen prefixed value) + preceding_short_opt_req_value = slice == "-b" + || slice == "-C" + || slice == "-l" + || slice == "-n" + || slice == "-a"; + // slice is a value + // reset preceding option flags + if !slice.starts_with('-') { + preceding_short_opt_req_value = false; + preceding_long_opt_req_value = false; } } else { - // either not a short option - // or a short option that cannot have obsolete lines value in it - filter = Some(slice.to_owned()); - } - // capture if current slice is a preceding long option that requires value and does not use '=' to assign that value - // following slice should be treaded as value for this option - // even if it starts with '-' (which would be treated as hyphen prefixed value) - if slice.starts_with("--") { - preceding_long_opt_req_value = &slice[2..] == OPT_BYTES - || &slice[2..] == OPT_LINE_BYTES - || &slice[2..] == OPT_LINES - || &slice[2..] == OPT_ADDITIONAL_SUFFIX - || &slice[2..] == OPT_FILTER - || &slice[2..] == OPT_NUMBER - || &slice[2..] == OPT_SUFFIX_LENGTH; - } - // capture if current slice is a preceding short option that requires value and does not have value in the same slice (value separated by whitespace) - // following slice should be treaded as value for this option - // even if it starts with '-' (which would be treated as hyphen prefixed value) - preceding_short_opt_req_value = - slice == "-b" || slice == "-C" || slice == "-l" || slice == "-n" || slice == "-a"; - // slice is a value - // reset preceding option flags - if !slice.starts_with('-') { - preceding_short_opt_req_value = false; - preceding_long_opt_req_value = false; + // Cannot cleanly convert os_slice to UTF-8 + // Do not process and return as-is + // This will cause failure later on, but we should not handle it here + // and let clap panic on invalid UTF-8 argument + filter = Some(os_slice); } // return filter filter diff --git a/tests/by-util/test_split.rs b/tests/by-util/test_split.rs index 466dabda9..d24d2cf54 100644 --- a/tests/by-util/test_split.rs +++ b/tests/by-util/test_split.rs @@ -9,6 +9,7 @@ use rand::{thread_rng, Rng, SeedableRng}; use regex::Regex; #[cfg(not(windows))] use std::env; +use std::ffi::OsStr; use std::path::Path; use std::{ fs::{read_dir, File}, @@ -1287,3 +1288,47 @@ fn test_split_invalid_input() { .no_stdout() .stderr_contains("split: invalid number of chunks: 0"); } + +/// Test if there are invalid (non UTF-8) in the arguments - unix +/// clap is expected to fail/panic +#[cfg(unix)] +#[test] +fn test_split_non_utf8_argument_unix() { + use std::os::unix::ffi::OsStrExt; + + let (at, mut ucmd) = at_and_ucmd!(); + let name = "test_split_non_utf8_argument"; + let opt = OsStr::from_bytes("--additional-suffix".as_bytes()); + RandomFile::new(&at, name).add_lines(2000); + // Here, the values 0x66 and 0x6f correspond to 'f' and 'o' + // respectively. The value 0x80 is a lone continuation byte, invalid + // in a UTF-8 sequence. + let opt_value = [0x66, 0x6f, 0x80, 0x6f]; + let opt_value = OsStr::from_bytes(&opt_value[..]); + let name = OsStr::from_bytes(name.as_bytes()); + ucmd.args(&[opt, opt_value, name]) + .fails() + .stderr_contains("error: invalid UTF-8 was detected in one or more arguments"); +} + +/// Test if there are invalid (non UTF-8) in the arguments - windows +/// clap is expected to fail/panic +#[cfg(windows)] +#[test] +fn test_split_non_utf8_argument_windows() { + use std::os::windows::prelude::*; + + let (at, mut ucmd) = at_and_ucmd!(); + let name = "test_split_non_utf8_argument"; + let opt = OsStr::from_bytes("--additional-suffix".as_bytes()); + RandomFile::new(&at, name).add_lines(2000); + // Here the values 0x0066 and 0x006f correspond to 'f' and 'o' + // respectively. The value 0xD800 is a lone surrogate half, invalid + // in a UTF-16 sequence. + let opt_value = [0x0066, 0x006f, 0xD800, 0x006f]; + let opt_value = OsString::from_wide(&opt_value[..]); + let name = OsStr::from_bytes(name.as_bytes()); + ucmd.args(&[opt, opt_value, name]) + .fails() + .stderr_contains("error: invalid UTF-8 was detected in one or more arguments"); +} From 271a108fa9e1ea75ed8c6e42c2a207a6da935c81 Mon Sep 17 00:00:00 2001 From: zhitkoff Date: Thu, 31 Aug 2023 15:37:42 -0400 Subject: [PATCH 08/10] split: formatting --- src/uu/split/src/split.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/uu/split/src/split.rs b/src/uu/split/src/split.rs index 68692d03c..d76bfb2de 100644 --- a/src/uu/split/src/split.rs +++ b/src/uu/split/src/split.rs @@ -53,7 +53,6 @@ const AFTER_HELP: &str = help_section!("after help", "split.md"); #[uucore::main] pub fn uumain(args: impl uucore::Args) -> UResult<()> { - let (args, obs_lines) = handle_obsolete(args); let matches = uu_app().try_get_matches_from(args)?; From d2812cbbc387b7dc3ad8f5f31863aad09ce1e6ad Mon Sep 17 00:00:00 2001 From: zhitkoff Date: Thu, 31 Aug 2023 16:04:44 -0400 Subject: [PATCH 09/10] split: disable windows test for invalid UTF8 --- tests/by-util/test_split.rs | 44 +++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 21 deletions(-) diff --git a/tests/by-util/test_split.rs b/tests/by-util/test_split.rs index d24d2cf54..aef9ea040 100644 --- a/tests/by-util/test_split.rs +++ b/tests/by-util/test_split.rs @@ -9,7 +9,6 @@ use rand::{thread_rng, Rng, SeedableRng}; use regex::Regex; #[cfg(not(windows))] use std::env; -use std::ffi::OsStr; use std::path::Path; use std::{ fs::{read_dir, File}, @@ -1294,6 +1293,7 @@ fn test_split_invalid_input() { #[cfg(unix)] #[test] fn test_split_non_utf8_argument_unix() { + use std::ffi::OsStr; use std::os::unix::ffi::OsStrExt; let (at, mut ucmd) = at_and_ucmd!(); @@ -1311,24 +1311,26 @@ fn test_split_non_utf8_argument_unix() { .stderr_contains("error: invalid UTF-8 was detected in one or more arguments"); } -/// Test if there are invalid (non UTF-8) in the arguments - windows -/// clap is expected to fail/panic -#[cfg(windows)] -#[test] -fn test_split_non_utf8_argument_windows() { - use std::os::windows::prelude::*; +// Test if there are invalid (non UTF-8) in the arguments - windows +// clap is expected to fail/panic +// comment it out for now +// #[cfg(windows)] +// #[test] +// fn test_split_non_utf8_argument_windows() { +// use std::ffi::OsString; +// use std::os::windows::prelude::*; - let (at, mut ucmd) = at_and_ucmd!(); - let name = "test_split_non_utf8_argument"; - let opt = OsStr::from_bytes("--additional-suffix".as_bytes()); - RandomFile::new(&at, name).add_lines(2000); - // Here the values 0x0066 and 0x006f correspond to 'f' and 'o' - // respectively. The value 0xD800 is a lone surrogate half, invalid - // in a UTF-16 sequence. - let opt_value = [0x0066, 0x006f, 0xD800, 0x006f]; - let opt_value = OsString::from_wide(&opt_value[..]); - let name = OsStr::from_bytes(name.as_bytes()); - ucmd.args(&[opt, opt_value, name]) - .fails() - .stderr_contains("error: invalid UTF-8 was detected in one or more arguments"); -} +// let (at, mut ucmd) = at_and_ucmd!(); +// let name = "test_split_non_utf8_argument"; +// let opt = OsStr::from_bytes("--additional-suffix".as_bytes()); +// RandomFile::new(&at, name).add_lines(2000); +// // Here the values 0x0066 and 0x006f correspond to 'f' and 'o' +// // respectively. The value 0xD800 is a lone surrogate half, invalid +// // in a UTF-16 sequence. +// let opt_value = [0x0066, 0x006f, 0xD800, 0x006f]; +// let opt_value = OsString::from_wide(&opt_value[..]); +// let name = OsStr::from_bytes(name.as_bytes()); +// ucmd.args(&[opt, opt_value, name]) +// .fails() +// .stderr_contains("error: invalid UTF-8 was detected in one or more arguments"); +// } From e597189be7555164fc7f57df0b03f1ae788901ea Mon Sep 17 00:00:00 2001 From: zhitkoff Date: Thu, 31 Aug 2023 20:48:44 -0400 Subject: [PATCH 10/10] split: fixed windows test for invalid unicode args --- tests/by-util/test_split.rs | 45 ++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 23 deletions(-) diff --git a/tests/by-util/test_split.rs b/tests/by-util/test_split.rs index aef9ea040..965f2733e 100644 --- a/tests/by-util/test_split.rs +++ b/tests/by-util/test_split.rs @@ -1290,8 +1290,8 @@ fn test_split_invalid_input() { /// Test if there are invalid (non UTF-8) in the arguments - unix /// clap is expected to fail/panic -#[cfg(unix)] #[test] +#[cfg(unix)] fn test_split_non_utf8_argument_unix() { use std::ffi::OsStr; use std::os::unix::ffi::OsStrExt; @@ -1311,26 +1311,25 @@ fn test_split_non_utf8_argument_unix() { .stderr_contains("error: invalid UTF-8 was detected in one or more arguments"); } -// Test if there are invalid (non UTF-8) in the arguments - windows -// clap is expected to fail/panic -// comment it out for now -// #[cfg(windows)] -// #[test] -// fn test_split_non_utf8_argument_windows() { -// use std::ffi::OsString; -// use std::os::windows::prelude::*; +/// Test if there are invalid (non UTF-8) in the arguments - windows +/// clap is expected to fail/panic +#[test] +#[cfg(windows)] +fn test_split_non_utf8_argument_windows() { + use std::ffi::OsString; + use std::os::windows::ffi::OsStringExt; -// let (at, mut ucmd) = at_and_ucmd!(); -// let name = "test_split_non_utf8_argument"; -// let opt = OsStr::from_bytes("--additional-suffix".as_bytes()); -// RandomFile::new(&at, name).add_lines(2000); -// // Here the values 0x0066 and 0x006f correspond to 'f' and 'o' -// // respectively. The value 0xD800 is a lone surrogate half, invalid -// // in a UTF-16 sequence. -// let opt_value = [0x0066, 0x006f, 0xD800, 0x006f]; -// let opt_value = OsString::from_wide(&opt_value[..]); -// let name = OsStr::from_bytes(name.as_bytes()); -// ucmd.args(&[opt, opt_value, name]) -// .fails() -// .stderr_contains("error: invalid UTF-8 was detected in one or more arguments"); -// } + let (at, mut ucmd) = at_and_ucmd!(); + let name = "test_split_non_utf8_argument"; + let opt = OsString::from("--additional-suffix"); + RandomFile::new(&at, name).add_lines(2000); + // Here the values 0x0066 and 0x006f correspond to 'f' and 'o' + // respectively. The value 0xD800 is a lone surrogate half, invalid + // in a UTF-16 sequence. + let opt_value = [0x0066, 0x006f, 0xD800, 0x006f]; + let opt_value = OsString::from_wide(&opt_value[..]); + let name = OsString::from(name); + ucmd.args(&[opt, opt_value, name]) + .fails() + .stderr_contains("error: invalid UTF-8 was detected in one or more arguments"); +}