mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-28 11:37:44 +00:00
split: avoid using collect_lossy
+ test for invalid UTF8 arguments
This commit is contained in:
parent
6f37b4b4cf
commit
5bfe9b19ef
2 changed files with 134 additions and 78 deletions
|
@ -13,6 +13,7 @@ use crate::filenames::FilenameIterator;
|
||||||
use crate::filenames::SuffixType;
|
use crate::filenames::SuffixType;
|
||||||
use clap::{crate_version, parser::ValueSource, Arg, ArgAction, ArgMatches, Command};
|
use clap::{crate_version, parser::ValueSource, Arg, ArgAction, ArgMatches, Command};
|
||||||
use std::env;
|
use std::env;
|
||||||
|
use std::ffi::OsString;
|
||||||
use std::fmt;
|
use std::fmt;
|
||||||
use std::fs::{metadata, File};
|
use std::fs::{metadata, File};
|
||||||
use std::io;
|
use std::io;
|
||||||
|
@ -52,9 +53,8 @@ const AFTER_HELP: &str = help_section!("after help", "split.md");
|
||||||
|
|
||||||
#[uucore::main]
|
#[uucore::main]
|
||||||
pub fn uumain(args: impl uucore::Args) -> UResult<()> {
|
pub fn uumain(args: impl uucore::Args) -> UResult<()> {
|
||||||
let args = args.collect_lossy();
|
|
||||||
|
|
||||||
let (args, obs_lines) = handle_obsolete(&args[..]);
|
let (args, obs_lines) = handle_obsolete(args);
|
||||||
|
|
||||||
let matches = uu_app().try_get_matches_from(args)?;
|
let matches = uu_app().try_get_matches_from(args)?;
|
||||||
|
|
||||||
|
@ -71,14 +71,14 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
|
||||||
/// `split -x300e file` would mean `split -x -l 300 -e file`
|
/// `split -x300e file` would mean `split -x -l 300 -e file`
|
||||||
/// `split -x300e -22 file` would mean `split -x -e -l 22 file` (last obsolete lines option wins)
|
/// `split -x300e -22 file` would mean `split -x -e -l 22 file` (last obsolete lines option wins)
|
||||||
/// following GNU `split` behavior
|
/// following GNU `split` behavior
|
||||||
fn handle_obsolete(args: &[String]) -> (Vec<String>, Option<String>) {
|
fn handle_obsolete(args: impl uucore::Args) -> (Vec<OsString>, Option<String>) {
|
||||||
let mut obs_lines = None;
|
let mut obs_lines = None;
|
||||||
let mut preceding_long_opt_req_value = false;
|
let mut preceding_long_opt_req_value = false;
|
||||||
let mut preceding_short_opt_req_value = false;
|
let mut preceding_short_opt_req_value = false;
|
||||||
let filtered_args = args
|
let filtered_args = args
|
||||||
.iter()
|
.filter_map(|os_slice| {
|
||||||
.filter_map(|slice| {
|
let filter: Option<OsString>;
|
||||||
let filter: Option<String>;
|
if let Some(slice) = os_slice.to_str() {
|
||||||
// check if the slice is a true short option (and not hyphen prefixed value of an option)
|
// check if the slice is a true short option (and not hyphen prefixed value of an option)
|
||||||
// and if so, a short option that can contain obsolete lines value
|
// and if so, a short option that can contain obsolete lines value
|
||||||
if slice.starts_with('-')
|
if slice.starts_with('-')
|
||||||
|
@ -116,7 +116,7 @@ fn handle_obsolete(args: &[String]) -> (Vec<String>, Option<String>) {
|
||||||
|
|
||||||
if obs_lines_extracted.is_empty() {
|
if obs_lines_extracted.is_empty() {
|
||||||
// no obsolete lines value found/extracted
|
// no obsolete lines value found/extracted
|
||||||
filter = Some(slice.to_owned());
|
filter = Some(OsString::from(slice));
|
||||||
} else {
|
} else {
|
||||||
// obsolete lines value was extracted
|
// obsolete lines value was extracted
|
||||||
obs_lines = Some(obs_lines_extracted.iter().collect());
|
obs_lines = Some(obs_lines_extracted.iter().collect());
|
||||||
|
@ -124,7 +124,8 @@ fn handle_obsolete(args: &[String]) -> (Vec<String>, Option<String>) {
|
||||||
// there were some short options in front of or after obsolete lines value
|
// there were some short options in front of or after obsolete lines value
|
||||||
// i.e. '-xd100' or '-100de' or similar, which after extraction of obsolete lines value
|
// i.e. '-xd100' or '-100de' or similar, which after extraction of obsolete lines value
|
||||||
// would look like '-xd' or '-de' or similar
|
// would look like '-xd' or '-de' or similar
|
||||||
filter = Some(filtered_slice.iter().collect());
|
let filtered_slice: String = filtered_slice.iter().collect();
|
||||||
|
filter = Some(OsString::from(filtered_slice));
|
||||||
} else {
|
} else {
|
||||||
filter = None;
|
filter = None;
|
||||||
}
|
}
|
||||||
|
@ -132,7 +133,7 @@ fn handle_obsolete(args: &[String]) -> (Vec<String>, Option<String>) {
|
||||||
} else {
|
} else {
|
||||||
// either not a short option
|
// either not a short option
|
||||||
// or a short option that cannot have obsolete lines value in it
|
// or a short option that cannot have obsolete lines value in it
|
||||||
filter = Some(slice.to_owned());
|
filter = Some(OsString::from(slice));
|
||||||
}
|
}
|
||||||
// capture if current slice is a preceding long option that requires value and does not use '=' to assign that value
|
// capture if current slice is a preceding long option that requires value and does not use '=' to assign that value
|
||||||
// following slice should be treaded as value for this option
|
// following slice should be treaded as value for this option
|
||||||
|
@ -149,14 +150,24 @@ fn handle_obsolete(args: &[String]) -> (Vec<String>, Option<String>) {
|
||||||
// capture if current slice is a preceding short option that requires value and does not have value in the same slice (value separated by whitespace)
|
// capture if current slice is a preceding short option that requires value and does not have value in the same slice (value separated by whitespace)
|
||||||
// following slice should be treaded as value for this option
|
// following slice should be treaded as value for this option
|
||||||
// even if it starts with '-' (which would be treated as hyphen prefixed value)
|
// even if it starts with '-' (which would be treated as hyphen prefixed value)
|
||||||
preceding_short_opt_req_value =
|
preceding_short_opt_req_value = slice == "-b"
|
||||||
slice == "-b" || slice == "-C" || slice == "-l" || slice == "-n" || slice == "-a";
|
|| slice == "-C"
|
||||||
|
|| slice == "-l"
|
||||||
|
|| slice == "-n"
|
||||||
|
|| slice == "-a";
|
||||||
// slice is a value
|
// slice is a value
|
||||||
// reset preceding option flags
|
// reset preceding option flags
|
||||||
if !slice.starts_with('-') {
|
if !slice.starts_with('-') {
|
||||||
preceding_short_opt_req_value = false;
|
preceding_short_opt_req_value = false;
|
||||||
preceding_long_opt_req_value = false;
|
preceding_long_opt_req_value = false;
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
// Cannot cleanly convert os_slice to UTF-8
|
||||||
|
// Do not process and return as-is
|
||||||
|
// This will cause failure later on, but we should not handle it here
|
||||||
|
// and let clap panic on invalid UTF-8 argument
|
||||||
|
filter = Some(os_slice);
|
||||||
|
}
|
||||||
// return filter
|
// return filter
|
||||||
filter
|
filter
|
||||||
})
|
})
|
||||||
|
|
|
@ -9,6 +9,7 @@ use rand::{thread_rng, Rng, SeedableRng};
|
||||||
use regex::Regex;
|
use regex::Regex;
|
||||||
#[cfg(not(windows))]
|
#[cfg(not(windows))]
|
||||||
use std::env;
|
use std::env;
|
||||||
|
use std::ffi::OsStr;
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
use std::{
|
use std::{
|
||||||
fs::{read_dir, File},
|
fs::{read_dir, File},
|
||||||
|
@ -1287,3 +1288,47 @@ fn test_split_invalid_input() {
|
||||||
.no_stdout()
|
.no_stdout()
|
||||||
.stderr_contains("split: invalid number of chunks: 0");
|
.stderr_contains("split: invalid number of chunks: 0");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Test if there are invalid (non UTF-8) in the arguments - unix
|
||||||
|
/// clap is expected to fail/panic
|
||||||
|
#[cfg(unix)]
|
||||||
|
#[test]
|
||||||
|
fn test_split_non_utf8_argument_unix() {
|
||||||
|
use std::os::unix::ffi::OsStrExt;
|
||||||
|
|
||||||
|
let (at, mut ucmd) = at_and_ucmd!();
|
||||||
|
let name = "test_split_non_utf8_argument";
|
||||||
|
let opt = OsStr::from_bytes("--additional-suffix".as_bytes());
|
||||||
|
RandomFile::new(&at, name).add_lines(2000);
|
||||||
|
// Here, the values 0x66 and 0x6f correspond to 'f' and 'o'
|
||||||
|
// respectively. The value 0x80 is a lone continuation byte, invalid
|
||||||
|
// in a UTF-8 sequence.
|
||||||
|
let opt_value = [0x66, 0x6f, 0x80, 0x6f];
|
||||||
|
let opt_value = OsStr::from_bytes(&opt_value[..]);
|
||||||
|
let name = OsStr::from_bytes(name.as_bytes());
|
||||||
|
ucmd.args(&[opt, opt_value, name])
|
||||||
|
.fails()
|
||||||
|
.stderr_contains("error: invalid UTF-8 was detected in one or more arguments");
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Test if there are invalid (non UTF-8) in the arguments - windows
|
||||||
|
/// clap is expected to fail/panic
|
||||||
|
#[cfg(windows)]
|
||||||
|
#[test]
|
||||||
|
fn test_split_non_utf8_argument_windows() {
|
||||||
|
use std::os::windows::prelude::*;
|
||||||
|
|
||||||
|
let (at, mut ucmd) = at_and_ucmd!();
|
||||||
|
let name = "test_split_non_utf8_argument";
|
||||||
|
let opt = OsStr::from_bytes("--additional-suffix".as_bytes());
|
||||||
|
RandomFile::new(&at, name).add_lines(2000);
|
||||||
|
// Here the values 0x0066 and 0x006f correspond to 'f' and 'o'
|
||||||
|
// respectively. The value 0xD800 is a lone surrogate half, invalid
|
||||||
|
// in a UTF-16 sequence.
|
||||||
|
let opt_value = [0x0066, 0x006f, 0xD800, 0x006f];
|
||||||
|
let opt_value = OsString::from_wide(&opt_value[..]);
|
||||||
|
let name = OsStr::from_bytes(name.as_bytes());
|
||||||
|
ucmd.args(&[opt, opt_value, name])
|
||||||
|
.fails()
|
||||||
|
.stderr_contains("error: invalid UTF-8 was detected in one or more arguments");
|
||||||
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue