1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-27 19:17:43 +00:00

cut: refactor delimiters osstring

This commit is contained in:
zhitkoff 2024-03-12 18:02:43 -04:00
parent 1725479c06
commit 4f9497f266

View file

@ -347,11 +347,9 @@ fn cut_files(mut filenames: Vec<String>, mode: &Mode) {
} }
} }
// This is temporary helper function to convert OsString to &[u8] for unix targets only // Helper function for processing delimiter values (which could be non UTF-8)
// TODO Remove this function and re-implement the functionality in each place that calls it // It converts OsString to &[u8] for unix targets only
// for all targets using https://doc.rust-lang.org/nightly/std/ffi/struct.OsStr.html#method.as_encoded_bytes // On non-unix (i.e. Windows) it will just return an error if delimiter value is not UTF-8
// once project's MSRV is bumped up to 1.74.0+ so that function becomes available
// For now - support unix targets only and on non-unix (i.e. Windows) will just return an error if delimiter value is not UTF-8
fn os_string_as_bytes(os_string: &OsString) -> UResult<&[u8]> { fn os_string_as_bytes(os_string: &OsString) -> UResult<&[u8]> {
#[cfg(unix)] #[cfg(unix)]
let bytes = os_string.as_bytes(); let bytes = os_string.as_bytes();
@ -372,12 +370,10 @@ fn os_string_as_bytes(os_string: &OsString) -> UResult<&[u8]> {
// Get delimiter and output delimiter from `-d`/`--delimiter` and `--output-delimiter` options respectively // Get delimiter and output delimiter from `-d`/`--delimiter` and `--output-delimiter` options respectively
// Allow either delimiter to have a value that is neither UTF-8 nor ASCII to align with GNU behavior // Allow either delimiter to have a value that is neither UTF-8 nor ASCII to align with GNU behavior
fn get_delimiters<'a>( fn get_delimiters(
matches: &'a ArgMatches, matches: &ArgMatches,
delimiter_is_equal: bool, delimiter_is_equal: bool,
os_string_equals: &'a OsString, ) -> UResult<(Delimiter, Option<&[u8]>)> {
os_string_nul: &'a OsString,
) -> UResult<(Delimiter<'a>, Option<&'a [u8]>)> {
let whitespace_delimited = matches.get_flag(options::WHITESPACE_DELIMITED); let whitespace_delimited = matches.get_flag(options::WHITESPACE_DELIMITED);
let delim_opt = matches.get_one::<OsString>(options::DELIMITER); let delim_opt = matches.get_one::<OsString>(options::DELIMITER);
let delim = match delim_opt { let delim = match delim_opt {
@ -387,17 +383,16 @@ fn get_delimiters<'a>(
"invalid input: Only one of --delimiter (-d) or -w option can be specified", "invalid input: Only one of --delimiter (-d) or -w option can be specified",
)); ));
} }
Some(mut os_string) => { Some(os_string) => {
// GNU's `cut` supports `-d=` to set the delimiter to `=`. // GNU's `cut` supports `-d=` to set the delimiter to `=`.
// Clap parsing is limited in this situation, see: // Clap parsing is limited in this situation, see:
// https://github.com/uutils/coreutils/issues/2424#issuecomment-863825242 // https://github.com/uutils/coreutils/issues/2424#issuecomment-863825242
// rewrite the delimiter value os_string before further processing
if delimiter_is_equal { if delimiter_is_equal {
os_string = os_string_equals; Delimiter::Slice(b"=")
} else if os_string == "''" || os_string.is_empty() { } else if os_string == "''" || os_string.is_empty() {
// treat `''` as empty delimiter // treat `''` as empty delimiter
os_string = os_string_nul; Delimiter::Slice(b"\0")
} } else {
// For delimiter `-d` option value - allow both UTF-8 (possibly multi-byte) characters // For delimiter `-d` option value - allow both UTF-8 (possibly multi-byte) characters
// and Non UTF-8 (and not ASCII) single byte "characters", like `b"\xAD"` to align with GNU behavior // and Non UTF-8 (and not ASCII) single byte "characters", like `b"\xAD"` to align with GNU behavior
let bytes = os_string_as_bytes(os_string)?; let bytes = os_string_as_bytes(os_string)?;
@ -412,6 +407,7 @@ fn get_delimiters<'a>(
Delimiter::from(os_string) Delimiter::from(os_string)
} }
} }
}
None => match whitespace_delimited { None => match whitespace_delimited {
true => Delimiter::Whitespace, true => Delimiter::Whitespace,
false => Delimiter::default(), false => Delimiter::default(),
@ -421,7 +417,7 @@ fn get_delimiters<'a>(
.get_one::<OsString>(options::OUTPUT_DELIMITER) .get_one::<OsString>(options::OUTPUT_DELIMITER)
.map(|os_string| { .map(|os_string| {
if os_string.is_empty() || os_string == "''" { if os_string.is_empty() || os_string == "''" {
"\0".as_bytes() b"\0"
} else { } else {
os_string_as_bytes(os_string).unwrap() os_string_as_bytes(os_string).unwrap()
} }
@ -452,17 +448,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
let complement = matches.get_flag(options::COMPLEMENT); let complement = matches.get_flag(options::COMPLEMENT);
let only_delimited = matches.get_flag(options::ONLY_DELIMITED); let only_delimited = matches.get_flag(options::ONLY_DELIMITED);
// since OsString::from creates a new value and it does not by default have 'static lifetime like &str let (delimiter, out_delimiter) = get_delimiters(&matches, delimiter_is_equal)?;
// we need to create these values here and pass them down to avoid issues with borrow checker and temporary values
let os_string_equals = OsString::from("=");
let os_string_nul = OsString::from("\0");
let (delimiter, out_delimiter) = get_delimiters(
&matches,
delimiter_is_equal,
&os_string_equals,
&os_string_nul,
)?;
let line_ending = LineEnding::from_zero_flag(matches.get_flag(options::ZERO_TERMINATED)); let line_ending = LineEnding::from_zero_flag(matches.get_flag(options::ZERO_TERMINATED));
// Only one, and only one of cutting mode arguments, i.e. `-b`, `-c`, `-f`, // Only one, and only one of cutting mode arguments, i.e. `-b`, `-c`, `-f`,