From ab77eaea454eb247b3fdee20aecc385a270db488 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Thu, 14 Nov 2024 22:04:17 +0100 Subject: [PATCH 001/179] fmt: generate an error if the input is a directory tested by tests/misc/read-errors --- src/uu/fmt/src/fmt.rs | 7 +++++++ tests/by-util/test_fmt.rs | 5 +++++ 2 files changed, 12 insertions(+) diff --git a/src/uu/fmt/src/fmt.rs b/src/uu/fmt/src/fmt.rs index 007e75dd6..bb2e1a978 100644 --- a/src/uu/fmt/src/fmt.rs +++ b/src/uu/fmt/src/fmt.rs @@ -189,6 +189,13 @@ fn process_file( _ => { let f = File::open(file_name) .map_err_context(|| format!("cannot open {} for reading", file_name.quote()))?; + if f.metadata() + .map_err_context(|| format!("cannot get metadata for {}", file_name.quote()))? + .is_dir() + { + return Err(USimpleError::new(1, "read error".to_string())); + } + Box::new(f) as Box } }); diff --git a/tests/by-util/test_fmt.rs b/tests/by-util/test_fmt.rs index 9bb82ede5..fb6416430 100644 --- a/tests/by-util/test_fmt.rs +++ b/tests/by-util/test_fmt.rs @@ -9,6 +9,11 @@ fn test_invalid_arg() { new_ucmd!().arg("--definitely-invalid").fails().code_is(1); } +#[test] +fn test_invalid_input() { + new_ucmd!().arg(".").fails().code_is(1); +} + #[test] fn test_fmt() { new_ucmd!() From 07b9fb8a2072a59cb2f3d0bce26520c3bc991629 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dorian=20P=C3=A9ron?= Date: Fri, 25 Oct 2024 02:43:39 +0200 Subject: [PATCH 002/179] feat(checksum): group flags under a ChecksumOptions struct --- src/uu/cksum/src/cksum.rs | 21 +++++++------ src/uu/hashsum/src/hashsum.rs | 16 ++++++---- src/uucore/src/lib/features/checksum.rs | 40 ++++++++++++++----------- 3 files changed, 43 insertions(+), 34 deletions(-) diff --git a/src/uu/cksum/src/cksum.rs b/src/uu/cksum/src/cksum.rs index e7d73a3bb..2392660ee 100644 --- a/src/uu/cksum/src/cksum.rs +++ b/src/uu/cksum/src/cksum.rs @@ -13,8 +13,8 @@ use std::iter; use std::path::Path; use uucore::checksum::{ calculate_blake2b_length, detect_algo, digest_reader, perform_checksum_validation, - ChecksumError, ALGORITHM_OPTIONS_BLAKE2B, ALGORITHM_OPTIONS_BSD, ALGORITHM_OPTIONS_CRC, - ALGORITHM_OPTIONS_SYSV, SUPPORTED_ALGORITHMS, + ChecksumError, ChecksumOptions, ALGORITHM_OPTIONS_BLAKE2B, ALGORITHM_OPTIONS_BSD, + ALGORITHM_OPTIONS_CRC, ALGORITHM_OPTIONS_SYSV, SUPPORTED_ALGORITHMS, }; use uucore::{ encoding, @@ -318,17 +318,16 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { || iter::once(OsStr::new("-")).collect::>(), |files| files.map(OsStr::new).collect::>(), ); - return perform_checksum_validation( - files.iter().copied(), - strict, - status, - warn, - binary_flag, + let opts = ChecksumOptions { + binary: binary_flag, ignore_missing, quiet, - algo_option, - length, - ); + status, + strict, + warn, + }; + + return perform_checksum_validation(files.iter().copied(), algo_option, length, opts); } let (tag, asterisk) = handle_tag_text_binary_flags(&matches)?; diff --git a/src/uu/hashsum/src/hashsum.rs b/src/uu/hashsum/src/hashsum.rs index 90c8c8adf..1d3a758f5 100644 --- a/src/uu/hashsum/src/hashsum.rs +++ b/src/uu/hashsum/src/hashsum.rs @@ -23,6 +23,7 @@ use uucore::checksum::digest_reader; use uucore::checksum::escape_filename; use uucore::checksum::perform_checksum_validation; use uucore::checksum::ChecksumError; +use uucore::checksum::ChecksumOptions; use uucore::checksum::HashAlgorithm; use uucore::error::{FromIo, UResult}; use uucore::sum::{Digest, Sha3_224, Sha3_256, Sha3_384, Sha3_512, Shake128, Shake256}; @@ -239,18 +240,21 @@ pub fn uumain(mut args: impl uucore::Args) -> UResult<()> { || iter::once(OsStr::new("-")).collect::>(), |files| files.map(OsStr::new).collect::>(), ); + let opts = ChecksumOptions { + binary, + ignore_missing, + quiet, + status, + strict, + warn, + }; // Execute the checksum validation return perform_checksum_validation( input.iter().copied(), - strict, - status, - warn, - binary, - ignore_missing, - quiet, Some(algo.name), Some(algo.bits), + opts, ); } else if quiet { return Err(ChecksumError::QuietNotCheck.into()); diff --git a/src/uucore/src/lib/features/checksum.rs b/src/uucore/src/lib/features/checksum.rs index a2de28bc5..2450bf804 100644 --- a/src/uucore/src/lib/features/checksum.rs +++ b/src/uucore/src/lib/features/checksum.rs @@ -75,6 +75,17 @@ struct ChecksumResult { pub failed_open_file: i32, } +/// This struct regroups CLI flags. +#[derive(Debug, Default, Clone, Copy)] +pub struct ChecksumOptions { + pub binary: bool, + pub ignore_missing: bool, + pub quiet: bool, + pub status: bool, + pub strict: bool, + pub warn: bool, +} + #[derive(Debug, Error)] pub enum ChecksumError { #[error("the --raw option is not supported with multiple files")] @@ -505,17 +516,11 @@ fn identify_algo_name_and_length( /*** * Do the checksum validation (can be strict or not) */ -#[allow(clippy::too_many_arguments)] pub fn perform_checksum_validation<'a, I>( files: I, - strict: bool, - status: bool, - warn: bool, - binary: bool, - ignore_missing: bool, - quiet: bool, algo_name_input: Option<&str>, length_input: Option, + opts: ChecksumOptions, ) -> UResult<()> where I: Iterator, @@ -610,7 +615,8 @@ where // manage the input file let file_to_check = - match get_file_to_check(&real_filename_to_check, ignore_missing, &mut res) { + match get_file_to_check(&real_filename_to_check, opts.ignore_missing, &mut res) + { Some(file) => file, None => continue, }; @@ -619,11 +625,11 @@ where let create_fn = &mut algo.create_fn; let mut digest = create_fn(); let (calculated_checksum, _) = - digest_reader(&mut digest, &mut file_reader, binary, algo.bits).unwrap(); + digest_reader(&mut digest, &mut file_reader, opts.binary, algo.bits).unwrap(); // Do the checksum validation if expected_checksum == calculated_checksum { - if !quiet && !status { + if !opts.quiet && !opts.status { print_file_report( std::io::stdout(), filename_to_check, @@ -633,7 +639,7 @@ where } correct_format += 1; } else { - if !status { + if !opts.status { print_file_report( std::io::stdout(), filename_to_check, @@ -648,7 +654,7 @@ where // Don't show any warning for empty or commented lines. continue; } - if warn { + if opts.warn { let algo = if let Some(algo_name_input) = algo_name_input { algo_name_input.to_uppercase() } else { @@ -670,7 +676,7 @@ where // not a single line correctly formatted found // return an error if !properly_formatted { - if !status { + if !opts.status { return Err(ChecksumError::NoProperlyFormattedChecksumLinesFound { filename: get_filename_for_output(filename_input, input_is_stdin), } @@ -682,9 +688,9 @@ where } // if any incorrectly formatted line, show it - cksum_output(&res, status); + cksum_output(&res, opts.status); - if ignore_missing && correct_format == 0 { + if opts.ignore_missing && correct_format == 0 { // we have only bad format // and we had ignore-missing eprintln!( @@ -696,13 +702,13 @@ where } // strict means that we should have an exit code. - if strict && res.bad_format > 0 { + if opts.strict && res.bad_format > 0 { set_exit_code(1); } // if we have any failed checksum verification, we set an exit code // except if we have ignore_missing - if (res.failed_cksum > 0 || res.failed_open_file > 0) && !ignore_missing { + if (res.failed_cksum > 0 || res.failed_open_file > 0) && !opts.ignore_missing { set_exit_code(1); } } From df0da55645fe1062791b53d0fbb1d310e2920ed2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dorian=20P=C3=A9ron?= Date: Fri, 25 Oct 2024 02:52:39 +0200 Subject: [PATCH 003/179] feat(checksum): extract line processing into a separate function --- src/uucore/src/lib/features/checksum.rs | 270 ++++++++++++++---------- 1 file changed, 159 insertions(+), 111 deletions(-) diff --git a/src/uucore/src/lib/features/checksum.rs b/src/uucore/src/lib/features/checksum.rs index 2450bf804..a29fe0767 100644 --- a/src/uucore/src/lib/features/checksum.rs +++ b/src/uucore/src/lib/features/checksum.rs @@ -75,6 +75,23 @@ struct ChecksumResult { pub failed_open_file: i32, } +enum LineCheckError { + UError(Box), + // ImproperlyFormatted, +} + +impl From> for LineCheckError { + fn from(value: Box) -> Self { + Self::UError(value) + } +} + +impl From for LineCheckError { + fn from(value: ChecksumError) -> Self { + Self::UError(Box::new(value)) + } +} + /// This struct regroups CLI flags. #[derive(Debug, Default, Clone, Copy)] pub struct ChecksumOptions { @@ -513,6 +530,132 @@ fn identify_algo_name_and_length( Some((algorithm, bits)) } +#[allow(clippy::too_many_arguments)] +fn process_checksum_line( + filename_input: &OsStr, + line: &OsStr, + i: usize, + chosen_regex: &Regex, + is_algo_based_format: bool, + res: &mut ChecksumResult, + cli_algo_name: Option<&str>, + cli_algo_length: Option, + properly_formatted: &mut bool, + correct_format: &mut usize, + opts: ChecksumOptions, +) -> Result<(), LineCheckError> { + let line_bytes = os_str_as_bytes(line)?; + if let Some(caps) = chosen_regex.captures(line_bytes) { + *properly_formatted = true; + + let mut filename_to_check = caps.name("filename").unwrap().as_bytes(); + + if filename_to_check.starts_with(b"*") + && i == 0 + && chosen_regex.as_str() == SINGLE_SPACE_REGEX + { + // Remove the leading asterisk if present - only for the first line + filename_to_check = &filename_to_check[1..]; + } + + let expected_checksum = get_expected_checksum(filename_to_check, &caps, chosen_regex)?; + + // If the algo_name is provided, we use it, otherwise we try to detect it + let (algo_name, length) = if is_algo_based_format { + identify_algo_name_and_length(&caps, cli_algo_name, res, properly_formatted) + .unwrap_or((String::new(), None)) + } else if let Some(a) = cli_algo_name { + // When a specific algorithm name is input, use it and use the provided bits + // except when dealing with blake2b, where we will detect the length + if cli_algo_name == Some(ALGORITHM_OPTIONS_BLAKE2B) { + // division by 2 converts the length of the Blake2b checksum from hexadecimal + // characters to bytes, as each byte is represented by two hexadecimal characters. + let length = Some(expected_checksum.len() / 2); + (ALGORITHM_OPTIONS_BLAKE2B.to_string(), length) + } else { + (a.to_lowercase(), cli_algo_length) + } + } else { + // Default case if no algorithm is specified and non-algo based format is matched + (String::new(), None) + }; + + if algo_name.is_empty() { + // we haven't been able to detect the algo name. No point to continue + *properly_formatted = false; + + // TODO: return error? + return Ok(()); + } + let mut algo = detect_algo(&algo_name, length)?; + + let (filename_to_check_unescaped, prefix) = unescape_filename(filename_to_check); + + let real_filename_to_check = os_str_from_bytes(&filename_to_check_unescaped)?; + + // manage the input file + let file_to_check = + match get_file_to_check(&real_filename_to_check, opts.ignore_missing, res) { + Some(file) => file, + // TODO: return error? + None => return Ok(()), + }; + let mut file_reader = BufReader::new(file_to_check); + // Read the file and calculate the checksum + let create_fn = &mut algo.create_fn; + let mut digest = create_fn(); + let (calculated_checksum, _) = + digest_reader(&mut digest, &mut file_reader, opts.binary, algo.bits).unwrap(); + + // Do the checksum validation + if expected_checksum == calculated_checksum { + if !opts.quiet && !opts.status { + print_file_report( + std::io::stdout(), + filename_to_check, + FileChecksumResult::Ok, + prefix, + ); + } + *correct_format += 1; + } else { + if !opts.status { + print_file_report( + std::io::stdout(), + filename_to_check, + FileChecksumResult::Failed, + prefix, + ); + } + res.failed_cksum += 1; + } + } else { + if line.is_empty() || line_bytes.starts_with(b"#") { + // Don't show any warning for empty or commented lines. + + // TODO: return error? + return Ok(()); + } + if opts.warn { + let algo = if let Some(algo_name_input) = cli_algo_name { + algo_name_input.to_uppercase() + } else { + "Unknown algorithm".to_string() + }; + eprintln!( + "{}: {}: {}: improperly formatted {} checksum line", + util_name(), + &filename_input.maybe_quote(), + i + 1, + algo + ); + } + + res.bad_format += 1; + } + Ok(()) +} + /*** * Do the checksum validation (can be strict or not) */ @@ -560,117 +703,22 @@ where }; for (i, line) in lines.iter().enumerate() { - let line_bytes = os_str_as_bytes(line)?; - if let Some(caps) = chosen_regex.captures(line_bytes) { - properly_formatted = true; - - let mut filename_to_check = caps.name("filename").unwrap().as_bytes(); - - if filename_to_check.starts_with(b"*") - && i == 0 - && chosen_regex.as_str() == SINGLE_SPACE_REGEX - { - // Remove the leading asterisk if present - only for the first line - filename_to_check = &filename_to_check[1..]; - } - - let expected_checksum = - get_expected_checksum(filename_to_check, &caps, &chosen_regex)?; - - // If the algo_name is provided, we use it, otherwise we try to detect it - let (algo_name, length) = if is_algo_based_format { - identify_algo_name_and_length( - &caps, - algo_name_input, - &mut res, - &mut properly_formatted, - ) - .unwrap_or((String::new(), None)) - } else if let Some(a) = algo_name_input { - // When a specific algorithm name is input, use it and use the provided bits - // except when dealing with blake2b, where we will detect the length - if algo_name_input == Some(ALGORITHM_OPTIONS_BLAKE2B) { - // division by 2 converts the length of the Blake2b checksum from hexadecimal - // characters to bytes, as each byte is represented by two hexadecimal characters. - let length = Some(expected_checksum.len() / 2); - (ALGORITHM_OPTIONS_BLAKE2B.to_string(), length) - } else { - (a.to_lowercase(), length_input) - } - } else { - // Default case if no algorithm is specified and non-algo based format is matched - (String::new(), None) - }; - - if algo_name.is_empty() { - // we haven't been able to detect the algo name. No point to continue - properly_formatted = false; - continue; - } - let mut algo = detect_algo(&algo_name, length)?; - - let (filename_to_check_unescaped, prefix) = unescape_filename(filename_to_check); - - let real_filename_to_check = os_str_from_bytes(&filename_to_check_unescaped)?; - - // manage the input file - let file_to_check = - match get_file_to_check(&real_filename_to_check, opts.ignore_missing, &mut res) - { - Some(file) => file, - None => continue, - }; - let mut file_reader = BufReader::new(file_to_check); - // Read the file and calculate the checksum - let create_fn = &mut algo.create_fn; - let mut digest = create_fn(); - let (calculated_checksum, _) = - digest_reader(&mut digest, &mut file_reader, opts.binary, algo.bits).unwrap(); - - // Do the checksum validation - if expected_checksum == calculated_checksum { - if !opts.quiet && !opts.status { - print_file_report( - std::io::stdout(), - filename_to_check, - FileChecksumResult::Ok, - prefix, - ); - } - correct_format += 1; - } else { - if !opts.status { - print_file_report( - std::io::stdout(), - filename_to_check, - FileChecksumResult::Failed, - prefix, - ); - } - res.failed_cksum += 1; - } - } else { - if line.is_empty() || line_bytes.starts_with(b"#") { - // Don't show any warning for empty or commented lines. - continue; - } - if opts.warn { - let algo = if let Some(algo_name_input) = algo_name_input { - algo_name_input.to_uppercase() - } else { - "Unknown algorithm".to_string() - }; - eprintln!( - "{}: {}: {}: improperly formatted {} checksum line", - util_name(), - &filename_input.maybe_quote(), - i + 1, - algo - ); - } - - res.bad_format += 1; - } + match process_checksum_line( + filename_input, + line, + i, + &chosen_regex, + is_algo_based_format, + &mut res, + algo_name_input, + length_input, + &mut properly_formatted, + &mut correct_format, + opts, + ) { + Ok(_) => (), + Err(LineCheckError::UError(e)) => return Err(e), + }; } // not a single line correctly formatted found From afcf93b3e39ee6fb13b67d1a63d83ea4a9fa6226 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dorian=20P=C3=A9ron?= Date: Fri, 25 Oct 2024 03:17:17 +0200 Subject: [PATCH 004/179] feat(checksum): extract file processing into a separate function --- src/uucore/src/lib/features/checksum.rs | 210 ++++++++++++++---------- 1 file changed, 122 insertions(+), 88 deletions(-) diff --git a/src/uucore/src/lib/features/checksum.rs b/src/uucore/src/lib/features/checksum.rs index a29fe0767..818190533 100644 --- a/src/uucore/src/lib/features/checksum.rs +++ b/src/uucore/src/lib/features/checksum.rs @@ -92,6 +92,25 @@ impl From for LineCheckError { } } +#[allow(clippy::enum_variant_names)] +enum FileCheckError { + UError(Box), + NonCriticalError, + CriticalError, +} + +impl From> for FileCheckError { + fn from(value: Box) -> Self { + Self::UError(value) + } +} + +impl From for FileCheckError { + fn from(value: ChecksumError) -> Self { + Self::UError(Box::new(value)) + } +} + /// This struct regroups CLI flags. #[derive(Debug, Default, Clone, Copy)] pub struct ChecksumOptions { @@ -656,6 +675,104 @@ fn process_checksum_line( Ok(()) } +fn process_checksum_file( + filename_input: &OsStr, + cli_algo_name: Option<&str>, + cli_algo_length: Option, + opts: ChecksumOptions, +) -> Result<(), FileCheckError> { + let mut correct_format = 0; + let mut properly_formatted = false; + let mut res = ChecksumResult::default(); + let input_is_stdin = filename_input == OsStr::new("-"); + + let file: Box = if input_is_stdin { + // Use stdin if "-" is specified + Box::new(stdin()) + } else { + match get_input_file(filename_input) { + Ok(f) => f, + Err(e) => { + // Could not read the file, show the error and continue to the next file + show_error!("{e}"); + set_exit_code(1); + return Err(FileCheckError::NonCriticalError); + } + } + }; + + let reader = BufReader::new(file); + let lines = read_os_string_lines(reader).collect::>(); + + let Some((chosen_regex, is_algo_based_format)) = determine_regex(&lines) else { + let e = ChecksumError::NoProperlyFormattedChecksumLinesFound { + filename: get_filename_for_output(filename_input, input_is_stdin), + }; + show_error!("{e}"); + set_exit_code(1); + return Err(FileCheckError::NonCriticalError); + }; + + for (i, line) in lines.iter().enumerate() { + match process_checksum_line( + filename_input, + line, + i, + &chosen_regex, + is_algo_based_format, + &mut res, + cli_algo_name, + cli_algo_length, + &mut properly_formatted, + &mut correct_format, + opts, + ) { + Ok(_) => (), + Err(LineCheckError::UError(e)) => return Err(e.into()), + }; + } + + // not a single line correctly formatted found + // return an error + if !properly_formatted { + if !opts.status { + return Err(ChecksumError::NoProperlyFormattedChecksumLinesFound { + filename: get_filename_for_output(filename_input, input_is_stdin), + } + .into()); + } + set_exit_code(1); + return Err(FileCheckError::CriticalError); + } + + // if any incorrectly formatted line, show it + cksum_output(&res, opts.status); + + if opts.ignore_missing && correct_format == 0 { + // we have only bad format + // and we had ignore-missing + eprintln!( + "{}: {}: no file was verified", + util_name(), + filename_input.maybe_quote(), + ); + set_exit_code(1); + } + + // strict means that we should have an exit code. + if opts.strict && res.bad_format > 0 { + set_exit_code(1); + } + + // if we have any failed checksum verification, we set an exit code + // except if we have ignore_missing + if (res.failed_cksum > 0 || res.failed_open_file > 0) && !opts.ignore_missing { + set_exit_code(1); + } + + Ok(()) +} + /*** * Do the checksum validation (can be strict or not) */ @@ -670,94 +787,11 @@ where { // if cksum has several input files, it will print the result for each file for filename_input in files { - let mut correct_format = 0; - let mut properly_formatted = false; - let mut res = ChecksumResult::default(); - let input_is_stdin = filename_input == OsStr::new("-"); - - let file: Box = if input_is_stdin { - // Use stdin if "-" is specified - Box::new(stdin()) - } else { - match get_input_file(filename_input) { - Ok(f) => f, - Err(e) => { - // Could not read the file, show the error and continue to the next file - show_error!("{e}"); - set_exit_code(1); - continue; - } - } - }; - - let reader = BufReader::new(file); - let lines = read_os_string_lines(reader).collect::>(); - - let Some((chosen_regex, is_algo_based_format)) = determine_regex(&lines) else { - let e = ChecksumError::NoProperlyFormattedChecksumLinesFound { - filename: get_filename_for_output(filename_input, input_is_stdin), - }; - show_error!("{e}"); - set_exit_code(1); - continue; - }; - - for (i, line) in lines.iter().enumerate() { - match process_checksum_line( - filename_input, - line, - i, - &chosen_regex, - is_algo_based_format, - &mut res, - algo_name_input, - length_input, - &mut properly_formatted, - &mut correct_format, - opts, - ) { - Ok(_) => (), - Err(LineCheckError::UError(e)) => return Err(e), - }; - } - - // not a single line correctly formatted found - // return an error - if !properly_formatted { - if !opts.status { - return Err(ChecksumError::NoProperlyFormattedChecksumLinesFound { - filename: get_filename_for_output(filename_input, input_is_stdin), - } - .into()); - } - set_exit_code(1); - - return Ok(()); - } - - // if any incorrectly formatted line, show it - cksum_output(&res, opts.status); - - if opts.ignore_missing && correct_format == 0 { - // we have only bad format - // and we had ignore-missing - eprintln!( - "{}: {}: no file was verified", - util_name(), - filename_input.maybe_quote(), - ); - set_exit_code(1); - } - - // strict means that we should have an exit code. - if opts.strict && res.bad_format > 0 { - set_exit_code(1); - } - - // if we have any failed checksum verification, we set an exit code - // except if we have ignore_missing - if (res.failed_cksum > 0 || res.failed_open_file > 0) && !opts.ignore_missing { - set_exit_code(1); + use FileCheckError::*; + match process_checksum_file(filename_input, algo_name_input, length_input, opts) { + Err(UError(e)) => return Err(e), + Err(CriticalError) => break, + Err(NonCriticalError) | Ok(_) => continue, } } From b6c726602f3f165910aa6e6f61cf3d9ee8a5d083 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dorian=20P=C3=A9ron?= Date: Fri, 25 Oct 2024 04:00:51 +0200 Subject: [PATCH 005/179] feat(checksum): get rid of `correct_format` at the line level --- src/uucore/src/lib/features/checksum.rs | 93 +++++++++++++++---------- 1 file changed, 56 insertions(+), 37 deletions(-) diff --git a/src/uucore/src/lib/features/checksum.rs b/src/uucore/src/lib/features/checksum.rs index 818190533..c0811df31 100644 --- a/src/uucore/src/lib/features/checksum.rs +++ b/src/uucore/src/lib/features/checksum.rs @@ -77,7 +77,8 @@ struct ChecksumResult { enum LineCheckError { UError(Box), - // ImproperlyFormatted, + Skipped, + ImproperlyFormatted, } impl From> for LineCheckError { @@ -228,6 +229,24 @@ enum FileChecksumResult { CantOpen, } +impl FileChecksumResult { + fn from_bool(checksum_correct: bool) -> Self { + if checksum_correct { + FileChecksumResult::Ok + } else { + FileChecksumResult::Failed + } + } + + fn can_display(&self, opts: ChecksumOptions) -> bool { + match self { + FileChecksumResult::Ok => !opts.status && !opts.quiet, + FileChecksumResult::Failed => !opts.status, + FileChecksumResult::CantOpen => true, + } + } +} + impl Display for FileChecksumResult { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { @@ -245,10 +264,13 @@ fn print_file_report( filename: &[u8], result: FileChecksumResult, prefix: &str, + opts: ChecksumOptions, ) { - let _ = write!(w, "{prefix}"); - let _ = w.write_all(filename); - let _ = writeln!(w, ": {result}"); + if result.can_display(opts) { + let _ = write!(w, "{prefix}"); + let _ = w.write_all(filename); + let _ = writeln!(w, ": {result}"); + } } pub fn detect_algo(algo: &str, length: Option) -> UResult { @@ -439,6 +461,7 @@ fn get_file_to_check( filename: &OsStr, ignore_missing: bool, res: &mut ChecksumResult, + opts: ChecksumOptions, ) -> Option> { let filename_bytes = os_str_as_bytes(filename).expect("UTF-8 error"); let filename_lossy = String::from_utf8_lossy(filename_bytes); @@ -451,6 +474,7 @@ fn get_file_to_check( filename_bytes, FileChecksumResult::CantOpen, "", + opts, ); res.failed_open_file += 1; }; @@ -549,6 +573,12 @@ fn identify_algo_name_and_length( Some((algorithm, bits)) } +/// Parses a checksum line, detect the algorithm to use, read the file and produce +/// its digest, and compare it to the expected value. +/// +/// Returns `Ok(bool)` if the comparison happened, bool indicates if the digest +/// matched the expected. +/// If the comparison didn't happen, return a `LineChecksumError`. #[allow(clippy::too_many_arguments)] fn process_checksum_line( filename_input: &OsStr, @@ -560,9 +590,8 @@ fn process_checksum_line( cli_algo_name: Option<&str>, cli_algo_length: Option, properly_formatted: &mut bool, - correct_format: &mut usize, opts: ChecksumOptions, -) -> Result<(), LineCheckError> { +) -> Result { let line_bytes = os_str_as_bytes(line)?; if let Some(caps) = chosen_regex.captures(line_bytes) { *properly_formatted = true; @@ -604,7 +633,7 @@ fn process_checksum_line( *properly_formatted = false; // TODO: return error? - return Ok(()); + return Err(LineCheckError::ImproperlyFormatted); } let mut algo = detect_algo(&algo_name, length)?; @@ -614,10 +643,10 @@ fn process_checksum_line( // manage the input file let file_to_check = - match get_file_to_check(&real_filename_to_check, opts.ignore_missing, res) { + match get_file_to_check(&real_filename_to_check, opts.ignore_missing, res, opts) { Some(file) => file, // TODO: return error? - None => return Ok(()), + None => return Err(LineCheckError::ImproperlyFormatted), }; let mut file_reader = BufReader::new(file_to_check); // Read the file and calculate the checksum @@ -627,33 +656,19 @@ fn process_checksum_line( digest_reader(&mut digest, &mut file_reader, opts.binary, algo.bits).unwrap(); // Do the checksum validation - if expected_checksum == calculated_checksum { - if !opts.quiet && !opts.status { - print_file_report( - std::io::stdout(), - filename_to_check, - FileChecksumResult::Ok, - prefix, - ); - } - *correct_format += 1; - } else { - if !opts.status { - print_file_report( - std::io::stdout(), - filename_to_check, - FileChecksumResult::Failed, - prefix, - ); - } - res.failed_cksum += 1; - } + let checksum_correct = expected_checksum == calculated_checksum; + print_file_report( + std::io::stdout(), + filename_to_check, + FileChecksumResult::from_bool(checksum_correct), + prefix, + opts, + ); + Ok(checksum_correct) } else { if line.is_empty() || line_bytes.starts_with(b"#") { // Don't show any warning for empty or commented lines. - - // TODO: return error? - return Ok(()); + return Err(LineCheckError::Skipped); } if opts.warn { let algo = if let Some(algo_name_input) = cli_algo_name { @@ -671,8 +686,8 @@ fn process_checksum_line( } res.bad_format += 1; + Err(LineCheckError::ImproperlyFormatted) } - Ok(()) } fn process_checksum_file( @@ -724,10 +739,12 @@ fn process_checksum_file( cli_algo_name, cli_algo_length, &mut properly_formatted, - &mut correct_format, opts, ) { - Ok(_) => (), + Ok(true) => correct_format += 1, + Ok(false) => res.failed_cksum += 1, + Err(LineCheckError::ImproperlyFormatted) => (), + Err(LineCheckError::Skipped) => continue, Err(LineCheckError::UError(e)) => return Err(e.into()), }; } @@ -1227,6 +1244,8 @@ mod tests { #[test] fn test_print_file_report() { + let opts = ChecksumOptions::default(); + let cases: &[(&[u8], FileChecksumResult, &str, &[u8])] = &[ (b"filename", FileChecksumResult::Ok, "", b"filename: OK\n"), ( @@ -1257,7 +1276,7 @@ mod tests { for (filename, result, prefix, expected) in cases { let mut buffer: Vec = vec![]; - print_file_report(&mut buffer, filename, *result, prefix); + print_file_report(&mut buffer, filename, *result, prefix, opts); assert_eq!(&buffer, expected) } } From 4ffedcdac6b9ae22cfe268d531ee67108fea6d48 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dorian=20P=C3=A9ron?= Date: Fri, 25 Oct 2024 04:38:52 +0200 Subject: [PATCH 006/179] feat(checksum): get rid of ChecksumResult in get_file_to_check --- src/uucore/src/lib/features/checksum.rs | 45 ++++++++++++++----------- 1 file changed, 26 insertions(+), 19 deletions(-) diff --git a/src/uucore/src/lib/features/checksum.rs b/src/uucore/src/lib/features/checksum.rs index c0811df31..1de41232e 100644 --- a/src/uucore/src/lib/features/checksum.rs +++ b/src/uucore/src/lib/features/checksum.rs @@ -79,6 +79,9 @@ enum LineCheckError { UError(Box), Skipped, ImproperlyFormatted, + CantOpenFile, + FileNotFound, + FileIsDirectory, } impl From> for LineCheckError { @@ -459,16 +462,14 @@ fn get_expected_checksum( /// Returns a reader that reads from the specified file, or from stdin if `filename_to_check` is "-". fn get_file_to_check( filename: &OsStr, - ignore_missing: bool, - res: &mut ChecksumResult, opts: ChecksumOptions, -) -> Option> { +) -> Result, LineCheckError> { let filename_bytes = os_str_as_bytes(filename).expect("UTF-8 error"); let filename_lossy = String::from_utf8_lossy(filename_bytes); if filename == "-" { - Some(Box::new(stdin())) // Use stdin if "-" is specified in the checksum file + Ok(Box::new(stdin())) // Use stdin if "-" is specified in the checksum file } else { - let mut failed_open = || { + let failed_open = || { print_file_report( std::io::stdout(), filename_bytes, @@ -476,30 +477,32 @@ fn get_file_to_check( "", opts, ); - res.failed_open_file += 1; }; match File::open(filename) { Ok(f) => { - if f.metadata().ok()?.is_dir() { + if f.metadata() + .map_err(|_| LineCheckError::CantOpenFile)? + .is_dir() + { show!(USimpleError::new( 1, format!("{filename_lossy}: Is a directory") )); // also regarded as a failed open failed_open(); - None + Err(LineCheckError::FileIsDirectory) } else { - Some(Box::new(f)) + Ok(Box::new(f)) } } Err(err) => { - if !ignore_missing { + if !opts.ignore_missing { // yes, we have both stderr and stdout here show!(err.map_err_context(|| filename_lossy.to_string())); failed_open(); } // we could not open the file but we want to continue - None + Err(LineCheckError::FileNotFound) } } } @@ -642,13 +645,9 @@ fn process_checksum_line( let real_filename_to_check = os_str_from_bytes(&filename_to_check_unescaped)?; // manage the input file - let file_to_check = - match get_file_to_check(&real_filename_to_check, opts.ignore_missing, res, opts) { - Some(file) => file, - // TODO: return error? - None => return Err(LineCheckError::ImproperlyFormatted), - }; + let file_to_check = get_file_to_check(&real_filename_to_check, opts)?; let mut file_reader = BufReader::new(file_to_check); + // Read the file and calculate the checksum let create_fn = &mut algo.create_fn; let mut digest = create_fn(); @@ -743,9 +742,17 @@ fn process_checksum_file( ) { Ok(true) => correct_format += 1, Ok(false) => res.failed_cksum += 1, - Err(LineCheckError::ImproperlyFormatted) => (), - Err(LineCheckError::Skipped) => continue, Err(LineCheckError::UError(e)) => return Err(e.into()), + Err(LineCheckError::Skipped) => continue, + Err(LineCheckError::ImproperlyFormatted) => (), + Err(LineCheckError::CantOpenFile | LineCheckError::FileIsDirectory) => { + res.failed_open_file += 1 + } + Err(LineCheckError::FileNotFound) => { + if !opts.ignore_missing { + res.failed_open_file += 1 + } + } }; } From 5309b65867d4899cce7ef27ca4af348c24eb8da3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dorian=20P=C3=A9ron?= Date: Sun, 3 Nov 2024 11:57:32 +0100 Subject: [PATCH 007/179] feat(checksum): change process_checksum_line return type to Result<(), LineCheckError> - Treat digest mismatch as an error --- src/uucore/src/lib/features/checksum.rs | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/uucore/src/lib/features/checksum.rs b/src/uucore/src/lib/features/checksum.rs index 1de41232e..333c76d2a 100644 --- a/src/uucore/src/lib/features/checksum.rs +++ b/src/uucore/src/lib/features/checksum.rs @@ -77,6 +77,7 @@ struct ChecksumResult { enum LineCheckError { UError(Box), + DigestMismatch, Skipped, ImproperlyFormatted, CantOpenFile, @@ -594,7 +595,7 @@ fn process_checksum_line( cli_algo_length: Option, properly_formatted: &mut bool, opts: ChecksumOptions, -) -> Result { +) -> Result<(), LineCheckError> { let line_bytes = os_str_as_bytes(line)?; if let Some(caps) = chosen_regex.captures(line_bytes) { *properly_formatted = true; @@ -663,7 +664,12 @@ fn process_checksum_line( prefix, opts, ); - Ok(checksum_correct) + + if checksum_correct { + Ok(()) + } else { + Err(LineCheckError::DigestMismatch) + } } else { if line.is_empty() || line_bytes.starts_with(b"#") { // Don't show any warning for empty or commented lines. @@ -740,8 +746,8 @@ fn process_checksum_file( &mut properly_formatted, opts, ) { - Ok(true) => correct_format += 1, - Ok(false) => res.failed_cksum += 1, + Ok(()) => correct_format += 1, + Err(LineCheckError::DigestMismatch) => res.failed_cksum += 1, Err(LineCheckError::UError(e)) => return Err(e.into()), Err(LineCheckError::Skipped) => continue, Err(LineCheckError::ImproperlyFormatted) => (), From a09c7cc0d21cb7a44e26914cbc607215f0fd337a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dorian=20P=C3=A9ron?= Date: Fri, 25 Oct 2024 13:24:43 +0200 Subject: [PATCH 008/179] feat(checksum): add doc --- src/uucore/src/lib/features/checksum.rs | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/uucore/src/lib/features/checksum.rs b/src/uucore/src/lib/features/checksum.rs index 333c76d2a..e7a0a2653 100644 --- a/src/uucore/src/lib/features/checksum.rs +++ b/src/uucore/src/lib/features/checksum.rs @@ -75,13 +75,22 @@ struct ChecksumResult { pub failed_open_file: i32, } +/// Represents a reason for which the processing of a checksum line +/// could not proceed to digest comparison. enum LineCheckError { + /// a generic UError was encountered in sub-functions UError(Box), + /// the computed checksum digest differs from the expected one DigestMismatch, + /// the line is empty or is a comment Skipped, + /// the line has a formatting error ImproperlyFormatted, + /// file exists but is impossible to read CantOpenFile, + /// there is nothing at the given path FileNotFound, + /// the given path leads to a directory FileIsDirectory, } @@ -97,10 +106,14 @@ impl From for LineCheckError { } } +/// Represents an error that was encountered when processing a checksum file. #[allow(clippy::enum_variant_names)] enum FileCheckError { + /// a generic UError was encountered in sub-functions UError(Box), + /// the error does not stop the processing of next files NonCriticalError, + /// the error must stop the run of the program CriticalError, } @@ -226,6 +239,8 @@ fn cksum_output(res: &ChecksumResult, status: bool) { } } +/// Represents the different outcomes that can happen to a file +/// that is being checked. #[derive(Debug, Clone, Copy)] enum FileChecksumResult { Ok, @@ -234,6 +249,8 @@ enum FileChecksumResult { } impl FileChecksumResult { + /// Creates a `FileChecksumResult` from a digest comparison that + /// either succeeded or failed. fn from_bool(checksum_correct: bool) -> Self { if checksum_correct { FileChecksumResult::Ok @@ -242,6 +259,8 @@ impl FileChecksumResult { } } + /// The cli options might prevent to display on the outcome of the + /// comparison on STDOUT. fn can_display(&self, opts: ChecksumOptions) -> bool { match self { FileChecksumResult::Ok => !opts.status && !opts.quiet, From 0bc22e8d18b20a3c673d8a36d367fcad883b7b08 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dorian=20P=C3=A9ron?= Date: Sat, 17 Aug 2024 01:21:59 +0200 Subject: [PATCH 009/179] test(cksum): add multiple tests test(cksum): add test for blake length gessing test(cksum): add test for hexa/base64 confusion test(cksum): add test for error handling on incorrectly formatted checksum test(cksum): add test for trailing spaces making a line improperly formatted test(cksum): Re-implement GNU test 'cksum-base64' in the testsuite --- tests/by-util/test_cksum.rs | 259 +++++++++++++++++++++++++++++++++++- 1 file changed, 258 insertions(+), 1 deletion(-) diff --git a/tests/by-util/test_cksum.rs b/tests/by-util/test_cksum.rs index 98366cbec..ee1e05292 100644 --- a/tests/by-util/test_cksum.rs +++ b/tests/by-util/test_cksum.rs @@ -2,7 +2,7 @@ // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -// spell-checker:ignore (words) asdf algo algos asha mgmt xffname +// spell-checker:ignore (words) asdf algo algos asha mgmt xffname hexa GFYEQ HYQK Yqxb use crate::common::util::TestScenario; @@ -1502,3 +1502,260 @@ mod check_utf8 { .stderr_contains("1 listed file could not be read"); } } + +#[ignore = "not yet implemented"] +#[test] +fn test_check_blake_length_guess() { + let correct_lines = [ + // Correct: The length is not explicit, but the checksum's size + // matches the default parameter. + "BLAKE2b (foo.dat) = ca002330e69d3e6b84a46a56a6533fd79d51d97a3bb7cad6c2ff43b354185d6dc1e723fb3db4ae0737e120378424c714bb982d9dc5bbd7a0ab318240ddd18f8d", + // Correct: The length is explicitly given, and the checksum's size + // matches the length. + "BLAKE2b-512 (foo.dat) = ca002330e69d3e6b84a46a56a6533fd79d51d97a3bb7cad6c2ff43b354185d6dc1e723fb3db4ae0737e120378424c714bb982d9dc5bbd7a0ab318240ddd18f8d", + // Correct: the checksum size is not default but + // the length is explicitly given. + "BLAKE2b-48 (foo.dat) = 171cdfdf84ed", + ]; + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + + at.write("foo.dat", "foo"); + + for line in correct_lines { + at.write("foo.sums", line); + scene + .ucmd() + .arg("--check") + .arg(at.subdir.join("foo.sums")) + .succeeds() + .stdout_is("foo.dat: OK\n"); + } + + // Incorrect lines + + // This is incorrect because the algorithm provides no length, + // and the checksum length is not default. + let incorrect = "BLAKE2b (foo.dat) = 171cdfdf84ed"; + at.write("foo.sums", incorrect); + scene + .ucmd() + .arg("--check") + .arg(at.subdir.join("foo.sums")) + .fails() + .stderr_contains("foo.sums: no properly formatted checksum lines found"); +} + +#[ignore = "not yet implemented"] +#[test] +fn test_check_confusing_base64() { + let cksum = "BLAKE2b-48 (foo.dat) = fc1f97C4"; + + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + + at.write("foo.dat", "esq"); + at.write("foo.sums", cksum); + + scene + .ucmd() + .arg("--check") + .arg(at.subdir.join("foo.sums")) + .succeeds() + .stdout_is("foo.dat: OK\n"); +} + +/// This test checks that when a file contains several checksum lines +/// with different encoding, the decoding still works. +#[ignore = "not yet implemented"] +#[test] +fn test_check_mix_hex_base64() { + let b64 = "BLAKE2b-128 (foo1.dat) = BBNuJPhdRwRlw9tm5Y7VbA=="; + let hex = "BLAKE2b-128 (foo2.dat) = 04136e24f85d470465c3db66e58ed56c"; + + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + + at.write("foo1.dat", "foo"); + at.write("foo2.dat", "foo"); + + at.write("hex_b64", &format!("{hex}\n{b64}")); + at.write("b64_hex", &format!("{b64}\n{hex}")); + + scene + .ucmd() + .arg("--check") + .arg(at.subdir.join("hex_b64")) + .succeeds() + .stdout_only("foo2.dat: OK\nfoo1.dat: OK\n"); + + scene + .ucmd() + .arg("--check") + .arg(at.subdir.join("b64_hex")) + .succeeds() + .stdout_only("foo1.dat: OK\nfoo2.dat: OK\n"); +} + +#[ignore = "not yet implemented"] +#[test] +fn test_check_incorrectly_formatted_checksum_does_not_stop_processing() { + // The first line contains an incorrectly formatted checksum that can't be + // correctly decoded. This must not prevent the program from looking at the + // rest of the file. + let lines = [ + "BLAKE2b-56 (foo1) = GFYEQ7HhAw=", // Should be 2 '=' at the end + "BLAKE2b-56 (foo2) = 18560443b1e103", // OK + ]; + + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + + at.write("foo1", "foo"); + at.write("foo2", "foo"); + at.write("sum", &lines.join("\n")); + + scene + .ucmd() + .arg("--check") + .arg(at.subdir.join("sum")) + .succeeds() + .stderr_contains("1 line is improperly formatted") + .stdout_contains("foo2: OK"); +} + +/// This module reimplements the cksum-base64.pl GNU test. +mod cksum_base64 { + use super::*; + use crate::common::util::log_info; + + const PAIRS: [(&str, &str); 11] = [ + ("sysv", "0 0 f"), + ("bsd", "00000 0 f"), + ("crc", "4294967295 0 f"), + ("md5", "1B2M2Y8AsgTpgAmY7PhCfg=="), + ("sha1", "2jmj7l5rSw0yVb/vlWAYkK/YBwk="), + ("sha224", "0UoCjCo6K8lHYQK7KII0xBWisB+CjqYqxbPkLw=="), + ("sha256", "47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU="), + ( + "sha384", + "OLBgp1GsljhM2TJ+sbHjaiH9txEUvgdDTAzHv2P24donTt6/529l+9Ua0vFImLlb", + ), + ( + "sha512", + "z4PhNX7vuL3xVChQ1m2AB9Yg5AULVxXcg/SpIdNs6c5H0NE8XYXysP+DGNKHfuwvY7kxvUdBeoGlODJ6+SfaPg==" + ), + ( + "blake2b", + "eGoC90IBWQPGxv2FJVLScpEvR0DhWEdhiobiF/cfVBnSXhAxr+5YUxOJZESTTrBLkDpoWxRIt1XVb3Aa/pvizg==" + ), + ("sm3", "GrIdg1XPoX+OYRlIMegajyK+yMco/vt0ftA161CCqis="), + ]; + + fn make_scene() -> TestScenario { + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + at.touch("f"); + + scene + } + + fn output_format(algo: &str, digest: &str) -> String { + if ["sysv", "bsd", "crc"].contains(&algo) { + digest.to_string() + } else { + format!("{} (f) = {}", algo.to_uppercase(), digest).replace("BLAKE2B", "BLAKE2b") + } + } + + #[test] + fn test_generating() { + // Ensure that each algorithm works with `--base64`. + let scene = make_scene(); + + for (algo, digest) in PAIRS { + scene + .ucmd() + .arg("--base64") + .arg("-a") + .arg(algo) + .arg("f") + .succeeds() + .stdout_only(format!("{}\n", output_format(algo, digest))); + } + } + + #[test] + fn test_chk() { + // For each algorithm that accepts `--check`, + // ensure that it works with base64 digests. + let scene = make_scene(); + + for (algo, digest) in PAIRS { + if ["sysv", "bsd", "crc"].contains(&algo) { + // These algorithms do not accept `--check` + continue; + } + + let line = output_format(algo, digest); + scene + .ucmd() + .arg("--check") + .arg("--strict") + .pipe_in(line) + .succeeds() + .stdout_only("f: OK\n"); + } + } + + #[test] + fn test_chk_eq1() { + // For digests ending with '=', ensure `--check` fails if '=' is removed. + let scene = make_scene(); + + for (algo, digest) in PAIRS { + if !digest.ends_with('=') { + continue; + } + + let mut line = output_format(algo, digest); + if line.ends_with('=') { + line.pop(); + } + + log_info(format!("ALGORITHM: {algo}, STDIN: '{line}'"), ""); + scene + .ucmd() + .arg("--check") + .pipe_in(line) + .fails() + .no_stdout() + .stderr_contains("no properly formatted checksum lines found"); + } + } + + #[test] + fn test_chk_eq2() { + // For digests ending with '==', + // ensure `--check` fails if '==' is removed. + let scene = make_scene(); + + for (algo, digest) in PAIRS { + if !digest.ends_with("==") { + continue; + } + + let line = output_format(algo, digest); + let line = line.trim_end_matches("=="); + + log_info(format!("ALGORITHM: {algo}, STDIN: '{line}'"), ""); + scene + .ucmd() + .arg("--check") + .pipe_in(line) + .fails() + .no_stdout() + .stderr_contains("no properly formatted checksum lines found"); + } + } +} From 7746c5c6ed7d27f7cfc2f270ffe49657c41ebf25 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Sat, 16 Nov 2024 22:52:19 +0100 Subject: [PATCH 010/179] publish: by default, put it as draft to send email only when ready closes: #6859 --- .github/workflows/CICD.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/CICD.yml b/.github/workflows/CICD.yml index c8993b121..7fab8f8ea 100644 --- a/.github/workflows/CICD.yml +++ b/.github/workflows/CICD.yml @@ -753,6 +753,7 @@ jobs: uses: softprops/action-gh-release@v2 if: steps.vars.outputs.DEPLOY with: + draft: true files: | ${{ steps.vars.outputs.STAGING }}/${{ steps.vars.outputs.PKG_NAME }} ${{ steps.vars.outputs.STAGING }}/${{ steps.vars.outputs.DPKG_NAME }} From 96e42d6dd9cea2a8ab6b2432c2c8c14a3d6ac029 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Sun, 17 Nov 2024 09:55:29 +0000 Subject: [PATCH 011/179] fix(deps): update rust crate libc to v0.2.164 --- Cargo.lock | 4 ++-- fuzz/Cargo.lock | 34 +++++++++++++++++----------------- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b59405071..b4eab1299 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1305,9 +1305,9 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" -version = "0.2.161" +version = "0.2.164" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e9489c2807c139ffd9c1794f4af0ebe86a828db53ecdc7fea2111d0fed085d1" +checksum = "433bfe06b8c75da9b2e3fbea6e5329ff87748f0b144ef75306e674c3f6f7c13f" [[package]] name = "libloading" diff --git a/fuzz/Cargo.lock b/fuzz/Cargo.lock index 4f7c56089..5db73ba31 100644 --- a/fuzz/Cargo.lock +++ b/fuzz/Cargo.lock @@ -416,9 +416,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.161" +version = "0.2.164" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e9489c2807c139ffd9c1794f4af0ebe86a828db53ecdc7fea2111d0fed085d1" +checksum = "433bfe06b8c75da9b2e3fbea6e5329ff87748f0b144ef75306e674c3f6f7c13f" [[package]] name = "libfuzzer-sys" @@ -847,7 +847,7 @@ checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" [[package]] name = "uu_cut" -version = "0.0.27" +version = "0.0.28" dependencies = [ "bstr", "clap", @@ -857,7 +857,7 @@ dependencies = [ [[package]] name = "uu_date" -version = "0.0.27" +version = "0.0.28" dependencies = [ "chrono", "clap", @@ -869,7 +869,7 @@ dependencies = [ [[package]] name = "uu_echo" -version = "0.0.27" +version = "0.0.28" dependencies = [ "clap", "uucore", @@ -877,7 +877,7 @@ dependencies = [ [[package]] name = "uu_env" -version = "0.0.27" +version = "0.0.28" dependencies = [ "clap", "nix 0.29.0", @@ -887,7 +887,7 @@ dependencies = [ [[package]] name = "uu_expr" -version = "0.0.27" +version = "0.0.28" dependencies = [ "clap", "num-bigint", @@ -898,7 +898,7 @@ dependencies = [ [[package]] name = "uu_printf" -version = "0.0.27" +version = "0.0.28" dependencies = [ "clap", "uucore", @@ -906,7 +906,7 @@ dependencies = [ [[package]] name = "uu_seq" -version = "0.0.27" +version = "0.0.28" dependencies = [ "bigdecimal", "clap", @@ -917,7 +917,7 @@ dependencies = [ [[package]] name = "uu_sort" -version = "0.0.27" +version = "0.0.28" dependencies = [ "binary-heap-plus", "clap", @@ -937,7 +937,7 @@ dependencies = [ [[package]] name = "uu_split" -version = "0.0.27" +version = "0.0.28" dependencies = [ "clap", "memchr", @@ -946,7 +946,7 @@ dependencies = [ [[package]] name = "uu_test" -version = "0.0.27" +version = "0.0.28" dependencies = [ "clap", "libc", @@ -955,7 +955,7 @@ dependencies = [ [[package]] name = "uu_tr" -version = "0.0.27" +version = "0.0.28" dependencies = [ "clap", "nom", @@ -964,7 +964,7 @@ dependencies = [ [[package]] name = "uu_wc" -version = "0.0.27" +version = "0.0.28" dependencies = [ "bytecount", "clap", @@ -977,7 +977,7 @@ dependencies = [ [[package]] name = "uucore" -version = "0.0.27" +version = "0.0.28" dependencies = [ "clap", "dunce", @@ -1020,7 +1020,7 @@ dependencies = [ [[package]] name = "uucore_procs" -version = "0.0.27" +version = "0.0.28" dependencies = [ "proc-macro2", "quote", @@ -1029,7 +1029,7 @@ dependencies = [ [[package]] name = "uuhelp_parser" -version = "0.0.27" +version = "0.0.28" [[package]] name = "wasi" From b9da6087934677149551677d5b4d3673dd10c32a Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Sun, 17 Nov 2024 10:23:34 +0000 Subject: [PATCH 012/179] chore(deps): update rust crate serde to v1.0.215 --- Cargo.lock | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b4eab1299..a6c9d1bc1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2138,9 +2138,9 @@ checksum = "e25dfac463d778e353db5be2449d1cce89bd6fd23c9f1ea21310ce6e5a1b29c4" [[package]] name = "serde" -version = "1.0.214" +version = "1.0.215" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f55c3193aca71c12ad7890f1785d2b73e1b9f63a0bbc353c08ef26fe03fc56b5" +checksum = "6513c1ad0b11a9376da888e3e0baa0077f1aed55c17f50e7b2397136129fb88f" dependencies = [ "serde_derive", ] @@ -2156,9 +2156,9 @@ dependencies = [ [[package]] name = "serde_derive" -version = "1.0.214" +version = "1.0.215" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "de523f781f095e28fa605cdce0f8307e451cc0fd14e2eb4cd2e98a355b147766" +checksum = "ad1e866f866923f252f05c889987993144fb74e722403468a4ebd70c3cd756c0" dependencies = [ "proc-macro2", "quote", From 1e47325ba252040db5b2b073007f845675c47ce2 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Sun, 17 Nov 2024 10:57:11 +0000 Subject: [PATCH 013/179] fix(deps): update rust crate libfuzzer-sys to v0.4.8 --- fuzz/Cargo.lock | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fuzz/Cargo.lock b/fuzz/Cargo.lock index 5db73ba31..0bd6826a2 100644 --- a/fuzz/Cargo.lock +++ b/fuzz/Cargo.lock @@ -422,13 +422,12 @@ checksum = "433bfe06b8c75da9b2e3fbea6e5329ff87748f0b144ef75306e674c3f6f7c13f" [[package]] name = "libfuzzer-sys" -version = "0.4.7" +version = "0.4.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a96cfd5557eb82f2b83fed4955246c988d331975a002961b07c81584d107e7f7" +checksum = "9b9569d2f74e257076d8c6bfa73fb505b46b851e51ddaecc825944aa3bed17fa" dependencies = [ "arbitrary", "cc", - "once_cell", ] [[package]] From 0d086edda8655c5af0762202355ab7faa1659174 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Sun, 17 Nov 2024 10:58:30 +0000 Subject: [PATCH 014/179] chore(deps): update davidanson/markdownlint-cli2-action action to v18 --- .github/workflows/CICD.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/CICD.yml b/.github/workflows/CICD.yml index c8993b121..ef7c96626 100644 --- a/.github/workflows/CICD.yml +++ b/.github/workflows/CICD.yml @@ -139,7 +139,7 @@ jobs: shell: bash run: | RUSTDOCFLAGS="-Dwarnings" cargo doc ${{ steps.vars.outputs.CARGO_FEATURES_OPTION }} --no-deps --workspace --document-private-items - - uses: DavidAnson/markdownlint-cli2-action@v17 + - uses: DavidAnson/markdownlint-cli2-action@v18 with: fix: "true" globs: | From 2f5e7f66a73bfe2f73a2fb3d09b54a5d799fd63e Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Sun, 17 Nov 2024 11:53:24 +0000 Subject: [PATCH 015/179] fix(deps): update rust crate tempfile to v3.14.0 --- Cargo.lock | 18 +++++++++--------- fuzz/Cargo.lock | 8 ++++---- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a6c9d1bc1..432dd0bcb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1795,7 +1795,7 @@ dependencies = [ "bitflags 2.6.0", "hex", "procfs-core", - "rustix 0.38.37", + "rustix 0.38.40", ] [[package]] @@ -2060,9 +2060,9 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.37" +version = "0.38.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8acb788b847c24f28525660c4d7758620a7210875711f79e7f663cc152726811" +checksum = "99e4ea3e1cdc4b559b8e5650f9c8e5998e3e5c1343b4eaf034565f32318d63c0" dependencies = [ "bitflags 2.6.0", "errno", @@ -2315,14 +2315,14 @@ checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" [[package]] name = "tempfile" -version = "3.13.0" +version = "3.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0f2c9fc62d0beef6951ccffd757e241266a2c833136efbe35af6cd2567dca5b" +checksum = "28cce251fcbc87fac86a866eeb0d6c2d536fc16d06f184bb61aeae11aa4cee0c" dependencies = [ "cfg-if", "fastrand", "once_cell", - "rustix 0.38.37", + "rustix 0.38.40", "windows-sys 0.59.0", ] @@ -2342,7 +2342,7 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4f599bd7ca042cfdf8f4512b277c02ba102247820f9d9d4a9f521f496751a6ef" dependencies = [ - "rustix 0.38.37", + "rustix 0.38.40", "windows-sys 0.59.0", ] @@ -3718,7 +3718,7 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" dependencies = [ - "windows-sys 0.59.0", + "windows-sys 0.48.0", ] [[package]] @@ -3986,7 +3986,7 @@ checksum = "8da84f1a25939b27f6820d92aed108f83ff920fdf11a7b19366c27c4cda81d4f" dependencies = [ "libc", "linux-raw-sys 0.4.14", - "rustix 0.38.37", + "rustix 0.38.40", ] [[package]] diff --git a/fuzz/Cargo.lock b/fuzz/Cargo.lock index 0bd6826a2..4903e424d 100644 --- a/fuzz/Cargo.lock +++ b/fuzz/Cargo.lock @@ -708,9 +708,9 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.37" +version = "0.38.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8acb788b847c24f28525660c4d7758620a7210875711f79e7f663cc152726811" +checksum = "99e4ea3e1cdc4b559b8e5650f9c8e5998e3e5c1343b4eaf034565f32318d63c0" dependencies = [ "bitflags 2.5.0", "errno", @@ -770,9 +770,9 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.13.0" +version = "3.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0f2c9fc62d0beef6951ccffd757e241266a2c833136efbe35af6cd2567dca5b" +checksum = "28cce251fcbc87fac86a866eeb0d6c2d536fc16d06f184bb61aeae11aa4cee0c" dependencies = [ "cfg-if", "fastrand", From 2ac5ad61623ebe875e6d10d26ac9aa07fd527d7d Mon Sep 17 00:00:00 2001 From: Banyc <36535895+Banyc@users.noreply.github.com> Date: Mon, 18 Nov 2024 14:44:08 +0800 Subject: [PATCH 016/179] fix(deps): crates import compatible uucore --- Cargo.toml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index a6881abfb..2534d4664 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -354,10 +354,10 @@ blake3 = "1.5.1" sm3 = "0.4.2" digest = "0.10.7" -uucore = { version = ">=0.0.19", package = "uucore", path = "src/uucore" } -uucore_procs = { version = ">=0.0.19", package = "uucore_procs", path = "src/uucore_procs" } -uu_ls = { version = ">=0.0.18", path = "src/uu/ls" } -uu_base32 = { version = ">=0.0.18", path = "src/uu/base32" } +uucore = { version = "0.0.28", package = "uucore", path = "src/uucore" } +uucore_procs = { version = "0.0.28", package = "uucore_procs", path = "src/uucore_procs" } +uu_ls = { version = "0.0.28", path = "src/uu/ls" } +uu_base32 = { version = "0.0.28", path = "src/uu/base32" } [dependencies] clap = { workspace = true } From 412d4f4f15f25fe907762b6d3f94072539d596cd Mon Sep 17 00:00:00 2001 From: Daniel Hofstetter Date: Mon, 18 Nov 2024 09:20:51 +0100 Subject: [PATCH 017/179] Bump MSRV to 1.77 --- .clippy.toml | 2 +- .github/workflows/CICD.yml | 2 +- Cargo.toml | 2 +- README.md | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.clippy.toml b/.clippy.toml index 89fd1cccd..72e8c35cf 100644 --- a/.clippy.toml +++ b/.clippy.toml @@ -1,4 +1,4 @@ -msrv = "1.70.0" +msrv = "1.77.0" cognitive-complexity-threshold = 24 missing-docs-in-crate-items = true check-private-items = true diff --git a/.github/workflows/CICD.yml b/.github/workflows/CICD.yml index 4694f2a3a..6c7b50995 100644 --- a/.github/workflows/CICD.yml +++ b/.github/workflows/CICD.yml @@ -11,7 +11,7 @@ env: PROJECT_NAME: coreutils PROJECT_DESC: "Core universal (cross-platform) utilities" PROJECT_AUTH: "uutils" - RUST_MIN_SRV: "1.70.0" + RUST_MIN_SRV: "1.77.0" # * style job configuration STYLE_FAIL_ON_FAULT: true ## (bool) fail the build if a style job contains a fault (error or warning); may be overridden on a per-job basis diff --git a/Cargo.toml b/Cargo.toml index a6881abfb..539f5002e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,7 +16,7 @@ repository = "https://github.com/uutils/coreutils" readme = "README.md" keywords = ["coreutils", "uutils", "cross-platform", "cli", "utility"] categories = ["command-line-utilities"] -rust-version = "1.70.0" +rust-version = "1.77.0" edition = "2021" build = "build.rs" diff --git a/README.md b/README.md index 22081c689..9f7d1c2ae 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ [![dependency status](https://deps.rs/repo/github/uutils/coreutils/status.svg)](https://deps.rs/repo/github/uutils/coreutils) [![CodeCov](https://codecov.io/gh/uutils/coreutils/branch/master/graph/badge.svg)](https://codecov.io/gh/uutils/coreutils) -![MSRV](https://img.shields.io/badge/MSRV-1.70.0-brightgreen) +![MSRV](https://img.shields.io/badge/MSRV-1.77.0-brightgreen) @@ -70,7 +70,7 @@ the [coreutils docs](https://github.com/uutils/uutils.github.io) repository. ### Rust Version uutils follows Rust's release channels and is tested against stable, beta and -nightly. The current Minimum Supported Rust Version (MSRV) is `1.70.0`. +nightly. The current Minimum Supported Rust Version (MSRV) is `1.77.0`. ## Building From 3281d3ef557639571abe9ebb297e2567555a81dd Mon Sep 17 00:00:00 2001 From: Daniel Hofstetter Date: Mon, 18 Nov 2024 09:45:44 +0100 Subject: [PATCH 018/179] tee: fix warning from ref_as_ptr lint in test --- tests/by-util/test_tee.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/by-util/test_tee.rs b/tests/by-util/test_tee.rs index c32759ed4..4f2437ace 100644 --- a/tests/by-util/test_tee.rs +++ b/tests/by-util/test_tee.rs @@ -172,7 +172,7 @@ mod linux_only { let mut fds: [c_int; 2] = [0, 0]; assert!( - (unsafe { libc::pipe(&mut fds as *mut c_int) } == 0), + (unsafe { libc::pipe(std::ptr::from_mut::(&mut fds[0])) } == 0), "Failed to create pipe" ); From 2e85198758edb44defcdc99ad96c5be26f1af1e9 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Mon, 18 Nov 2024 09:09:47 +0000 Subject: [PATCH 019/179] chore(deps): update rust crate thiserror to v1.0.69 --- Cargo.lock | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 432dd0bcb..ba1007c26 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -182,7 +182,7 @@ dependencies = [ "regex", "rustc-hash", "shlex", - "syn 2.0.86", + "syn 2.0.87", ] [[package]] @@ -615,7 +615,7 @@ dependencies = [ "lazy_static", "proc-macro2", "regex", - "syn 2.0.86", + "syn 2.0.87", "unicode-xid", ] @@ -627,7 +627,7 @@ checksum = "3e1a2532e4ed4ea13031c13bc7bc0dbca4aae32df48e9d77f0d1e743179f2ea1" dependencies = [ "lazy_static", "proc-macro2", - "syn 2.0.86", + "syn 2.0.87", ] [[package]] @@ -642,7 +642,7 @@ dependencies = [ "lazy_static", "proc-macro2", "quote", - "syn 2.0.86", + "syn 2.0.87", ] [[package]] @@ -796,7 +796,7 @@ checksum = "67e77553c4162a157adbf834ebae5b415acbecbeafc7a74b0e886657506a7611" dependencies = [ "proc-macro2", "quote", - "syn 2.0.86", + "syn 2.0.87", ] [[package]] @@ -823,7 +823,7 @@ checksum = "487585f4d0c6655fe74905e2504d8ad6908e4db67f744eb140876906c2f3175d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.86", + "syn 2.0.87", ] [[package]] @@ -1046,7 +1046,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" dependencies = [ "proc-macro2", "quote", - "syn 2.0.86", + "syn 2.0.87", ] [[package]] @@ -1565,7 +1565,7 @@ dependencies = [ "proc-macro-crate", "proc-macro2", "quote", - "syn 2.0.86", + "syn 2.0.87", ] [[package]] @@ -1765,7 +1765,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5ac2cf0f2e4f42b49f5ffd07dae8d746508ef7526c13940e5f524012ae6c6550" dependencies = [ "proc-macro2", - "syn 2.0.86", + "syn 2.0.87", ] [[package]] @@ -2014,7 +2014,7 @@ dependencies = [ "regex", "relative-path", "rustc_version", - "syn 2.0.86", + "syn 2.0.87", "unicode-ident", ] @@ -2162,7 +2162,7 @@ checksum = "ad1e866f866923f252f05c889987993144fb74e722403468a4ebd70c3cd756c0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.86", + "syn 2.0.87", ] [[package]] @@ -2298,9 +2298,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.86" +version = "2.0.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e89275301d38033efb81a6e60e3497e734dfcc62571f2854bf4b16690398824c" +checksum = "25aa4ce346d03a6dcd68dd8b4010bcb74e54e62c90c573f394c46eae99aba32d" dependencies = [ "proc-macro2", "quote", @@ -2360,22 +2360,22 @@ dependencies = [ [[package]] name = "thiserror" -version = "1.0.66" +version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d171f59dbaa811dbbb1aee1e73db92ec2b122911a48e1390dfe327a821ddede" +checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.66" +version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b08be0f17bd307950653ce45db00cd31200d82b624b36e181337d9c7d92765b5" +checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.86", + "syn 2.0.87", ] [[package]] @@ -3644,7 +3644,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.86", + "syn 2.0.87", "wasm-bindgen-shared", ] @@ -3666,7 +3666,7 @@ checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.86", + "syn 2.0.87", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -4019,7 +4019,7 @@ checksum = "15e934569e47891f7d9411f1a451d947a60e000ab3bd24fbb970f000387d1b3b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.86", + "syn 2.0.87", ] [[package]] From 5fe4dee5c429c0438f658097d9f73fff936736e5 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Mon, 18 Nov 2024 15:05:57 +0000 Subject: [PATCH 020/179] chore(deps): update rust crate bstr to v1.11.0 --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ba1007c26..c95745548 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -259,9 +259,9 @@ dependencies = [ [[package]] name = "bstr" -version = "1.10.0" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40723b8fb387abc38f4f4a37c09073622e41dd12327033091ef8950659e6dc0c" +checksum = "1a68f1f47cdf0ec8ee4b941b2eee2a80cb796db73118c0dd09ac63fbe405be22" dependencies = [ "memchr", "regex-automata", From f572124f1e883c71c060efc530ee4118d7f09351 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Tue, 19 Nov 2024 06:18:07 +0000 Subject: [PATCH 021/179] chore(deps): update rust crate clap_mangen to v0.2.24 --- Cargo.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c95745548..1ed175eed 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -378,9 +378,9 @@ checksum = "2da6da31387c7e4ef160ffab6d5e7f00c42626fe39aea70a7b0f1773f7dd6c1b" [[package]] name = "clap_mangen" -version = "0.2.9" +version = "0.2.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb0f09a0ca8f0dd8ac92c546b426f466ef19828185c6d504c80c48c9c2768ed9" +checksum = "fbae9cbfdc5d4fa8711c09bd7b83f644cb48281ac35bf97af3e47b0675864bdf" dependencies = [ "clap", "roff", @@ -3718,7 +3718,7 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" dependencies = [ - "windows-sys 0.48.0", + "windows-sys 0.59.0", ] [[package]] From 76477f2ed2b008f269a763186214683c834a34e8 Mon Sep 17 00:00:00 2001 From: Daniel Hofstetter Date: Tue, 19 Nov 2024 09:47:14 +0100 Subject: [PATCH 022/179] Bump crossterm from 0.27.0 to 0.28.1 --- Cargo.lock | 33 +++++++++++++++++++++++---------- Cargo.toml | 2 +- 2 files changed, 24 insertions(+), 11 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1ed175eed..54b03d6ca 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -703,16 +703,16 @@ checksum = "248e3bacc7dc6baa3b21e405ee045c3047101a49145e7e9eca583ab4c2ca5345" [[package]] name = "crossterm" -version = "0.27.0" +version = "0.28.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f476fe445d41c9e991fd07515a6f463074b782242ccf4a5b7b1d1012e70824df" +checksum = "829d955a0bb380ef178a640b91779e3987da38c9aea133b20614cfed8cdea9c6" dependencies = [ "bitflags 2.6.0", "crossterm_winapi", "filedescriptor", - "libc", - "mio", + "mio 1.0.2", "parking_lot", + "rustix 0.38.40", "signal-hook", "signal-hook-mio", "winapi", @@ -1140,9 +1140,9 @@ dependencies = [ [[package]] name = "hermit-abi" -version = "0.3.2" +version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "443144c8cdadd93ebf52ddb4056d257f5b52c04d3c804e657d19eb73fc33668b" +checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" [[package]] name = "hex" @@ -1435,6 +1435,19 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "mio" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "80e04d1dcff3aae0704555fe5fee3bcfaf3d1fdf8a7e521d5b9d2b42acb52cec" +dependencies = [ + "hermit-abi", + "libc", + "log", + "wasi", + "windows-sys 0.52.0", +] + [[package]] name = "nix" version = "0.29.0" @@ -1470,7 +1483,7 @@ dependencies = [ "inotify", "kqueue", "libc", - "mio", + "mio 0.8.11", "walkdir", "windows-sys 0.45.0", ] @@ -2215,12 +2228,12 @@ dependencies = [ [[package]] name = "signal-hook-mio" -version = "0.2.3" +version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29ad2e15f37ec9a6cc544097b78a1ec90001e9f71b81338ca39f430adaca99af" +checksum = "34db1a06d485c9142248b7a054f034b349b212551f3dfd19c94d45a754a217cd" dependencies = [ "libc", - "mio", + "mio 1.0.2", "signal-hook", ] diff --git a/Cargo.toml b/Cargo.toml index a5d45a09b..f7b180231 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -281,7 +281,7 @@ clap_complete = "4.4" clap_mangen = "0.2" compare = "0.1.0" coz = { version = "0.1.3" } -crossterm = ">=0.27.0" +crossterm = "0.28.1" ctrlc = { version = "3.4.4", features = ["termination"] } dns-lookup = { version = "2.0.4" } exacl = "0.12.0" From 6257cf1793b8627f9a69c83f8e310abd423fce13 Mon Sep 17 00:00:00 2001 From: Daniel Hofstetter Date: Tue, 19 Nov 2024 09:47:49 +0100 Subject: [PATCH 023/179] deny.toml: add mio to skip list --- deny.toml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/deny.toml b/deny.toml index 9fefc7727..1d7524ce2 100644 --- a/deny.toml +++ b/deny.toml @@ -104,6 +104,8 @@ skip = [ { name = "terminal_size", version = "0.2.6" }, # ansi-width, console, os_display { name = "unicode-width", version = "0.1.13" }, + # notify + { name = "mio", version = "0.8.11" }, ] # spell-checker: enable From d2fc3914ff436975801a1b57ff5a662d844593a1 Mon Sep 17 00:00:00 2001 From: Jesse Schalken Date: Mon, 28 Oct 2024 20:24:35 +1100 Subject: [PATCH 024/179] du: use metadata from DirEntry where possible --- src/uu/du/src/du.rs | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/uu/du/src/du.rs b/src/uu/du/src/du.rs index a35e9f77e..cfa53cad4 100644 --- a/src/uu/du/src/du.rs +++ b/src/uu/du/src/du.rs @@ -12,7 +12,7 @@ use std::error::Error; use std::fmt::Display; #[cfg(not(windows))] use std::fs::Metadata; -use std::fs::{self, File}; +use std::fs::{self, DirEntry, File}; use std::io::{BufRead, BufReader}; #[cfg(not(windows))] use std::os::unix::fs::MetadataExt; @@ -138,7 +138,11 @@ struct Stat { } impl Stat { - fn new(path: &Path, options: &TraversalOptions) -> std::io::Result { + fn new( + path: &Path, + dir_entry: Option<&DirEntry>, + options: &TraversalOptions, + ) -> std::io::Result { // Determine whether to dereference (follow) the symbolic link let should_dereference = match &options.dereference { Deref::All => true, @@ -149,8 +153,11 @@ impl Stat { let metadata = if should_dereference { // Get metadata, following symbolic links if necessary fs::metadata(path) + } else if let Some(dir_entry) = dir_entry { + // Get metadata directly from the DirEntry, which is faster on Windows + dir_entry.metadata() } else { - // Get metadata without following symbolic links + // Get metadata from the filesystem without following symbolic links fs::symlink_metadata(path) }?; @@ -319,7 +326,7 @@ fn du( 'file_loop: for f in read { match f { Ok(entry) => { - match Stat::new(&entry.path(), options) { + match Stat::new(&entry.path(), Some(&entry), options) { Ok(this_stat) => { // We have an exclude list for pattern in &options.excludes { @@ -765,7 +772,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { } // Check existence of path provided in argument - if let Ok(stat) = Stat::new(&path, &traversal_options) { + if let Ok(stat) = Stat::new(&path, None, &traversal_options) { // Kick off the computation of disk usage from the initial path let mut seen_inodes: HashSet = HashSet::new(); if let Some(inode) = stat.inode { From 28e9a880773510fd2f8790a0cbb2fba4bc1581ee Mon Sep 17 00:00:00 2001 From: Daniel Hofstetter Date: Wed, 20 Nov 2024 09:10:39 +0100 Subject: [PATCH 025/179] du: use div_ceil() from std --- src/uu/du/src/du.rs | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/src/uu/du/src/du.rs b/src/uu/du/src/du.rs index a35e9f77e..3681668f0 100644 --- a/src/uu/du/src/du.rs +++ b/src/uu/du/src/du.rs @@ -555,7 +555,7 @@ impl StatPrinter { size, uucore::format::human::SizeFormat::Binary, ), - SizeFormat::BlockSize(block_size) => div_ceil(size, block_size).to_string(), + SizeFormat::BlockSize(block_size) => size.div_ceil(block_size).to_string(), } } @@ -576,13 +576,6 @@ impl StatPrinter { } } -// This can be replaced with u64::div_ceil once it is stabilized. -// This implementation approach is optimized for when `b` is a constant, -// particularly a power of two. -pub fn div_ceil(a: u64, b: u64) -> u64 { - (a + b - 1) / b -} - // Read file paths from the specified file, separated by null characters fn read_files_from(file_name: &str) -> Result, std::io::Error> { let reader: Box = if file_name == "-" { From 7fb0f8a29dea1666537c4905d082d3a8b526125e Mon Sep 17 00:00:00 2001 From: Daniel Hofstetter Date: Wed, 20 Nov 2024 09:13:32 +0100 Subject: [PATCH 026/179] sum: use div_ceil() from std --- src/uu/sum/src/sum.rs | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/src/uu/sum/src/sum.rs b/src/uu/sum/src/sum.rs index d1f383351..bae288d80 100644 --- a/src/uu/sum/src/sum.rs +++ b/src/uu/sum/src/sum.rs @@ -16,13 +16,6 @@ use uucore::{format_usage, help_about, help_usage, show}; const USAGE: &str = help_usage!("sum.md"); const ABOUT: &str = help_about!("sum.md"); -// This can be replaced with usize::div_ceil once it is stabilized. -// This implementation approach is optimized for when `b` is a constant, -// particularly a power of two. -const fn div_ceil(a: usize, b: usize) -> usize { - (a + b - 1) / b -} - fn bsd_sum(mut reader: Box) -> (usize, u16) { let mut buf = [0; 4096]; let mut bytes_read = 0; @@ -41,7 +34,7 @@ fn bsd_sum(mut reader: Box) -> (usize, u16) { } // Report blocks read in terms of 1024-byte blocks. - let blocks_read = div_ceil(bytes_read, 1024); + let blocks_read = bytes_read.div_ceil(1024); (blocks_read, checksum) } @@ -66,7 +59,7 @@ fn sysv_sum(mut reader: Box) -> (usize, u16) { ret = (ret & 0xffff) + (ret >> 16); // Report blocks read in terms of 512-byte blocks. - let blocks_read = div_ceil(bytes_read, 512); + let blocks_read = bytes_read.div_ceil(512); (blocks_read, ret as u16) } From fc2f73b16cc112f34d16f6725a5b652e611f46d1 Mon Sep 17 00:00:00 2001 From: Daniel Hofstetter Date: Wed, 20 Nov 2024 09:17:14 +0100 Subject: [PATCH 027/179] cksum: use div_ceil() from std --- src/uu/cksum/src/cksum.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/uu/cksum/src/cksum.rs b/src/uu/cksum/src/cksum.rs index 2392660ee..e96d2de6f 100644 --- a/src/uu/cksum/src/cksum.rs +++ b/src/uu/cksum/src/cksum.rs @@ -22,7 +22,7 @@ use uucore::{ format_usage, help_about, help_section, help_usage, line_ending::LineEnding, os_str_as_bytes, show, - sum::{div_ceil, Digest}, + sum::Digest, }; const USAGE: &str = help_usage!("cksum.md"); @@ -124,7 +124,7 @@ where format!( "{} {}{}", sum.parse::().unwrap(), - div_ceil(sz, options.output_bits), + sz.div_ceil(options.output_bits), if not_file { "" } else { " " } ), !not_file, @@ -134,7 +134,7 @@ where format!( "{:0bsd_width$} {:bsd_width$}{}", sum.parse::().unwrap(), - div_ceil(sz, options.output_bits), + sz.div_ceil(options.output_bits), if not_file { "" } else { " " } ), !not_file, From cfe2c9f6da56e9047d05a78657d7f8d0a1ad20ab Mon Sep 17 00:00:00 2001 From: Daniel Hofstetter Date: Wed, 20 Nov 2024 09:20:41 +0100 Subject: [PATCH 028/179] uucore: remove div_ceil() from sum feature --- src/uucore/src/lib/features/sum.rs | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/uucore/src/lib/features/sum.rs b/src/uucore/src/lib/features/sum.rs index 086c6ca9d..1baff7f79 100644 --- a/src/uucore/src/lib/features/sum.rs +++ b/src/uucore/src/lib/features/sum.rs @@ -207,13 +207,6 @@ impl Digest for CRC { } } -// This can be replaced with usize::div_ceil once it is stabilized. -// This implementation approach is optimized for when `b` is a constant, -// particularly a power of two. -pub fn div_ceil(a: usize, b: usize) -> usize { - (a + b - 1) / b -} - pub struct BSD { state: u16, } From 7ffe3d49efab4e0ca9cd7fdeb49319637cd857b8 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Sat, 16 Nov 2024 23:15:21 +0100 Subject: [PATCH 029/179] mv: fix the output of an error message + adjust a GNU test Should make tests/mv/dup-source.sh pass --- src/uu/mv/src/mv.rs | 2 +- tests/by-util/test_mv.rs | 20 ++++++++++++++++++++ util/gnu-patches/tests_dup_source.patch | 13 +++++++++++++ 3 files changed, 34 insertions(+), 1 deletion(-) create mode 100644 util/gnu-patches/tests_dup_source.patch diff --git a/src/uu/mv/src/mv.rs b/src/uu/mv/src/mv.rs index c57f2527e..9d8452b1e 100644 --- a/src/uu/mv/src/mv.rs +++ b/src/uu/mv/src/mv.rs @@ -488,7 +488,7 @@ fn move_files_into_dir(files: &[PathBuf], target_dir: &Path, options: &Options) format!( "cannot move '{}' to a subdirectory of itself, '{}/{}'", sourcepath.display(), - target_dir.display(), + uucore::fs::normalize_path(target_dir).display(), canonicalized_target_dir.components().last().map_or_else( || target_dir.display().to_string(), |dir| { PathBuf::from(dir.as_os_str()).display().to_string() } diff --git a/tests/by-util/test_mv.rs b/tests/by-util/test_mv.rs index d8bc49e8e..6f2693a86 100644 --- a/tests/by-util/test_mv.rs +++ b/tests/by-util/test_mv.rs @@ -1732,3 +1732,23 @@ fn test_mv_error_msg_with_multiple_sources_that_does_not_exist() { .stderr_contains("mv: cannot stat 'a': No such file or directory") .stderr_contains("mv: cannot stat 'b/': No such file or directory"); } + +#[test] +fn test_mv_error_cant_move_itself() { + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + at.mkdir("b"); + scene + .ucmd() + .arg("b") + .arg("b/") + .fails() + .stderr_contains("mv: cannot move 'b' to a subdirectory of itself, 'b/b'"); + scene + .ucmd() + .arg("./b") + .arg("b") + .arg("b/") + .fails() + .stderr_contains("mv: cannot move 'b' to a subdirectory of itself, 'b/b'"); +} diff --git a/util/gnu-patches/tests_dup_source.patch b/util/gnu-patches/tests_dup_source.patch new file mode 100644 index 000000000..44e33723b --- /dev/null +++ b/util/gnu-patches/tests_dup_source.patch @@ -0,0 +1,13 @@ +diff --git a/tests/mv/dup-source.sh b/tests/mv/dup-source.sh +index 7bcd82fc3..0f9005296 100755 +--- a/tests/mv/dup-source.sh ++++ b/tests/mv/dup-source.sh +@@ -83,7 +83,7 @@ $i: cannot stat 'a': No such file or directory + $i: cannot stat 'a': No such file or directory + $i: cannot stat 'b': No such file or directory + $i: cannot move './b' to a subdirectory of itself, 'b/b' +-$i: warning: source directory 'b' specified more than once ++$i: cannot move 'b' to a subdirectory of itself, 'b/b' + EOF + compare exp out || fail=1 + done From c986fb7d2e924baa1b27ec8e4c025ee104b07c85 Mon Sep 17 00:00:00 2001 From: steinwand6 <57711907+steinwand6@users.noreply.github.com> Date: Thu, 21 Nov 2024 00:11:04 +0900 Subject: [PATCH 030/179] seq: add overflow checks when parsing exponents (#6858) * seq: remove ignore flag from test_invalid_float_point_fail_properly(#6235) * seq: prevent overflow in parse_exponent_no_decimal * seq: add tests for invalid floating point arguments * seq: add overflow checks when parsing decimal with exponent * seq: add overflow checks --- src/uu/seq/src/numberparse.rs | 29 ++++++++++++++++++++--------- tests/by-util/test_seq.rs | 14 ++++++++++++-- 2 files changed, 32 insertions(+), 11 deletions(-) diff --git a/src/uu/seq/src/numberparse.rs b/src/uu/seq/src/numberparse.rs index 5a5c64bb9..adbaccc11 100644 --- a/src/uu/seq/src/numberparse.rs +++ b/src/uu/seq/src/numberparse.rs @@ -106,16 +106,20 @@ fn parse_exponent_no_decimal(s: &str, j: usize) -> Result 0 { - 2usize + exponent as usize + (2usize) + .checked_add(exponent as usize) + .ok_or(ParseNumberError::Float)? } else { 2usize } } else { - let total = j as i64 + exponent; + let total = (j as i64) + .checked_add(exponent) + .ok_or(ParseNumberError::Float)?; let result = if total < 1 { 1 } else { - total.try_into().unwrap() + total.try_into().map_err(|_| ParseNumberError::Float)? }; if x.sign() == Sign::Minus { result + 1 @@ -207,7 +211,9 @@ fn parse_decimal_and_exponent( let integral_part: f64 = s[..j].parse().map_err(|_| ParseNumberError::Float)?; if integral_part.is_sign_negative() { if exponent > 0 { - 2usize + exponent as usize + 2usize + .checked_add(exponent as usize) + .ok_or(ParseNumberError::Float)? } else { 2usize } @@ -217,15 +223,20 @@ fn parse_decimal_and_exponent( }; // Special case: if the string is "-.1e2", we need to treat it // as if it were "-0.1e2". - let total = if s.starts_with("-.") { - i as i64 + exponent + 1 - } else { - i as i64 + exponent + let total = { + let total = (i as i64) + .checked_add(exponent) + .ok_or(ParseNumberError::Float)?; + if s.starts_with("-.") { + total.checked_add(1).ok_or(ParseNumberError::Float)? + } else { + total + } }; if total < minimum as i64 { minimum } else { - total.try_into().unwrap() + total.try_into().map_err(|_| ParseNumberError::Float)? } }; diff --git a/tests/by-util/test_seq.rs b/tests/by-util/test_seq.rs index a8bd1fb83..c14d30629 100644 --- a/tests/by-util/test_seq.rs +++ b/tests/by-util/test_seq.rs @@ -777,12 +777,22 @@ fn test_undefined() { } #[test] -#[ignore = "Need issue #6235 to be fixed"] fn test_invalid_float_point_fail_properly() { new_ucmd!() .args(&["66000e000000000000000000000000000000000000000000000000000009223372036854775807"]) .fails() - .stdout_only(""); // might need to be updated + .no_stdout() + .usage_error("invalid floating point argument: '66000e000000000000000000000000000000000000000000000000000009223372036854775807'"); + new_ucmd!() + .args(&["-1.1e9223372036854775807"]) + .fails() + .no_stdout() + .usage_error("invalid floating point argument: '-1.1e9223372036854775807'"); + new_ucmd!() + .args(&["-.1e9223372036854775807"]) + .fails() + .no_stdout() + .usage_error("invalid floating point argument: '-.1e9223372036854775807'"); } #[test] From 76d14ed4841d2d8342625577cf9b4a25ce691566 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Wed, 20 Nov 2024 07:46:12 -0500 Subject: [PATCH 031/179] du: fix the count with --inodes --- src/uu/du/src/du.rs | 11 +++++++++-- tests/by-util/test_du.rs | 27 +++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 2 deletions(-) diff --git a/src/uu/du/src/du.rs b/src/uu/du/src/du.rs index 49303a82c..a2d2082e6 100644 --- a/src/uu/du/src/du.rs +++ b/src/uu/du/src/du.rs @@ -346,14 +346,21 @@ fn du( } if let Some(inode) = this_stat.inode { - if seen_inodes.contains(&inode) { - if options.count_links { + // Check if the inode has been seen before and if we should skip it + if seen_inodes.contains(&inode) + && (!options.count_links || !options.all) + { + // If `count_links` is enabled and `all` is not, increment the inode count + if options.count_links && !options.all { my_stat.inodes += 1; } + // Skip further processing for this inode continue; } + // Mark this inode as seen seen_inodes.insert(inode); } + if this_stat.is_dir { if options.one_file_system { if let (Some(this_inode), Some(my_inode)) = diff --git a/tests/by-util/test_du.rs b/tests/by-util/test_du.rs index ef6179e02..862d3581c 100644 --- a/tests/by-util/test_du.rs +++ b/tests/by-util/test_du.rs @@ -546,6 +546,33 @@ fn test_du_inodes_with_count_links() { } } +#[cfg(not(target_os = "android"))] +#[test] +fn test_du_inodes_with_count_links_all() { + let ts = TestScenario::new(util_name!()); + let at = &ts.fixtures; + + at.mkdir("d"); + at.mkdir("d/d"); + at.touch("d/f"); + at.hard_link("d/f", "d/h"); + + let result = ts.ucmd().arg("--inodes").arg("-al").arg("d").succeeds(); + result.no_stderr(); + + let mut result_seq: Vec = result + .stdout_str() + .split('\n') + .filter(|x| !x.is_empty()) + .map(|x| x.parse().unwrap()) + .collect(); + result_seq.sort_unstable(); + #[cfg(windows)] + assert_eq!(result_seq, ["1\td\\d", "1\td\\f", "1\td\\h", "4\td"]); + #[cfg(not(windows))] + assert_eq!(result_seq, ["1\td/d", "1\td/f", "1\td/h", "4\td"]); +} + #[test] fn test_du_h_flag_empty_file() { new_ucmd!() From 1b2778b819ed68bf8a5f462cf0e50e3990f911ef Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Wed, 20 Nov 2024 07:46:53 -0500 Subject: [PATCH 032/179] du: fix the size display with --inodes --- src/uu/du/src/du.rs | 12 ++++++++---- tests/by-util/test_du.rs | 22 ++++++++++++++++++++++ 2 files changed, 30 insertions(+), 4 deletions(-) diff --git a/src/uu/du/src/du.rs b/src/uu/du/src/du.rs index a2d2082e6..e7b00838e 100644 --- a/src/uu/du/src/du.rs +++ b/src/uu/du/src/du.rs @@ -557,9 +557,6 @@ impl StatPrinter { } fn convert_size(&self, size: u64) -> String { - if self.inodes { - return size.to_string(); - } match self.size_format { SizeFormat::HumanDecimal => uucore::format::human::human_readable( size, @@ -569,7 +566,14 @@ impl StatPrinter { size, uucore::format::human::SizeFormat::Binary, ), - SizeFormat::BlockSize(block_size) => size.div_ceil(block_size).to_string(), + SizeFormat::BlockSize(block_size) => { + if self.inodes { + // we ignore block size (-B) with --inodes + size.to_string() + } else { + size.div_ceil(block_size).to_string() + } + } } } diff --git a/tests/by-util/test_du.rs b/tests/by-util/test_du.rs index 862d3581c..af9718a4e 100644 --- a/tests/by-util/test_du.rs +++ b/tests/by-util/test_du.rs @@ -1198,3 +1198,25 @@ fn test_invalid_time_style() { .succeeds() .stdout_does_not_contain("du: invalid argument 'banana' for 'time style'"); } + +#[test] +fn test_human_size() { + use std::fs::File; + + let ts = TestScenario::new(util_name!()); + let at = &ts.fixtures; + let dir = at.plus_as_string("d"); + at.mkdir(&dir); + + for i in 1..=1023 { + let file_path = format!("{dir}/file{i}"); + File::create(&file_path).expect("Failed to create file"); + } + + ts.ucmd() + .arg("--inodes") + .arg("-h") + .arg(&dir) + .succeeds() + .stdout_contains(format!("1.0K {dir}")); +} From 06e01324a185e2ed310e2f6e77dcd6c49e04e324 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Thu, 21 Nov 2024 06:08:18 +0000 Subject: [PATCH 033/179] fix(deps): update rust crate proc-macro2 to v1.0.91 --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 54b03d6ca..e699d20d8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1792,9 +1792,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.89" +version = "1.0.91" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f139b0662de085916d1fb67d2b4169d1addddda1919e696f3252b740b629986e" +checksum = "307e3004becf10f5a6e0d59d20f3cd28231b0e0827a96cd3e0ce6d14bc1e4bb3" dependencies = [ "unicode-ident", ] From 9f07bf880944e777b218b43e435e9f8f03eab159 Mon Sep 17 00:00:00 2001 From: Daniel Hofstetter Date: Thu, 28 Mar 2024 16:02:54 +0100 Subject: [PATCH 034/179] mv: remove "sleep" in tests --- tests/by-util/test_mv.rs | 39 +++++++++++++++++++-------------------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/tests/by-util/test_mv.rs b/tests/by-util/test_mv.rs index 6f2693a86..6ab989ee4 100644 --- a/tests/by-util/test_mv.rs +++ b/tests/by-util/test_mv.rs @@ -6,8 +6,7 @@ // spell-checker:ignore mydir use crate::common::util::TestScenario; use filetime::FileTime; -use std::thread::sleep; -use std::time::Duration; +use std::io::Write; #[test] fn test_mv_invalid_arg() { @@ -974,9 +973,9 @@ fn test_mv_arg_update_older_dest_not_older() { let old_content = "file1 content\n"; let new_content = "file2 content\n"; - at.write(old, old_content); - - sleep(Duration::from_secs(1)); + let mut f = at.make_file(old); + f.write_all(old_content.as_bytes()).unwrap(); + f.set_modified(std::time::UNIX_EPOCH).unwrap(); at.write(new, new_content); @@ -1001,9 +1000,9 @@ fn test_mv_arg_update_none_then_all() { let old_content = "old content\n"; let new_content = "new content\n"; - at.write(old, old_content); - - sleep(Duration::from_secs(1)); + let mut f = at.make_file(old); + f.write_all(old_content.as_bytes()).unwrap(); + f.set_modified(std::time::UNIX_EPOCH).unwrap(); at.write(new, new_content); @@ -1029,9 +1028,9 @@ fn test_mv_arg_update_all_then_none() { let old_content = "old content\n"; let new_content = "new content\n"; - at.write(old, old_content); - - sleep(Duration::from_secs(1)); + let mut f = at.make_file(old); + f.write_all(old_content.as_bytes()).unwrap(); + f.set_modified(std::time::UNIX_EPOCH).unwrap(); at.write(new, new_content); @@ -1055,9 +1054,9 @@ fn test_mv_arg_update_older_dest_older() { let old_content = "file1 content\n"; let new_content = "file2 content\n"; - at.write(old, old_content); - - sleep(Duration::from_secs(1)); + let mut f = at.make_file(old); + f.write_all(old_content.as_bytes()).unwrap(); + f.set_modified(std::time::UNIX_EPOCH).unwrap(); at.write(new, new_content); @@ -1081,9 +1080,9 @@ fn test_mv_arg_update_short_overwrite() { let old_content = "file1 content\n"; let new_content = "file2 content\n"; - at.write(old, old_content); - - sleep(Duration::from_secs(1)); + let mut f = at.make_file(old); + f.write_all(old_content.as_bytes()).unwrap(); + f.set_modified(std::time::UNIX_EPOCH).unwrap(); at.write(new, new_content); @@ -1107,9 +1106,9 @@ fn test_mv_arg_update_short_no_overwrite() { let old_content = "file1 content\n"; let new_content = "file2 content\n"; - at.write(old, old_content); - - sleep(Duration::from_secs(1)); + let mut f = at.make_file(old); + f.write_all(old_content.as_bytes()).unwrap(); + f.set_modified(std::time::UNIX_EPOCH).unwrap(); at.write(new, new_content); From bdffbb044b706bd35f7766299439ceff373e7b06 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Thu, 21 Nov 2024 20:24:37 +0000 Subject: [PATCH 035/179] fix(deps): update rust crate proc-macro2 to v1.0.92 --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e699d20d8..9fe65293a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1792,9 +1792,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.91" +version = "1.0.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "307e3004becf10f5a6e0d59d20f3cd28231b0e0827a96cd3e0ce6d14bc1e4bb3" +checksum = "37d3544b3f2748c54e147655edb5025752e2303145b5aefb3c3ea2c78b973bb0" dependencies = [ "unicode-ident", ] From 6ddd4f6285f69b43d5833472540d9da0ba4207f0 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Sun, 24 Nov 2024 01:17:34 +0000 Subject: [PATCH 036/179] chore(deps): update rust crate fts-sys to v0.2.13 --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 9fe65293a..42ccfc0f2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -961,9 +961,9 @@ dependencies = [ [[package]] name = "fts-sys" -version = "0.2.11" +version = "0.2.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28ab6a6dfd9184fe8a5097924dea6e7648f499121b3e933bb8486a17f817122e" +checksum = "c427b250eff90452a35afd79fdfcbcf4880e307225bc28bd36d9a2cd78bb6d90" dependencies = [ "bindgen", "libc", From cc3353ed7a454c6b95e453cf7ae6d5a49b8c2798 Mon Sep 17 00:00:00 2001 From: Daniel Hofstetter Date: Mon, 25 Nov 2024 09:03:26 +0100 Subject: [PATCH 037/179] Bump cpp & cpp_build from 0.5.9 to 0.5.10 --- Cargo.lock | 16 ++++++++-------- src/uu/stdbuf/src/libstdbuf/Cargo.toml | 4 ++-- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 42ccfc0f2..10ca01c8b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -597,18 +597,18 @@ dependencies = [ [[package]] name = "cpp" -version = "0.5.9" +version = "0.5.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa65869ef853e45c60e9828aa08cdd1398cb6e13f3911d9cb2a079b144fcd64" +checksum = "f36bcac3d8234c1fb813358e83d1bb6b0290a3d2b3b5efc6b88bfeaf9d8eec17" dependencies = [ "cpp_macros", ] [[package]] name = "cpp_build" -version = "0.5.9" +version = "0.5.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e361fae2caf9758164b24da3eedd7f7d7451be30d90d8e7b5d2be29a2f0cf5b" +checksum = "27f8638c97fbd79cc6fc80b616e0e74b49bac21014faed590bbc89b7e2676c90" dependencies = [ "cc", "cpp_common", @@ -621,9 +621,9 @@ dependencies = [ [[package]] name = "cpp_common" -version = "0.5.9" +version = "0.5.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e1a2532e4ed4ea13031c13bc7bc0dbca4aae32df48e9d77f0d1e743179f2ea1" +checksum = "25fcfea2ee05889597d35e986c2ad0169694320ae5cc8f6d2640a4bb8a884560" dependencies = [ "lazy_static", "proc-macro2", @@ -632,9 +632,9 @@ dependencies = [ [[package]] name = "cpp_macros" -version = "0.5.9" +version = "0.5.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "47ec9cc90633446f779ef481a9ce5a0077107dd5b87016440448d908625a83fd" +checksum = "d156158fe86e274820f5a53bc9edb0885a6e7113909497aa8d883b69dd171871" dependencies = [ "aho-corasick", "byteorder", diff --git a/src/uu/stdbuf/src/libstdbuf/Cargo.toml b/src/uu/stdbuf/src/libstdbuf/Cargo.toml index ff9de77fc..67a7e903e 100644 --- a/src/uu/stdbuf/src/libstdbuf/Cargo.toml +++ b/src/uu/stdbuf/src/libstdbuf/Cargo.toml @@ -20,8 +20,8 @@ crate-type = [ ] # XXX: note: the rlib is just to prevent Cargo from spitting out a warning [dependencies] -cpp = "0.5.9" +cpp = "0.5.10" libc = { workspace = true } [build-dependencies] -cpp_build = "0.5.9" +cpp_build = "0.5.10" From 91dc89c3bab4b6694b3debf2a411aad9a8f92ebc Mon Sep 17 00:00:00 2001 From: Daniel Hofstetter Date: Mon, 25 Nov 2024 09:28:35 +0100 Subject: [PATCH 038/179] Bump thiserror from 1.0.69 to 2.0.3 --- Cargo.lock | 44 ++++++++++++++++++++++++++++++++------------ Cargo.toml | 2 +- 2 files changed, 33 insertions(+), 13 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 42ccfc0f2..31798d689 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -912,7 +912,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7199d965852c3bac31f779ef99cbb4537f80e952e2d6aa0ffeb30cce00f4f46e" dependencies = [ "libc", - "thiserror", + "thiserror 1.0.69", "winapi", ] @@ -2128,7 +2128,7 @@ dependencies = [ "once_cell", "reference-counted-singleton", "selinux-sys", - "thiserror", + "thiserror 1.0.69", ] [[package]] @@ -2377,7 +2377,16 @@ version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" dependencies = [ - "thiserror-impl", + "thiserror-impl 1.0.69", +] + +[[package]] +name = "thiserror" +version = "2.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c006c85c7651b3cf2ada4584faa36773bd07bac24acfb39f3c431b36d7e667aa" +dependencies = [ + "thiserror-impl 2.0.3", ] [[package]] @@ -2391,6 +2400,17 @@ dependencies = [ "syn 2.0.87", ] +[[package]] +name = "thiserror-impl" +version = "2.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f077553d607adc1caf65430528a576c757a71ed73944b66ebb58ef2bbd243568" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + [[package]] name = "time" version = "0.3.36" @@ -2524,7 +2544,7 @@ checksum = "e24c654e19afaa6b8f3877ece5d3bed849c2719c56f6752b18ca7da4fcc6e85a" dependencies = [ "cfg-if", "libc", - "thiserror", + "thiserror 1.0.69", "time", "utmp-classic-raw", "zerocopy", @@ -2590,7 +2610,7 @@ version = "0.0.28" dependencies = [ "clap", "nix", - "thiserror", + "thiserror 2.0.3", "uucore", ] @@ -2602,7 +2622,7 @@ dependencies = [ "fts-sys", "libc", "selinux", - "thiserror", + "thiserror 2.0.3", "uucore", ] @@ -2679,7 +2699,7 @@ version = "0.0.28" dependencies = [ "clap", "regex", - "thiserror", + "thiserror 2.0.3", "uucore", ] @@ -3195,7 +3215,7 @@ dependencies = [ "clap", "libc", "selinux", - "thiserror", + "thiserror 2.0.3", "uucore", ] @@ -3474,7 +3494,7 @@ version = "0.0.28" dependencies = [ "chrono", "clap", - "thiserror", + "thiserror 2.0.3", "utmp-classic", "uucore", ] @@ -3505,7 +3525,7 @@ dependencies = [ "clap", "libc", "nix", - "thiserror", + "thiserror 2.0.3", "unicode-width 0.1.13", "uucore", ] @@ -3566,7 +3586,7 @@ dependencies = [ "sha3", "sm3", "tempfile", - "thiserror", + "thiserror 2.0.3", "time", "uucore_procs", "walkdir", @@ -4048,5 +4068,5 @@ dependencies = [ "flate2", "indexmap", "num_enum", - "thiserror", + "thiserror 1.0.69", ] diff --git a/Cargo.toml b/Cargo.toml index f7b180231..caa233802 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -332,7 +332,7 @@ tempfile = "3.10.1" uutils_term_grid = "0.6" terminal_size = "0.4.0" textwrap = { version = "0.16.1", features = ["terminal_size"] } -thiserror = "1.0.59" +thiserror = "2.0.3" time = { version = "0.3.36" } unicode-segmentation = "1.11.0" unicode-width = "0.1.12" From c38897b101ff405207d572cda5df9a281b2cbfd4 Mon Sep 17 00:00:00 2001 From: Daniel Hofstetter Date: Mon, 25 Nov 2024 09:32:01 +0100 Subject: [PATCH 039/179] deny.toml: add thiserror & thiserror-impl to skip list --- deny.toml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/deny.toml b/deny.toml index 1d7524ce2..d64a2d33a 100644 --- a/deny.toml +++ b/deny.toml @@ -106,6 +106,10 @@ skip = [ { name = "unicode-width", version = "0.1.13" }, # notify { name = "mio", version = "0.8.11" }, + # various crates + { name = "thiserror", version = "1.0.69" }, + # thiserror + { name = "thiserror-impl", version = "1.0.69" }, ] # spell-checker: enable From a81bd33b6b39188deb3fe3827e1e84ae7171f754 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Mon, 25 Nov 2024 22:58:46 +0000 Subject: [PATCH 040/179] fix(deps): update rust crate libc to v0.2.165 --- Cargo.lock | 4 ++-- fuzz/Cargo.lock | 20 ++++++++++---------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 604ccebd5..5d13b9c4b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1305,9 +1305,9 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" -version = "0.2.164" +version = "0.2.165" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "433bfe06b8c75da9b2e3fbea6e5329ff87748f0b144ef75306e674c3f6f7c13f" +checksum = "fcb4d3d38eab6c5239a362fa8bae48c03baf980a6e7079f063942d563ef3533e" [[package]] name = "libloading" diff --git a/fuzz/Cargo.lock b/fuzz/Cargo.lock index 4903e424d..724e0db7e 100644 --- a/fuzz/Cargo.lock +++ b/fuzz/Cargo.lock @@ -416,9 +416,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.164" +version = "0.2.165" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "433bfe06b8c75da9b2e3fbea6e5329ff87748f0b144ef75306e674c3f6f7c13f" +checksum = "fcb4d3d38eab6c5239a362fa8bae48c03baf980a6e7079f063942d563ef3533e" [[package]] name = "libfuzzer-sys" @@ -600,9 +600,9 @@ checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" [[package]] name = "proc-macro2" -version = "1.0.83" +version = "1.0.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b33eb56c327dec362a9e55b3ad14f9d2f0904fb5a5b03b513ab5465399e9f43" +checksum = "37d3544b3f2748c54e147655edb5025752e2303145b5aefb3c3ea2c78b973bb0" dependencies = [ "unicode-ident", ] @@ -759,9 +759,9 @@ checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" [[package]] name = "syn" -version = "2.0.65" +version = "2.0.89" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2863d96a84c6439701d7a38f9de935ec562c8832cc55d1dde0f513b52fad106" +checksum = "44d46482f1c1c87acd84dea20c1bf5ebff4c757009ed6bf19cfd36fb10e92c4e" dependencies = [ "proc-macro2", "quote", @@ -793,18 +793,18 @@ dependencies = [ [[package]] name = "thiserror" -version = "1.0.61" +version = "2.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c546c80d6be4bc6a00c0f01730c08df82eaa7a7a61f11d656526506112cc1709" +checksum = "c006c85c7651b3cf2ada4584faa36773bd07bac24acfb39f3c431b36d7e667aa" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.61" +version = "2.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46c3384250002a6d5af4d114f2845d37b57521033f30d5c3f46c4d70e1197533" +checksum = "f077553d607adc1caf65430528a576c757a71ed73944b66ebb58ef2bbd243568" dependencies = [ "proc-macro2", "quote", From a3b740355057027bc556b02f9791a49317870927 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dorian=20P=C3=A9ron?= Date: Sun, 3 Nov 2024 12:29:30 +0100 Subject: [PATCH 041/179] feat(checksum): improve FileCheckError variants to be meaningful --- src/uucore/src/lib/features/checksum.rs | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/src/uucore/src/lib/features/checksum.rs b/src/uucore/src/lib/features/checksum.rs index e7a0a2653..160644046 100644 --- a/src/uucore/src/lib/features/checksum.rs +++ b/src/uucore/src/lib/features/checksum.rs @@ -107,14 +107,16 @@ impl From for LineCheckError { } /// Represents an error that was encountered when processing a checksum file. -#[allow(clippy::enum_variant_names)] enum FileCheckError { /// a generic UError was encountered in sub-functions UError(Box), - /// the error does not stop the processing of next files - NonCriticalError, - /// the error must stop the run of the program - CriticalError, + /// the checksum file is improperly formatted. + ImproperlyFormatted, + /// reading of the checksum file failed + CantOpenChecksumFile, + /// Algorithm detection was unsuccessful. + /// Either none is provided, or there is a conflict. + AlgoDetectionError, } impl From> for FileCheckError { @@ -735,7 +737,7 @@ fn process_checksum_file( // Could not read the file, show the error and continue to the next file show_error!("{e}"); set_exit_code(1); - return Err(FileCheckError::NonCriticalError); + return Err(FileCheckError::CantOpenChecksumFile); } } }; @@ -749,7 +751,7 @@ fn process_checksum_file( }; show_error!("{e}"); set_exit_code(1); - return Err(FileCheckError::NonCriticalError); + return Err(FileCheckError::AlgoDetectionError); }; for (i, line) in lines.iter().enumerate() { @@ -791,7 +793,7 @@ fn process_checksum_file( .into()); } set_exit_code(1); - return Err(FileCheckError::CriticalError); + return Err(FileCheckError::ImproperlyFormatted); } // if any incorrectly formatted line, show it @@ -839,8 +841,8 @@ where use FileCheckError::*; match process_checksum_file(filename_input, algo_name_input, length_input, opts) { Err(UError(e)) => return Err(e), - Err(CriticalError) => break, - Err(NonCriticalError) | Ok(_) => continue, + Err(ImproperlyFormatted) => break, + Err(CantOpenChecksumFile | AlgoDetectionError) | Ok(_) => continue, } } From 20dfe2dc10d80e2a7b3f4f6edb036b1c22c2b005 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dorian=20P=C3=A9ron?= Date: Sun, 17 Nov 2024 12:06:37 +0100 Subject: [PATCH 042/179] test(cksum): remove duplicate testcase --- tests/by-util/test_cksum.rs | 28 ---------------------------- 1 file changed, 28 deletions(-) diff --git a/tests/by-util/test_cksum.rs b/tests/by-util/test_cksum.rs index ee1e05292..86de7ea95 100644 --- a/tests/by-util/test_cksum.rs +++ b/tests/by-util/test_cksum.rs @@ -1251,33 +1251,6 @@ fn test_several_files_error_mgmt() { .stderr_contains("incorrect: no properly "); } -#[cfg(target_os = "linux")] -#[test] -fn test_non_utf8_filename() { - use std::ffi::OsString; - use std::os::unix::ffi::OsStringExt; - - let scene = TestScenario::new(util_name!()); - let at = &scene.fixtures; - let filename: OsString = OsStringExt::from_vec(b"funky\xffname".to_vec()); - - at.touch(&filename); - - scene - .ucmd() - .arg(&filename) - .succeeds() - .stdout_is_bytes(b"4294967295 0 funky\xffname\n") - .no_stderr(); - scene - .ucmd() - .arg("-asha256") - .arg(filename) - .succeeds() - .stdout_is_bytes(b"SHA256 (funky\xffname) = e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855\n") - .no_stderr(); -} - #[test] fn test_check_comment_line() { // A comment in a checksum file shall be discarded unnoticed. @@ -1458,7 +1431,6 @@ mod check_utf8 { .no_stderr(); } - #[cfg(target_os = "linux")] #[test] fn test_check_non_utf8_filename() { use std::{ffi::OsString, os::unix::ffi::OsStringExt}; From f3763ef190eb5497e10a1fa89bc16028ff35fa4d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dorian=20P=C3=A9ron?= Date: Sun, 17 Nov 2024 14:09:25 +0100 Subject: [PATCH 043/179] feat(checksum): simplify get_expected_checksum - Rename the function to emphasize its goal - Do not pass the filename anymore, as it is only used to create an error, that may be done in the scope calling the function - Change the return type to Option, as the error is made in the outer scope - Don't try to decode the base64 string as UTF8 string. This most oftenly fails and is wrong. - Get rid of the `bytes_to_hex` function, as it provides the same functionality as `hex::encode` --- src/uucore/src/lib/features/checksum.rs | 56 ++++++++----------------- 1 file changed, 17 insertions(+), 39 deletions(-) diff --git a/src/uucore/src/lib/features/checksum.rs b/src/uucore/src/lib/features/checksum.rs index 160644046..8dce955fc 100644 --- a/src/uucore/src/lib/features/checksum.rs +++ b/src/uucore/src/lib/features/checksum.rs @@ -441,43 +441,15 @@ fn determine_regex(lines: &[OsString]) -> Option<(Regex, bool)> { None } -// Converts bytes to a hexadecimal string -fn bytes_to_hex(bytes: &[u8]) -> String { - use std::fmt::Write; - bytes - .iter() - .fold(String::with_capacity(bytes.len() * 2), |mut hex, byte| { - write!(hex, "{byte:02x}").unwrap(); - hex - }) -} +/// Extract the expected digest from the checksum string +fn get_expected_digest_as_hexa_string(caps: &Captures, chosen_regex: &Regex) -> Option { + // Unwraps are safe, ensured by regex. + let ck = caps.name("checksum").unwrap().as_bytes(); -fn get_expected_checksum( - filename: &[u8], - caps: &Captures, - chosen_regex: &Regex, -) -> UResult { if chosen_regex.as_str() == ALGO_BASED_REGEX_BASE64 { - // Unwrap is safe, ensured by regex - let ck = caps.name("checksum").unwrap().as_bytes(); - match BASE64.decode(ck) { - Ok(decoded_bytes) => { - match std::str::from_utf8(&decoded_bytes) { - Ok(decoded_str) => Ok(decoded_str.to_string()), - Err(_) => Ok(bytes_to_hex(&decoded_bytes)), // Handle as raw bytes if not valid UTF-8 - } - } - Err(_) => Err(Box::new( - ChecksumError::NoProperlyFormattedChecksumLinesFound { - filename: String::from_utf8_lossy(filename).to_string(), - }, - )), - } + BASE64.decode(ck).map(hex::encode).ok() } else { - // Unwraps are safe, ensured by regex. - Ok(str::from_utf8(caps.name("checksum").unwrap().as_bytes()) - .unwrap() - .to_string()) + Some(str::from_utf8(ck).unwrap().to_string()) } } @@ -631,7 +603,13 @@ fn process_checksum_line( filename_to_check = &filename_to_check[1..]; } - let expected_checksum = get_expected_checksum(filename_to_check, &caps, chosen_regex)?; + let expected_checksum = get_expected_digest_as_hexa_string(&caps, chosen_regex).ok_or( + LineCheckError::UError(Box::new( + ChecksumError::NoProperlyFormattedChecksumLinesFound { + filename: String::from_utf8_lossy(filename_to_check).to_string(), + }, + )), + )?; // If the algo_name is provided, we use it, otherwise we try to detect it let (algo_name, length) = if is_algo_based_format { @@ -1250,13 +1228,13 @@ mod tests { } #[test] - fn test_get_expected_checksum() { + fn test_get_expected_digest() { let re = Regex::new(ALGO_BASED_REGEX_BASE64).unwrap(); let caps = re .captures(b"SHA256 (empty) = 47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU=") .unwrap(); - let result = get_expected_checksum(b"filename", &caps, &re); + let result = get_expected_digest_as_hexa_string(&caps, &re); assert_eq!( result.unwrap(), @@ -1271,9 +1249,9 @@ mod tests { .captures(b"SHA256 (empty) = 47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU") .unwrap(); - let result = get_expected_checksum(b"filename", &caps, &re); + let result = get_expected_digest_as_hexa_string(&caps, &re); - assert!(result.is_err()); + assert!(result.is_none()); } #[test] From a0af49f2d8576e8e17ce9653abb5bd492c70d6db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dorian=20P=C3=A9ron?= Date: Sun, 17 Nov 2024 15:52:11 +0100 Subject: [PATCH 044/179] feat(checksum): get rid of the properly_formatted variable --- src/uucore/src/lib/features/checksum.rs | 78 ++++++++++--------------- 1 file changed, 30 insertions(+), 48 deletions(-) diff --git a/src/uucore/src/lib/features/checksum.rs b/src/uucore/src/lib/features/checksum.rs index 8dce955fc..386da1f71 100644 --- a/src/uucore/src/lib/features/checksum.rs +++ b/src/uucore/src/lib/features/checksum.rs @@ -174,8 +174,6 @@ pub enum ChecksumError { CombineMultipleAlgorithms, #[error("Needs an algorithm to hash with.\nUse --help for more information.")] NeedAlgorithmToHash, - #[error("{filename}: no properly formatted checksum lines found")] - NoProperlyFormattedChecksumLinesFound { filename: String }, } impl UError for ChecksumError { @@ -241,6 +239,12 @@ fn cksum_output(res: &ChecksumResult, status: bool) { } } +/// Print a "no properly formatted lines" message in stderr +#[inline] +fn log_no_properly_formatted(filename: String) { + show_error!("{filename}: no properly formatted checksum lines found"); +} + /// Represents the different outcomes that can happen to a file /// that is being checked. #[derive(Debug, Clone, Copy)] @@ -442,7 +446,7 @@ fn determine_regex(lines: &[OsString]) -> Option<(Regex, bool)> { } /// Extract the expected digest from the checksum string -fn get_expected_digest_as_hexa_string(caps: &Captures, chosen_regex: &Regex) -> Option { +fn get_expected_digest_as_hex_string(caps: &Captures, chosen_regex: &Regex) -> Option { // Unwraps are safe, ensured by regex. let ck = caps.name("checksum").unwrap().as_bytes(); @@ -528,8 +532,6 @@ fn get_input_file(filename: &OsStr) -> UResult> { fn identify_algo_name_and_length( caps: &Captures, algo_name_input: Option<&str>, - res: &mut ChecksumResult, - properly_formatted: &mut bool, ) -> Option<(String, Option)> { // When the algo-based format is matched, extract details from regex captures let algorithm = caps @@ -543,14 +545,11 @@ fn identify_algo_name_and_length( // (for example SHA1 (f) = d...) // Also handle the case cksum -s sm3 but the file contains other formats if algo_name_input.is_some() && algo_name_input != Some(&algorithm) { - res.bad_format += 1; - *properly_formatted = false; return None; } if !SUPPORTED_ALGORITHMS.contains(&algorithm.as_str()) { // Not supported algo, leave early - *properly_formatted = false; return None; } @@ -562,7 +561,6 @@ fn identify_algo_name_and_length( if bits_value % 8 == 0 { Some(Some(bits_value / 8)) } else { - *properly_formatted = false; None // Return None to signal a divisibility issue } })?; @@ -583,16 +581,12 @@ fn process_checksum_line( i: usize, chosen_regex: &Regex, is_algo_based_format: bool, - res: &mut ChecksumResult, cli_algo_name: Option<&str>, cli_algo_length: Option, - properly_formatted: &mut bool, opts: ChecksumOptions, ) -> Result<(), LineCheckError> { let line_bytes = os_str_as_bytes(line)?; if let Some(caps) = chosen_regex.captures(line_bytes) { - *properly_formatted = true; - let mut filename_to_check = caps.name("filename").unwrap().as_bytes(); if filename_to_check.starts_with(b"*") @@ -603,18 +597,13 @@ fn process_checksum_line( filename_to_check = &filename_to_check[1..]; } - let expected_checksum = get_expected_digest_as_hexa_string(&caps, chosen_regex).ok_or( - LineCheckError::UError(Box::new( - ChecksumError::NoProperlyFormattedChecksumLinesFound { - filename: String::from_utf8_lossy(filename_to_check).to_string(), - }, - )), - )?; + let expected_checksum = get_expected_digest_as_hex_string(&caps, chosen_regex) + .ok_or(LineCheckError::ImproperlyFormatted)?; // If the algo_name is provided, we use it, otherwise we try to detect it let (algo_name, length) = if is_algo_based_format { - identify_algo_name_and_length(&caps, cli_algo_name, res, properly_formatted) - .unwrap_or((String::new(), None)) + identify_algo_name_and_length(&caps, cli_algo_name) + .ok_or(LineCheckError::ImproperlyFormatted)? } else if let Some(a) = cli_algo_name { // When a specific algorithm name is input, use it and use the provided bits // except when dealing with blake2b, where we will detect the length @@ -628,16 +617,9 @@ fn process_checksum_line( } } else { // Default case if no algorithm is specified and non-algo based format is matched - (String::new(), None) + return Err(LineCheckError::ImproperlyFormatted); }; - if algo_name.is_empty() { - // we haven't been able to detect the algo name. No point to continue - *properly_formatted = false; - - // TODO: return error? - return Err(LineCheckError::ImproperlyFormatted); - } let mut algo = detect_algo(&algo_name, length)?; let (filename_to_check_unescaped, prefix) = unescape_filename(filename_to_check); @@ -689,7 +671,6 @@ fn process_checksum_line( ); } - res.bad_format += 1; Err(LineCheckError::ImproperlyFormatted) } } @@ -701,8 +682,9 @@ fn process_checksum_file( opts: ChecksumOptions, ) -> Result<(), FileCheckError> { let mut correct_format = 0; - let mut properly_formatted = false; + let mut properly_formatted_lines = 0; let mut res = ChecksumResult::default(); + let input_is_stdin = filename_input == OsStr::new("-"); let file: Box = if input_is_stdin { @@ -724,10 +706,7 @@ fn process_checksum_file( let lines = read_os_string_lines(reader).collect::>(); let Some((chosen_regex, is_algo_based_format)) = determine_regex(&lines) else { - let e = ChecksumError::NoProperlyFormattedChecksumLinesFound { - filename: get_filename_for_output(filename_input, input_is_stdin), - }; - show_error!("{e}"); + log_no_properly_formatted(get_filename_for_output(filename_input, input_is_stdin)); set_exit_code(1); return Err(FileCheckError::AlgoDetectionError); }; @@ -739,21 +718,27 @@ fn process_checksum_file( i, &chosen_regex, is_algo_based_format, - &mut res, cli_algo_name, cli_algo_length, - &mut properly_formatted, opts, ) { - Ok(()) => correct_format += 1, - Err(LineCheckError::DigestMismatch) => res.failed_cksum += 1, + Ok(()) => { + correct_format += 1; + properly_formatted_lines += 1 + } + Err(LineCheckError::DigestMismatch) => { + res.failed_cksum += 1; + properly_formatted_lines += 1 + } Err(LineCheckError::UError(e)) => return Err(e.into()), Err(LineCheckError::Skipped) => continue, - Err(LineCheckError::ImproperlyFormatted) => (), + Err(LineCheckError::ImproperlyFormatted) => res.bad_format += 1, Err(LineCheckError::CantOpenFile | LineCheckError::FileIsDirectory) => { + properly_formatted_lines += 1; res.failed_open_file += 1 } Err(LineCheckError::FileNotFound) => { + properly_formatted_lines += 1; if !opts.ignore_missing { res.failed_open_file += 1 } @@ -763,12 +748,9 @@ fn process_checksum_file( // not a single line correctly formatted found // return an error - if !properly_formatted { + if properly_formatted_lines == 0 { if !opts.status { - return Err(ChecksumError::NoProperlyFormattedChecksumLinesFound { - filename: get_filename_for_output(filename_input, input_is_stdin), - } - .into()); + log_no_properly_formatted(get_filename_for_output(filename_input, input_is_stdin)); } set_exit_code(1); return Err(FileCheckError::ImproperlyFormatted); @@ -1234,7 +1216,7 @@ mod tests { .captures(b"SHA256 (empty) = 47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU=") .unwrap(); - let result = get_expected_digest_as_hexa_string(&caps, &re); + let result = get_expected_digest_as_hex_string(&caps, &re); assert_eq!( result.unwrap(), @@ -1249,7 +1231,7 @@ mod tests { .captures(b"SHA256 (empty) = 47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU") .unwrap(); - let result = get_expected_digest_as_hexa_string(&caps, &re); + let result = get_expected_digest_as_hex_string(&caps, &re); assert!(result.is_none()); } From 7c4724edc32dd4ecfd7d16051eb074fb3bcc0ea4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dorian=20P=C3=A9ron?= Date: Sun, 17 Nov 2024 16:28:53 +0100 Subject: [PATCH 045/179] feat(checksum): refactor ChecksumResult to include more counters in it - Add comments to explain what each field is counting --- src/uucore/src/lib/features/checksum.rs | 71 ++++++++++++++----------- 1 file changed, 41 insertions(+), 30 deletions(-) diff --git a/src/uucore/src/lib/features/checksum.rs b/src/uucore/src/lib/features/checksum.rs index 386da1f71..8c435afed 100644 --- a/src/uucore/src/lib/features/checksum.rs +++ b/src/uucore/src/lib/features/checksum.rs @@ -68,11 +68,27 @@ pub struct HashAlgorithm { pub bits: usize, } +/// This structure holds the count of checksum test lines' outcomes. #[derive(Default)] struct ChecksumResult { - pub bad_format: i32, - pub failed_cksum: i32, - pub failed_open_file: i32, + /// Number of lines in the file where the computed checksum MATCHES + /// the expectation. + pub correct: u32, + /// Number of lines in the file where the computed checksum DIFFERS + /// from the expectation. + pub failed_cksum: u32, + pub failed_open_file: u32, + /// Number of improperly formatted lines. + pub bad_format: u32, + /// Total number of non-empty, non-comment lines. + pub total: u32, +} + +impl ChecksumResult { + #[inline] + fn total_properly_formatted(&self) -> u32 { + self.total - self.bad_format + } } /// Represents a reason for which the processing of a checksum line @@ -681,8 +697,6 @@ fn process_checksum_file( cli_algo_length: Option, opts: ChecksumOptions, ) -> Result<(), FileCheckError> { - let mut correct_format = 0; - let mut properly_formatted_lines = 0; let mut res = ChecksumResult::default(); let input_is_stdin = filename_input == OsStr::new("-"); @@ -712,7 +726,7 @@ fn process_checksum_file( }; for (i, line) in lines.iter().enumerate() { - match process_checksum_line( + let line_result = process_checksum_line( filename_input, line, i, @@ -721,34 +735,31 @@ fn process_checksum_file( cli_algo_name, cli_algo_length, opts, - ) { - Ok(()) => { - correct_format += 1; - properly_formatted_lines += 1 - } - Err(LineCheckError::DigestMismatch) => { - res.failed_cksum += 1; - properly_formatted_lines += 1 - } - Err(LineCheckError::UError(e)) => return Err(e.into()), - Err(LineCheckError::Skipped) => continue, - Err(LineCheckError::ImproperlyFormatted) => res.bad_format += 1, - Err(LineCheckError::CantOpenFile | LineCheckError::FileIsDirectory) => { - properly_formatted_lines += 1; - res.failed_open_file += 1 - } - Err(LineCheckError::FileNotFound) => { - properly_formatted_lines += 1; - if !opts.ignore_missing { - res.failed_open_file += 1 - } - } + ); + + // Match a first time to elude critical UErrors, and increment the total + // in all cases except on skipped. + use LineCheckError::*; + match line_result { + Err(UError(e)) => return Err(e.into()), + Err(Skipped) => (), + _ => res.total += 1, + } + + // Match a second time to update the right field of `res`. + match line_result { + Ok(()) => res.correct += 1, + Err(DigestMismatch) => res.failed_cksum += 1, + Err(ImproperlyFormatted) => res.bad_format += 1, + Err(CantOpenFile | FileIsDirectory) => res.failed_open_file += 1, + Err(FileNotFound) if !opts.ignore_missing => res.failed_open_file += 1, + _ => continue, }; } // not a single line correctly formatted found // return an error - if properly_formatted_lines == 0 { + if res.total_properly_formatted() == 0 { if !opts.status { log_no_properly_formatted(get_filename_for_output(filename_input, input_is_stdin)); } @@ -759,7 +770,7 @@ fn process_checksum_file( // if any incorrectly formatted line, show it cksum_output(&res, opts.status); - if opts.ignore_missing && correct_format == 0 { + if opts.ignore_missing && res.correct == 0 { // we have only bad format // and we had ignore-missing eprintln!( From ba7c02860e30120d0a64aaf17d97bb0b3d2a8ccf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dorian=20P=C3=A9ron?= Date: Sun, 17 Nov 2024 23:16:16 +0100 Subject: [PATCH 046/179] feat(checksum): odd number of hexa characters is wrong formatting --- src/uucore/src/lib/features/checksum.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/uucore/src/lib/features/checksum.rs b/src/uucore/src/lib/features/checksum.rs index 8c435afed..1fbf201e6 100644 --- a/src/uucore/src/lib/features/checksum.rs +++ b/src/uucore/src/lib/features/checksum.rs @@ -468,8 +468,12 @@ fn get_expected_digest_as_hex_string(caps: &Captures, chosen_regex: &Regex) -> O if chosen_regex.as_str() == ALGO_BASED_REGEX_BASE64 { BASE64.decode(ck).map(hex::encode).ok() - } else { + } else if ck.len() % 2 == 0 { Some(str::from_utf8(ck).unwrap().to_string()) + } else { + // If the length of the digest is not a multiple of 2, then it + // must be improperly formatted (1 hex digit is 2 characters) + None } } From 8c4f595f2414d9dc0aa97e75386bba6f2b76ff6b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dorian=20P=C3=A9ron?= Date: Sun, 17 Nov 2024 16:58:17 +0100 Subject: [PATCH 047/179] test(cksum): rework test for improperly formatted keeps processing --- tests/by-util/test_cksum.rs | 76 +++++++++++++++++++++++++++---------- 1 file changed, 57 insertions(+), 19 deletions(-) diff --git a/tests/by-util/test_cksum.rs b/tests/by-util/test_cksum.rs index 86de7ea95..bf74de9cc 100644 --- a/tests/by-util/test_cksum.rs +++ b/tests/by-util/test_cksum.rs @@ -1403,12 +1403,13 @@ fn test_check_trailing_space_fails() { /// in checksum files. /// These tests are excluded from Windows because it does not provide any safe /// conversion between `OsString` and byte sequences for non-utf-8 strings. -#[cfg(not(windows))] mod check_utf8 { - use super::*; + // This test should pass on linux and macos. + #[cfg(not(windows))] #[test] fn test_check_non_utf8_comment() { + use super::*; let hashes = b"MD5 (empty) = 1B2M2Y8AsgTpgAmY7PhCfg==\n\ # Comment with a non utf8 char: >>\xff<<\n\ @@ -1431,8 +1432,12 @@ mod check_utf8 { .no_stderr(); } + // This test should pass on linux. Windows and macos will fail to + // create a file which name contains '\xff'. + #[cfg(target_os = "linux")] #[test] fn test_check_non_utf8_filename() { + use super::*; use std::{ffi::OsString, os::unix::ffi::OsStringExt}; let scene = TestScenario::new(util_name!()); @@ -1569,35 +1574,68 @@ fn test_check_mix_hex_base64() { .stdout_only("foo1.dat: OK\nfoo2.dat: OK\n"); } -#[ignore = "not yet implemented"] +/// This test ensures that an improperly formatted base64 checksum in a file +/// does not interrupt the processing of next lines. #[test] -fn test_check_incorrectly_formatted_checksum_does_not_stop_processing() { - // The first line contains an incorrectly formatted checksum that can't be - // correctly decoded. This must not prevent the program from looking at the - // rest of the file. - let lines = [ - "BLAKE2b-56 (foo1) = GFYEQ7HhAw=", // Should be 2 '=' at the end - "BLAKE2b-56 (foo2) = 18560443b1e103", // OK - ]; - +fn test_check_incorrectly_formatted_checksum_keeps_processing_b64() { let scene = TestScenario::new(util_name!()); let at = &scene.fixtures; + at.touch("f"); - at.write("foo1", "foo"); - at.write("foo2", "foo"); - at.write("sum", &lines.join("\n")); + let good_ck = "MD5 (f) = 1B2M2Y8AsgTpgAmY7PhCfg=="; // OK + let bad_ck = "MD5 (f) = 1B2M2Y8AsgTpgAmY7PhCfg="; // Missing last '=' + // Good then Bad scene .ucmd() .arg("--check") - .arg(at.subdir.join("sum")) + .pipe_in([good_ck, bad_ck].join("\n").as_bytes().to_vec()) .succeeds() - .stderr_contains("1 line is improperly formatted") - .stdout_contains("foo2: OK"); + .stdout_contains("f: OK") + .stderr_contains("cksum: WARNING: 1 line is improperly formatted"); + + // Bad then Good + scene + .ucmd() + .arg("--check") + .pipe_in([bad_ck, good_ck].join("\n").as_bytes().to_vec()) + .succeeds() + .stdout_contains("f: OK") + .stderr_contains("cksum: WARNING: 1 line is improperly formatted"); +} + +/// This test ensures that an improperly formatted hexadecimal checksum in a +/// file does not interrupt the processing of next lines. +#[test] +fn test_check_incorrectly_formatted_checksum_keeps_processing_hex() { + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + at.touch("f"); + + let good_ck = "MD5 (f) = d41d8cd98f00b204e9800998ecf8427e"; // OK + let bad_ck = "MD5 (f) = d41d8cd98f00b204e9800998ecf8427"; // Missing last + + // Good then Bad + scene + .ucmd() + .arg("--check") + .pipe_in([good_ck, bad_ck].join("\n").as_bytes().to_vec()) + .succeeds() + .stdout_contains("f: OK") + .stderr_contains("cksum: WARNING: 1 line is improperly formatted"); + + // Bad then Good + scene + .ucmd() + .arg("--check") + .pipe_in([bad_ck, good_ck].join("\n").as_bytes().to_vec()) + .succeeds() + .stdout_contains("f: OK") + .stderr_contains("cksum: WARNING: 1 line is improperly formatted"); } /// This module reimplements the cksum-base64.pl GNU test. -mod cksum_base64 { +mod gnu_cksum_base64 { use super::*; use crate::common::util::log_info; From cfc66f9f6fe8df12b3378c6d6416fe28689ff217 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dorian=20P=C3=A9ron?= Date: Tue, 26 Nov 2024 01:46:35 +0100 Subject: [PATCH 048/179] chore(checksum): fix clippy warnings in tests --- src/uucore/src/lib/features/checksum.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/uucore/src/lib/features/checksum.rs b/src/uucore/src/lib/features/checksum.rs index 1fbf201e6..f7228830b 100644 --- a/src/uucore/src/lib/features/checksum.rs +++ b/src/uucore/src/lib/features/checksum.rs @@ -1056,7 +1056,7 @@ mod tests { ]; for (input, expected) in test_cases { - let captures = algo_based_regex.captures(*input); + let captures = algo_based_regex.captures(input); match expected { Some((algo, bits, filename, checksum)) => { assert!(captures.is_some()); @@ -1206,7 +1206,7 @@ mod tests { // Test leading space before checksum line let lines_algo_based_leading_space = - vec![" MD5 (example.txt) = d41d8cd98f00b204e9800998ecf8427e"] + [" MD5 (example.txt) = d41d8cd98f00b204e9800998ecf8427e"] .iter() .map(|s| OsString::from(s.to_string())) .collect::>(); @@ -1216,7 +1216,7 @@ mod tests { // Test trailing space after checksum line (should fail) let lines_algo_based_leading_space = - vec!["MD5 (example.txt) = d41d8cd98f00b204e9800998ecf8427e "] + ["MD5 (example.txt) = d41d8cd98f00b204e9800998ecf8427e "] .iter() .map(|s| OsString::from(s.to_string())) .collect::>(); From c8b0c8b612f97f29b6d801715ed9bffd26b6e1fd Mon Sep 17 00:00:00 2001 From: Daniel Hofstetter Date: Tue, 26 Nov 2024 10:35:13 +0100 Subject: [PATCH 049/179] cp: remove some sleep() calls in tests --- tests/by-util/test_cp.rs | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/tests/by-util/test_cp.rs b/tests/by-util/test_cp.rs index 156daec1f..7a0889b0f 100644 --- a/tests/by-util/test_cp.rs +++ b/tests/by-util/test_cp.rs @@ -8,6 +8,7 @@ use crate::common::util::TestScenario; #[cfg(not(windows))] use std::fs::set_permissions; +use std::io::Write; #[cfg(not(windows))] use std::os::unix::fs; @@ -447,9 +448,9 @@ fn test_cp_arg_update_older_dest_older_than_src() { let old_content = "old content\n"; let new_content = "new content\n"; - at.write(old, old_content); - - sleep(Duration::from_secs(1)); + let mut f = at.make_file(old); + f.write_all(old_content.as_bytes()).unwrap(); + f.set_modified(std::time::UNIX_EPOCH).unwrap(); at.write(new, new_content); @@ -473,9 +474,9 @@ fn test_cp_arg_update_short_no_overwrite() { let old_content = "old content\n"; let new_content = "new content\n"; - at.write(old, old_content); - - sleep(Duration::from_secs(1)); + let mut f = at.make_file(old); + f.write_all(old_content.as_bytes()).unwrap(); + f.set_modified(std::time::UNIX_EPOCH).unwrap(); at.write(new, new_content); @@ -499,9 +500,9 @@ fn test_cp_arg_update_short_overwrite() { let old_content = "old content\n"; let new_content = "new content\n"; - at.write(old, old_content); - - sleep(Duration::from_secs(1)); + let mut f = at.make_file(old); + f.write_all(old_content.as_bytes()).unwrap(); + f.set_modified(std::time::UNIX_EPOCH).unwrap(); at.write(new, new_content); @@ -526,9 +527,9 @@ fn test_cp_arg_update_none_then_all() { let old_content = "old content\n"; let new_content = "new content\n"; - at.write(old, old_content); - - sleep(Duration::from_secs(1)); + let mut f = at.make_file(old); + f.write_all(old_content.as_bytes()).unwrap(); + f.set_modified(std::time::UNIX_EPOCH).unwrap(); at.write(new, new_content); @@ -554,9 +555,9 @@ fn test_cp_arg_update_all_then_none() { let old_content = "old content\n"; let new_content = "new content\n"; - at.write(old, old_content); - - sleep(Duration::from_secs(1)); + let mut f = at.make_file(old); + f.write_all(old_content.as_bytes()).unwrap(); + f.set_modified(std::time::UNIX_EPOCH).unwrap(); at.write(new, new_content); From 869253379066824ecc83fad9cde64bd1b408dc5f Mon Sep 17 00:00:00 2001 From: Daniel Hofstetter Date: Tue, 26 Nov 2024 15:32:53 +0100 Subject: [PATCH 050/179] uucore/perms: use ORs instead of match (fix todo) --- src/uucore/src/lib/features/perms.rs | 26 ++++++++------------------ 1 file changed, 8 insertions(+), 18 deletions(-) diff --git a/src/uucore/src/lib/features/perms.rs b/src/uucore/src/lib/features/perms.rs index ebb97042e..3623e9e61 100644 --- a/src/uucore/src/lib/features/perms.rs +++ b/src/uucore/src/lib/features/perms.rs @@ -23,7 +23,7 @@ use std::fs::Metadata; use std::os::unix::fs::MetadataExt; use std::os::unix::ffi::OsStrExt; -use std::path::{Path, MAIN_SEPARATOR_STR}; +use std::path::{Path, MAIN_SEPARATOR}; /// The various level of verbosity #[derive(PartialEq, Eq, Clone, Debug)] @@ -214,23 +214,13 @@ fn is_root(path: &Path, would_traverse_symlink: bool) -> bool { // We cannot check path.is_dir() here, as this would resolve symlinks, // which we need to avoid here. // All directory-ish paths match "*/", except ".", "..", "*/.", and "*/..". - let looks_like_dir = match path.as_os_str().to_str() { - // If it contains special character, prefer to err on the side of safety, i.e. forbidding the chown operation: - None => false, - Some(".") | Some("..") => true, - Some(path_str) => { - (path_str.ends_with(MAIN_SEPARATOR_STR)) - || (path_str.ends_with(&format!("{MAIN_SEPARATOR_STR}."))) - || (path_str.ends_with(&format!("{MAIN_SEPARATOR_STR}.."))) - } - }; - // TODO: Once we reach MSRV 1.74.0, replace this abomination by something simpler, e.g. this: - // let path_bytes = path.as_os_str().as_encoded_bytes(); - // let looks_like_dir = path_bytes == [b'.'] - // || path_bytes == [b'.', b'.'] - // || path_bytes.ends_with(&[MAIN_SEPARATOR as u8]) - // || path_bytes.ends_with(&[MAIN_SEPARATOR as u8, b'.']) - // || path_bytes.ends_with(&[MAIN_SEPARATOR as u8, b'.', b'.']); + let path_bytes = path.as_os_str().as_encoded_bytes(); + let looks_like_dir = path_bytes == [b'.'] + || path_bytes == [b'.', b'.'] + || path_bytes.ends_with(&[MAIN_SEPARATOR as u8]) + || path_bytes.ends_with(&[MAIN_SEPARATOR as u8, b'.']) + || path_bytes.ends_with(&[MAIN_SEPARATOR as u8, b'.', b'.']); + if !looks_like_dir { return false; } From 3e1328c81acde33957d06b1d908ae2732d0cc053 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Wed, 27 Nov 2024 02:09:06 +0000 Subject: [PATCH 051/179] fix(deps): update rust crate libc to v0.2.166 --- Cargo.lock | 4 ++-- fuzz/Cargo.lock | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5d13b9c4b..b6848c409 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1305,9 +1305,9 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" -version = "0.2.165" +version = "0.2.166" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fcb4d3d38eab6c5239a362fa8bae48c03baf980a6e7079f063942d563ef3533e" +checksum = "c2ccc108bbc0b1331bd061864e7cd823c0cab660bbe6970e66e2c0614decde36" [[package]] name = "libloading" diff --git a/fuzz/Cargo.lock b/fuzz/Cargo.lock index 724e0db7e..d5372d79b 100644 --- a/fuzz/Cargo.lock +++ b/fuzz/Cargo.lock @@ -416,9 +416,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.165" +version = "0.2.166" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fcb4d3d38eab6c5239a362fa8bae48c03baf980a6e7079f063942d563ef3533e" +checksum = "c2ccc108bbc0b1331bd061864e7cd823c0cab660bbe6970e66e2c0614decde36" [[package]] name = "libfuzzer-sys" From 069ec76f5b0117cabc4d1af794caceeb3c849b7f Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Wed, 27 Nov 2024 04:27:39 +0000 Subject: [PATCH 052/179] chore(deps): update rust crate blake3 to v1.5.5 --- Cargo.lock | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5d13b9c4b..0b4540df2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -237,9 +237,9 @@ dependencies = [ [[package]] name = "blake3" -version = "1.5.4" +version = "1.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d82033247fd8e890df8f740e407ad4d038debb9eb1f40533fffb32e7d17dc6f7" +checksum = "b8ee0c1824c4dea5b5f81736aff91bae041d2c07ee1192bec91054e10e3e601e" dependencies = [ "arrayref", "arrayvec", @@ -433,9 +433,9 @@ dependencies = [ [[package]] name = "constant_time_eq" -version = "0.3.0" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7144d30dcf0fafbce74250a3963025d8d52177934239851c917d29f1df280c2" +checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6" [[package]] name = "core-foundation-sys" From ab9e5cb8a780f75e8529fd8805a197d996361d26 Mon Sep 17 00:00:00 2001 From: Daniel Hofstetter Date: Wed, 27 Nov 2024 09:29:22 +0100 Subject: [PATCH 053/179] env: add missing cfg attributes to tests --- tests/by-util/test_env.rs | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/tests/by-util/test_env.rs b/tests/by-util/test_env.rs index 208feab6d..8c5b43b2d 100644 --- a/tests/by-util/test_env.rs +++ b/tests/by-util/test_env.rs @@ -10,6 +10,7 @@ use crate::common::util::TestScenario; use crate::common::util::UChild; #[cfg(unix)] use nix::sys::signal::Signal; +#[cfg(feature = "echo")] use regex::Regex; use std::env; use std::path::Path; @@ -98,6 +99,7 @@ fn test_if_windows_batch_files_can_be_executed() { assert!(result.stdout_str().contains("Hello Windows World!")); } +#[cfg(feature = "echo")] #[test] fn test_debug_1() { let ts = TestScenario::new(util_name!()); @@ -118,6 +120,7 @@ fn test_debug_1() { ); } +#[cfg(feature = "echo")] #[test] fn test_debug_2() { let ts = TestScenario::new(util_name!()); @@ -144,6 +147,7 @@ fn test_debug_2() { ); } +#[cfg(feature = "echo")] #[test] fn test_debug1_part_of_string_arg() { let ts = TestScenario::new(util_name!()); @@ -165,6 +169,7 @@ fn test_debug1_part_of_string_arg() { ); } +#[cfg(feature = "echo")] #[test] fn test_debug2_part_of_string_arg() { let ts = TestScenario::new(util_name!()); @@ -651,7 +656,7 @@ fn test_env_with_empty_executable_double_quotes() { } #[test] -#[cfg(unix)] +#[cfg(all(unix, feature = "dirname", feature = "echo"))] fn test_env_overwrite_arg0() { let ts = TestScenario::new(util_name!()); @@ -675,7 +680,7 @@ fn test_env_overwrite_arg0() { } #[test] -#[cfg(unix)] +#[cfg(all(unix, feature = "echo"))] fn test_env_arg_argv0_overwrite() { let ts = TestScenario::new(util_name!()); @@ -723,7 +728,7 @@ fn test_env_arg_argv0_overwrite() { } #[test] -#[cfg(unix)] +#[cfg(all(unix, feature = "echo"))] fn test_env_arg_argv0_overwrite_mixed_with_string_args() { let ts = TestScenario::new(util_name!()); From 75de5a0613aec8d5b5d26b80467d5d0d9cb7d99c Mon Sep 17 00:00:00 2001 From: Peng Zijun <2200012909@stu.pku.edu.cn> Date: Thu, 28 Nov 2024 16:14:16 +0800 Subject: [PATCH 054/179] tr: Add ambiguous octal escape warning (#6886) * tr: Add ambiguous octal escape warning, issue #6821 * tr: Make code cleaner --- src/uu/tr/src/operation.rs | 51 ++++++++++++++++++++++++++++++-------- tests/by-util/test_tr.rs | 4 +-- 2 files changed, 42 insertions(+), 13 deletions(-) diff --git a/src/uu/tr/src/operation.rs b/src/uu/tr/src/operation.rs index fc01a8360..035f09972 100644 --- a/src/uu/tr/src/operation.rs +++ b/src/uu/tr/src/operation.rs @@ -16,6 +16,7 @@ use nom::{ IResult, }; use std::{ + char, collections::{HashMap, HashSet}, error::Error, fmt::{Debug, Display}, @@ -23,6 +24,7 @@ use std::{ ops::Not, }; use uucore::error::UError; +use uucore::show_warning; #[derive(Debug, Clone)] pub enum BadSequence { @@ -293,7 +295,9 @@ impl Sequence { Self::parse_class, Self::parse_char_equal, // NOTE: This must be the last one - map(Self::parse_backslash_or_char, |s| Ok(Self::Char(s))), + map(Self::parse_backslash_or_char_with_warning, |s| { + Ok(Self::Char(s)) + }), )))(input) .map(|(_, r)| r) .unwrap() @@ -302,10 +306,16 @@ impl Sequence { } fn parse_octal(input: &[u8]) -> IResult<&[u8], u8> { + // For `parse_char_range`, `parse_char_star`, `parse_char_repeat`, `parse_char_equal`. + // Because in these patterns, there's no ambiguous cases. + preceded(tag("\\"), Self::parse_octal_up_to_three_digits)(input) + } + + fn parse_octal_with_warning(input: &[u8]) -> IResult<&[u8], u8> { preceded( tag("\\"), alt(( - Self::parse_octal_up_to_three_digits, + Self::parse_octal_up_to_three_digits_with_warning, // Fallback for if the three digit octal escape is greater than \377 (0xFF), and therefore can't be // parsed as as a byte // See test `test_multibyte_octal_sequence` @@ -319,16 +329,29 @@ impl Sequence { recognize(many_m_n(1, 3, one_of("01234567"))), |out: &[u8]| { let str_to_parse = std::str::from_utf8(out).unwrap(); + u8::from_str_radix(str_to_parse, 8).ok() + }, + )(input) + } - match u8::from_str_radix(str_to_parse, 8) { - Ok(ue) => Some(ue), - Err(_pa) => { - // TODO - // A warning needs to be printed here - // See https://github.com/uutils/coreutils/issues/6821 - None - } + fn parse_octal_up_to_three_digits_with_warning(input: &[u8]) -> IResult<&[u8], u8> { + map_opt( + recognize(many_m_n(1, 3, one_of("01234567"))), + |out: &[u8]| { + let str_to_parse = std::str::from_utf8(out).unwrap(); + let result = u8::from_str_radix(str_to_parse, 8).ok(); + if result.is_none() { + let origin_octal: &str = std::str::from_utf8(input).unwrap(); + let actual_octal_tail: &str = std::str::from_utf8(&input[0..2]).unwrap(); + let outstand_char: char = char::from_u32(input[2] as u32).unwrap(); + show_warning!( + "the ambiguous octal escape \\{} is being\n interpreted as the 2-byte sequence \\0{}, {}", + origin_octal, + actual_octal_tail, + outstand_char + ); } + result }, )(input) } @@ -360,6 +383,14 @@ impl Sequence { alt((Self::parse_octal, Self::parse_backslash, Self::single_char))(input) } + fn parse_backslash_or_char_with_warning(input: &[u8]) -> IResult<&[u8], u8> { + alt(( + Self::parse_octal_with_warning, + Self::parse_backslash, + Self::single_char, + ))(input) + } + fn single_char(input: &[u8]) -> IResult<&[u8], u8> { take(1usize)(input).map(|(l, a)| (l, a[0])) } diff --git a/tests/by-util/test_tr.rs b/tests/by-util/test_tr.rs index ebd7635e4..705f40834 100644 --- a/tests/by-util/test_tr.rs +++ b/tests/by-util/test_tr.rs @@ -1494,9 +1494,7 @@ fn test_multibyte_octal_sequence() { .args(&["-d", r"\501"]) .pipe_in("(1Ł)") .succeeds() - // TODO - // A warning needs to be printed here - // See https://github.com/uutils/coreutils/issues/6821 + .stderr_is("tr: warning: the ambiguous octal escape \\501 is being\n interpreted as the 2-byte sequence \\050, 1\n") .stdout_is("Ł)"); } From 29de3ee43c3277273b343119c877209c1e40383c Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Thu, 28 Nov 2024 18:13:49 +0100 Subject: [PATCH 055/179] run rustfmt on src/uu/shuf/src/shuf.rs --- src/uu/shuf/src/shuf.rs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/uu/shuf/src/shuf.rs b/src/uu/shuf/src/shuf.rs index 260b5130c..15b158b0c 100644 --- a/src/uu/shuf/src/shuf.rs +++ b/src/uu/shuf/src/shuf.rs @@ -279,7 +279,10 @@ impl<'a> Shufable for Vec<&'a [u8]> { // this is safe. (**self).choose(rng).unwrap() } - type PartialShuffleIterator<'b> = std::iter::Copied> where Self: 'b; + type PartialShuffleIterator<'b> + = std::iter::Copied> + where + Self: 'b; fn partial_shuffle<'b>( &'b mut self, rng: &'b mut WrappedRng, @@ -298,7 +301,10 @@ impl Shufable for RangeInclusive { fn choose(&self, rng: &mut WrappedRng) -> usize { rng.gen_range(self.clone()) } - type PartialShuffleIterator<'b> = NonrepeatingIterator<'b> where Self: 'b; + type PartialShuffleIterator<'b> + = NonrepeatingIterator<'b> + where + Self: 'b; fn partial_shuffle<'b>( &'b mut self, rng: &'b mut WrappedRng, From cfb0b95b62c2b415a3b72640af023bb14675be23 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Thu, 28 Nov 2024 19:06:10 +0100 Subject: [PATCH 056/179] clippy: fix 'empty line after doc comment' --- src/uu/dd/src/numbers.rs | 2 +- src/uu/df/src/df.rs | 2 -- src/uu/timeout/src/timeout.rs | 1 - 3 files changed, 1 insertion(+), 4 deletions(-) diff --git a/src/uu/dd/src/numbers.rs b/src/uu/dd/src/numbers.rs index 8a6fa5a7a..c29668c89 100644 --- a/src/uu/dd/src/numbers.rs +++ b/src/uu/dd/src/numbers.rs @@ -2,8 +2,8 @@ // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -/// Functions for formatting a number as a magnitude and a unit suffix. +/// Functions for formatting a number as a magnitude and a unit suffix. /// The first ten powers of 1024. const IEC_BASES: [u128; 10] = [ 1, diff --git a/src/uu/df/src/df.rs b/src/uu/df/src/df.rs index 517f8a31f..8ef84a463 100644 --- a/src/uu/df/src/df.rs +++ b/src/uu/df/src/df.rs @@ -311,7 +311,6 @@ fn is_best(previous: &[MountInfo], mi: &MountInfo) -> bool { /// /// Finally, if there are duplicate entries, the one with the shorter /// path is kept. - fn filter_mount_list(vmi: Vec, opt: &Options) -> Vec { let mut result = vec![]; for mi in vmi { @@ -331,7 +330,6 @@ fn filter_mount_list(vmi: Vec, opt: &Options) -> Vec { /// /// `opt` excludes certain filesystems from consideration and allows for the synchronization of filesystems before running; see /// [`Options`] for more information. - fn get_all_filesystems(opt: &Options) -> UResult> { // Run a sync call before any operation if so instructed. if opt.sync { diff --git a/src/uu/timeout/src/timeout.rs b/src/uu/timeout/src/timeout.rs index 19016900a..2ba93769a 100644 --- a/src/uu/timeout/src/timeout.rs +++ b/src/uu/timeout/src/timeout.rs @@ -288,7 +288,6 @@ fn preserve_signal_info(signal: libc::c_int) -> libc::c_int { } /// TODO: Improve exit codes, and make them consistent with the GNU Coreutils exit codes. - fn timeout( cmd: &[String], duration: Duration, From 4d3902426a946533fecf020572864964983544bd Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Thu, 28 Nov 2024 19:17:20 +0100 Subject: [PATCH 057/179] clippy: fix unneeded 'return' statement --- src/uu/env/src/string_parser.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/uu/env/src/string_parser.rs b/src/uu/env/src/string_parser.rs index 0ea4a3c0c..5cc8d77a1 100644 --- a/src/uu/env/src/string_parser.rs +++ b/src/uu/env/src/string_parser.rs @@ -114,10 +114,9 @@ impl<'a> StringParser<'a> { } pub fn peek_chunk(&self) -> Option> { - return self - .get_chunk_with_length_at(self.pointer) + self.get_chunk_with_length_at(self.pointer) .ok() - .map(|(chunk, _)| chunk); + .map(|(chunk, _)| chunk) } pub fn consume_chunk(&mut self) -> Result, Error> { From 41a3695b3fe5678a9be82f1585d104249d7e737d Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Thu, 28 Nov 2024 18:05:15 +0100 Subject: [PATCH 058/179] uucore: Fix a clippy warning The following explicit lifetimes could be elided: 'a --- src/uu/csplit/src/csplit.rs | 4 ++-- src/uu/cut/src/matcher.rs | 2 +- src/uu/cut/src/searcher.rs | 2 +- src/uu/date/src/date.rs | 4 ++-- src/uu/dd/src/dd.rs | 6 +++--- src/uu/env/src/variable_parser.rs | 2 +- src/uu/fmt/src/linebreak.rs | 2 +- src/uu/fmt/src/parasplit.rs | 12 ++++++------ src/uu/join/src/join.rs | 2 +- src/uu/od/src/inputdecoder.rs | 8 ++++---- src/uu/od/src/multifilereader.rs | 6 +++--- src/uu/shuf/src/shuf.rs | 4 ++-- src/uu/sort/src/merge.rs | 4 ++-- src/uu/split/src/filenames.rs | 2 +- src/uu/split/src/split.rs | 6 +++--- src/uu/tail/src/chunks.rs | 2 +- src/uu/wc/src/utf8/read.rs | 4 ++-- src/uucore/src/lib/features/sum.rs | 2 +- 18 files changed, 37 insertions(+), 37 deletions(-) diff --git a/src/uu/csplit/src/csplit.rs b/src/uu/csplit/src/csplit.rs index 9e132b704..2054e6cff 100644 --- a/src/uu/csplit/src/csplit.rs +++ b/src/uu/csplit/src/csplit.rs @@ -197,7 +197,7 @@ struct SplitWriter<'a> { dev_null: bool, } -impl<'a> Drop for SplitWriter<'a> { +impl Drop for SplitWriter<'_> { fn drop(&mut self) { if self.options.elide_empty_files && self.size == 0 { let file_name = self.options.split_name.get(self.counter); @@ -206,7 +206,7 @@ impl<'a> Drop for SplitWriter<'a> { } } -impl<'a> SplitWriter<'a> { +impl SplitWriter<'_> { fn new(options: &CsplitOptions) -> SplitWriter { SplitWriter { options, diff --git a/src/uu/cut/src/matcher.rs b/src/uu/cut/src/matcher.rs index 953e083b1..bb0c44d5b 100644 --- a/src/uu/cut/src/matcher.rs +++ b/src/uu/cut/src/matcher.rs @@ -23,7 +23,7 @@ impl<'a> ExactMatcher<'a> { } } -impl<'a> Matcher for ExactMatcher<'a> { +impl Matcher for ExactMatcher<'_> { fn next_match(&self, haystack: &[u8]) -> Option<(usize, usize)> { let mut pos = 0usize; loop { diff --git a/src/uu/cut/src/searcher.rs b/src/uu/cut/src/searcher.rs index 21424790e..41c12cf6e 100644 --- a/src/uu/cut/src/searcher.rs +++ b/src/uu/cut/src/searcher.rs @@ -27,7 +27,7 @@ impl<'a, 'b, M: Matcher> Searcher<'a, 'b, M> { // Iterate over field delimiters // Returns (first, last) positions of each sequence, where `haystack[first..last]` // corresponds to the delimiter. -impl<'a, 'b, M: Matcher> Iterator for Searcher<'a, 'b, M> { +impl Iterator for Searcher<'_, '_, M> { type Item = (usize, usize); fn next(&mut self) -> Option { diff --git a/src/uu/date/src/date.rs b/src/uu/date/src/date.rs index 9c7d86564..766e79bd4 100644 --- a/src/uu/date/src/date.rs +++ b/src/uu/date/src/date.rs @@ -103,7 +103,7 @@ enum Iso8601Format { Ns, } -impl<'a> From<&'a str> for Iso8601Format { +impl From<&str> for Iso8601Format { fn from(s: &str) -> Self { match s { HOURS => Self::Hours, @@ -123,7 +123,7 @@ enum Rfc3339Format { Ns, } -impl<'a> From<&'a str> for Rfc3339Format { +impl From<&str> for Rfc3339Format { fn from(s: &str) -> Self { match s { DATE => Self::Date, diff --git a/src/uu/dd/src/dd.rs b/src/uu/dd/src/dd.rs index 24fab1e2f..ca8c2a8b5 100644 --- a/src/uu/dd/src/dd.rs +++ b/src/uu/dd/src/dd.rs @@ -424,7 +424,7 @@ fn make_linux_iflags(iflags: &IFlags) -> Option { } } -impl<'a> Read for Input<'a> { +impl Read for Input<'_> { fn read(&mut self, buf: &mut [u8]) -> io::Result { let mut base_idx = 0; let target_len = buf.len(); @@ -447,7 +447,7 @@ impl<'a> Read for Input<'a> { } } -impl<'a> Input<'a> { +impl Input<'_> { /// Discard the system file cache for the given portion of the input. /// /// `offset` and `len` specify a contiguous portion of the input. @@ -928,7 +928,7 @@ enum BlockWriter<'a> { Unbuffered(Output<'a>), } -impl<'a> BlockWriter<'a> { +impl BlockWriter<'_> { fn discard_cache(&self, offset: libc::off_t, len: libc::off_t) { match self { Self::Unbuffered(o) => o.discard_cache(offset, len), diff --git a/src/uu/env/src/variable_parser.rs b/src/uu/env/src/variable_parser.rs index f225d4945..d08c9f0dc 100644 --- a/src/uu/env/src/variable_parser.rs +++ b/src/uu/env/src/variable_parser.rs @@ -11,7 +11,7 @@ pub struct VariableParser<'a, 'b> { pub parser: &'b mut StringParser<'a>, } -impl<'a, 'b> VariableParser<'a, 'b> { +impl<'a> VariableParser<'a, '_> { fn get_current_char(&self) -> Option { self.parser.peek().ok() } diff --git a/src/uu/fmt/src/linebreak.rs b/src/uu/fmt/src/linebreak.rs index aa1477eba..05d01d1a3 100644 --- a/src/uu/fmt/src/linebreak.rs +++ b/src/uu/fmt/src/linebreak.rs @@ -20,7 +20,7 @@ struct BreakArgs<'a> { ostream: &'a mut BufWriter, } -impl<'a> BreakArgs<'a> { +impl BreakArgs<'_> { fn compute_width(&self, winfo: &WordInfo, posn: usize, fresh: bool) -> usize { if fresh { 0 diff --git a/src/uu/fmt/src/parasplit.rs b/src/uu/fmt/src/parasplit.rs index 1ae8ea34f..8aa18c4c9 100644 --- a/src/uu/fmt/src/parasplit.rs +++ b/src/uu/fmt/src/parasplit.rs @@ -73,7 +73,7 @@ pub struct FileLines<'a> { lines: Lines<&'a mut FileOrStdReader>, } -impl<'a> FileLines<'a> { +impl FileLines<'_> { fn new<'b>(opts: &'b FmtOptions, lines: Lines<&'b mut FileOrStdReader>) -> FileLines<'b> { FileLines { opts, lines } } @@ -144,7 +144,7 @@ impl<'a> FileLines<'a> { } } -impl<'a> Iterator for FileLines<'a> { +impl Iterator for FileLines<'_> { type Item = Line; fn next(&mut self) -> Option { @@ -232,7 +232,7 @@ pub struct ParagraphStream<'a> { opts: &'a FmtOptions, } -impl<'a> ParagraphStream<'a> { +impl ParagraphStream<'_> { pub fn new<'b>(opts: &'b FmtOptions, reader: &'b mut FileOrStdReader) -> ParagraphStream<'b> { let lines = FileLines::new(opts, reader.lines()).peekable(); // at the beginning of the file, we might find mail headers @@ -273,7 +273,7 @@ impl<'a> ParagraphStream<'a> { } } -impl<'a> Iterator for ParagraphStream<'a> { +impl Iterator for ParagraphStream<'_> { type Item = Result; #[allow(clippy::cognitive_complexity)] @@ -491,7 +491,7 @@ struct WordSplit<'a> { prev_punct: bool, } -impl<'a> WordSplit<'a> { +impl WordSplit<'_> { fn analyze_tabs(&self, string: &str) -> (Option, usize, Option) { // given a string, determine (length before tab) and (printed length after first tab) // if there are no tabs, beforetab = -1 and aftertab is the printed length @@ -517,7 +517,7 @@ impl<'a> WordSplit<'a> { } } -impl<'a> WordSplit<'a> { +impl WordSplit<'_> { fn new<'b>(opts: &'b FmtOptions, string: &'b str) -> WordSplit<'b> { // wordsplits *must* start at a non-whitespace character let trim_string = string.trim_start(); diff --git a/src/uu/join/src/join.rs b/src/uu/join/src/join.rs index e7bc7da69..f01f75b71 100644 --- a/src/uu/join/src/join.rs +++ b/src/uu/join/src/join.rs @@ -109,7 +109,7 @@ struct MultiByteSep<'a> { finder: Finder<'a>, } -impl<'a> Separator for MultiByteSep<'a> { +impl Separator for MultiByteSep<'_> { fn field_ranges(&self, haystack: &[u8], len_guess: usize) -> Vec<(usize, usize)> { let mut field_ranges = Vec::with_capacity(len_guess); let mut last_end = 0; diff --git a/src/uu/od/src/inputdecoder.rs b/src/uu/od/src/inputdecoder.rs index 62117d546..44ad29228 100644 --- a/src/uu/od/src/inputdecoder.rs +++ b/src/uu/od/src/inputdecoder.rs @@ -33,7 +33,7 @@ where byte_order: ByteOrder, } -impl<'a, I> InputDecoder<'a, I> { +impl InputDecoder<'_, I> { /// Creates a new `InputDecoder` with an allocated buffer of `normal_length` + `peek_length` bytes. /// `byte_order` determines how to read multibyte formats from the buffer. pub fn new( @@ -55,7 +55,7 @@ impl<'a, I> InputDecoder<'a, I> { } } -impl<'a, I> InputDecoder<'a, I> +impl InputDecoder<'_, I> where I: PeekRead, { @@ -81,7 +81,7 @@ where } } -impl<'a, I> HasError for InputDecoder<'a, I> +impl HasError for InputDecoder<'_, I> where I: HasError, { @@ -103,7 +103,7 @@ pub struct MemoryDecoder<'a> { byte_order: ByteOrder, } -impl<'a> MemoryDecoder<'a> { +impl MemoryDecoder<'_> { /// Set a part of the internal buffer to zero. /// access to the whole buffer is possible, not just to the valid data. pub fn zero_out_buffer(&mut self, start: usize, end: usize) { diff --git a/src/uu/od/src/multifilereader.rs b/src/uu/od/src/multifilereader.rs index 813ef029f..34cd251ac 100644 --- a/src/uu/od/src/multifilereader.rs +++ b/src/uu/od/src/multifilereader.rs @@ -28,7 +28,7 @@ pub trait HasError { fn has_error(&self) -> bool; } -impl<'b> MultifileReader<'b> { +impl MultifileReader<'_> { pub fn new(fnames: Vec) -> MultifileReader { let mut mf = MultifileReader { ni: fnames, @@ -76,7 +76,7 @@ impl<'b> MultifileReader<'b> { } } -impl<'b> io::Read for MultifileReader<'b> { +impl io::Read for MultifileReader<'_> { // Fill buf with bytes read from the list of files // Returns Ok() // Handles io errors itself, thus always returns OK @@ -113,7 +113,7 @@ impl<'b> io::Read for MultifileReader<'b> { } } -impl<'b> HasError for MultifileReader<'b> { +impl HasError for MultifileReader<'_> { fn has_error(&self) -> bool { self.any_err } diff --git a/src/uu/shuf/src/shuf.rs b/src/uu/shuf/src/shuf.rs index 15b158b0c..2d8023448 100644 --- a/src/uu/shuf/src/shuf.rs +++ b/src/uu/shuf/src/shuf.rs @@ -380,7 +380,7 @@ impl<'a> NonrepeatingIterator<'a> { } } -impl<'a> Iterator for NonrepeatingIterator<'a> { +impl Iterator for NonrepeatingIterator<'_> { type Item = usize; fn next(&mut self) -> Option { @@ -407,7 +407,7 @@ trait Writable { fn write_all_to(&self, output: &mut impl Write) -> Result<(), Error>; } -impl<'a> Writable for &'a [u8] { +impl Writable for &[u8] { fn write_all_to(&self, output: &mut impl Write) -> Result<(), Error> { output.write_all(self) } diff --git a/src/uu/sort/src/merge.rs b/src/uu/sort/src/merge.rs index c0457ffa4..d6872ec80 100644 --- a/src/uu/sort/src/merge.rs +++ b/src/uu/sort/src/merge.rs @@ -267,7 +267,7 @@ pub struct FileMerger<'a> { reader_join_handle: JoinHandle>, } -impl<'a> FileMerger<'a> { +impl FileMerger<'_> { /// Write the merged contents to the output file. pub fn write_all(self, settings: &GlobalSettings, output: Output) -> UResult<()> { let mut out = output.into_write(); @@ -341,7 +341,7 @@ struct FileComparator<'a> { settings: &'a GlobalSettings, } -impl<'a> Compare for FileComparator<'a> { +impl Compare for FileComparator<'_> { fn compare(&self, a: &MergeableFile, b: &MergeableFile) -> Ordering { let mut cmp = compare_by( &a.current_chunk.lines()[a.line_idx], diff --git a/src/uu/split/src/filenames.rs b/src/uu/split/src/filenames.rs index d2ce1beb3..9e899a417 100644 --- a/src/uu/split/src/filenames.rs +++ b/src/uu/split/src/filenames.rs @@ -341,7 +341,7 @@ impl<'a> FilenameIterator<'a> { } } -impl<'a> Iterator for FilenameIterator<'a> { +impl Iterator for FilenameIterator<'_> { type Item = String; fn next(&mut self) -> Option { diff --git a/src/uu/split/src/split.rs b/src/uu/split/src/split.rs index 11fa04184..86fded1d5 100644 --- a/src/uu/split/src/split.rs +++ b/src/uu/split/src/split.rs @@ -748,7 +748,7 @@ impl<'a> ByteChunkWriter<'a> { } } -impl<'a> Write for ByteChunkWriter<'a> { +impl Write for ByteChunkWriter<'_> { /// Implements `--bytes=SIZE` fn write(&mut self, mut buf: &[u8]) -> std::io::Result { // If the length of `buf` exceeds the number of bytes remaining @@ -872,7 +872,7 @@ impl<'a> LineChunkWriter<'a> { } } -impl<'a> Write for LineChunkWriter<'a> { +impl Write for LineChunkWriter<'_> { /// Implements `--lines=NUMBER` fn write(&mut self, buf: &[u8]) -> std::io::Result { // If the number of lines in `buf` exceeds the number of lines @@ -978,7 +978,7 @@ impl<'a> LineBytesChunkWriter<'a> { } } -impl<'a> Write for LineBytesChunkWriter<'a> { +impl Write for LineBytesChunkWriter<'_> { /// Write as many lines to a chunk as possible without /// exceeding the byte limit. If a single line has more bytes /// than the limit, then fill an entire single chunk with those diff --git a/src/uu/tail/src/chunks.rs b/src/uu/tail/src/chunks.rs index 636de7a90..2c80ac0ac 100644 --- a/src/uu/tail/src/chunks.rs +++ b/src/uu/tail/src/chunks.rs @@ -64,7 +64,7 @@ impl<'a> ReverseChunks<'a> { } } -impl<'a> Iterator for ReverseChunks<'a> { +impl Iterator for ReverseChunks<'_> { type Item = Vec; fn next(&mut self) -> Option { diff --git a/src/uu/wc/src/utf8/read.rs b/src/uu/wc/src/utf8/read.rs index 819b0a689..9515cdc9f 100644 --- a/src/uu/wc/src/utf8/read.rs +++ b/src/uu/wc/src/utf8/read.rs @@ -27,7 +27,7 @@ pub enum BufReadDecoderError<'a> { Io(io::Error), } -impl<'a> fmt::Display for BufReadDecoderError<'a> { +impl fmt::Display for BufReadDecoderError<'_> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match *self { BufReadDecoderError::InvalidByteSequence(bytes) => { @@ -38,7 +38,7 @@ impl<'a> fmt::Display for BufReadDecoderError<'a> { } } -impl<'a> Error for BufReadDecoderError<'a> { +impl Error for BufReadDecoderError<'_> { fn source(&self) -> Option<&(dyn Error + 'static)> { match *self { BufReadDecoderError::InvalidByteSequence(_) => None, diff --git a/src/uucore/src/lib/features/sum.rs b/src/uucore/src/lib/features/sum.rs index 1baff7f79..df9e1673d 100644 --- a/src/uucore/src/lib/features/sum.rs +++ b/src/uucore/src/lib/features/sum.rs @@ -403,7 +403,7 @@ impl<'a> DigestWriter<'a> { } } -impl<'a> Write for DigestWriter<'a> { +impl Write for DigestWriter<'_> { #[cfg(not(windows))] fn write(&mut self, buf: &[u8]) -> std::io::Result { self.digest.hash_update(buf); From a3a4457a4435fe559de667fd59343be1c4b2a244 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Thu, 28 Nov 2024 21:27:49 +0100 Subject: [PATCH 059/179] clippy: spawned process is never 'wait()'ed on --- tests/by-util/test_dd.rs | 10 ++++++---- tests/by-util/test_env.rs | 5 +++-- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/tests/by-util/test_dd.rs b/tests/by-util/test_dd.rs index e1e55054a..64ca7603b 100644 --- a/tests/by-util/test_dd.rs +++ b/tests/by-util/test_dd.rs @@ -1658,13 +1658,14 @@ fn test_reading_partial_blocks_from_fifo() { // Start different processes to write to the FIFO, with a small // pause in between. let mut writer_command = Command::new("sh"); - writer_command + let _ = writer_command .args([ "-c", &format!("(printf \"ab\"; sleep 0.1; printf \"cd\") > {fifoname}"), ]) .spawn() - .unwrap(); + .unwrap() + .wait(); let output = child.wait_with_output().unwrap(); assert_eq!(output.stdout, b"abcd"); @@ -1701,13 +1702,14 @@ fn test_reading_partial_blocks_from_fifo_unbuffered() { // Start different processes to write to the FIFO, with a small // pause in between. let mut writer_command = Command::new("sh"); - writer_command + let _ = writer_command .args([ "-c", &format!("(printf \"ab\"; sleep 0.1; printf \"cd\") > {fifoname}"), ]) .spawn() - .unwrap(); + .unwrap() + .wait(); let output = child.wait_with_output().unwrap(); assert_eq!(output.stdout, b"abcd"); diff --git a/tests/by-util/test_env.rs b/tests/by-util/test_env.rs index 8c5b43b2d..a1b13e020 100644 --- a/tests/by-util/test_env.rs +++ b/tests/by-util/test_env.rs @@ -36,13 +36,14 @@ impl Target { Self { child } } fn send_signal(&mut self, signal: Signal) { - Command::new("kill") + let _ = Command::new("kill") .args(&[ format!("-{}", signal as i32), format!("{}", self.child.id()), ]) .spawn() - .expect("failed to send signal"); + .expect("failed to send signal") + .wait(); self.child.delay(100); } fn is_alive(&mut self) -> bool { From c0840dd43f7764cdcdbe3c839767835cfba29fde Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Thu, 28 Nov 2024 21:29:23 +0100 Subject: [PATCH 060/179] clippy: unneeded 'return' statement --- tests/by-util/test_ls.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/by-util/test_ls.rs b/tests/by-util/test_ls.rs index 0c0d8e3a8..3b2d46b39 100644 --- a/tests/by-util/test_ls.rs +++ b/tests/by-util/test_ls.rs @@ -1329,10 +1329,10 @@ fn test_ls_long_symlink_color() { Some(captures) => { dbg!(captures.get(1).unwrap().as_str().to_string()); dbg!(captures.get(2).unwrap().as_str().to_string()); - return ( + ( captures.get(1).unwrap().as_str().to_string(), captures.get(2).unwrap().as_str().to_string(), - ); + ) } None => (String::new(), input.to_string()), } From 9d404e5ee83cd341b225baa3a860697a7ae919aa Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Thu, 28 Nov 2024 21:29:57 +0100 Subject: [PATCH 061/179] clippy: it is more idiomatic to use 'Option<&T>' instead of '&Option' --- tests/common/util.rs | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/tests/common/util.rs b/tests/common/util.rs index 87c937492..844618def 100644 --- a/tests/common/util.rs +++ b/tests/common/util.rs @@ -75,7 +75,7 @@ pub fn is_ci() -> bool { } /// Read a test scenario fixture, returning its bytes -fn read_scenario_fixture>(tmpd: &Option>, file_rel_path: S) -> Vec { +fn read_scenario_fixture>(tmpd: Option<&Rc>, file_rel_path: S) -> Vec { let tmpdir_path = tmpd.as_ref().unwrap().as_ref().path(); AtPath::new(tmpdir_path).read_bytes(file_rel_path.as_ref().to_str().unwrap()) } @@ -517,7 +517,7 @@ impl CmdResult { /// like `stdout_is()`, but expects the contents of the file at the provided relative path #[track_caller] pub fn stdout_is_fixture>(&self, file_rel_path: T) -> &Self { - let contents = read_scenario_fixture(&self.tmpd, file_rel_path); + let contents = read_scenario_fixture(self.tmpd.as_ref(), file_rel_path); self.stdout_is(String::from_utf8(contents).unwrap()) } @@ -539,7 +539,7 @@ impl CmdResult { /// ``` #[track_caller] pub fn stdout_is_fixture_bytes>(&self, file_rel_path: T) -> &Self { - let contents = read_scenario_fixture(&self.tmpd, file_rel_path); + let contents = read_scenario_fixture(self.tmpd.as_ref(), file_rel_path); self.stdout_is_bytes(contents) } @@ -552,7 +552,7 @@ impl CmdResult { template_vars: &[(&str, &str)], ) -> &Self { let mut contents = - String::from_utf8(read_scenario_fixture(&self.tmpd, file_rel_path)).unwrap(); + String::from_utf8(read_scenario_fixture(self.tmpd.as_ref(), file_rel_path)).unwrap(); for kv in template_vars { contents = contents.replace(kv.0, kv.1); } @@ -566,7 +566,8 @@ impl CmdResult { file_rel_path: T, template_vars: &[Vec<(String, String)>], ) { - let contents = String::from_utf8(read_scenario_fixture(&self.tmpd, file_rel_path)).unwrap(); + let contents = + String::from_utf8(read_scenario_fixture(self.tmpd.as_ref(), file_rel_path)).unwrap(); let possible_values = template_vars.iter().map(|vars| { let mut contents = contents.clone(); for kv in vars { @@ -604,7 +605,7 @@ impl CmdResult { /// Like `stdout_is_fixture`, but for stderr #[track_caller] pub fn stderr_is_fixture>(&self, file_rel_path: T) -> &Self { - let contents = read_scenario_fixture(&self.tmpd, file_rel_path); + let contents = read_scenario_fixture(self.tmpd.as_ref(), file_rel_path); self.stderr_is(String::from_utf8(contents).unwrap()) } @@ -629,7 +630,7 @@ impl CmdResult { /// like `stdout_only()`, but expects the contents of the file at the provided relative path #[track_caller] pub fn stdout_only_fixture>(&self, file_rel_path: T) -> &Self { - let contents = read_scenario_fixture(&self.tmpd, file_rel_path); + let contents = read_scenario_fixture(self.tmpd.as_ref(), file_rel_path); self.stdout_only_bytes(contents) } @@ -1384,7 +1385,7 @@ impl UCommand { /// like `pipe_in()`, but uses the contents of the file at the provided relative path as the piped in data pub fn pipe_in_fixture>(&mut self, file_rel_path: S) -> &mut Self { - let contents = read_scenario_fixture(&self.tmpd, file_rel_path); + let contents = read_scenario_fixture(self.tmpd.as_ref(), file_rel_path); self.pipe_in(contents) } From 8df608cf974f2ffe9b0eca69edae81b40cb8d237 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Thu, 28 Nov 2024 21:29:00 +0100 Subject: [PATCH 062/179] clippy: used underscore-prefixed item --- tests/by-util/test_du.rs | 102 +++++++++++++++------------------------ 1 file changed, 38 insertions(+), 64 deletions(-) diff --git a/tests/by-util/test_du.rs b/tests/by-util/test_du.rs index af9718a4e..84e3b5050 100644 --- a/tests/by-util/test_du.rs +++ b/tests/by-util/test_du.rs @@ -7,7 +7,7 @@ #[cfg(not(windows))] use regex::Regex; -#[cfg(any(target_os = "linux", target_os = "android"))] +#[cfg(not(target_os = "windows"))] use crate::common::util::expected_result; use crate::common::util::TestScenario; @@ -36,11 +36,11 @@ fn test_du_basics() { return; } } - _du_basics(result.stdout_str()); + du_basics(result.stdout_str()); } #[cfg(target_vendor = "apple")] -fn _du_basics(s: &str) { +fn du_basics(s: &str) { let answer = concat!( "4\t./subdir/deeper/deeper_dir\n", "8\t./subdir/deeper\n", @@ -52,7 +52,7 @@ fn _du_basics(s: &str) { } #[cfg(target_os = "windows")] -fn _du_basics(s: &str) { +fn du_basics(s: &str) { let answer = concat!( "0\t.\\subdir\\deeper\\deeper_dir\n", "0\t.\\subdir\\deeper\n", @@ -64,7 +64,7 @@ fn _du_basics(s: &str) { } #[cfg(all(not(target_vendor = "apple"), not(target_os = "windows"),))] -fn _du_basics(s: &str) { +fn du_basics(s: &str) { let answer = concat!( "8\t./subdir/deeper/deeper_dir\n", "16\t./subdir/deeper\n", @@ -95,19 +95,19 @@ fn test_du_basics_subdir() { return; } } - _du_basics_subdir(result.stdout_str()); + du_basics_subdir(result.stdout_str()); } #[cfg(target_vendor = "apple")] -fn _du_basics_subdir(s: &str) { +fn du_basics_subdir(s: &str) { assert_eq!(s, "4\tsubdir/deeper/deeper_dir\n8\tsubdir/deeper\n"); } #[cfg(target_os = "windows")] -fn _du_basics_subdir(s: &str) { +fn du_basics_subdir(s: &str) { assert_eq!(s, "0\tsubdir/deeper\\deeper_dir\n0\tsubdir/deeper\n"); } #[cfg(target_os = "freebsd")] -fn _du_basics_subdir(s: &str) { +fn du_basics_subdir(s: &str) { assert_eq!(s, "8\tsubdir/deeper/deeper_dir\n16\tsubdir/deeper\n"); } #[cfg(all( @@ -115,7 +115,7 @@ fn _du_basics_subdir(s: &str) { not(target_os = "windows"), not(target_os = "freebsd") ))] -fn _du_basics_subdir(s: &str) { +fn du_basics_subdir(s: &str) { // MS-WSL linux has altered expected output if uucore::os::is_wsl_1() { assert_eq!(s, "0\tsubdir/deeper\n"); @@ -206,20 +206,20 @@ fn test_du_soft_link() { return; } } - _du_soft_link(result.stdout_str()); + du_soft_link(result.stdout_str()); } #[cfg(target_vendor = "apple")] -fn _du_soft_link(s: &str) { +fn du_soft_link(s: &str) { // 'macos' host variants may have `du` output variation for soft links assert!((s == "12\tsubdir/links\n") || (s == "16\tsubdir/links\n")); } #[cfg(target_os = "windows")] -fn _du_soft_link(s: &str) { +fn du_soft_link(s: &str) { assert_eq!(s, "8\tsubdir/links\n"); } #[cfg(target_os = "freebsd")] -fn _du_soft_link(s: &str) { +fn du_soft_link(s: &str) { assert_eq!(s, "16\tsubdir/links\n"); } #[cfg(all( @@ -227,7 +227,7 @@ fn _du_soft_link(s: &str) { not(target_os = "windows"), not(target_os = "freebsd") ))] -fn _du_soft_link(s: &str) { +fn du_soft_link(s: &str) { // MS-WSL linux has altered expected output if uucore::os::is_wsl_1() { assert_eq!(s, "8\tsubdir/links\n"); @@ -255,19 +255,19 @@ fn test_du_hard_link() { } } // We do not double count hard links as the inodes are identical - _du_hard_link(result.stdout_str()); + du_hard_link(result.stdout_str()); } #[cfg(target_vendor = "apple")] -fn _du_hard_link(s: &str) { +fn du_hard_link(s: &str) { assert_eq!(s, "12\tsubdir/links\n"); } #[cfg(target_os = "windows")] -fn _du_hard_link(s: &str) { +fn du_hard_link(s: &str) { assert_eq!(s, "8\tsubdir/links\n"); } #[cfg(target_os = "freebsd")] -fn _du_hard_link(s: &str) { +fn du_hard_link(s: &str) { assert_eq!(s, "16\tsubdir/links\n"); } #[cfg(all( @@ -275,7 +275,7 @@ fn _du_hard_link(s: &str) { not(target_os = "windows"), not(target_os = "freebsd") ))] -fn _du_hard_link(s: &str) { +fn du_hard_link(s: &str) { // MS-WSL linux has altered expected output if uucore::os::is_wsl_1() { assert_eq!(s, "8\tsubdir/links\n"); @@ -299,19 +299,19 @@ fn test_du_d_flag() { return; } } - _du_d_flag(result.stdout_str()); + du_d_flag(result.stdout_str()); } #[cfg(target_vendor = "apple")] -fn _du_d_flag(s: &str) { +fn du_d_flag(s: &str) { assert_eq!(s, "20\t./subdir\n24\t.\n"); } #[cfg(target_os = "windows")] -fn _du_d_flag(s: &str) { +fn du_d_flag(s: &str) { assert_eq!(s, "8\t.\\subdir\n8\t.\n"); } #[cfg(target_os = "freebsd")] -fn _du_d_flag(s: &str) { +fn du_d_flag(s: &str) { assert_eq!(s, "36\t./subdir\n44\t.\n"); } #[cfg(all( @@ -319,7 +319,7 @@ fn _du_d_flag(s: &str) { not(target_os = "windows"), not(target_os = "freebsd") ))] -fn _du_d_flag(s: &str) { +fn du_d_flag(s: &str) { // MS-WSL linux has altered expected output if uucore::os::is_wsl_1() { assert_eq!(s, "8\t./subdir\n8\t.\n"); @@ -348,7 +348,7 @@ fn test_du_dereference() { } } - _du_dereference(result.stdout_str()); + du_dereference(result.stdout_str()); } #[cfg(not(windows))] @@ -376,15 +376,15 @@ fn test_du_dereference_args() { } #[cfg(target_vendor = "apple")] -fn _du_dereference(s: &str) { +fn du_dereference(s: &str) { assert_eq!(s, "4\tsubdir/links/deeper_dir\n16\tsubdir/links\n"); } #[cfg(target_os = "windows")] -fn _du_dereference(s: &str) { +fn du_dereference(s: &str) { assert_eq!(s, "0\tsubdir/links\\deeper_dir\n8\tsubdir/links\n"); } #[cfg(target_os = "freebsd")] -fn _du_dereference(s: &str) { +fn du_dereference(s: &str) { assert_eq!(s, "8\tsubdir/links/deeper_dir\n24\tsubdir/links\n"); } #[cfg(all( @@ -392,7 +392,7 @@ fn _du_dereference(s: &str) { not(target_os = "windows"), not(target_os = "freebsd") ))] -fn _du_dereference(s: &str) { +fn du_dereference(s: &str) { // MS-WSL linux has altered expected output if uucore::os::is_wsl_1() { assert_eq!(s, "0\tsubdir/links/deeper_dir\n8\tsubdir/links\n"); @@ -454,20 +454,15 @@ fn test_du_inodes_basic() { let ts = TestScenario::new(util_name!()); let result = ts.ucmd().arg("--inodes").succeeds(); - #[cfg(any(target_os = "linux", target_os = "android"))] + #[cfg(not(target_os = "windows"))] { let result_reference = unwrap_or_return!(expected_result(&ts, &["--inodes"])); assert_eq!(result.stdout_str(), result_reference.stdout_str()); } - #[cfg(not(any(target_os = "linux", target_os = "android")))] - _du_inodes_basic(result.stdout_str()); -} - -#[cfg(target_os = "windows")] -fn _du_inodes_basic(s: &str) { + #[cfg(target_os = "windows")] assert_eq!( - s, + result.stdout_str(), concat!( "2\t.\\subdir\\deeper\\deeper_dir\n", "4\t.\\subdir\\deeper\n", @@ -478,20 +473,6 @@ fn _du_inodes_basic(s: &str) { ); } -#[cfg(not(target_os = "windows"))] -fn _du_inodes_basic(s: &str) { - assert_eq!( - s, - concat!( - "2\t./subdir/deeper/deeper_dir\n", - "4\t./subdir/deeper\n", - "3\t./subdir/links\n", - "8\t./subdir\n", - "11\t.\n", - ) - ); -} - #[test] fn test_du_inodes() { let ts = TestScenario::new(util_name!()); @@ -706,8 +687,10 @@ fn test_du_no_permission() { return; } } - - _du_no_permission(result.stdout_str()); + #[cfg(not(target_vendor = "apple"))] + assert_eq!(result.stdout_str(), "4\tsubdir/links\n"); + #[cfg(target_vendor = "apple")] + assert_eq!(result.stdout_str(), "0\tsubdir/links\n"); } #[cfg(not(target_os = "windows"))] @@ -725,15 +708,6 @@ fn test_du_no_exec_permission() { result.stderr_contains("du: cannot access 'd/no-x/y': Permission denied"); } -#[cfg(target_vendor = "apple")] -fn _du_no_permission(s: &str) { - assert_eq!(s, "0\tsubdir/links\n"); -} -#[cfg(all(not(target_vendor = "apple"), not(target_os = "windows")))] -fn _du_no_permission(s: &str) { - assert_eq!(s, "4\tsubdir/links\n"); -} - #[test] #[cfg(not(target_os = "openbsd"))] fn test_du_one_file_system() { @@ -749,7 +723,7 @@ fn test_du_one_file_system() { return; } } - _du_basics_subdir(result.stdout_str()); + du_basics_subdir(result.stdout_str()); } #[test] From ffbc682b92be7b53f2794fb846f6952235c5012e Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Fri, 29 Nov 2024 09:19:30 +0100 Subject: [PATCH 063/179] Use the other comment syntax as it is not related Co-authored-by: Daniel Hofstetter --- src/uu/dd/src/numbers.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/uu/dd/src/numbers.rs b/src/uu/dd/src/numbers.rs index c29668c89..d0ee2d90b 100644 --- a/src/uu/dd/src/numbers.rs +++ b/src/uu/dd/src/numbers.rs @@ -3,7 +3,8 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -/// Functions for formatting a number as a magnitude and a unit suffix. +//! Functions for formatting a number as a magnitude and a unit suffix. + /// The first ten powers of 1024. const IEC_BASES: [u128; 10] = [ 1, From 95bd50e09a8ced6b49eeafd66d436bbbdbf10742 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Sun, 24 Nov 2024 13:12:55 +0100 Subject: [PATCH 064/179] du: deduplicate the input Should fix: tests/du/hard-link.sh --- src/uu/du/src/du.rs | 24 ++++++++++------- tests/by-util/test_du.rs | 58 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+), 9 deletions(-) diff --git a/src/uu/du/src/du.rs b/src/uu/du/src/du.rs index e7b00838e..2d36679f0 100644 --- a/src/uu/du/src/du.rs +++ b/src/uu/du/src/du.rs @@ -649,6 +649,8 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { let summarize = matches.get_flag(options::SUMMARIZE); + let count_links = matches.get_flag(options::COUNT_LINKS); + let max_depth = parse_depth( matches .get_one::(options::MAX_DEPTH) @@ -669,15 +671,19 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { } read_files_from(file_from)? - } else { - match matches.get_one::(options::FILE) { - Some(_) => matches - .get_many::(options::FILE) - .unwrap() - .map(PathBuf::from) - .collect(), - None => vec![PathBuf::from(".")], + } else if let Some(files) = matches.get_many::(options::FILE) { + let files = files.map(PathBuf::from); + if count_links { + files.collect() + } else { + // Deduplicate while preserving order + let mut seen = std::collections::HashSet::new(); + files + .filter(|path| seen.insert(path.clone())) + .collect::>() } + } else { + vec![PathBuf::from(".")] }; let time = matches.contains_id(options::TIME).then(|| { @@ -719,7 +725,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { } else { Deref::None }, - count_links: matches.get_flag(options::COUNT_LINKS), + count_links, verbose: matches.get_flag(options::VERBOSE), excludes: build_exclude_patterns(&matches)?, }; diff --git a/tests/by-util/test_du.rs b/tests/by-util/test_du.rs index 84e3b5050..ecbf58b11 100644 --- a/tests/by-util/test_du.rs +++ b/tests/by-util/test_du.rs @@ -1194,3 +1194,61 @@ fn test_human_size() { .succeeds() .stdout_contains(format!("1.0K {dir}")); } + +#[cfg(not(target_os = "android"))] +#[test] +fn test_du_deduplicated_input_args() { + let ts = TestScenario::new(util_name!()); + let at = &ts.fixtures; + + at.mkdir("d"); + at.mkdir("d/d"); + at.touch("d/f"); + at.hard_link("d/f", "d/h"); + + let result = ts + .ucmd() + .arg("--inodes") + .arg("d") + .arg("d") + .arg("d") + .succeeds(); + result.no_stderr(); + + let result_seq: Vec = result + .stdout_str() + .lines() + .map(|x| x.parse().unwrap()) + .collect(); + #[cfg(windows)] + assert_eq!(result_seq, ["1\td\\d", "3\td"]); + #[cfg(not(windows))] + assert_eq!(result_seq, ["1\td/d", "3\td"]); +} + +#[cfg(not(target_os = "android"))] +#[test] +fn test_du_no_deduplicated_input_args() { + let ts = TestScenario::new(util_name!()); + let at = &ts.fixtures; + + at.mkdir("d"); + at.touch("d/d"); + + let result = ts + .ucmd() + .arg("--inodes") + .arg("-l") + .arg("d") + .arg("d") + .arg("d") + .succeeds(); + result.no_stderr(); + + let result_seq: Vec = result + .stdout_str() + .lines() + .map(|x| x.parse().unwrap()) + .collect(); + assert_eq!(result_seq, ["2\td", "2\td", "2\td"]); +} From 865da0caada8208eb2c956aa9d34c0fb7c67382b Mon Sep 17 00:00:00 2001 From: Daniel Hofstetter Date: Fri, 29 Nov 2024 10:47:14 +0100 Subject: [PATCH 065/179] uudoc,chcon: fix needless_lifetimes warnings --- src/bin/uudoc.rs | 2 +- src/uu/chcon/src/chcon.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/bin/uudoc.rs b/src/bin/uudoc.rs index 8ea11ed4d..f2c325e32 100644 --- a/src/bin/uudoc.rs +++ b/src/bin/uudoc.rs @@ -172,7 +172,7 @@ struct MDWriter<'a, 'b> { markdown: Option, } -impl<'a, 'b> MDWriter<'a, 'b> { +impl MDWriter<'_, '_> { /// # Errors /// Returns an error if the writer fails. fn markdown(&mut self) -> io::Result<()> { diff --git a/src/uu/chcon/src/chcon.rs b/src/uu/chcon/src/chcon.rs index 1a804bd3b..c8d1c4017 100644 --- a/src/uu/chcon/src/chcon.rs +++ b/src/uu/chcon/src/chcon.rs @@ -777,7 +777,7 @@ enum SELinuxSecurityContext<'t> { String(Option), } -impl<'t> SELinuxSecurityContext<'t> { +impl SELinuxSecurityContext<'_> { fn to_c_string(&self) -> Result>> { match self { Self::File(context) => context From 270525a02f49bbd7050929beceef014a382f6c4d Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Fri, 29 Nov 2024 10:14:38 +0000 Subject: [PATCH 066/179] chore(deps): update dawidd6/action-download-artifact action to v7 --- .github/workflows/CICD.yml | 4 ++-- .github/workflows/GnuTests.yml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/CICD.yml b/.github/workflows/CICD.yml index 6c7b50995..1875c512d 100644 --- a/.github/workflows/CICD.yml +++ b/.github/workflows/CICD.yml @@ -397,14 +397,14 @@ jobs: --arg multisize "$SIZE_MULTI" \ '{($date): { sha: $sha, size: $size, multisize: $multisize, }}' > size-result.json - name: Download the previous individual size result - uses: dawidd6/action-download-artifact@v6 + uses: dawidd6/action-download-artifact@v7 with: workflow: CICD.yml name: individual-size-result repo: uutils/coreutils path: dl - name: Download the previous size result - uses: dawidd6/action-download-artifact@v6 + uses: dawidd6/action-download-artifact@v7 with: workflow: CICD.yml name: size-result diff --git a/.github/workflows/GnuTests.yml b/.github/workflows/GnuTests.yml index 113cb1e97..b47b43596 100644 --- a/.github/workflows/GnuTests.yml +++ b/.github/workflows/GnuTests.yml @@ -86,7 +86,7 @@ jobs: working-directory: ${{ steps.vars.outputs.path_GNU }} - name: Retrieve reference artifacts - uses: dawidd6/action-download-artifact@v6 + uses: dawidd6/action-download-artifact@v7 # ref: continue-on-error: true ## don't break the build for missing reference artifacts (may be expired or just not generated yet) with: From 4f7f85da4795fbaec133189e3226057011d98f91 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Fri, 29 Nov 2024 10:45:33 +0000 Subject: [PATCH 067/179] fix(deps): update rust crate libc to v0.2.167 --- Cargo.lock | 4 ++-- fuzz/Cargo.lock | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ca91ec309..c5e96cef2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1305,9 +1305,9 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" -version = "0.2.166" +version = "0.2.167" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2ccc108bbc0b1331bd061864e7cd823c0cab660bbe6970e66e2c0614decde36" +checksum = "09d6582e104315a817dff97f75133544b2e094ee22447d2acf4a74e189ba06fc" [[package]] name = "libloading" diff --git a/fuzz/Cargo.lock b/fuzz/Cargo.lock index d5372d79b..a300d8b65 100644 --- a/fuzz/Cargo.lock +++ b/fuzz/Cargo.lock @@ -416,9 +416,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.166" +version = "0.2.167" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2ccc108bbc0b1331bd061864e7cd823c0cab660bbe6970e66e2c0614decde36" +checksum = "09d6582e104315a817dff97f75133544b2e094ee22447d2acf4a74e189ba06fc" [[package]] name = "libfuzzer-sys" From 1365f6c025b6154b78ef9c8f7eef36b4d198236d Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Fri, 29 Nov 2024 21:46:48 +0000 Subject: [PATCH 068/179] chore(deps): update rust crate terminal_size to v0.4.1 --- Cargo.lock | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c5e96cef2..ef1704f01 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2351,9 +2351,9 @@ dependencies = [ [[package]] name = "terminal_size" -version = "0.4.0" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f599bd7ca042cfdf8f4512b277c02ba102247820f9d9d4a9f521f496751a6ef" +checksum = "5352447f921fda68cf61b4101566c0bdb5104eff6804d0678e5227580ab6a4e9" dependencies = [ "rustix 0.38.40", "windows-sys 0.59.0", @@ -2984,7 +2984,7 @@ dependencies = [ "number_prefix", "once_cell", "selinux", - "terminal_size 0.4.0", + "terminal_size 0.4.1", "uucore", "uutils_term_grid", ] @@ -3751,7 +3751,7 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" dependencies = [ - "windows-sys 0.59.0", + "windows-sys 0.48.0", ] [[package]] From 0cae322dfa8db31937a041efd686c56f356ef7c2 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Thu, 14 Nov 2024 21:37:20 +0100 Subject: [PATCH 069/179] comm: generate an error if the input is a directory tested by tests/misc/read-errors --- src/uu/comm/src/comm.rs | 8 +++++--- tests/by-util/test_comm.rs | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 3 deletions(-) diff --git a/src/uu/comm/src/comm.rs b/src/uu/comm/src/comm.rs index cae405865..ae57b8bf8 100644 --- a/src/uu/comm/src/comm.rs +++ b/src/uu/comm/src/comm.rs @@ -6,9 +6,8 @@ // spell-checker:ignore (ToDO) delim mkdelim use std::cmp::Ordering; -use std::fs::File; +use std::fs::{metadata, File}; use std::io::{self, stdin, BufRead, BufReader, Stdin}; -use std::path::Path; use uucore::error::{FromIo, UResult, USimpleError}; use uucore::line_ending::LineEnding; use uucore::{format_usage, help_about, help_usage}; @@ -130,7 +129,10 @@ fn open_file(name: &str, line_ending: LineEnding) -> io::Result { if name == "-" { Ok(LineReader::new(Input::Stdin(stdin()), line_ending)) } else { - let f = File::open(Path::new(name))?; + if metadata(name)?.is_dir() { + return Err(io::Error::new(io::ErrorKind::Other, "Is a directory")); + } + let f = File::open(name)?; Ok(LineReader::new( Input::FileIn(BufReader::new(f)), line_ending, diff --git a/tests/by-util/test_comm.rs b/tests/by-util/test_comm.rs index 2dc385ef3..b62febf50 100644 --- a/tests/by-util/test_comm.rs +++ b/tests/by-util/test_comm.rs @@ -292,3 +292,36 @@ fn test_no_such_file() { .fails() .stderr_only("comm: bogus_file_1: No such file or directory\n"); } + +#[test] +fn test_is_dir() { + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + + scene + .ucmd() + .args(&[".", "."]) + .fails() + .stderr_only("comm: .: Is a directory\n"); + + at.mkdir("dir"); + scene + .ucmd() + .args(&["dir", "."]) + .fails() + .stderr_only("comm: dir: Is a directory\n"); + + at.touch("file"); + scene + .ucmd() + .args(&[".", "file"]) + .fails() + .stderr_only("comm: .: Is a directory\n"); + + at.touch("file"); + scene + .ucmd() + .args(&["file", "."]) + .fails() + .stderr_only("comm: .: Is a directory\n"); +} From 2ad3c452303752e22b81d34b454515eca0b3eef7 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Sat, 30 Nov 2024 10:38:46 +0100 Subject: [PATCH 070/179] tr: generate an error if the input is a directory (#6855) * tr: generate an error if the input is a directory tested by tests/misc/read-errors * tr: improve the test * tr: take the commentinto account --- src/uu/tr/src/operation.rs | 24 +++++++++++++++++------- src/uu/tr/src/tr.rs | 10 +++++----- tests/by-util/test_tr.rs | 9 +++++++++ 3 files changed, 31 insertions(+), 12 deletions(-) diff --git a/src/uu/tr/src/operation.rs b/src/uu/tr/src/operation.rs index 035f09972..6a1bf9391 100644 --- a/src/uu/tr/src/operation.rs +++ b/src/uu/tr/src/operation.rs @@ -23,7 +23,7 @@ use std::{ io::{BufRead, Write}, ops::Not, }; -use uucore::error::UError; +use uucore::error::{UError, UResult, USimpleError}; use uucore::show_warning; #[derive(Debug, Clone)] @@ -608,7 +608,7 @@ impl SymbolTranslator for SqueezeOperation { } } -pub fn translate_input(input: &mut R, output: &mut W, mut translator: T) +pub fn translate_input(input: &mut R, output: &mut W, mut translator: T) -> UResult<()> where T: SymbolTranslator, R: BufRead, @@ -616,15 +616,25 @@ where { let mut buf = Vec::new(); let mut output_buf = Vec::new(); + while let Ok(length) = input.read_until(b'\n', &mut buf) { if length == 0 { - break; - } else { - let filtered = buf.iter().filter_map(|c| translator.translate(*c)); - output_buf.extend(filtered); - output.write_all(&output_buf).unwrap(); + break; // EOF reached } + + let filtered = buf.iter().filter_map(|&c| translator.translate(c)); + output_buf.extend(filtered); + + if let Err(e) = output.write_all(&output_buf) { + return Err(USimpleError::new( + 1, + format!("{}: write error: {}", uucore::util_name(), e), + )); + } + buf.clear(); output_buf.clear(); } + + Ok(()) } diff --git a/src/uu/tr/src/tr.rs b/src/uu/tr/src/tr.rs index 67998d26d..ff85002e7 100644 --- a/src/uu/tr/src/tr.rs +++ b/src/uu/tr/src/tr.rs @@ -132,24 +132,24 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { let delete_op = DeleteOperation::new(set1); let squeeze_op = SqueezeOperation::new(set2); let op = delete_op.chain(squeeze_op); - translate_input(&mut locked_stdin, &mut buffered_stdout, op); + translate_input(&mut locked_stdin, &mut buffered_stdout, op)?; } else { let op = DeleteOperation::new(set1); - translate_input(&mut locked_stdin, &mut buffered_stdout, op); + translate_input(&mut locked_stdin, &mut buffered_stdout, op)?; } } else if squeeze_flag { if sets_len < 2 { let op = SqueezeOperation::new(set1); - translate_input(&mut locked_stdin, &mut buffered_stdout, op); + translate_input(&mut locked_stdin, &mut buffered_stdout, op)?; } else { let translate_op = TranslateOperation::new(set1, set2.clone())?; let squeeze_op = SqueezeOperation::new(set2); let op = translate_op.chain(squeeze_op); - translate_input(&mut locked_stdin, &mut buffered_stdout, op); + translate_input(&mut locked_stdin, &mut buffered_stdout, op)?; } } else { let op = TranslateOperation::new(set1, set2)?; - translate_input(&mut locked_stdin, &mut buffered_stdout, op); + translate_input(&mut locked_stdin, &mut buffered_stdout, op)?; } Ok(()) } diff --git a/tests/by-util/test_tr.rs b/tests/by-util/test_tr.rs index 705f40834..f8fcafce3 100644 --- a/tests/by-util/test_tr.rs +++ b/tests/by-util/test_tr.rs @@ -13,6 +13,15 @@ fn test_invalid_arg() { new_ucmd!().arg("--definitely-invalid").fails().code_is(1); } +#[test] +fn test_invalid_input() { + new_ucmd!() + .args(&["1", "1", "<", "."]) + .fails() + .code_is(1) + .stderr_contains("tr: extra operand '<'"); +} + #[test] fn test_to_upper() { new_ucmd!() From 6487347bc6d212edb72d332844b49701627e6641 Mon Sep 17 00:00:00 2001 From: Jesse Schalken Date: Mon, 28 Oct 2024 20:25:46 +1100 Subject: [PATCH 071/179] Reuse existing metadata instead of calling path.is_dir() again --- src/uu/du/src/du.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/uu/du/src/du.rs b/src/uu/du/src/du.rs index 2d36679f0..d4bec77ef 100644 --- a/src/uu/du/src/du.rs +++ b/src/uu/du/src/du.rs @@ -171,7 +171,7 @@ impl Stat { Ok(Self { path: path.to_path_buf(), is_dir: metadata.is_dir(), - size: if path.is_dir() { 0 } else { metadata.len() }, + size: if metadata.is_dir() { 0 } else { metadata.len() }, blocks: metadata.blocks(), inodes: 1, inode: Some(file_info), @@ -189,7 +189,7 @@ impl Stat { Ok(Self { path: path.to_path_buf(), is_dir: metadata.is_dir(), - size: if path.is_dir() { 0 } else { metadata.len() }, + size: if metadata.is_dir() { 0 } else { metadata.len() }, blocks: size_on_disk / 1024 * 2, inodes: 1, inode: file_info, From 4f422c1a3a49f91722ee320f1c070dd3492fe593 Mon Sep 17 00:00:00 2001 From: Simone Ragusa Date: Sat, 21 Sep 2024 15:59:34 +0200 Subject: [PATCH 072/179] uucore: add alacritty to the list of terminals that support colors Any value of TERM with glob pattern `alacritty*` will be matched. Fixes #6722 --- src/uucore/src/lib/features/colors.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/uucore/src/lib/features/colors.rs b/src/uucore/src/lib/features/colors.rs index f05739431..f8cbc9ebf 100644 --- a/src/uucore/src/lib/features/colors.rs +++ b/src/uucore/src/lib/features/colors.rs @@ -13,6 +13,7 @@ /// restrict following config to systems with matching environment variables. pub static TERMS: &[&str] = &[ "Eterm", + "alacritty*", "ansi", "*color*", "con[0-9]*x[0-9]*", From f0b7d322d1e29e088708d959ee412b7781630184 Mon Sep 17 00:00:00 2001 From: Simone Ragusa Date: Sat, 21 Sep 2024 22:08:46 +0200 Subject: [PATCH 073/179] dircolors: patch test fixture to include alacritty support --- src/uu/dircolors/README.md | 8 +++++++- src/uu/dircolors/alacritty-supports-colors.patch | 12 ++++++++++++ tests/fixtures/dircolors/internal.expected | 1 + 3 files changed, 20 insertions(+), 1 deletion(-) create mode 100644 src/uu/dircolors/alacritty-supports-colors.patch diff --git a/src/uu/dircolors/README.md b/src/uu/dircolors/README.md index ce8aa965f..f4ec5d675 100644 --- a/src/uu/dircolors/README.md +++ b/src/uu/dircolors/README.md @@ -9,10 +9,16 @@ dircolors -b > /PATH_TO_COREUTILS/tests/fixtures/dircolors/bash_def.expected dircolors -c > /PATH_TO_COREUTILS/tests/fixtures/dircolors/csh_def.expected ``` +Apply the patches to include more terminals that support colors: + +```shell +git apply /PATH_TO_COREUTILS/src/uu/dircolors/alacritty-supports-colors.patch +``` + Run the tests: ```shell cargo test --features "dircolors" --no-default-features ``` -Edit `/PATH_TO_COREUTILS/src/uu/dircolors/src/colors.rs` until the tests pass. +Edit `/PATH_TO_COREUTILS/src/uu/dircolors/src/dircolors.rs` until the tests pass. diff --git a/src/uu/dircolors/alacritty-supports-colors.patch b/src/uu/dircolors/alacritty-supports-colors.patch new file mode 100644 index 000000000..c6f022423 --- /dev/null +++ b/src/uu/dircolors/alacritty-supports-colors.patch @@ -0,0 +1,12 @@ +diff --git a/tests/fixtures/dircolors/internal.expected b/tests/fixtures/dircolors/internal.expected +index e151973f2..01dae4273 100644 +--- a/tests/fixtures/dircolors/internal.expected ++++ b/tests/fixtures/dircolors/internal.expected +@@ -7,6 +7,7 @@ + # restrict following config to systems with matching environment variables. + COLORTERM ?* + TERM Eterm ++TERM alacritty* + TERM ansi + TERM *color* + TERM con[0-9]*x[0-9]* diff --git a/tests/fixtures/dircolors/internal.expected b/tests/fixtures/dircolors/internal.expected index e151973f2..01dae4273 100644 --- a/tests/fixtures/dircolors/internal.expected +++ b/tests/fixtures/dircolors/internal.expected @@ -7,6 +7,7 @@ # restrict following config to systems with matching environment variables. COLORTERM ?* TERM Eterm +TERM alacritty* TERM ansi TERM *color* TERM con[0-9]*x[0-9]* From 2fef5be8f757ebe6cd3e1b9d9a2b791addcbdf64 Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Mon, 13 May 2024 19:39:45 +0200 Subject: [PATCH 074/179] more: reduce memory usage a bit --- src/uu/more/src/more.rs | 47 ++++++++++++++--------------------------- 1 file changed, 16 insertions(+), 31 deletions(-) diff --git a/src/uu/more/src/more.rs b/src/uu/more/src/more.rs index 0b8c838f2..f397d5273 100644 --- a/src/uu/more/src/more.rs +++ b/src/uu/more/src/more.rs @@ -447,7 +447,7 @@ struct Pager<'a> { upper_mark: usize, // The number of rows that fit on the screen content_rows: u16, - lines: Vec, + lines: Vec<&'a str>, next_file: Option<&'a str>, line_count: usize, silent: bool, @@ -456,7 +456,7 @@ struct Pager<'a> { } impl<'a> Pager<'a> { - fn new(rows: u16, lines: Vec, next_file: Option<&'a str>, options: &Options) -> Self { + fn new(rows: u16, lines: Vec<&'a str>, next_file: Option<&'a str>, options: &Options) -> Self { let line_count = lines.len(); Self { upper_mark: options.from_line, @@ -608,7 +608,7 @@ impl<'a> Pager<'a> { } } -fn search_pattern_in_file(lines: &[String], pattern: &Option) -> Option { +fn search_pattern_in_file(lines: &[&str], pattern: &Option) -> Option { let pattern = pattern.clone().unwrap_or_default(); if lines.is_empty() || pattern.is_empty() { return None; @@ -630,8 +630,10 @@ fn paging_add_back_message(options: &Options, stdout: &mut std::io::Stdout) -> U } // Break the lines on the cols of the terminal -fn break_buff(buff: &str, cols: usize) -> Vec { - let mut lines = Vec::with_capacity(buff.lines().count()); +fn break_buff(buff: &str, cols: usize) -> Vec<&str> { + // We _could_ do a precise with_capacity here, but that would require scanning the + // whole buffer. Just guess a value instead. + let mut lines = Vec::with_capacity(2048); for l in buff.lines() { lines.append(&mut break_line(l, cols)); @@ -639,11 +641,11 @@ fn break_buff(buff: &str, cols: usize) -> Vec { lines } -fn break_line(line: &str, cols: usize) -> Vec { +fn break_line(line: &str, cols: usize) -> Vec<&str> { let width = UnicodeWidthStr::width(line); let mut lines = Vec::new(); if width < cols { - lines.push(line.to_string()); + lines.push(line); return lines; } @@ -655,14 +657,14 @@ fn break_line(line: &str, cols: usize) -> Vec { total_width += width; if total_width > cols { - lines.push(line[last_index..index].to_string()); + lines.push(&line[last_index..index]); last_index = index; total_width = width; } } if last_index != line.len() { - lines.push(line[last_index..].to_string()); + lines.push(&line[last_index..]); } lines } @@ -727,29 +729,16 @@ mod tests { #[test] fn test_search_pattern_empty_pattern() { - let lines = vec![String::from("line1"), String::from("line2")]; + let lines = vec!["line1", "line2"]; let pattern = None; assert_eq!(None, search_pattern_in_file(&lines, &pattern)); } #[test] fn test_search_pattern_found_pattern() { - let lines = vec![ - String::from("line1"), - String::from("line2"), - String::from("pattern"), - ]; - let lines2 = vec![ - String::from("line1"), - String::from("line2"), - String::from("pattern"), - String::from("pattern2"), - ]; - let lines3 = vec![ - String::from("line1"), - String::from("line2"), - String::from("other_pattern"), - ]; + let lines = vec!["line1", "line2", "pattern"]; + let lines2 = vec!["line1", "line2", "pattern", "pattern2"]; + let lines3 = vec!["line1", "line2", "other_pattern"]; let pattern = Some(String::from("pattern")); assert_eq!(2, search_pattern_in_file(&lines, &pattern).unwrap()); assert_eq!(2, search_pattern_in_file(&lines2, &pattern).unwrap()); @@ -758,11 +747,7 @@ mod tests { #[test] fn test_search_pattern_not_found_pattern() { - let lines = vec![ - String::from("line1"), - String::from("line2"), - String::from("something"), - ]; + let lines = vec!["line1", "line2", "something"]; let pattern = Some(String::from("pattern")); assert_eq!(None, search_pattern_in_file(&lines, &pattern)); } From b2510feb4663a284843e6f5f794ed015b0410dd7 Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Tue, 14 May 2024 12:30:48 +0200 Subject: [PATCH 075/179] clean up use of u16s and patterns --- src/uu/more/src/more.rs | 53 +++++++++++++++++++---------------------- 1 file changed, 24 insertions(+), 29 deletions(-) diff --git a/src/uu/more/src/more.rs b/src/uu/more/src/more.rs index f397d5273..987ed4a58 100644 --- a/src/uu/more/src/more.rs +++ b/src/uu/more/src/more.rs @@ -98,10 +98,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { println!("{panic_info}"); })); - let matches = match uu_app().try_get_matches_from(args) { - Ok(m) => m, - Err(e) => return Err(e.into()), - }; + let matches = uu_app().try_get_matches_from(args)?; let mut options = Options::from(&matches); @@ -308,12 +305,12 @@ fn more( rows = number; } - let lines = break_buff(buff, usize::from(cols)); + let lines = break_buff(buff, cols as usize); let mut pager = Pager::new(rows, lines, next_file, options); - if options.pattern.is_some() { - match search_pattern_in_file(&pager.lines, &options.pattern) { + if let Some(pat) = options.pattern.as_ref() { + match search_pattern_in_file(&pager.lines, pat) { Some(number) => pager.upper_mark = number, None => { execute!(stdout, terminal::Clear(terminal::ClearType::CurrentLine))?; @@ -446,7 +443,7 @@ struct Pager<'a> { // The current line at the top of the screen upper_mark: usize, // The number of rows that fit on the screen - content_rows: u16, + content_rows: usize, lines: Vec<&'a str>, next_file: Option<&'a str>, line_count: usize, @@ -460,7 +457,7 @@ impl<'a> Pager<'a> { let line_count = lines.len(); Self { upper_mark: options.from_line, - content_rows: rows.saturating_sub(1), + content_rows: rows.saturating_sub(1) as usize, lines, next_file, line_count, @@ -481,10 +478,10 @@ impl<'a> Pager<'a> { // the upper mark must not grow past top of the screen at the end of the open file. if self .upper_mark - .saturating_add(self.content_rows as usize * 2) - .ge(&self.line_count) + .saturating_add(self.content_rows * 2) + >= self.line_count { - self.upper_mark = self.line_count - self.content_rows as usize; + self.upper_mark = self.line_count - self.content_rows; return; } @@ -492,10 +489,9 @@ impl<'a> Pager<'a> { } fn page_up(&mut self) { - let content_row_usize: usize = self.content_rows.into(); self.upper_mark = self .upper_mark - .saturating_sub(content_row_usize.saturating_add(self.line_squeezed)); + .saturating_sub(self.content_rows.saturating_add(self.line_squeezed)); if self.squeeze { let iter = self.lines.iter().take(self.upper_mark).rev(); @@ -520,7 +516,7 @@ impl<'a> Pager<'a> { // TODO: Deal with column size changes. fn page_resize(&mut self, _: u16, row: u16, option_line: Option) { if option_line.is_none() { - self.content_rows = row.saturating_sub(1); + self.content_rows = row.saturating_sub(1) as usize; }; } @@ -541,7 +537,7 @@ impl<'a> Pager<'a> { let mut displayed_lines = Vec::new(); let mut iter = self.lines.iter().skip(self.upper_mark); - while displayed_lines.len() < self.content_rows as usize { + while displayed_lines.len() < self.content_rows { match iter.next() { Some(line) => { if self.squeeze { @@ -608,13 +604,12 @@ impl<'a> Pager<'a> { } } -fn search_pattern_in_file(lines: &[&str], pattern: &Option) -> Option { - let pattern = pattern.clone().unwrap_or_default(); +fn search_pattern_in_file(lines: &[&str], pattern: &str) -> Option { if lines.is_empty() || pattern.is_empty() { return None; } for (line_number, line) in lines.iter().enumerate() { - if line.contains(pattern.as_str()) { + if line.contains(pattern) { return Some(line_number); } } @@ -723,15 +718,15 @@ mod tests { #[test] fn test_search_pattern_empty_lines() { let lines = vec![]; - let pattern = Some(String::from("pattern")); - assert_eq!(None, search_pattern_in_file(&lines, &pattern)); + let pattern = "pattern"; + assert_eq!(None, search_pattern_in_file(&lines, pattern)); } #[test] fn test_search_pattern_empty_pattern() { let lines = vec!["line1", "line2"]; - let pattern = None; - assert_eq!(None, search_pattern_in_file(&lines, &pattern)); + let pattern = ""; + assert_eq!(None, search_pattern_in_file(&lines, pattern)); } #[test] @@ -739,16 +734,16 @@ mod tests { let lines = vec!["line1", "line2", "pattern"]; let lines2 = vec!["line1", "line2", "pattern", "pattern2"]; let lines3 = vec!["line1", "line2", "other_pattern"]; - let pattern = Some(String::from("pattern")); - assert_eq!(2, search_pattern_in_file(&lines, &pattern).unwrap()); - assert_eq!(2, search_pattern_in_file(&lines2, &pattern).unwrap()); - assert_eq!(2, search_pattern_in_file(&lines3, &pattern).unwrap()); + let pattern = "pattern"; + assert_eq!(2, search_pattern_in_file(&lines, pattern).unwrap()); + assert_eq!(2, search_pattern_in_file(&lines2, pattern).unwrap()); + assert_eq!(2, search_pattern_in_file(&lines3, pattern).unwrap()); } #[test] fn test_search_pattern_not_found_pattern() { let lines = vec!["line1", "line2", "something"]; - let pattern = Some(String::from("pattern")); - assert_eq!(None, search_pattern_in_file(&lines, &pattern)); + let pattern = "pattern"; + assert_eq!(None, search_pattern_in_file(&lines, pattern)); } } From 6b32c30d57cf60a09179bc5b294f430a864804a2 Mon Sep 17 00:00:00 2001 From: hamflx Date: Sun, 24 Mar 2024 17:15:34 +0800 Subject: [PATCH 076/179] mv: fix invalid numbered backup path --- src/uucore/src/lib/features/backup_control.rs | 58 ++++++++++++++++--- tests/by-util/test_mv.rs | 24 ++++++++ 2 files changed, 74 insertions(+), 8 deletions(-) diff --git a/src/uucore/src/lib/features/backup_control.rs b/src/uucore/src/lib/features/backup_control.rs index 9086acb19..4b4f7aa93 100644 --- a/src/uucore/src/lib/features/backup_control.rs +++ b/src/uucore/src/lib/features/backup_control.rs @@ -421,25 +421,29 @@ pub fn get_backup_path( } fn simple_backup_path(path: &Path, suffix: &str) -> PathBuf { - let mut p = path.to_string_lossy().into_owned(); - p.push_str(suffix); - PathBuf::from(p) + let mut file_name = path.file_name().unwrap_or_default().to_os_string(); + file_name.push(suffix); + path.with_file_name(file_name) } fn numbered_backup_path(path: &Path) -> PathBuf { + let file_name = path.file_name().unwrap_or_default(); for i in 1_u64.. { - let path_str = &format!("{}.~{}~", path.to_string_lossy(), i); - let path = Path::new(path_str); + let mut numbered_file_name = file_name.to_os_string(); + numbered_file_name.push(format!(".~{}~", i)); + let path = path.with_file_name(numbered_file_name); if !path.exists() { - return path.to_path_buf(); + return path; } } panic!("cannot create backup") } fn existing_backup_path(path: &Path, suffix: &str) -> PathBuf { - let test_path_str = &format!("{}.~1~", path.to_string_lossy()); - let test_path = Path::new(test_path_str); + let file_name = path.file_name().unwrap_or_default(); + let mut numbered_file_name = file_name.to_os_string(); + numbered_file_name.push(".~1~"); + let test_path = path.with_file_name(numbered_file_name); if test_path.exists() { numbered_backup_path(path) } else { @@ -660,6 +664,44 @@ mod tests { let result = determine_backup_suffix(&matches); assert_eq!(result, "-v"); } + + #[test] + fn test_numbered_backup_path() { + assert_eq!(numbered_backup_path(&Path::new("")), PathBuf::from(".~1~")); + assert_eq!( + numbered_backup_path(&Path::new("/")), + PathBuf::from("/.~1~") + ); + assert_eq!( + numbered_backup_path(&Path::new("/hello/world")), + PathBuf::from("/hello/world.~1~") + ); + assert_eq!( + numbered_backup_path(&Path::new("/hello/world/")), + PathBuf::from("/hello/world.~1~") + ); + } + + #[test] + fn test_simple_backup_path() { + assert_eq!( + simple_backup_path(&Path::new(""), ".bak"), + PathBuf::from(".bak") + ); + assert_eq!( + simple_backup_path(&Path::new("/"), ".bak"), + PathBuf::from("/.bak") + ); + assert_eq!( + simple_backup_path(&Path::new("/hello/world"), ".bak"), + PathBuf::from("/hello/world.bak") + ); + assert_eq!( + simple_backup_path(&Path::new("/hello/world/"), ".bak"), + PathBuf::from("/hello/world.bak") + ); + } + #[test] fn test_source_is_target_backup() { let source = Path::new("data.txt.bak"); diff --git a/tests/by-util/test_mv.rs b/tests/by-util/test_mv.rs index 6ab989ee4..ac64fae7e 100644 --- a/tests/by-util/test_mv.rs +++ b/tests/by-util/test_mv.rs @@ -571,6 +571,30 @@ fn test_mv_simple_backup() { assert!(at.file_exists(format!("{file_b}~"))); } +#[test] +fn test_mv_simple_backup_for_directory() { + let (at, mut ucmd) = at_and_ucmd!(); + let dir_a = "test_mv_simple_backup_dir_a"; + let dir_b = "test_mv_simple_backup_dir_b"; + + at.mkdir(dir_a); + at.mkdir(dir_b); + at.touch(format!("{dir_a}/file_a")); + at.touch(format!("{dir_b}/file_b")); + ucmd.arg("-T") + .arg("-b") + .arg(dir_a) + .arg(dir_b) + .succeeds() + .no_stderr(); + + assert!(!at.dir_exists(dir_a)); + assert!(at.dir_exists(dir_b)); + assert!(at.dir_exists(&format!("{dir_b}~"))); + assert!(at.file_exists(format!("{dir_b}/file_a"))); + assert!(at.file_exists(format!("{dir_b}~/file_b"))); +} + #[test] fn test_mv_simple_backup_with_file_extension() { let (at, mut ucmd) = at_and_ucmd!(); From 2e4b7c8cb70f81e42f95bf89a525d1c5d9c19765 Mon Sep 17 00:00:00 2001 From: Daniel Hofstetter Date: Fri, 5 Jan 2024 11:10:37 +0100 Subject: [PATCH 077/179] mv: show "same file" error for "mv d/f d" --- src/uu/mv/src/mv.rs | 8 ++++++++ tests/by-util/test_mv.rs | 18 +++++++++++++++++- 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/src/uu/mv/src/mv.rs b/src/uu/mv/src/mv.rs index 9d8452b1e..20a22043c 100644 --- a/src/uu/mv/src/mv.rs +++ b/src/uu/mv/src/mv.rs @@ -314,6 +314,7 @@ fn handle_two_paths(source: &Path, target: &Path, opts: &Options) -> UResult<()> ) .into()); } + if source.symlink_metadata().is_err() { return Err(if path_ends_with_terminator(source) { MvError::CannotStatNotADirectory(source.quote().to_string()).into() @@ -336,6 +337,13 @@ fn handle_two_paths(source: &Path, target: &Path, opts: &Options) -> UResult<()> } } + if source.parent() == Some(target) { + return Err( + // use source twice to match GNU's error message + MvError::SameFile(source.quote().to_string(), source.quote().to_string()).into(), + ); + } + let target_is_dir = target.is_dir(); let source_is_dir = source.is_dir(); diff --git a/tests/by-util/test_mv.rs b/tests/by-util/test_mv.rs index 6ab989ee4..562e24754 100644 --- a/tests/by-util/test_mv.rs +++ b/tests/by-util/test_mv.rs @@ -402,7 +402,23 @@ fn test_mv_same_file() { ucmd.arg(file_a) .arg(file_a) .fails() - .stderr_is(format!("mv: '{file_a}' and '{file_a}' are the same file\n",)); + .stderr_is(format!("mv: '{file_a}' and '{file_a}' are the same file\n")); +} + +#[test] +fn test_mv_file_to_same_dir() { + let (at, mut ucmd) = at_and_ucmd!(); + let file = "a"; + let dir = "dir"; + let path = &format!("{dir}/{file}"); + + at.mkdir(dir); + at.touch(path); + + ucmd.arg(path) + .arg(dir) + .fails() + .stderr_is(format!("mv: '{path}' and '{path}' are the same file\n")); } #[test] From 2d81463399c3b0b7afb08f6dfa12d608437ec91a Mon Sep 17 00:00:00 2001 From: Christian Legnitto Date: Mon, 2 Dec 2024 10:50:48 -0400 Subject: [PATCH 078/179] Make EscapedChar and friends pub --- src/uucore/src/lib/features/format/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/uucore/src/lib/features/format/mod.rs b/src/uucore/src/lib/features/format/mod.rs index 6d7a2ee30..24dd1daaa 100644 --- a/src/uucore/src/lib/features/format/mod.rs +++ b/src/uucore/src/lib/features/format/mod.rs @@ -48,7 +48,7 @@ use std::{ use crate::error::UError; -use self::{ +pub use self::{ escape::{parse_escape_code, EscapedChar}, num_format::Formatter, }; From 2799b288e3791485456d291dc80641801732e2e8 Mon Sep 17 00:00:00 2001 From: Julian <61988360+just-an-engineer@users.noreply.github.com> Date: Mon, 2 Dec 2024 13:04:36 -0500 Subject: [PATCH 079/179] tail: fix issue #6543 (--pid when reading from stdin) (#6582) --------- Co-authored-by: just-an-engineer Co-authored-by: Sylvestre Ledru --- src/uu/tail/src/tail.rs | 14 +++++---- tests/by-util/test_tail.rs | 59 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+), 6 deletions(-) diff --git a/src/uu/tail/src/tail.rs b/src/uu/tail/src/tail.rs index edac4b151..a48da6b31 100644 --- a/src/uu/tail/src/tail.rs +++ b/src/uu/tail/src/tail.rs @@ -65,13 +65,15 @@ fn uu_tail(settings: &Settings) -> UResult<()> { // Add `path` and `reader` to `files` map if `--follow` is selected. for input in &settings.inputs.clone() { match input.kind() { - InputKind::File(path) if cfg!(not(unix)) || path != &PathBuf::from(text::DEV_STDIN) => { - tail_file(settings, &mut printer, input, path, &mut observer, 0)?; - } - // File points to /dev/stdin here - InputKind::File(_) | InputKind::Stdin => { + InputKind::Stdin => { tail_stdin(settings, &mut printer, input, &mut observer)?; } + InputKind::File(path) if cfg!(unix) && path == &PathBuf::from(text::DEV_STDIN) => { + tail_stdin(settings, &mut printer, input, &mut observer)?; + } + InputKind::File(path) => { + tail_file(settings, &mut printer, input, path, &mut observer, 0)?; + } } } @@ -85,7 +87,7 @@ fn uu_tail(settings: &Settings) -> UResult<()> { the input file is not a FIFO, pipe, or regular file, it is unspecified whether or not the -f option shall be ignored. */ - if !settings.has_only_stdin() { + if !settings.has_only_stdin() || settings.pid != 0 { follow::follow(observer, settings)?; } } diff --git a/tests/by-util/test_tail.rs b/tests/by-util/test_tail.rs index 4c7c52c7c..885e50ad3 100644 --- a/tests/by-util/test_tail.rs +++ b/tests/by-util/test_tail.rs @@ -6,6 +6,7 @@ // spell-checker:ignore (ToDO) abcdefghijklmnopqrstuvwxyz efghijklmnopqrstuvwxyz vwxyz emptyfile file siette ocho nueve diez MULT // spell-checker:ignore (libs) kqueue // spell-checker:ignore (jargon) tailable untailable datasame runneradmin tmpi +// spell-checker:ignore (cmd) taskkill #![allow( clippy::unicode_not_nfc, clippy::cast_lossless, @@ -4822,3 +4823,61 @@ fn test_obsolete_encoding_windows() { .stderr_is("tail: bad argument encoding: '-�b'\n") .code_is(1); } + +#[test] +#[cfg(not(target_vendor = "apple"))] // FIXME: for currently not working platforms +fn test_following_with_pid() { + use std::process::Command; + + let ts = TestScenario::new(util_name!()); + + #[cfg(not(windows))] + let mut sleep_command = Command::new("sleep") + .arg("999d") + .spawn() + .expect("failed to start sleep command"); + #[cfg(windows)] + let mut sleep_command = Command::new("powershell") + .arg("-Command") + .arg("Start-Sleep -Seconds 999") + .spawn() + .expect("failed to start sleep command"); + + let sleep_pid = sleep_command.id(); + + let at = &ts.fixtures; + at.touch("f"); + // when -f is specified, tail should die after + // the pid from --pid also dies + let mut child = ts + .ucmd() + .args(&[ + "--pid", + &sleep_pid.to_string(), + "-f", + at.plus("f").to_str().unwrap(), + ]) + .stderr_to_stdout() + .run_no_wait(); + child.make_assertion_with_delay(2000).is_alive(); + + #[cfg(not(windows))] + Command::new("kill") + .arg("-9") + .arg(sleep_pid.to_string()) + .output() + .expect("failed to kill sleep command"); + #[cfg(windows)] + Command::new("taskkill") + .arg("/PID") + .arg(sleep_pid.to_string()) + .arg("/F") + .output() + .expect("failed to kill sleep command"); + + let _ = sleep_command.wait(); + + child.make_assertion_with_delay(2000).is_not_alive(); + + child.kill(); +} From 9877085702d359dd04cade165231fc2fce64178e Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Mon, 2 Dec 2024 20:39:39 +0100 Subject: [PATCH 080/179] Revert "mv: show "same file" error for `mv d/f d`" --- src/uu/mv/src/mv.rs | 8 -------- tests/by-util/test_mv.rs | 18 +----------------- 2 files changed, 1 insertion(+), 25 deletions(-) diff --git a/src/uu/mv/src/mv.rs b/src/uu/mv/src/mv.rs index 20a22043c..9d8452b1e 100644 --- a/src/uu/mv/src/mv.rs +++ b/src/uu/mv/src/mv.rs @@ -314,7 +314,6 @@ fn handle_two_paths(source: &Path, target: &Path, opts: &Options) -> UResult<()> ) .into()); } - if source.symlink_metadata().is_err() { return Err(if path_ends_with_terminator(source) { MvError::CannotStatNotADirectory(source.quote().to_string()).into() @@ -337,13 +336,6 @@ fn handle_two_paths(source: &Path, target: &Path, opts: &Options) -> UResult<()> } } - if source.parent() == Some(target) { - return Err( - // use source twice to match GNU's error message - MvError::SameFile(source.quote().to_string(), source.quote().to_string()).into(), - ); - } - let target_is_dir = target.is_dir(); let source_is_dir = source.is_dir(); diff --git a/tests/by-util/test_mv.rs b/tests/by-util/test_mv.rs index 562e24754..6ab989ee4 100644 --- a/tests/by-util/test_mv.rs +++ b/tests/by-util/test_mv.rs @@ -402,23 +402,7 @@ fn test_mv_same_file() { ucmd.arg(file_a) .arg(file_a) .fails() - .stderr_is(format!("mv: '{file_a}' and '{file_a}' are the same file\n")); -} - -#[test] -fn test_mv_file_to_same_dir() { - let (at, mut ucmd) = at_and_ucmd!(); - let file = "a"; - let dir = "dir"; - let path = &format!("{dir}/{file}"); - - at.mkdir(dir); - at.touch(path); - - ucmd.arg(path) - .arg(dir) - .fails() - .stderr_is(format!("mv: '{path}' and '{path}' are the same file\n")); + .stderr_is(format!("mv: '{file_a}' and '{file_a}' are the same file\n",)); } #[test] From abfedc3431aae1a2037b075c0c6841089ee9695d Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Tue, 3 Dec 2024 10:09:49 +0000 Subject: [PATCH 081/179] chore(deps): update rust crate time to v0.3.37 --- Cargo.lock | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ef1704f01..41b1a1224 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2413,9 +2413,9 @@ dependencies = [ [[package]] name = "time" -version = "0.3.36" +version = "0.3.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5dfd88e563464686c916c7e46e623e520ddc6d79fa6641390f2e3fa86e83e885" +checksum = "35e7868883861bd0e56d9ac6efcaaca0d6d5d82a2a7ec8209ff492c07cf37b21" dependencies = [ "deranged", "itoa", @@ -2436,9 +2436,9 @@ checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3" [[package]] name = "time-macros" -version = "0.2.18" +version = "0.2.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f252a68540fde3a3877aeea552b832b40ab9a69e318efd078774a01ddee1ccf" +checksum = "2834e6017e3e5e4b9834939793b282bc03b37a3336245fa820e35e233e2a85de" dependencies = [ "num-conv", "time-core", From 316f5f6e846b37c7eecf160f85274d8bfb30cd51 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Tue, 3 Dec 2024 17:48:09 +0000 Subject: [PATCH 082/179] chore(deps): update rust crate thiserror to v2.0.4 --- Cargo.lock | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 41b1a1224..e58ac54d2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2382,11 +2382,11 @@ dependencies = [ [[package]] name = "thiserror" -version = "2.0.3" +version = "2.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c006c85c7651b3cf2ada4584faa36773bd07bac24acfb39f3c431b36d7e667aa" +checksum = "2f49a1853cf82743e3b7950f77e0f4d622ca36cf4317cba00c767838bac8d490" dependencies = [ - "thiserror-impl 2.0.3", + "thiserror-impl 2.0.4", ] [[package]] @@ -2402,9 +2402,9 @@ dependencies = [ [[package]] name = "thiserror-impl" -version = "2.0.3" +version = "2.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f077553d607adc1caf65430528a576c757a71ed73944b66ebb58ef2bbd243568" +checksum = "8381894bb3efe0c4acac3ded651301ceee58a15d47c2e34885ed1908ad667061" dependencies = [ "proc-macro2", "quote", @@ -2610,7 +2610,7 @@ version = "0.0.28" dependencies = [ "clap", "nix", - "thiserror 2.0.3", + "thiserror 2.0.4", "uucore", ] @@ -2622,7 +2622,7 @@ dependencies = [ "fts-sys", "libc", "selinux", - "thiserror 2.0.3", + "thiserror 2.0.4", "uucore", ] @@ -2699,7 +2699,7 @@ version = "0.0.28" dependencies = [ "clap", "regex", - "thiserror 2.0.3", + "thiserror 2.0.4", "uucore", ] @@ -3215,7 +3215,7 @@ dependencies = [ "clap", "libc", "selinux", - "thiserror 2.0.3", + "thiserror 2.0.4", "uucore", ] @@ -3494,7 +3494,7 @@ version = "0.0.28" dependencies = [ "chrono", "clap", - "thiserror 2.0.3", + "thiserror 2.0.4", "utmp-classic", "uucore", ] @@ -3525,7 +3525,7 @@ dependencies = [ "clap", "libc", "nix", - "thiserror 2.0.3", + "thiserror 2.0.4", "unicode-width 0.1.13", "uucore", ] @@ -3586,7 +3586,7 @@ dependencies = [ "sha3", "sm3", "tempfile", - "thiserror 2.0.3", + "thiserror 2.0.4", "time", "uucore_procs", "walkdir", From 3e4221a4615e800d6010d564984b93d2d9e6abfb Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Mon, 2 Dec 2024 22:11:45 +0100 Subject: [PATCH 083/179] tr: generate an error for real if the input is a directory --- src/uu/tr/Cargo.toml | 2 +- src/uu/tr/src/tr.rs | 10 +++++++--- src/uucore/src/lib/features/fs.rs | 29 +++++++++++++++++++++++++++++ tests/by-util/test_tr.rs | 8 ++++++++ 4 files changed, 45 insertions(+), 4 deletions(-) diff --git a/src/uu/tr/Cargo.toml b/src/uu/tr/Cargo.toml index 0787e4279..6378b7766 100644 --- a/src/uu/tr/Cargo.toml +++ b/src/uu/tr/Cargo.toml @@ -19,7 +19,7 @@ path = "src/tr.rs" [dependencies] nom = { workspace = true } clap = { workspace = true } -uucore = { workspace = true } +uucore = { workspace = true, features = ["fs"] } [[bin]] name = "tr" diff --git a/src/uu/tr/src/tr.rs b/src/uu/tr/src/tr.rs index ff85002e7..c226d2189 100644 --- a/src/uu/tr/src/tr.rs +++ b/src/uu/tr/src/tr.rs @@ -8,17 +8,17 @@ mod operation; mod unicode_table; +use crate::operation::DeleteOperation; use clap::{crate_version, value_parser, Arg, ArgAction, Command}; use operation::{ translate_input, Sequence, SqueezeOperation, SymbolTranslator, TranslateOperation, }; use std::ffi::OsString; use std::io::{stdin, stdout, BufWriter}; -use uucore::{format_usage, help_about, help_section, help_usage, os_str_as_bytes, show}; - -use crate::operation::DeleteOperation; use uucore::display::Quotable; use uucore::error::{UResult, USimpleError, UUsageError}; +use uucore::fs::is_stdin_directory; +use uucore::{format_usage, help_about, help_section, help_usage, os_str_as_bytes, show}; const ABOUT: &str = help_about!("tr.md"); const AFTER_HELP: &str = help_section!("after help", "tr.md"); @@ -126,6 +126,10 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { translating, )?; + if is_stdin_directory(&stdin) { + return Err(USimpleError::new(1, "read error: Is a directory")); + } + // '*_op' are the operations that need to be applied, in order. if delete_flag { if squeeze_flag { diff --git a/src/uucore/src/lib/features/fs.rs b/src/uucore/src/lib/features/fs.rs index e0c8ea79d..beb4d77a9 100644 --- a/src/uucore/src/lib/features/fs.rs +++ b/src/uucore/src/lib/features/fs.rs @@ -20,6 +20,7 @@ use std::ffi::{OsStr, OsString}; use std::fs; use std::fs::read_dir; use std::hash::Hash; +use std::io::Stdin; use std::io::{Error, ErrorKind, Result as IOResult}; #[cfg(unix)] use std::os::unix::{fs::MetadataExt, io::AsRawFd}; @@ -721,6 +722,34 @@ pub fn path_ends_with_terminator(path: &Path) -> bool { .map_or(false, |wide| wide == b'/'.into() || wide == b'\\'.into()) } +/// Checks if the standard input (stdin) is a directory. +/// +/// # Arguments +/// +/// * `stdin` - A reference to the standard input handle. +/// +/// # Returns +/// +/// * `bool` - Returns `true` if stdin is a directory, `false` otherwise. +pub fn is_stdin_directory(stdin: &Stdin) -> bool { + #[cfg(unix)] + { + use nix::sys::stat::fstat; + let mode = fstat(stdin.as_raw_fd()).unwrap().st_mode as mode_t; + has!(mode, S_IFDIR) + } + + #[cfg(windows)] + { + use std::os::windows::io::AsRawHandle; + let handle = stdin.as_raw_handle(); + if let Ok(metadata) = fs::metadata(format!("{}", handle as usize)) { + return metadata.is_dir(); + } + false + } +} + pub mod sane_blksize { #[cfg(not(target_os = "windows"))] diff --git a/tests/by-util/test_tr.rs b/tests/by-util/test_tr.rs index f8fcafce3..cd99f1c3a 100644 --- a/tests/by-util/test_tr.rs +++ b/tests/by-util/test_tr.rs @@ -20,6 +20,14 @@ fn test_invalid_input() { .fails() .code_is(1) .stderr_contains("tr: extra operand '<'"); + #[cfg(unix)] + new_ucmd!() + .args(&["1", "1"]) + // will test "tr 1 1 < ." + .set_stdin(std::process::Stdio::from(std::fs::File::open(".").unwrap())) + .fails() + .code_is(1) + .stderr_contains("tr: read error: Is a directory"); } #[test] From c1f82b158cff27e76acb152732719bdb8a009121 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Tue, 3 Dec 2024 22:45:00 +0100 Subject: [PATCH 084/179] fix rustfmt+clippy --- src/uu/more/src/more.rs | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/src/uu/more/src/more.rs b/src/uu/more/src/more.rs index 987ed4a58..cb74e1176 100644 --- a/src/uu/more/src/more.rs +++ b/src/uu/more/src/more.rs @@ -469,23 +469,19 @@ impl<'a> Pager<'a> { fn should_close(&mut self) -> bool { self.upper_mark - .saturating_add(self.content_rows.into()) + .saturating_add(self.content_rows) .ge(&self.line_count) } fn page_down(&mut self) { // If the next page down position __after redraw__ is greater than the total line count, // the upper mark must not grow past top of the screen at the end of the open file. - if self - .upper_mark - .saturating_add(self.content_rows * 2) - >= self.line_count - { + if self.upper_mark.saturating_add(self.content_rows * 2) >= self.line_count { self.upper_mark = self.line_count - self.content_rows; return; } - self.upper_mark = self.upper_mark.saturating_add(self.content_rows.into()); + self.upper_mark = self.upper_mark.saturating_add(self.content_rows); } fn page_up(&mut self) { @@ -524,7 +520,7 @@ impl<'a> Pager<'a> { self.draw_lines(stdout); let lower_mark = self .line_count - .min(self.upper_mark.saturating_add(self.content_rows.into())); + .min(self.upper_mark.saturating_add(self.content_rows)); self.draw_prompt(stdout, lower_mark, wrong_key); stdout.flush().unwrap(); } From a16630fdedc472a7625a28ec2f380737d7902f86 Mon Sep 17 00:00:00 2001 From: Anirban Halder Date: Wed, 4 Dec 2024 03:21:03 +0530 Subject: [PATCH 085/179] rm: fix the usage of '/..' '/.' with -rf options fix the test tests/rm/r-4 --------- Co-authored-by: Julian Beltz Co-authored-by: Sylvestre Ledru --- Cargo.lock | 2 +- src/uu/rm/src/rm.rs | 88 ++++++++++++++++++++++++++++++++++++++-- tests/by-util/test_rm.rs | 62 ++++++++++++++++++++++++++++ 3 files changed, 148 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 41b1a1224..60db20ca6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3751,7 +3751,7 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" dependencies = [ - "windows-sys 0.48.0", + "windows-sys 0.52.0", ] [[package]] diff --git a/src/uu/rm/src/rm.rs b/src/uu/rm/src/rm.rs index a89ba6db6..ad9c942a8 100644 --- a/src/uu/rm/src/rm.rs +++ b/src/uu/rm/src/rm.rs @@ -3,7 +3,7 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -// spell-checker:ignore (path) eacces inacc +// spell-checker:ignore (path) eacces inacc rm-r4 use clap::{builder::ValueParser, crate_version, parser::ValueSource, Arg, ArgAction, Command}; use std::collections::VecDeque; @@ -11,10 +11,15 @@ use std::ffi::{OsStr, OsString}; use std::fs::{self, File, Metadata}; use std::io::ErrorKind; use std::ops::BitOr; +#[cfg(not(windows))] +use std::os::unix::ffi::OsStrExt; +use std::path::MAIN_SEPARATOR; use std::path::{Path, PathBuf}; use uucore::display::Quotable; use uucore::error::{UResult, USimpleError, UUsageError}; -use uucore::{format_usage, help_about, help_section, help_usage, prompt_yes, show_error}; +use uucore::{ + format_usage, help_about, help_section, help_usage, os_str_as_bytes, prompt_yes, show_error, +}; use walkdir::{DirEntry, WalkDir}; #[derive(Eq, PartialEq, Clone, Copy)] @@ -290,6 +295,7 @@ pub fn remove(files: &[&OsStr], options: &Options) -> bool { for filename in files { let file = Path::new(filename); + had_err = match file.symlink_metadata() { Ok(metadata) => { if metadata.is_dir() { @@ -300,6 +306,7 @@ pub fn remove(files: &[&OsStr], options: &Options) -> bool { remove_file(file, options) } } + Err(_e) => { // TODO: actually print out the specific error // TODO: When the error is not about missing files @@ -326,6 +333,15 @@ pub fn remove(files: &[&OsStr], options: &Options) -> bool { fn handle_dir(path: &Path, options: &Options) -> bool { let mut had_err = false; + let path = clean_trailing_slashes(path); + if path_is_current_or_parent_directory(path) { + show_error!( + "refusing to remove '.' or '..' directory: skipping '{}'", + path.display() + ); + return true; + } + let is_root = path.has_root() && path.parent().is_none(); if options.recursive && (!is_root || !options.preserve_root) { if options.interactive != InteractiveMode::Always && !options.verbose { @@ -396,7 +412,11 @@ fn handle_dir(path: &Path, options: &Options) -> bool { } else if options.dir && (!is_root || !options.preserve_root) { had_err = remove_dir(path, options).bitor(had_err); } else if options.recursive { - show_error!("could not remove directory {}", path.quote()); + show_error!( + "it is dangerous to operate recursively on '{}'", + MAIN_SEPARATOR + ); + show_error!("use --no-preserve-root to override this failsafe"); had_err = true; } else { show_error!( @@ -559,6 +579,20 @@ fn handle_writable_directory(path: &Path, options: &Options, metadata: &Metadata true } } +/// Checks if the path is referring to current or parent directory , if it is referring to current or any parent directory in the file tree e.g '/../..' , '../..' +fn path_is_current_or_parent_directory(path: &Path) -> bool { + let path_str = os_str_as_bytes(path.as_os_str()); + let dir_separator = MAIN_SEPARATOR as u8; + if let Ok(path_bytes) = path_str { + return path_bytes == ([b'.']) + || path_bytes == ([b'.', b'.']) + || path_bytes.ends_with(&[dir_separator, b'.']) + || path_bytes.ends_with(&[dir_separator, b'.', b'.']) + || path_bytes.ends_with(&[dir_separator, b'.', dir_separator]) + || path_bytes.ends_with(&[dir_separator, b'.', b'.', dir_separator]); + } + false +} // For windows we can use windows metadata trait and file attributes to see if a directory is readonly #[cfg(windows)] @@ -586,6 +620,40 @@ fn handle_writable_directory(path: &Path, options: &Options, metadata: &Metadata } } +/// Removes trailing slashes, for example 'd/../////' yield 'd/../' required to fix rm-r4 GNU test +fn clean_trailing_slashes(path: &Path) -> &Path { + let path_str = os_str_as_bytes(path.as_os_str()); + let dir_separator = MAIN_SEPARATOR as u8; + + if let Ok(path_bytes) = path_str { + let mut idx = if path_bytes.len() > 1 { + path_bytes.len() - 1 + } else { + return path; + }; + // Checks if element at the end is a '/' + if path_bytes[idx] == dir_separator { + for i in (1..path_bytes.len()).rev() { + // Will break at the start of the continuous sequence of '/', eg: "abc//////" , will break at + // "abc/", this will clean ////// to the root '/', so we have to be careful to not + // delete the root. + if path_bytes[i - 1] != dir_separator { + idx = i; + break; + } + } + #[cfg(unix)] + return Path::new(OsStr::from_bytes(&path_bytes[0..=idx])); + + #[cfg(not(unix))] + // Unwrapping is fine here as os_str_as_bytes() would return an error on non unix + // systems with non utf-8 characters and thus bypass the if let Ok branch + return Path::new(std::str::from_utf8(&path_bytes[0..=idx]).unwrap()); + } + } + path +} + fn prompt_descend(path: &Path) -> bool { prompt_yes!("descend into directory {}?", path.quote()) } @@ -611,3 +679,17 @@ fn is_symlink_dir(metadata: &Metadata) -> bool { metadata.file_type().is_symlink() && ((metadata.file_attributes() & FILE_ATTRIBUTE_DIRECTORY) != 0) } + +mod tests { + + #[test] + // Testing whether path the `/////` collapses to `/` + fn test_collapsible_slash_path() { + use std::path::Path; + + use crate::clean_trailing_slashes; + let path = Path::new("/////"); + + assert_eq!(Path::new("/"), clean_trailing_slashes(path)); + } +} diff --git a/tests/by-util/test_rm.rs b/tests/by-util/test_rm.rs index f997688c8..b220926fe 100644 --- a/tests/by-util/test_rm.rs +++ b/tests/by-util/test_rm.rs @@ -677,6 +677,68 @@ fn test_remove_inaccessible_dir() { assert!(!at.dir_exists(dir_1)); } +#[test] +#[cfg(not(windows))] +fn test_rm_current_or_parent_dir_rm4() { + let ts = TestScenario::new(util_name!()); + let at = &ts.fixtures; + + at.mkdir("d"); + + let answers = [ + "rm: refusing to remove '.' or '..' directory: skipping 'd/.'", + "rm: refusing to remove '.' or '..' directory: skipping 'd/./'", + "rm: refusing to remove '.' or '..' directory: skipping 'd/./'", + "rm: refusing to remove '.' or '..' directory: skipping 'd/..'", + "rm: refusing to remove '.' or '..' directory: skipping 'd/../'", + ]; + let std_err_str = ts + .ucmd() + .arg("-rf") + .arg("d/.") + .arg("d/./") + .arg("d/.////") + .arg("d/..") + .arg("d/../") + .fails() + .stderr_move_str(); + + for (idx, line) in std_err_str.lines().enumerate() { + assert_eq!(line, answers[idx]); + } +} + +#[test] +#[cfg(windows)] +fn test_rm_current_or_parent_dir_rm4_windows() { + let ts = TestScenario::new(util_name!()); + let at = &ts.fixtures; + + at.mkdir("d"); + + let answers = [ + "rm: refusing to remove '.' or '..' directory: skipping 'd\\.'", + "rm: refusing to remove '.' or '..' directory: skipping 'd\\.\\'", + "rm: refusing to remove '.' or '..' directory: skipping 'd\\.\\'", + "rm: refusing to remove '.' or '..' directory: skipping 'd\\..'", + "rm: refusing to remove '.' or '..' directory: skipping 'd\\..\\'", + ]; + let std_err_str = ts + .ucmd() + .arg("-rf") + .arg("d\\.") + .arg("d\\.\\") + .arg("d\\.\\\\\\\\") + .arg("d\\..") + .arg("d\\..\\") + .fails() + .stderr_move_str(); + + for (idx, line) in std_err_str.lines().enumerate() { + assert_eq!(line, answers[idx]); + } +} + #[test] #[cfg(not(windows))] fn test_fifo_removal() { From 5b087e96246f1d10c471f9219fa932bacb91ae83 Mon Sep 17 00:00:00 2001 From: andreistan26 <48967297+andreistan26@users.noreply.github.com> Date: Wed, 4 Dec 2024 01:04:55 +0200 Subject: [PATCH 086/179] Fix `cut` when lines dont end with specified delim (#5844) Print lines without delimiters only when they end with specified line terminator('\n' by default or `\0` if `-s`) Signed-off-by: Andrei Stan Co-authored-by: Sylvestre Ledru --- src/uu/cut/src/cut.rs | 5 +---- tests/by-util/test_cut.rs | 9 +++++++++ 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/uu/cut/src/cut.rs b/src/uu/cut/src/cut.rs index cd6eb22d3..25bb73330 100644 --- a/src/uu/cut/src/cut.rs +++ b/src/uu/cut/src/cut.rs @@ -213,11 +213,8 @@ fn cut_fields_implicit_out_delim( let mut print_delim = false; if delim_search.peek().is_none() { - if !only_delimited { + if !only_delimited && line[line.len() - 1] == newline_char { out.write_all(line)?; - if line[line.len() - 1] != newline_char { - out.write_all(&[newline_char])?; - } } return Ok(true); diff --git a/tests/by-util/test_cut.rs b/tests/by-util/test_cut.rs index 86d3ddf0f..6b376b0ca 100644 --- a/tests/by-util/test_cut.rs +++ b/tests/by-util/test_cut.rs @@ -282,6 +282,15 @@ fn test_multiple() { assert_eq!(result.stderr_str(), ""); } +#[test] +fn test_newline_delimited() { + new_ucmd!() + .args(&["-f", "1", "-d", "\n"]) + .pipe_in("a:1\nb:") + .succeeds() + .stdout_only_bytes("a:1\n"); +} + #[test] fn test_multiple_mode_args() { for args in [ From a5867bdf34cdee2b71891d5922c5ed424c4e8f33 Mon Sep 17 00:00:00 2001 From: Chen Chen Date: Sun, 28 Jul 2024 12:08:36 -0500 Subject: [PATCH 087/179] install: create destination file with safer modes before copy --- src/uu/install/src/install.rs | 56 ++++++++++++++++++++++++++--------- tests/by-util/test_install.rs | 38 ++++++++++++++++++++++++ 2 files changed, 80 insertions(+), 14 deletions(-) diff --git a/src/uu/install/src/install.rs b/src/uu/install/src/install.rs index 331a50f67..8f5d381fe 100644 --- a/src/uu/install/src/install.rs +++ b/src/uu/install/src/install.rs @@ -13,9 +13,12 @@ use filetime::{set_file_times, FileTime}; use std::error::Error; use std::fmt::{Debug, Display}; use std::fs; +#[cfg(not(unix))] use std::fs::File; use std::os::unix::fs::MetadataExt; #[cfg(unix)] +use std::os::unix::fs::OpenOptionsExt; +#[cfg(unix)] use std::os::unix::prelude::OsStrExt; use std::path::{Path, PathBuf, MAIN_SEPARATOR}; use std::process; @@ -750,27 +753,52 @@ fn perform_backup(to: &Path, b: &Behavior) -> UResult> { fn copy_file(from: &Path, to: &Path) -> UResult<()> { // fs::copy fails if destination is a invalid symlink. // so lets just remove all existing files at destination before copy. - if let Err(e) = fs::remove_file(to) { - if e.kind() != std::io::ErrorKind::NotFound { - show_error!( - "Failed to remove existing file {}. Error: {:?}", - to.display(), - e - ); + let remove_destination = || { + if let Err(e) = fs::remove_file(to) { + if e.kind() != std::io::ErrorKind::NotFound { + show_error!( + "Failed to remove existing file {}. Error: {:?}", + to.display(), + e + ); + } } + }; + remove_destination(); + + // create the destination file first. Using safer mode on unix to avoid + // potential unsafe mode between time-of-creation and time-of-chmod. + #[cfg(unix)] + let creation = fs::OpenOptions::new() + .write(true) + .create_new(true) + .mode(0o600) + .open(to); + #[cfg(not(unix))] + let creation = File::create(to); + + if let Err(e) = creation { + show_error!( + "Failed to create destination file {}. Error: {:?}", + to.display(), + e + ); + return Err(InstallError::InstallFailed(from.to_path_buf(), to.to_path_buf(), e).into()); } - if from.as_os_str() == "/dev/null" { - /* workaround a limitation of fs::copy - * https://github.com/rust-lang/rust/issues/79390 - */ - if let Err(err) = File::create(to) { + // drop the file to close the fd of creation + drop(creation); + + /* workaround a limitation of fs::copy: skip copy if source is /dev/null + * https://github.com/rust-lang/rust/issues/79390 + */ + if from.as_os_str() != "/dev/null" { + if let Err(err) = fs::copy(from, to) { + remove_destination(); return Err( InstallError::InstallFailed(from.to_path_buf(), to.to_path_buf(), err).into(), ); } - } else if let Err(err) = fs::copy(from, to) { - return Err(InstallError::InstallFailed(from.to_path_buf(), to.to_path_buf(), err).into()); } Ok(()) } diff --git a/tests/by-util/test_install.rs b/tests/by-util/test_install.rs index f1e3302e1..6fa9cc62f 100644 --- a/tests/by-util/test_install.rs +++ b/tests/by-util/test_install.rs @@ -1717,3 +1717,41 @@ fn test_install_root_combined() { run_and_check(&["-Cv", "c", "d"], "d", 0, 0); run_and_check(&["-Cv", "c", "d"], "d", 0, 0); } + +#[cfg(all(unix, feature = "chmod"))] +#[test] +fn test_install_copy_failures() { + let scene = TestScenario::new(util_name!()); + + let at = &scene.fixtures; + + let file1 = "source_file"; + let file2 = "target_file"; + + at.touch(file1); + scene.ccmd("chmod").arg("000").arg(file1).succeeds(); + + // if source file is not readable, it will raise a permission error. + // since we create the file with mode 0600 before `fs::copy`, if the + // copy failed, the file should be removed. + scene + .ucmd() + .arg(file1) + .arg(file2) + .arg("--mode=400") + .fails() + .stderr_contains("permission denied"); + assert!(!at.file_exists(file2)); + + // if source file is good to copy, it should succeed and set the + // destination file permissions accordingly + scene.ccmd("chmod").arg("644").arg(file1).succeeds(); + scene + .ucmd() + .arg(file1) + .arg(file2) + .arg("--mode=400") + .succeeds(); + assert!(at.file_exists(file2)); + assert_eq!(0o100_400_u32, at.metadata(file2).permissions().mode()); +} From caf08dd279114ba6d7355cd90ea2fdd9050336df Mon Sep 17 00:00:00 2001 From: Andrew Liebenow Date: Fri, 11 Oct 2024 10:04:21 -0500 Subject: [PATCH 088/179] basenc: ignore Interrupted errors Mirror behavior of `std::io::Read`'s `read_to_end` function ([link][1]): continue reading when errors with kind `std::io::ErrorKind::Interrupted` are encountered. Also: clean up a few other things. [1]: https://doc.rust-lang.org/std/io/trait.Read.html#method.read_to_end --- src/uu/base32/src/base_common.rs | 34 ++++++++++++++++---------------- src/uu/paste/src/paste.rs | 10 +++++----- 2 files changed, 22 insertions(+), 22 deletions(-) diff --git a/src/uu/base32/src/base_common.rs b/src/uu/base32/src/base_common.rs index f6b88f551..130fe8626 100644 --- a/src/uu/base32/src/base_common.rs +++ b/src/uu/base32/src/base_common.rs @@ -160,7 +160,7 @@ pub fn get_input(config: &Config) -> UResult> { } } -pub fn handle_input(input: &mut R, format: Format, config: Config) -> UResult<()> { +pub fn handle_input(input: &mut dyn Read, format: Format, config: Config) -> UResult<()> { let supports_fast_decode_and_encode = get_supports_fast_decode_and_encode(format); let supports_fast_decode_and_encode_ref = supports_fast_decode_and_encode.as_ref(); @@ -377,13 +377,13 @@ pub mod fast_encode { } fn write_to_output( - line_wrapping_option: &mut Option, + line_wrapping: &mut Option, encoded_buffer: &mut VecDeque, output: &mut dyn Write, is_cleanup: bool, ) -> io::Result<()> { // Write all data in `encoded_buffer` to `output` - if let &mut Some(ref mut li) = line_wrapping_option { + if let &mut Some(ref mut li) = line_wrapping { write_with_line_breaks(li, encoded_buffer, output, is_cleanup)?; } else { write_without_line_breaks(encoded_buffer, output, is_cleanup)?; @@ -393,9 +393,9 @@ pub mod fast_encode { } // End of helper functions - pub fn fast_encode( - input: &mut R, - mut output: W, + pub fn fast_encode( + input: &mut dyn Read, + output: &mut dyn Write, supports_fast_decode_and_encode: &dyn SupportsFastDecodeAndEncode, wrap: Option, ) -> UResult<()> { @@ -475,14 +475,14 @@ pub mod fast_encode { assert!(leftover_buffer.len() < encode_in_chunks_of_size); // Write all data in `encoded_buffer` to `output` - write_to_output(&mut line_wrapping, &mut encoded_buffer, &mut output, false)?; + write_to_output(&mut line_wrapping, &mut encoded_buffer, output, false)?; } Err(er) => { let kind = er.kind(); if kind == ErrorKind::Interrupted { - // TODO - // Retry reading? + // Retry reading + continue; } return Err(USimpleError::new(1, format_read_error(kind))); @@ -499,7 +499,7 @@ pub mod fast_encode { // Write all data in `encoded_buffer` to output // `is_cleanup` triggers special cleanup-only logic - write_to_output(&mut line_wrapping, &mut encoded_buffer, &mut output, true)?; + write_to_output(&mut line_wrapping, &mut encoded_buffer, output, true)?; } Ok(()) @@ -606,9 +606,9 @@ pub mod fast_decode { } // End of helper functions - pub fn fast_decode( - input: &mut R, - mut output: &mut W, + pub fn fast_decode( + input: &mut dyn Read, + output: &mut dyn Write, supports_fast_decode_and_encode: &dyn SupportsFastDecodeAndEncode, ignore_garbage: bool, ) -> UResult<()> { @@ -711,14 +711,14 @@ pub mod fast_decode { assert!(leftover_buffer.len() < decode_in_chunks_of_size); // Write all data in `decoded_buffer` to `output` - write_to_output(&mut decoded_buffer, &mut output)?; + write_to_output(&mut decoded_buffer, output)?; } Err(er) => { let kind = er.kind(); if kind == ErrorKind::Interrupted { - // TODO - // Retry reading? + // Retry reading + continue; } return Err(USimpleError::new(1, format_read_error(kind))); @@ -734,7 +734,7 @@ pub mod fast_decode { .decode_into_vec(&leftover_buffer, &mut decoded_buffer)?; // Write all data in `decoded_buffer` to `output` - write_to_output(&mut decoded_buffer, &mut output)?; + write_to_output(&mut decoded_buffer, output)?; } Ok(()) diff --git a/src/uu/paste/src/paste.rs b/src/uu/paste/src/paste.rs index 9d2619781..456639ba9 100644 --- a/src/uu/paste/src/paste.rs +++ b/src/uu/paste/src/paste.rs @@ -200,7 +200,7 @@ fn parse_delimiters(delimiters: &str) -> UResult]>> { let mut add_single_char_delimiter = |vec: &mut Vec>, ch: char| { let delimiter_encoded = ch.encode_utf8(&mut buffer); - vec.push(Box::from(delimiter_encoded.as_bytes())); + vec.push(Box::<[u8]>::from(delimiter_encoded.as_bytes())); }; let mut vec = Vec::>::with_capacity(delimiters.len()); @@ -311,7 +311,7 @@ impl<'a> DelimiterState<'a> { DelimiterState::MultipleDelimiters { current_delimiter, .. } => current_delimiter.len(), - _ => { + DelimiterState::NoDelimiters => { return; } }; @@ -350,7 +350,7 @@ impl<'a> DelimiterState<'a> { *current_delimiter = bo; } - _ => {} + DelimiterState::NoDelimiters => {} } } } @@ -363,8 +363,8 @@ enum InputSource { impl InputSource { fn read_until(&mut self, byte: u8, buf: &mut Vec) -> UResult { let us = match self { - Self::File(bu) => bu.read_until(byte, buf)?, - Self::StandardInput(rc) => rc + InputSource::File(bu) => bu.read_until(byte, buf)?, + InputSource::StandardInput(rc) => rc .try_borrow() .map_err(|bo| USimpleError::new(1, format!("{bo}")))? .lock() From 0780e26914016a593f3ed6e1ff999afa7afbd508 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Wed, 4 Dec 2024 11:08:28 +0100 Subject: [PATCH 089/179] Revert "install: create destination file with safer modes before copy" --- src/uu/install/src/install.rs | 56 +++++++++-------------------------- tests/by-util/test_install.rs | 38 ------------------------ 2 files changed, 14 insertions(+), 80 deletions(-) diff --git a/src/uu/install/src/install.rs b/src/uu/install/src/install.rs index 8f5d381fe..331a50f67 100644 --- a/src/uu/install/src/install.rs +++ b/src/uu/install/src/install.rs @@ -13,12 +13,9 @@ use filetime::{set_file_times, FileTime}; use std::error::Error; use std::fmt::{Debug, Display}; use std::fs; -#[cfg(not(unix))] use std::fs::File; use std::os::unix::fs::MetadataExt; #[cfg(unix)] -use std::os::unix::fs::OpenOptionsExt; -#[cfg(unix)] use std::os::unix::prelude::OsStrExt; use std::path::{Path, PathBuf, MAIN_SEPARATOR}; use std::process; @@ -753,52 +750,27 @@ fn perform_backup(to: &Path, b: &Behavior) -> UResult> { fn copy_file(from: &Path, to: &Path) -> UResult<()> { // fs::copy fails if destination is a invalid symlink. // so lets just remove all existing files at destination before copy. - let remove_destination = || { - if let Err(e) = fs::remove_file(to) { - if e.kind() != std::io::ErrorKind::NotFound { - show_error!( - "Failed to remove existing file {}. Error: {:?}", - to.display(), - e - ); - } + if let Err(e) = fs::remove_file(to) { + if e.kind() != std::io::ErrorKind::NotFound { + show_error!( + "Failed to remove existing file {}. Error: {:?}", + to.display(), + e + ); } - }; - remove_destination(); - - // create the destination file first. Using safer mode on unix to avoid - // potential unsafe mode between time-of-creation and time-of-chmod. - #[cfg(unix)] - let creation = fs::OpenOptions::new() - .write(true) - .create_new(true) - .mode(0o600) - .open(to); - #[cfg(not(unix))] - let creation = File::create(to); - - if let Err(e) = creation { - show_error!( - "Failed to create destination file {}. Error: {:?}", - to.display(), - e - ); - return Err(InstallError::InstallFailed(from.to_path_buf(), to.to_path_buf(), e).into()); } - // drop the file to close the fd of creation - drop(creation); - - /* workaround a limitation of fs::copy: skip copy if source is /dev/null - * https://github.com/rust-lang/rust/issues/79390 - */ - if from.as_os_str() != "/dev/null" { - if let Err(err) = fs::copy(from, to) { - remove_destination(); + if from.as_os_str() == "/dev/null" { + /* workaround a limitation of fs::copy + * https://github.com/rust-lang/rust/issues/79390 + */ + if let Err(err) = File::create(to) { return Err( InstallError::InstallFailed(from.to_path_buf(), to.to_path_buf(), err).into(), ); } + } else if let Err(err) = fs::copy(from, to) { + return Err(InstallError::InstallFailed(from.to_path_buf(), to.to_path_buf(), err).into()); } Ok(()) } diff --git a/tests/by-util/test_install.rs b/tests/by-util/test_install.rs index 6fa9cc62f..f1e3302e1 100644 --- a/tests/by-util/test_install.rs +++ b/tests/by-util/test_install.rs @@ -1717,41 +1717,3 @@ fn test_install_root_combined() { run_and_check(&["-Cv", "c", "d"], "d", 0, 0); run_and_check(&["-Cv", "c", "d"], "d", 0, 0); } - -#[cfg(all(unix, feature = "chmod"))] -#[test] -fn test_install_copy_failures() { - let scene = TestScenario::new(util_name!()); - - let at = &scene.fixtures; - - let file1 = "source_file"; - let file2 = "target_file"; - - at.touch(file1); - scene.ccmd("chmod").arg("000").arg(file1).succeeds(); - - // if source file is not readable, it will raise a permission error. - // since we create the file with mode 0600 before `fs::copy`, if the - // copy failed, the file should be removed. - scene - .ucmd() - .arg(file1) - .arg(file2) - .arg("--mode=400") - .fails() - .stderr_contains("permission denied"); - assert!(!at.file_exists(file2)); - - // if source file is good to copy, it should succeed and set the - // destination file permissions accordingly - scene.ccmd("chmod").arg("644").arg(file1).succeeds(); - scene - .ucmd() - .arg(file1) - .arg(file2) - .arg("--mode=400") - .succeeds(); - assert!(at.file_exists(file2)); - assert_eq!(0o100_400_u32, at.metadata(file2).permissions().mode()); -} From de775caa9c2b7882f12401e8028b3cba7ab7a682 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Wed, 4 Dec 2024 13:23:03 +0100 Subject: [PATCH 090/179] alacritty: remove unused patch --- src/uu/dircolors/README.md | 6 ------ src/uu/dircolors/alacritty-supports-colors.patch | 12 ------------ 2 files changed, 18 deletions(-) delete mode 100644 src/uu/dircolors/alacritty-supports-colors.patch diff --git a/src/uu/dircolors/README.md b/src/uu/dircolors/README.md index f4ec5d675..62944d490 100644 --- a/src/uu/dircolors/README.md +++ b/src/uu/dircolors/README.md @@ -9,12 +9,6 @@ dircolors -b > /PATH_TO_COREUTILS/tests/fixtures/dircolors/bash_def.expected dircolors -c > /PATH_TO_COREUTILS/tests/fixtures/dircolors/csh_def.expected ``` -Apply the patches to include more terminals that support colors: - -```shell -git apply /PATH_TO_COREUTILS/src/uu/dircolors/alacritty-supports-colors.patch -``` - Run the tests: ```shell diff --git a/src/uu/dircolors/alacritty-supports-colors.patch b/src/uu/dircolors/alacritty-supports-colors.patch deleted file mode 100644 index c6f022423..000000000 --- a/src/uu/dircolors/alacritty-supports-colors.patch +++ /dev/null @@ -1,12 +0,0 @@ -diff --git a/tests/fixtures/dircolors/internal.expected b/tests/fixtures/dircolors/internal.expected -index e151973f2..01dae4273 100644 ---- a/tests/fixtures/dircolors/internal.expected -+++ b/tests/fixtures/dircolors/internal.expected -@@ -7,6 +7,7 @@ - # restrict following config to systems with matching environment variables. - COLORTERM ?* - TERM Eterm -+TERM alacritty* - TERM ansi - TERM *color* - TERM con[0-9]*x[0-9]* From a6447241375976d9d7477de21eabe47461babbff Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Wed, 4 Dec 2024 13:25:24 +0100 Subject: [PATCH 091/179] uucore: add foot to the list of terminals that support colors --- src/uucore/src/lib/features/colors.rs | 1 + tests/fixtures/dircolors/internal.expected | 1 + 2 files changed, 2 insertions(+) diff --git a/src/uucore/src/lib/features/colors.rs b/src/uucore/src/lib/features/colors.rs index f8cbc9ebf..885ae2fe9 100644 --- a/src/uucore/src/lib/features/colors.rs +++ b/src/uucore/src/lib/features/colors.rs @@ -22,6 +22,7 @@ pub static TERMS: &[&str] = &[ "cygwin", "*direct*", "dtterm", + "foot", "gnome", "hurd", "jfbterm", diff --git a/tests/fixtures/dircolors/internal.expected b/tests/fixtures/dircolors/internal.expected index 01dae4273..feea46455 100644 --- a/tests/fixtures/dircolors/internal.expected +++ b/tests/fixtures/dircolors/internal.expected @@ -16,6 +16,7 @@ TERM console TERM cygwin TERM *direct* TERM dtterm +TERM foot TERM gnome TERM hurd TERM jfbterm From 93dfc933bd4d50a349913c4c1b1cf1850433630c Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Wed, 4 Dec 2024 22:02:57 +0100 Subject: [PATCH 092/179] base32/base64: handle two corner cases * no padding * --wrap 0 + remove property_tests.rs, we don't need such tests as the code is already tests by test_base* (+ it is too dependant on the code structure) Should make base64.pl pass --- Cargo.lock | 83 +---- src/uu/base32/Cargo.toml | 5 - src/uu/base32/src/base32.rs | 3 +- src/uu/base32/src/base_common.rs | 131 ++++++-- src/uu/base32/tests/property_tests.rs | 430 -------------------------- tests/by-util/test_base64.rs | 33 ++ 6 files changed, 145 insertions(+), 540 deletions(-) delete mode 100644 src/uu/base32/tests/property_tests.rs diff --git a/Cargo.lock b/Cargo.lock index 1a70d4c02..a526c8f98 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -185,21 +185,6 @@ dependencies = [ "syn 2.0.87", ] -[[package]] -name = "bit-set" -version = "0.5.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1" -dependencies = [ - "bit-vec", -] - -[[package]] -name = "bit-vec" -version = "0.6.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" - [[package]] name = "bitflags" version = "1.3.2" @@ -1557,7 +1542,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" dependencies = [ "autocfg", - "libm", ] [[package]] @@ -1821,32 +1805,6 @@ dependencies = [ "hex", ] -[[package]] -name = "proptest" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4c2511913b88df1637da85cc8d96ec8e43a3f8bb8ccb71ee1ac240d6f3df58d" -dependencies = [ - "bit-set", - "bit-vec", - "bitflags 2.6.0", - "lazy_static", - "num-traits", - "rand", - "rand_chacha", - "rand_xorshift", - "regex-syntax", - "rusty-fork", - "tempfile", - "unarray", -] - -[[package]] -name = "quick-error" -version = "1.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" - [[package]] name = "quick-error" version = "2.0.1" @@ -1907,15 +1865,6 @@ dependencies = [ "rand_core", ] -[[package]] -name = "rand_xorshift" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d25bf25ec5ae4a3f1b92f929810509a2f53d7dca2f50b794ff57e3face536c8f" -dependencies = [ - "rand_core", -] - [[package]] name = "rayon" version = "1.10.0" @@ -2084,18 +2033,6 @@ dependencies = [ "windows-sys 0.52.0", ] -[[package]] -name = "rusty-fork" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb3dcc6e454c328bb824492db107ab7c0ae8fcffe4ad210136ef014458c1bc4f" -dependencies = [ - "fnv", - "quick-error 1.2.3", - "tempfile", - "wait-timeout", -] - [[package]] name = "same-file" version = "1.0.6" @@ -2482,12 +2419,6 @@ version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dcf81ac59edc17cc8697ff311e8f5ef2d99fcbd9817b34cec66f90b6c3dfd987" -[[package]] -name = "unarray" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eaea85b334db583fe3274d12b4cd1880032beab409c0d774be044d4480ab9a94" - [[package]] name = "unicode-ident" version = "1.0.13" @@ -2574,7 +2505,6 @@ name = "uu_base32" version = "0.0.28" dependencies = [ "clap", - "proptest", "uucore", ] @@ -2686,7 +2616,7 @@ dependencies = [ "filetime", "indicatif", "libc", - "quick-error 2.0.1", + "quick-error", "selinux", "uucore", "walkdir", @@ -3134,7 +3064,7 @@ dependencies = [ "chrono", "clap", "itertools", - "quick-error 2.0.1", + "quick-error", "regex", "uucore", ] @@ -3631,15 +3561,6 @@ version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" -[[package]] -name = "wait-timeout" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f200f5b12eb75f8c1ed65abd4b2db8a6e1b138a20de009dacee265a2498f3f6" -dependencies = [ - "libc", -] - [[package]] name = "walkdir" version = "2.5.0" diff --git a/src/uu/base32/Cargo.toml b/src/uu/base32/Cargo.toml index ffcd4796c..26ab2bc6f 100644 --- a/src/uu/base32/Cargo.toml +++ b/src/uu/base32/Cargo.toml @@ -1,5 +1,3 @@ -# spell-checker:ignore proptest - [package] name = "uu_base32" version = "0.0.28" @@ -22,9 +20,6 @@ path = "src/base32.rs" clap = { workspace = true } uucore = { workspace = true, features = ["encoding"] } -[dev-dependencies] -proptest = "1.5.0" - [[bin]] name = "base32" path = "src/main.rs" diff --git a/src/uu/base32/src/base32.rs b/src/uu/base32/src/base32.rs index 46a0361ea..e14e83921 100644 --- a/src/uu/base32/src/base32.rs +++ b/src/uu/base32/src/base32.rs @@ -5,6 +5,7 @@ pub mod base_common; +use base_common::ReadSeek; use clap::Command; use uucore::{encoding::Format, error::UResult, help_about, help_usage}; @@ -17,7 +18,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { let config = base_common::parse_base_cmd_args(args, ABOUT, USAGE)?; - let mut input = base_common::get_input(&config)?; + let mut input: Box = base_common::get_input(&config)?; base_common::handle_input(&mut input, format, config) } diff --git a/src/uu/base32/src/base_common.rs b/src/uu/base32/src/base_common.rs index 130fe8626..84a461963 100644 --- a/src/uu/base32/src/base_common.rs +++ b/src/uu/base32/src/base_common.rs @@ -3,15 +3,15 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -// spell-checker:ignore hexupper lsbf msbf unpadded +// spell-checker:ignore hexupper lsbf msbf unpadded nopad aGVsbG8sIHdvcmxkIQ use clap::{crate_version, Arg, ArgAction, Command}; use std::fs::File; -use std::io::{self, ErrorKind, Read}; +use std::io::{self, ErrorKind, Read, Seek, SeekFrom}; use std::path::{Path, PathBuf}; use uucore::display::Quotable; use uucore::encoding::{ - for_base_common::{BASE32, BASE32HEX, BASE64, BASE64URL, HEXUPPER}, + for_base_common::{BASE32, BASE32HEX, BASE64, BASE64URL, BASE64_NOPAD, HEXUPPER}, Format, Z85Wrapper, BASE2LSBF, BASE2MSBF, }; use uucore::encoding::{EncodingWrapper, SupportsFastDecodeAndEncode}; @@ -143,25 +143,50 @@ pub fn base_app(about: &'static str, usage: &str) -> Command { ) } -pub fn get_input(config: &Config) -> UResult> { +/// A trait alias for types that implement both `Read` and `Seek`. +pub trait ReadSeek: Read + Seek {} + +/// Automatically implement the `ReadSeek` trait for any type that implements both `Read` and `Seek`. +impl ReadSeek for T {} + +pub fn get_input(config: &Config) -> UResult> { match &config.to_read { Some(path_buf) => { // Do not buffer input, because buffering is handled by `fast_decode` and `fast_encode` let file = File::open(path_buf).map_err_context(|| path_buf.maybe_quote().to_string())?; - Ok(Box::new(file)) } None => { - let stdin_lock = io::stdin().lock(); - - Ok(Box::new(stdin_lock)) + let mut buffer = Vec::new(); + io::stdin().read_to_end(&mut buffer)?; + Ok(Box::new(io::Cursor::new(buffer))) } } } -pub fn handle_input(input: &mut dyn Read, format: Format, config: Config) -> UResult<()> { - let supports_fast_decode_and_encode = get_supports_fast_decode_and_encode(format); +/// Determines if the input buffer ends with padding ('=') after trimming trailing whitespace. +fn has_padding(input: &mut R) -> UResult { + let mut buf = Vec::new(); + input + .read_to_end(&mut buf) + .map_err(|err| USimpleError::new(1, format_read_error(err.kind())))?; + + // Reverse iterator and skip trailing whitespace without extra collections + let has_padding = buf + .iter() + .rfind(|&&byte| !byte.is_ascii_whitespace()) + .is_some_and(|&byte| byte == b'='); + + input.seek(SeekFrom::Start(0))?; + Ok(has_padding) +} + +pub fn handle_input(input: &mut R, format: Format, config: Config) -> UResult<()> { + let has_padding = has_padding(input)?; + + let supports_fast_decode_and_encode = + get_supports_fast_decode_and_encode(format, config.decode, has_padding); let supports_fast_decode_and_encode_ref = supports_fast_decode_and_encode.as_ref(); @@ -184,7 +209,11 @@ pub fn handle_input(input: &mut dyn Read, format: Format, config: Config) -> URe } } -pub fn get_supports_fast_decode_and_encode(format: Format) -> Box { +pub fn get_supports_fast_decode_and_encode( + format: Format, + decode: bool, + has_padding: bool, +) -> Box { const BASE16_VALID_DECODING_MULTIPLE: usize = 2; const BASE2_VALID_DECODING_MULTIPLE: usize = 8; const BASE32_VALID_DECODING_MULTIPLE: usize = 8; @@ -231,13 +260,24 @@ pub fn get_supports_fast_decode_and_encode(format: Format) -> Box { + let alphabet: &[u8] = if has_padding { + &b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789+/="[..] + } else { + &b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789+/"[..] + }; + let wrapper = if decode && !has_padding { + BASE64_NOPAD + } else { + BASE64 + }; + Box::from(EncodingWrapper::new( + wrapper, + BASE64_VALID_DECODING_MULTIPLE, + BASE64_UNPADDED_MULTIPLE, + alphabet, + )) + } Format::Base64Url => Box::from(EncodingWrapper::new( BASE64URL, BASE64_VALID_DECODING_MULTIPLE, @@ -316,6 +356,7 @@ pub mod fast_encode { encoded_buffer: &mut VecDeque, output: &mut dyn Write, is_cleanup: bool, + empty_wrap: bool, ) -> io::Result<()> { // TODO // `encoded_buffer` only has to be a VecDeque if line wrapping is enabled @@ -324,7 +365,9 @@ pub mod fast_encode { output.write_all(encoded_buffer.make_contiguous())?; if is_cleanup { - output.write_all(b"\n")?; + if !empty_wrap { + output.write_all(b"\n")?; + } } else { encoded_buffer.clear(); } @@ -381,12 +424,13 @@ pub mod fast_encode { encoded_buffer: &mut VecDeque, output: &mut dyn Write, is_cleanup: bool, + empty_wrap: bool, ) -> io::Result<()> { // Write all data in `encoded_buffer` to `output` if let &mut Some(ref mut li) = line_wrapping { write_with_line_breaks(li, encoded_buffer, output, is_cleanup)?; } else { - write_without_line_breaks(encoded_buffer, output, is_cleanup)?; + write_without_line_breaks(encoded_buffer, output, is_cleanup, empty_wrap)?; } Ok(()) @@ -473,9 +517,14 @@ pub mod fast_encode { )?; assert!(leftover_buffer.len() < encode_in_chunks_of_size); - // Write all data in `encoded_buffer` to `output` - write_to_output(&mut line_wrapping, &mut encoded_buffer, output, false)?; + write_to_output( + &mut line_wrapping, + &mut encoded_buffer, + output, + false, + wrap == Some(0), + )?; } Err(er) => { let kind = er.kind(); @@ -499,7 +548,13 @@ pub mod fast_encode { // Write all data in `encoded_buffer` to output // `is_cleanup` triggers special cleanup-only logic - write_to_output(&mut line_wrapping, &mut encoded_buffer, output, true)?; + write_to_output( + &mut line_wrapping, + &mut encoded_buffer, + output, + true, + wrap == Some(0), + )?; } Ok(()) @@ -759,3 +814,33 @@ fn format_read_error(kind: ErrorKind) -> String { format!("read error: {kind_string_capitalized}") } + +#[cfg(test)] +mod tests { + use super::*; + use std::io::Cursor; + + #[test] + fn test_has_padding() { + let test_cases = vec![ + ("aGVsbG8sIHdvcmxkIQ==", true), + ("aGVsbG8sIHdvcmxkIQ== ", true), + ("aGVsbG8sIHdvcmxkIQ==\n", true), + ("aGVsbG8sIHdvcmxkIQ== \n", true), + ("aGVsbG8sIHdvcmxkIQ=", true), + ("aGVsbG8sIHdvcmxkIQ= ", true), + ("aGVsbG8sIHdvcmxkIQ \n", false), + ("aGVsbG8sIHdvcmxkIQ", false), + ]; + + for (input, expected) in test_cases { + let mut cursor = Cursor::new(input.as_bytes()); + assert_eq!( + has_padding(&mut cursor).unwrap(), + expected, + "Failed for input: '{}'", + input + ); + } + } +} diff --git a/src/uu/base32/tests/property_tests.rs b/src/uu/base32/tests/property_tests.rs deleted file mode 100644 index 0f2393c42..000000000 --- a/src/uu/base32/tests/property_tests.rs +++ /dev/null @@ -1,430 +0,0 @@ -// spell-checker:ignore lsbf msbf proptest - -use proptest::{prelude::TestCaseError, prop_assert, prop_assert_eq, test_runner::TestRunner}; -use std::io::Cursor; -use uu_base32::base_common::{fast_decode, fast_encode, get_supports_fast_decode_and_encode}; -use uucore::encoding::{Format, SupportsFastDecodeAndEncode}; - -const CASES: u32 = { - #[cfg(debug_assertions)] - { - 32 - } - - #[cfg(not(debug_assertions))] - { - 128 - } -}; - -const NORMAL_INPUT_SIZE_LIMIT: usize = { - #[cfg(debug_assertions)] - { - // 256 kibibytes - 256 * 1024 - } - - #[cfg(not(debug_assertions))] - { - // 4 mebibytes - 4 * 1024 * 1024 - } -}; - -const LARGE_INPUT_SIZE_LIMIT: usize = 4 * NORMAL_INPUT_SIZE_LIMIT; - -// Note that `TestRunner`s cannot be reused -fn get_test_runner() -> TestRunner { - TestRunner::new(proptest::test_runner::Config { - cases: CASES, - failure_persistence: None, - - ..proptest::test_runner::Config::default() - }) -} - -fn generic_round_trip(format: Format) { - let supports_fast_decode_and_encode = get_supports_fast_decode_and_encode(format); - - let supports_fast_decode_and_encode_ref = supports_fast_decode_and_encode.as_ref(); - - // Make sure empty inputs round trip - { - get_test_runner() - .run( - &( - proptest::bool::ANY, - proptest::bool::ANY, - proptest::option::of(0_usize..512_usize), - ), - |(ignore_garbage, line_wrap_zero, line_wrap)| { - configurable_round_trip( - format, - supports_fast_decode_and_encode_ref, - ignore_garbage, - line_wrap_zero, - line_wrap, - // Do not add garbage - Vec::<(usize, u8)>::new(), - // Empty input - Vec::::new(), - ) - }, - ) - .unwrap(); - } - - // Unusually large line wrapping settings - { - get_test_runner() - .run( - &( - proptest::bool::ANY, - proptest::bool::ANY, - proptest::option::of(512_usize..65_535_usize), - proptest::collection::vec(proptest::num::u8::ANY, 0..NORMAL_INPUT_SIZE_LIMIT), - ), - |(ignore_garbage, line_wrap_zero, line_wrap, input)| { - configurable_round_trip( - format, - supports_fast_decode_and_encode_ref, - ignore_garbage, - line_wrap_zero, - line_wrap, - // Do not add garbage - Vec::<(usize, u8)>::new(), - input, - ) - }, - ) - .unwrap(); - } - - // Spend more time on sane line wrapping settings - { - get_test_runner() - .run( - &( - proptest::bool::ANY, - proptest::bool::ANY, - proptest::option::of(0_usize..512_usize), - proptest::collection::vec(proptest::num::u8::ANY, 0..NORMAL_INPUT_SIZE_LIMIT), - ), - |(ignore_garbage, line_wrap_zero, line_wrap, input)| { - configurable_round_trip( - format, - supports_fast_decode_and_encode_ref, - ignore_garbage, - line_wrap_zero, - line_wrap, - // Do not add garbage - Vec::<(usize, u8)>::new(), - input, - ) - }, - ) - .unwrap(); - } - - // Test with garbage data - { - get_test_runner() - .run( - &( - proptest::bool::ANY, - proptest::bool::ANY, - proptest::option::of(0_usize..512_usize), - // Garbage data to insert - proptest::collection::vec( - ( - // Random index - proptest::num::usize::ANY, - // In all of the encodings being tested, non-ASCII bytes are garbage - 128_u8..=u8::MAX, - ), - 0..4_096, - ), - proptest::collection::vec(proptest::num::u8::ANY, 0..NORMAL_INPUT_SIZE_LIMIT), - ), - |(ignore_garbage, line_wrap_zero, line_wrap, garbage_data, input)| { - configurable_round_trip( - format, - supports_fast_decode_and_encode_ref, - ignore_garbage, - line_wrap_zero, - line_wrap, - garbage_data, - input, - ) - }, - ) - .unwrap(); - } - - // Test small inputs - { - get_test_runner() - .run( - &( - proptest::bool::ANY, - proptest::bool::ANY, - proptest::option::of(0_usize..512_usize), - proptest::collection::vec(proptest::num::u8::ANY, 0..1_024), - ), - |(ignore_garbage, line_wrap_zero, line_wrap, input)| { - configurable_round_trip( - format, - supports_fast_decode_and_encode_ref, - ignore_garbage, - line_wrap_zero, - line_wrap, - // Do not add garbage - Vec::<(usize, u8)>::new(), - input, - ) - }, - ) - .unwrap(); - } - - // Test small inputs with garbage data - { - get_test_runner() - .run( - &( - proptest::bool::ANY, - proptest::bool::ANY, - proptest::option::of(0_usize..512_usize), - // Garbage data to insert - proptest::collection::vec( - ( - // Random index - proptest::num::usize::ANY, - // In all of the encodings being tested, non-ASCII bytes are garbage - 128_u8..=u8::MAX, - ), - 0..1_024, - ), - proptest::collection::vec(proptest::num::u8::ANY, 0..1_024), - ), - |(ignore_garbage, line_wrap_zero, line_wrap, garbage_data, input)| { - configurable_round_trip( - format, - supports_fast_decode_and_encode_ref, - ignore_garbage, - line_wrap_zero, - line_wrap, - garbage_data, - input, - ) - }, - ) - .unwrap(); - } - - // Test large inputs - { - get_test_runner() - .run( - &( - proptest::bool::ANY, - proptest::bool::ANY, - proptest::option::of(0_usize..512_usize), - proptest::collection::vec(proptest::num::u8::ANY, 0..LARGE_INPUT_SIZE_LIMIT), - ), - |(ignore_garbage, line_wrap_zero, line_wrap, input)| { - configurable_round_trip( - format, - supports_fast_decode_and_encode_ref, - ignore_garbage, - line_wrap_zero, - line_wrap, - // Do not add garbage - Vec::<(usize, u8)>::new(), - input, - ) - }, - ) - .unwrap(); - } -} - -fn configurable_round_trip( - format: Format, - supports_fast_decode_and_encode: &dyn SupportsFastDecodeAndEncode, - ignore_garbage: bool, - line_wrap_zero: bool, - line_wrap: Option, - garbage_data: Vec<(usize, u8)>, - mut input: Vec, -) -> Result<(), TestCaseError> { - // Z85 only accepts inputs with lengths divisible by 4 - if let Format::Z85 = format { - // Reduce length of "input" until it is divisible by 4 - input.truncate((input.len() / 4) * 4); - - assert!((input.len() % 4) == 0); - } - - let line_wrap_to_use = if line_wrap_zero { Some(0) } else { line_wrap }; - - let input_len = input.len(); - - let garbage_data_len = garbage_data.len(); - - let garbage_data_is_empty = garbage_data_len == 0; - - let (input, encoded) = { - let mut output = Vec::with_capacity(input_len * 8); - - let mut cursor = Cursor::new(input); - - fast_encode::fast_encode( - &mut cursor, - &mut output, - supports_fast_decode_and_encode, - line_wrap_to_use, - ) - .unwrap(); - - (cursor.into_inner(), output) - }; - - let encoded_or_encoded_with_garbage = if garbage_data_is_empty { - encoded - } else { - let encoded_len = encoded.len(); - - let encoded_highest_index = match encoded_len.checked_sub(1) { - Some(0) | None => None, - Some(x) => Some(x), - }; - - let mut garbage_data_indexed = vec![Option::::None; encoded_len]; - - let mut encoded_with_garbage = Vec::::with_capacity(encoded_len + garbage_data_len); - - for (index, garbage_byte) in garbage_data { - if let Some(x) = encoded_highest_index { - let index_to_use = index % x; - - garbage_data_indexed[index_to_use] = Some(garbage_byte); - } else { - encoded_with_garbage.push(garbage_byte); - } - } - - for (index, encoded_byte) in encoded.into_iter().enumerate() { - encoded_with_garbage.push(encoded_byte); - - if let Some(garbage_byte) = garbage_data_indexed[index] { - encoded_with_garbage.push(garbage_byte); - } - } - - encoded_with_garbage - }; - - match line_wrap_to_use { - Some(0) => { - let line_endings_count = encoded_or_encoded_with_garbage - .iter() - .filter(|byte| **byte == b'\n') - .count(); - - // If line wrapping is disabled, there should only be one '\n' character (at the very end of the output) - prop_assert_eq!(line_endings_count, 1); - } - _ => { - // TODO - // Validate other line wrapping settings - } - } - - let decoded_or_error = { - let mut output = Vec::with_capacity(input_len); - - let mut cursor = Cursor::new(encoded_or_encoded_with_garbage); - - match fast_decode::fast_decode( - &mut cursor, - &mut output, - supports_fast_decode_and_encode, - ignore_garbage, - ) { - Ok(()) => Ok(output), - Err(er) => Err(er), - } - }; - - let made_round_trip = match decoded_or_error { - Ok(ve) => input.as_slice() == ve.as_slice(), - Err(_) => false, - }; - - let result_was_correct = if garbage_data_is_empty || ignore_garbage { - // If there was no garbage data added, or if "ignore_garbage" was enabled, expect the round trip to succeed - made_round_trip - } else { - // If garbage data was added, and "ignore_garbage" was disabled, expect the round trip to fail - - !made_round_trip - }; - - if !result_was_correct { - eprintln!( - "\ -(configurable_round_trip) FAILURE -format: {format:?} -ignore_garbage: {ignore_garbage} -line_wrap_to_use: {line_wrap_to_use:?} -garbage_data_len: {garbage_data_len} -input_len: {input_len} -", - ); - } - - prop_assert!(result_was_correct); - - Ok(()) -} - -#[test] -fn base16_round_trip() { - generic_round_trip(Format::Base16); -} - -#[test] -fn base2lsbf_round_trip() { - generic_round_trip(Format::Base2Lsbf); -} - -#[test] -fn base2msbf_round_trip() { - generic_round_trip(Format::Base2Msbf); -} - -#[test] -fn base32_round_trip() { - generic_round_trip(Format::Base32); -} - -#[test] -fn base32hex_round_trip() { - generic_round_trip(Format::Base32Hex); -} - -#[test] -fn base64_round_trip() { - generic_round_trip(Format::Base64); -} - -#[test] -fn base64url_round_trip() { - generic_round_trip(Format::Base64Url); -} - -#[test] -fn z85_round_trip() { - generic_round_trip(Format::Z85); -} diff --git a/tests/by-util/test_base64.rs b/tests/by-util/test_base64.rs index f07da925f..29b9edf02 100644 --- a/tests/by-util/test_base64.rs +++ b/tests/by-util/test_base64.rs @@ -40,6 +40,28 @@ fn test_encode_repeat_flags_later_wrap_15() { .stdout_only("aGVsbG8sIHdvcmx\nkIQ==\n"); // spell-checker:disable-line } +#[test] +fn test_decode_short() { + let input = "aQ"; + new_ucmd!() + .args(&["--decode"]) + .pipe_in(input) + .succeeds() + .stdout_only("i"); +} + +#[test] +fn test_multi_lines() { + let input = ["aQ\n\n\n", "a\nQ==\n\n\n"]; + for i in input { + new_ucmd!() + .args(&["--decode"]) + .pipe_in(i) + .succeeds() + .stdout_only("i"); + } +} + #[test] fn test_base64_encode_file() { new_ucmd!() @@ -105,6 +127,17 @@ fn test_wrap() { // spell-checker:disable-next-line .stdout_only("VGhlIHF1aWNrIGJyb3du\nIGZveCBqdW1wcyBvdmVy\nIHRoZSBsYXp5IGRvZy4=\n"); } + let input = "hello, world"; + new_ucmd!() + .args(&["--wrap", "0"]) + .pipe_in(input) + .succeeds() + .stdout_only("aGVsbG8sIHdvcmxk"); // spell-checker:disable-line + new_ucmd!() + .args(&["--wrap", "30"]) + .pipe_in(input) + .succeeds() + .stdout_only("aGVsbG8sIHdvcmxk\n"); // spell-checker:disable-line } #[test] From 094cab046cc66505347008dd014c7d43ee022714 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Thu, 5 Dec 2024 17:29:52 +0000 Subject: [PATCH 093/179] chore(deps): update rust crate self_cell to v1.1.0 --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1a70d4c02..cb09c5fd6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2113,9 +2113,9 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] name = "self_cell" -version = "1.0.4" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d369a96f978623eb3dc28807c4852d6cc617fed53da5d3c400feff1ef34a714a" +checksum = "c2fdfc24bc566f839a2da4c4295b82db7d25a24253867d5c64355abb5799bdbe" [[package]] name = "selinux" From 76dfcd82faed3c548fc262933aac054ef3a8cb40 Mon Sep 17 00:00:00 2001 From: Arthur Pin Date: Fri, 6 Dec 2024 15:58:40 -0300 Subject: [PATCH 094/179] seq: handle scientific notation with uppercase 'E' --- src/uu/seq/src/numberparse.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/uu/seq/src/numberparse.rs b/src/uu/seq/src/numberparse.rs index adbaccc11..79e7068e3 100644 --- a/src/uu/seq/src/numberparse.rs +++ b/src/uu/seq/src/numberparse.rs @@ -333,7 +333,7 @@ impl FromStr for PreciseNumber { // number differently depending on its form. This is important // because the form of the input dictates how the output will be // presented. - match (s.find('.'), s.find('e')) { + match (s.find('.'), s.find(['e', 'E'])) { // For example, "123456" or "inf". (None, None) => parse_no_decimal_no_exponent(s), // For example, "123e456" or "1e-2". From 5cbe87620c380ec3d0681246b5819820f83df21c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dorian=20P=C3=A9ron?= Date: Thu, 5 Dec 2024 15:57:01 +0100 Subject: [PATCH 095/179] checksum: move regex detection to the line level --- src/uucore/src/lib/features/checksum.rs | 109 ++++++++---------------- 1 file changed, 36 insertions(+), 73 deletions(-) diff --git a/src/uucore/src/lib/features/checksum.rs b/src/uucore/src/lib/features/checksum.rs index f7228830b..34dc0f870 100644 --- a/src/uucore/src/lib/features/checksum.rs +++ b/src/uucore/src/lib/features/checksum.rs @@ -8,7 +8,7 @@ use data_encoding::BASE64; use os_display::Quotable; use regex::bytes::{Captures, Regex}; use std::{ - ffi::{OsStr, OsString}, + ffi::OsStr, fmt::Display, fs::File, io::{self, stdin, BufReader, Read, Write}, @@ -130,9 +130,6 @@ enum FileCheckError { ImproperlyFormatted, /// reading of the checksum file failed CantOpenChecksumFile, - /// Algorithm detection was unsuccessful. - /// Either none is provided, or there is a conflict. - AlgoDetectionError, } impl From> for FileCheckError { @@ -441,7 +438,7 @@ fn get_filename_for_output(filename: &OsStr, input_is_stdin: bool) -> String { } /// Determines the appropriate regular expression to use based on the provided lines. -fn determine_regex(lines: &[OsString]) -> Option<(Regex, bool)> { +fn determine_regex(line: impl AsRef) -> Option<(Regex, bool)> { let regexes = [ (Regex::new(ALGO_BASED_REGEX).unwrap(), true), (Regex::new(DOUBLE_SPACE_REGEX).unwrap(), false), @@ -449,12 +446,10 @@ fn determine_regex(lines: &[OsString]) -> Option<(Regex, bool)> { (Regex::new(ALGO_BASED_REGEX_BASE64).unwrap(), true), ]; - for line in lines { - let line_bytes = os_str_as_bytes(line).expect("UTF-8 decoding failed"); - for (regex, is_algo_based) in ®exes { - if regex.is_match(line_bytes) { - return Some((regex.clone(), *is_algo_based)); - } + let line_bytes = os_str_as_bytes(line.as_ref()).expect("UTF-8 decoding failed"); + for (regex, is_algo_based) in ®exes { + if regex.is_match(line_bytes) { + return Some((regex.clone(), *is_algo_based)); } } @@ -599,13 +594,20 @@ fn process_checksum_line( filename_input: &OsStr, line: &OsStr, i: usize, - chosen_regex: &Regex, - is_algo_based_format: bool, cli_algo_name: Option<&str>, cli_algo_length: Option, opts: ChecksumOptions, ) -> Result<(), LineCheckError> { let line_bytes = os_str_as_bytes(line)?; + + // early return on empty or commented lines. + if line.is_empty() || line_bytes.starts_with(b"#") { + return Err(LineCheckError::Skipped); + } + + let (chosen_regex, is_algo_based_format) = + determine_regex(line).ok_or(LineCheckError::ImproperlyFormatted)?; + if let Some(caps) = chosen_regex.captures(line_bytes) { let mut filename_to_check = caps.name("filename").unwrap().as_bytes(); @@ -617,7 +619,7 @@ fn process_checksum_line( filename_to_check = &filename_to_check[1..]; } - let expected_checksum = get_expected_digest_as_hex_string(&caps, chosen_regex) + let expected_checksum = get_expected_digest_as_hex_string(&caps, &chosen_regex) .ok_or(LineCheckError::ImproperlyFormatted)?; // If the algo_name is provided, we use it, otherwise we try to detect it @@ -672,10 +674,6 @@ fn process_checksum_line( Err(LineCheckError::DigestMismatch) } } else { - if line.is_empty() || line_bytes.starts_with(b"#") { - // Don't show any warning for empty or commented lines. - return Err(LineCheckError::Skipped); - } if opts.warn { let algo = if let Some(algo_name_input) = cli_algo_name { algo_name_input.to_uppercase() @@ -723,19 +721,11 @@ fn process_checksum_file( let reader = BufReader::new(file); let lines = read_os_string_lines(reader).collect::>(); - let Some((chosen_regex, is_algo_based_format)) = determine_regex(&lines) else { - log_no_properly_formatted(get_filename_for_output(filename_input, input_is_stdin)); - set_exit_code(1); - return Err(FileCheckError::AlgoDetectionError); - }; - for (i, line) in lines.iter().enumerate() { let line_result = process_checksum_line( filename_input, line, i, - &chosen_regex, - is_algo_based_format, cli_algo_name, cli_algo_length, opts, @@ -816,8 +806,7 @@ where use FileCheckError::*; match process_checksum_file(filename_input, algo_name_input, length_input, opts) { Err(UError(e)) => return Err(e), - Err(ImproperlyFormatted) => break, - Err(CantOpenChecksumFile | AlgoDetectionError) | Ok(_) => continue, + Err(CantOpenChecksumFile | ImproperlyFormatted) | Ok(_) => continue, } } @@ -926,6 +915,7 @@ pub fn escape_filename(filename: &Path) -> (String, &'static str) { #[cfg(test)] mod tests { use super::*; + use std::ffi::OsString; #[test] fn test_unescape_filename() { @@ -1161,66 +1151,39 @@ mod tests { #[test] fn test_determine_regex() { // Test algo-based regex - let lines_algo_based = ["MD5 (example.txt) = d41d8cd98f00b204e9800998ecf8427e"] - .iter() - .map(|s| OsString::from(s.to_string())) - .collect::>(); - let (regex, algo_based) = determine_regex(&lines_algo_based).unwrap(); + let line_algo_based = + OsString::from("MD5 (example.txt) = d41d8cd98f00b204e9800998ecf8427e"); + let (regex, algo_based) = determine_regex(&line_algo_based).unwrap(); assert!(algo_based); - assert!(regex.is_match(os_str_as_bytes(&lines_algo_based[0]).unwrap())); + assert!(regex.is_match(os_str_as_bytes(&line_algo_based).unwrap())); // Test double-space regex - let lines_double_space = ["d41d8cd98f00b204e9800998ecf8427e example.txt"] - .iter() - .map(|s| OsString::from(s.to_string())) - .collect::>(); - let (regex, algo_based) = determine_regex(&lines_double_space).unwrap(); + let line_double_space = OsString::from("d41d8cd98f00b204e9800998ecf8427e example.txt"); + let (regex, algo_based) = determine_regex(&line_double_space).unwrap(); assert!(!algo_based); - assert!(regex.is_match(os_str_as_bytes(&lines_double_space[0]).unwrap())); + assert!(regex.is_match(os_str_as_bytes(&line_double_space).unwrap())); // Test single-space regex - let lines_single_space = ["d41d8cd98f00b204e9800998ecf8427e example.txt"] - .iter() - .map(|s| OsString::from(s.to_string())) - .collect::>(); - let (regex, algo_based) = determine_regex(&lines_single_space).unwrap(); + let line_single_space = OsString::from("d41d8cd98f00b204e9800998ecf8427e example.txt"); + let (regex, algo_based) = determine_regex(&line_single_space).unwrap(); assert!(!algo_based); - assert!(regex.is_match(os_str_as_bytes(&lines_single_space[0]).unwrap())); - - // Test double-space regex start with invalid - let lines_double_space = ["ERR", "d41d8cd98f00b204e9800998ecf8427e example.txt"] - .iter() - .map(|s| OsString::from(s.to_string())) - .collect::>(); - let (regex, algo_based) = determine_regex(&lines_double_space).unwrap(); - assert!(!algo_based); - assert!(!regex.is_match(os_str_as_bytes(&lines_double_space[0]).unwrap())); - assert!(regex.is_match(os_str_as_bytes(&lines_double_space[1]).unwrap())); + assert!(regex.is_match(os_str_as_bytes(&line_single_space).unwrap())); // Test invalid checksum line - let lines_invalid = ["invalid checksum line"] - .iter() - .map(|s| OsString::from(s.to_string())) - .collect::>(); - assert!(determine_regex(&lines_invalid).is_none()); + let line_invalid = OsString::from("invalid checksum line"); + assert!(determine_regex(&line_invalid).is_none()); // Test leading space before checksum line - let lines_algo_based_leading_space = - [" MD5 (example.txt) = d41d8cd98f00b204e9800998ecf8427e"] - .iter() - .map(|s| OsString::from(s.to_string())) - .collect::>(); - let res = determine_regex(&lines_algo_based_leading_space); + let line_algo_based_leading_space = + OsString::from(" MD5 (example.txt) = d41d8cd98f00b204e9800998ecf8427e"); + let res = determine_regex(&line_algo_based_leading_space); assert!(res.is_some()); assert_eq!(res.unwrap().0.as_str(), ALGO_BASED_REGEX); // Test trailing space after checksum line (should fail) - let lines_algo_based_leading_space = - ["MD5 (example.txt) = d41d8cd98f00b204e9800998ecf8427e "] - .iter() - .map(|s| OsString::from(s.to_string())) - .collect::>(); - let res = determine_regex(&lines_algo_based_leading_space); + let line_algo_based_leading_space = + OsString::from("MD5 (example.txt) = d41d8cd98f00b204e9800998ecf8427e "); + let res = determine_regex(&line_algo_based_leading_space); assert!(res.is_none()); } From 88e10478bc8f231c5a551e7efab30b8ba92493d7 Mon Sep 17 00:00:00 2001 From: Arthur Pin Date: Fri, 6 Dec 2024 17:47:22 -0300 Subject: [PATCH 096/179] tests/seq: test scientific notation with uppercase 'E' --- src/uu/seq/src/numberparse.rs | 1 + tests/by-util/test_seq.rs | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/src/uu/seq/src/numberparse.rs b/src/uu/seq/src/numberparse.rs index 79e7068e3..891fa2ce6 100644 --- a/src/uu/seq/src/numberparse.rs +++ b/src/uu/seq/src/numberparse.rs @@ -392,6 +392,7 @@ mod tests { fn test_parse_big_int() { assert_eq!(parse("0"), ExtendedBigDecimal::zero()); assert_eq!(parse("0.1e1"), ExtendedBigDecimal::one()); + assert_eq!(parse("0.1E1"), ExtendedBigDecimal::one()); assert_eq!( parse("1.0e1"), ExtendedBigDecimal::BigDecimal("10".parse::().unwrap()) diff --git a/tests/by-util/test_seq.rs b/tests/by-util/test_seq.rs index c14d30629..ab0659f21 100644 --- a/tests/by-util/test_seq.rs +++ b/tests/by-util/test_seq.rs @@ -333,6 +333,11 @@ fn test_width_scientific_notation() { .succeeds() .stdout_is("0999\n1000\n") .no_stderr(); + new_ucmd!() + .args(&["-w", "999", "1E3"]) + .succeeds() + .stdout_is("0999\n1000\n") + .no_stderr(); } #[test] From 4e79a01513a5c75b057c8f4af7fadfe0158a9efd Mon Sep 17 00:00:00 2001 From: Alexander Shirokov Date: Sat, 7 Dec 2024 12:23:06 +0100 Subject: [PATCH 097/179] fix(mv): don't panic if apply_xattrs fails This commit fixes issue #6727 by returning the error status instead of causing a panic. It aligns with the original GNU mv more closely. --- src/uu/mv/src/mv.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/uu/mv/src/mv.rs b/src/uu/mv/src/mv.rs index 9d8452b1e..7debf52c9 100644 --- a/src/uu/mv/src/mv.rs +++ b/src/uu/mv/src/mv.rs @@ -679,7 +679,7 @@ fn rename_with_fallback( }; #[cfg(all(unix, not(any(target_os = "macos", target_os = "redox"))))] - fsxattr::apply_xattrs(to, xattrs).unwrap(); + fsxattr::apply_xattrs(to, xattrs)?; if let Err(err) = result { return match err.kind { From 367cc19d455a91547d14c2e06ea651fa6fb5220f Mon Sep 17 00:00:00 2001 From: aimerlief <152078880+aimerlief@users.noreply.github.com> Date: Sun, 8 Dec 2024 00:42:34 +0900 Subject: [PATCH 098/179] fix(seq): handle 0e... scientific notation without padding (#6934) * fix(seq): handle 0e... scientific notation without padding - Updated the parse_exponent_no_decimal function to treat 0e... as zero. - Added test cases to verify correct behavior for 0e15 and -w 0e15. Fix: #6926 * fix(seq): improved parse for accurate BigDecimal handling * apply missing cargo fmt formatting adjustments --- src/uu/seq/src/numberparse.rs | 22 ++++++++++++++++++++-- tests/by-util/test_seq.rs | 28 ++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 2 deletions(-) diff --git a/src/uu/seq/src/numberparse.rs b/src/uu/seq/src/numberparse.rs index 891fa2ce6..80587f713 100644 --- a/src/uu/seq/src/numberparse.rs +++ b/src/uu/seq/src/numberparse.rs @@ -102,7 +102,16 @@ fn parse_exponent_no_decimal(s: &str, j: usize) -> Result() + .map_err(|_| ParseNumberError::Float)?; + if parsed_decimal == BigDecimal::zero() { + BigDecimal::zero() + } else { + parsed_decimal + } + }; let num_integral_digits = if is_minus_zero_float(s, &x) { if exponent > 0 { @@ -204,7 +213,16 @@ fn parse_decimal_and_exponent( // Because of the match guard, this subtraction will not underflow. let num_digits_between_decimal_point_and_e = (j - (i + 1)) as i64; let exponent: i64 = s[j + 1..].parse().map_err(|_| ParseNumberError::Float)?; - let val: BigDecimal = s.parse().map_err(|_| ParseNumberError::Float)?; + let val: BigDecimal = { + let parsed_decimal = s + .parse::() + .map_err(|_| ParseNumberError::Float)?; + if parsed_decimal == BigDecimal::zero() { + BigDecimal::zero() + } else { + parsed_decimal + } + }; let num_integral_digits = { let minimum: usize = { diff --git a/tests/by-util/test_seq.rs b/tests/by-util/test_seq.rs index ab0659f21..62a0212b1 100644 --- a/tests/by-util/test_seq.rs +++ b/tests/by-util/test_seq.rs @@ -842,3 +842,31 @@ fn test_invalid_format() { .no_stdout() .stderr_contains("format '%g%g' has too many % directives"); } + +#[test] +fn test_parse_scientific_zero() { + new_ucmd!() + .args(&["0e15", "1"]) + .succeeds() + .stdout_only("0\n1\n"); + new_ucmd!() + .args(&["0.0e15", "1"]) + .succeeds() + .stdout_only("0\n1\n"); + new_ucmd!() + .args(&["0", "1"]) + .succeeds() + .stdout_only("0\n1\n"); + new_ucmd!() + .args(&["-w", "0e15", "1"]) + .succeeds() + .stdout_only("0000000000000000\n0000000000000001\n"); + new_ucmd!() + .args(&["-w", "0.0e15", "1"]) + .succeeds() + .stdout_only("0000000000000000\n0000000000000001\n"); + new_ucmd!() + .args(&["-w", "0", "1"]) + .succeeds() + .stdout_only("0\n1\n"); +} From e654645974f019a683f4a9e815f099cbcf7e59de Mon Sep 17 00:00:00 2001 From: Daniel Hofstetter Date: Sat, 7 Dec 2024 16:59:13 +0100 Subject: [PATCH 099/179] tests/seq: use stdout_only() to remove no_stderr() --- tests/by-util/test_seq.rs | 162 +++++++++++++------------------------- 1 file changed, 54 insertions(+), 108 deletions(-) diff --git a/tests/by-util/test_seq.rs b/tests/by-util/test_seq.rs index 62a0212b1..730a9571d 100644 --- a/tests/by-util/test_seq.rs +++ b/tests/by-util/test_seq.rs @@ -303,18 +303,15 @@ fn test_preserve_negative_zero_start() { new_ucmd!() .args(&["-0", "1"]) .succeeds() - .stdout_is("-0\n1\n") - .no_stderr(); + .stdout_only("-0\n1\n"); new_ucmd!() .args(&["-0", "1", "2"]) .succeeds() - .stdout_is("-0\n1\n2\n") - .no_stderr(); + .stdout_only("-0\n1\n2\n"); new_ucmd!() .args(&["-0", "1", "2.0"]) .succeeds() - .stdout_is("-0\n1\n2\n") - .no_stderr(); + .stdout_only("-0\n1\n2\n"); } #[test] @@ -322,8 +319,7 @@ fn test_drop_negative_zero_end() { new_ucmd!() .args(&["1", "-1", "-0"]) .succeeds() - .stdout_is("1\n0\n") - .no_stderr(); + .stdout_only("1\n0\n"); } #[test] @@ -331,13 +327,11 @@ fn test_width_scientific_notation() { new_ucmd!() .args(&["-w", "999", "1e3"]) .succeeds() - .stdout_is("0999\n1000\n") - .no_stderr(); + .stdout_only("0999\n1000\n"); new_ucmd!() .args(&["-w", "999", "1E3"]) .succeeds() - .stdout_is("0999\n1000\n") - .no_stderr(); + .stdout_only("0999\n1000\n"); } #[test] @@ -345,18 +339,15 @@ fn test_width_negative_zero() { new_ucmd!() .args(&["-w", "-0", "1"]) .succeeds() - .stdout_is("-0\n01\n") - .no_stderr(); + .stdout_only("-0\n01\n"); new_ucmd!() .args(&["-w", "-0", "1", "2"]) .succeeds() - .stdout_is("-0\n01\n02\n") - .no_stderr(); + .stdout_only("-0\n01\n02\n"); new_ucmd!() .args(&["-w", "-0", "1", "2.0"]) .succeeds() - .stdout_is("-0\n01\n02\n") - .no_stderr(); + .stdout_only("-0\n01\n02\n"); } #[test] @@ -364,33 +355,27 @@ fn test_width_negative_zero_decimal_notation() { new_ucmd!() .args(&["-w", "-0.0", "1"]) .succeeds() - .stdout_is("-0.0\n01.0\n") - .no_stderr(); + .stdout_only("-0.0\n01.0\n"); new_ucmd!() .args(&["-w", "-0.0", "1.0"]) .succeeds() - .stdout_is("-0.0\n01.0\n") - .no_stderr(); + .stdout_only("-0.0\n01.0\n"); new_ucmd!() .args(&["-w", "-0.0", "1", "2"]) .succeeds() - .stdout_is("-0.0\n01.0\n02.0\n") - .no_stderr(); + .stdout_only("-0.0\n01.0\n02.0\n"); new_ucmd!() .args(&["-w", "-0.0", "1", "2.0"]) .succeeds() - .stdout_is("-0.0\n01.0\n02.0\n") - .no_stderr(); + .stdout_only("-0.0\n01.0\n02.0\n"); new_ucmd!() .args(&["-w", "-0.0", "1.0", "2"]) .succeeds() - .stdout_is("-0.0\n01.0\n02.0\n") - .no_stderr(); + .stdout_only("-0.0\n01.0\n02.0\n"); new_ucmd!() .args(&["-w", "-0.0", "1.0", "2.0"]) .succeeds() - .stdout_is("-0.0\n01.0\n02.0\n") - .no_stderr(); + .stdout_only("-0.0\n01.0\n02.0\n"); } #[test] @@ -398,98 +383,80 @@ fn test_width_negative_zero_scientific_notation() { new_ucmd!() .args(&["-w", "-0e0", "1"]) .succeeds() - .stdout_is("-0\n01\n") - .no_stderr(); + .stdout_only("-0\n01\n"); new_ucmd!() .args(&["-w", "-0e0", "1", "2"]) .succeeds() - .stdout_is("-0\n01\n02\n") - .no_stderr(); + .stdout_only("-0\n01\n02\n"); new_ucmd!() .args(&["-w", "-0e0", "1", "2.0"]) .succeeds() - .stdout_is("-0\n01\n02\n") - .no_stderr(); + .stdout_only("-0\n01\n02\n"); new_ucmd!() .args(&["-w", "-0e+1", "1"]) .succeeds() - .stdout_is("-00\n001\n") - .no_stderr(); + .stdout_only("-00\n001\n"); new_ucmd!() .args(&["-w", "-0e+1", "1", "2"]) .succeeds() - .stdout_is("-00\n001\n002\n") - .no_stderr(); + .stdout_only("-00\n001\n002\n"); new_ucmd!() .args(&["-w", "-0e+1", "1", "2.0"]) .succeeds() - .stdout_is("-00\n001\n002\n") - .no_stderr(); + .stdout_only("-00\n001\n002\n"); new_ucmd!() .args(&["-w", "-0.000e0", "1"]) .succeeds() - .stdout_is("-0.000\n01.000\n") - .no_stderr(); + .stdout_only("-0.000\n01.000\n"); new_ucmd!() .args(&["-w", "-0.000e0", "1", "2"]) .succeeds() - .stdout_is("-0.000\n01.000\n02.000\n") - .no_stderr(); + .stdout_only("-0.000\n01.000\n02.000\n"); new_ucmd!() .args(&["-w", "-0.000e0", "1", "2.0"]) .succeeds() - .stdout_is("-0.000\n01.000\n02.000\n") - .no_stderr(); + .stdout_only("-0.000\n01.000\n02.000\n"); new_ucmd!() .args(&["-w", "-0.000e-2", "1"]) .succeeds() - .stdout_is("-0.00000\n01.00000\n") - .no_stderr(); + .stdout_only("-0.00000\n01.00000\n"); new_ucmd!() .args(&["-w", "-0.000e-2", "1", "2"]) .succeeds() - .stdout_is("-0.00000\n01.00000\n02.00000\n") - .no_stderr(); + .stdout_only("-0.00000\n01.00000\n02.00000\n"); new_ucmd!() .args(&["-w", "-0.000e-2", "1", "2.0"]) .succeeds() - .stdout_is("-0.00000\n01.00000\n02.00000\n") - .no_stderr(); + .stdout_only("-0.00000\n01.00000\n02.00000\n"); new_ucmd!() .args(&["-w", "-0.000e5", "1"]) .succeeds() - .stdout_is("-000000\n0000001\n") - .no_stderr(); + .stdout_only("-000000\n0000001\n"); new_ucmd!() .args(&["-w", "-0.000e5", "1", "2"]) .succeeds() - .stdout_is("-000000\n0000001\n0000002\n") - .no_stderr(); + .stdout_only("-000000\n0000001\n0000002\n"); new_ucmd!() .args(&["-w", "-0.000e5", "1", "2.0"]) .succeeds() - .stdout_is("-000000\n0000001\n0000002\n") - .no_stderr(); + .stdout_only("-000000\n0000001\n0000002\n"); new_ucmd!() .args(&["-w", "-0.000e5", "1"]) .succeeds() - .stdout_is("-000000\n0000001\n") - .no_stderr(); + .stdout_only("-000000\n0000001\n"); new_ucmd!() .args(&["-w", "-0.000e5", "1", "2"]) .succeeds() - .stdout_is("-000000\n0000001\n0000002\n") - .no_stderr(); + .stdout_only("-000000\n0000001\n0000002\n"); new_ucmd!() .args(&["-w", "-0.000e5", "1", "2.0"]) .succeeds() - .stdout_is("-000000\n0000001\n0000002\n") - .no_stderr(); + .stdout_only("-000000\n0000001\n0000002\n"); } #[test] @@ -497,14 +464,12 @@ fn test_width_decimal_scientific_notation_increment() { new_ucmd!() .args(&["-w", ".1", "1e-2", ".11"]) .succeeds() - .stdout_is("0.10\n0.11\n") - .no_stderr(); + .stdout_only("0.10\n0.11\n"); new_ucmd!() .args(&["-w", ".0", "1.500e-1", ".2"]) .succeeds() - .stdout_is("0.0000\n0.1500\n") - .no_stderr(); + .stdout_only("0.0000\n0.1500\n"); } /// Test that trailing zeros in the start argument contribute to precision. @@ -513,8 +478,7 @@ fn test_width_decimal_scientific_notation_trailing_zeros_start() { new_ucmd!() .args(&["-w", ".1000", "1e-2", ".11"]) .succeeds() - .stdout_is("0.1000\n0.1100\n") - .no_stderr(); + .stdout_only("0.1000\n0.1100\n"); } /// Test that trailing zeros in the increment argument contribute to precision. @@ -523,8 +487,7 @@ fn test_width_decimal_scientific_notation_trailing_zeros_increment() { new_ucmd!() .args(&["-w", "1e-1", "0.0100", ".11"]) .succeeds() - .stdout_is("0.1000\n0.1100\n") - .no_stderr(); + .stdout_only("0.1000\n0.1100\n"); } #[test] @@ -532,8 +495,7 @@ fn test_width_negative_decimal_notation() { new_ucmd!() .args(&["-w", "-.1", ".1", ".11"]) .succeeds() - .stdout_is("-0.1\n00.0\n00.1\n") - .no_stderr(); + .stdout_only("-0.1\n00.0\n00.1\n"); } #[test] @@ -541,22 +503,19 @@ fn test_width_negative_scientific_notation() { new_ucmd!() .args(&["-w", "-1e-3", "1"]) .succeeds() - .stdout_is("-0.001\n00.999\n") - .no_stderr(); + .stdout_only("-0.001\n00.999\n"); new_ucmd!() .args(&["-w", "-1.e-3", "1"]) .succeeds() - .stdout_is("-0.001\n00.999\n") - .no_stderr(); + .stdout_only("-0.001\n00.999\n"); new_ucmd!() .args(&["-w", "-1.0e-4", "1"]) .succeeds() - .stdout_is("-0.00010\n00.99990\n") - .no_stderr(); + .stdout_only("-0.00010\n00.99990\n"); new_ucmd!() .args(&["-w", "-.1e2", "10", "100"]) .succeeds() - .stdout_is( + .stdout_only( "-010 0000 0010 @@ -570,12 +529,11 @@ fn test_width_negative_scientific_notation() { 0090 0100 ", - ) - .no_stderr(); + ); new_ucmd!() .args(&["-w", "-0.1e2", "10", "100"]) .succeeds() - .stdout_is( + .stdout_only( "-010 0000 0010 @@ -589,8 +547,7 @@ fn test_width_negative_scientific_notation() { 0090 0100 ", - ) - .no_stderr(); + ); } /// Test that trailing zeros in the end argument do not contribute to width. @@ -599,8 +556,7 @@ fn test_width_decimal_scientific_notation_trailing_zeros_end() { new_ucmd!() .args(&["-w", "1e-1", "1e-2", ".1100"]) .succeeds() - .stdout_is("0.10\n0.11\n") - .no_stderr(); + .stdout_only("0.10\n0.11\n"); } #[test] @@ -608,8 +564,7 @@ fn test_width_floats() { new_ucmd!() .args(&["-w", "9.0", "10.0"]) .succeeds() - .stdout_is("09.0\n10.0\n") - .no_stderr(); + .stdout_only("09.0\n10.0\n"); } // TODO This is duplicated from `test_yes.rs`; refactor them. @@ -661,11 +616,7 @@ fn test_neg_inf_width() { #[test] fn test_ignore_leading_whitespace() { - new_ucmd!() - .arg(" 1") - .succeeds() - .stdout_is("1\n") - .no_stderr(); + new_ucmd!().arg(" 1").succeeds().stdout_only("1\n"); } #[test] @@ -684,8 +635,7 @@ fn test_negative_zero_int_start_float_increment() { new_ucmd!() .args(&["-0", "0.1", "0.1"]) .succeeds() - .stdout_is("-0.0\n0.1\n") - .no_stderr(); + .stdout_only("-0.0\n0.1\n"); } #[test] @@ -693,7 +643,7 @@ fn test_float_precision_increment() { new_ucmd!() .args(&["999", "0.1", "1000.1"]) .succeeds() - .stdout_is( + .stdout_only( "999.0 999.1 999.2 @@ -707,8 +657,7 @@ fn test_float_precision_increment() { 1000.0 1000.1 ", - ) - .no_stderr(); + ); } /// Test for floating point precision issues. @@ -717,8 +666,7 @@ fn test_negative_increment_decimal() { new_ucmd!() .args(&["0.1", "-0.1", "-0.2"]) .succeeds() - .stdout_is("0.1\n0.0\n-0.1\n-0.2\n") - .no_stderr(); + .stdout_only("0.1\n0.0\n-0.1\n-0.2\n"); } #[test] @@ -726,8 +674,7 @@ fn test_zero_not_first() { new_ucmd!() .args(&["-w", "-0.1", "0.1", "0.1"]) .succeeds() - .stdout_is("-0.1\n00.0\n00.1\n") - .no_stderr(); + .stdout_only("-0.1\n00.0\n00.1\n"); } #[test] @@ -735,8 +682,7 @@ fn test_rounding_end() { new_ucmd!() .args(&["1", "-1", "0.1"]) .succeeds() - .stdout_is("1\n") - .no_stderr(); + .stdout_only("1\n"); } #[test] From 1f6f7fbe8c02c4be963e18203631f827ab83446e Mon Sep 17 00:00:00 2001 From: Daniel Hofstetter Date: Sat, 7 Dec 2024 17:04:05 +0100 Subject: [PATCH 100/179] tests/seq: fix ticket references of ignored tests --- tests/by-util/test_seq.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/by-util/test_seq.rs b/tests/by-util/test_seq.rs index 730a9571d..8f33c3aa7 100644 --- a/tests/by-util/test_seq.rs +++ b/tests/by-util/test_seq.rs @@ -710,7 +710,7 @@ fn test_format_option() { } #[test] -#[ignore = "Need issue #6233 to be fixed"] +#[ignore = "Need issue #2660 to be fixed"] fn test_auto_precision() { new_ucmd!() .args(&["1", "0x1p-1", "2"]) @@ -719,7 +719,7 @@ fn test_auto_precision() { } #[test] -#[ignore = "Need issue #6234 to be fixed"] +#[ignore = "Need issue #3318 to be fixed"] fn test_undefined() { new_ucmd!() .args(&["1e-9223372036854775808"]) From 7708d22eced7c2d5eaf539feff8c500c60522019 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Sat, 7 Dec 2024 18:52:15 +0000 Subject: [PATCH 101/179] chore(deps): update rust crate thiserror to v2.0.5 --- Cargo.lock | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index cb09c5fd6..9dce55ae0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2382,11 +2382,11 @@ dependencies = [ [[package]] name = "thiserror" -version = "2.0.4" +version = "2.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f49a1853cf82743e3b7950f77e0f4d622ca36cf4317cba00c767838bac8d490" +checksum = "643caef17e3128658ff44d85923ef2d28af81bb71e0d67bbfe1d76f19a73e053" dependencies = [ - "thiserror-impl 2.0.4", + "thiserror-impl 2.0.5", ] [[package]] @@ -2402,9 +2402,9 @@ dependencies = [ [[package]] name = "thiserror-impl" -version = "2.0.4" +version = "2.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8381894bb3efe0c4acac3ded651301ceee58a15d47c2e34885ed1908ad667061" +checksum = "995d0bbc9995d1f19d28b7215a9352b0fc3cd3a2d2ec95c2cadc485cdedbcdde" dependencies = [ "proc-macro2", "quote", @@ -2610,7 +2610,7 @@ version = "0.0.28" dependencies = [ "clap", "nix", - "thiserror 2.0.4", + "thiserror 2.0.5", "uucore", ] @@ -2622,7 +2622,7 @@ dependencies = [ "fts-sys", "libc", "selinux", - "thiserror 2.0.4", + "thiserror 2.0.5", "uucore", ] @@ -2699,7 +2699,7 @@ version = "0.0.28" dependencies = [ "clap", "regex", - "thiserror 2.0.4", + "thiserror 2.0.5", "uucore", ] @@ -3215,7 +3215,7 @@ dependencies = [ "clap", "libc", "selinux", - "thiserror 2.0.4", + "thiserror 2.0.5", "uucore", ] @@ -3494,7 +3494,7 @@ version = "0.0.28" dependencies = [ "chrono", "clap", - "thiserror 2.0.4", + "thiserror 2.0.5", "utmp-classic", "uucore", ] @@ -3525,7 +3525,7 @@ dependencies = [ "clap", "libc", "nix", - "thiserror 2.0.4", + "thiserror 2.0.5", "unicode-width 0.1.13", "uucore", ] @@ -3586,7 +3586,7 @@ dependencies = [ "sha3", "sm3", "tempfile", - "thiserror 2.0.4", + "thiserror 2.0.5", "time", "uucore_procs", "walkdir", From bd294ddd100080e349ad866b4bc126c33d6902a7 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Sun, 8 Dec 2024 21:59:45 +0000 Subject: [PATCH 102/179] chore(deps): update rust crate thiserror to v2.0.6 --- Cargo.lock | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 9dce55ae0..4a186d496 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2382,11 +2382,11 @@ dependencies = [ [[package]] name = "thiserror" -version = "2.0.5" +version = "2.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "643caef17e3128658ff44d85923ef2d28af81bb71e0d67bbfe1d76f19a73e053" +checksum = "8fec2a1820ebd077e2b90c4df007bebf344cd394098a13c563957d0afc83ea47" dependencies = [ - "thiserror-impl 2.0.5", + "thiserror-impl 2.0.6", ] [[package]] @@ -2402,9 +2402,9 @@ dependencies = [ [[package]] name = "thiserror-impl" -version = "2.0.5" +version = "2.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "995d0bbc9995d1f19d28b7215a9352b0fc3cd3a2d2ec95c2cadc485cdedbcdde" +checksum = "d65750cab40f4ff1929fb1ba509e9914eb756131cef4210da8d5d700d26f6312" dependencies = [ "proc-macro2", "quote", @@ -2610,7 +2610,7 @@ version = "0.0.28" dependencies = [ "clap", "nix", - "thiserror 2.0.5", + "thiserror 2.0.6", "uucore", ] @@ -2622,7 +2622,7 @@ dependencies = [ "fts-sys", "libc", "selinux", - "thiserror 2.0.5", + "thiserror 2.0.6", "uucore", ] @@ -2699,7 +2699,7 @@ version = "0.0.28" dependencies = [ "clap", "regex", - "thiserror 2.0.5", + "thiserror 2.0.6", "uucore", ] @@ -3215,7 +3215,7 @@ dependencies = [ "clap", "libc", "selinux", - "thiserror 2.0.5", + "thiserror 2.0.6", "uucore", ] @@ -3494,7 +3494,7 @@ version = "0.0.28" dependencies = [ "chrono", "clap", - "thiserror 2.0.5", + "thiserror 2.0.6", "utmp-classic", "uucore", ] @@ -3525,7 +3525,7 @@ dependencies = [ "clap", "libc", "nix", - "thiserror 2.0.5", + "thiserror 2.0.6", "unicode-width 0.1.13", "uucore", ] @@ -3586,7 +3586,7 @@ dependencies = [ "sha3", "sm3", "tempfile", - "thiserror 2.0.5", + "thiserror 2.0.6", "time", "uucore_procs", "walkdir", From 85bd072655d4c036b2385092cd4fb48ce3ce5f12 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Mon, 9 Dec 2024 02:20:54 +0000 Subject: [PATCH 103/179] chore(deps): update rust crate bigdecimal to v0.4.7 --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 9dce55ae0..65e6a30be 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -136,9 +136,9 @@ checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" [[package]] name = "bigdecimal" -version = "0.4.6" +version = "0.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f850665a0385e070b64c38d2354e6c104c8479c59868d1e48a0c13ee2c7a1c1" +checksum = "7f31f3af01c5c65a07985c804d3366560e6fa7883d640a122819b14ec327482c" dependencies = [ "autocfg", "libm", From 46d5d638fe448b06a1b10c275ebf82889f34cd0d Mon Sep 17 00:00:00 2001 From: Daniel Hofstetter Date: Mon, 2 Dec 2024 15:03:28 +0100 Subject: [PATCH 104/179] Bump unicode-width from 0.1.12 to 0.2.0 --- Cargo.lock | 14 +++++++------- Cargo.toml | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ed82e9aa7..0a07c7398 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2743,7 +2743,7 @@ version = "0.0.28" dependencies = [ "clap", "tempfile", - "unicode-width 0.1.13", + "unicode-width 0.2.0", "uucore", ] @@ -2806,7 +2806,7 @@ name = "uu_expand" version = "0.0.28" dependencies = [ "clap", - "unicode-width 0.1.13", + "unicode-width 0.2.0", "uucore", ] @@ -2848,7 +2848,7 @@ name = "uu_fmt" version = "0.0.28" dependencies = [ "clap", - "unicode-width 0.1.13", + "unicode-width 0.2.0", "uucore", ] @@ -3033,7 +3033,7 @@ dependencies = [ "crossterm", "nix", "unicode-segmentation", - "unicode-width 0.1.13", + "unicode-width 0.2.0", "uucore", ] @@ -3276,7 +3276,7 @@ dependencies = [ "rayon", "self_cell", "tempfile", - "unicode-width 0.1.13", + "unicode-width 0.2.0", "uucore", ] @@ -3468,7 +3468,7 @@ name = "uu_unexpand" version = "0.0.28" dependencies = [ "clap", - "unicode-width 0.1.13", + "unicode-width 0.2.0", "uucore", ] @@ -3526,7 +3526,7 @@ dependencies = [ "libc", "nix", "thiserror 2.0.6", - "unicode-width 0.1.13", + "unicode-width 0.2.0", "uucore", ] diff --git a/Cargo.toml b/Cargo.toml index caa233802..a4f8462e4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -335,7 +335,7 @@ textwrap = { version = "0.16.1", features = ["terminal_size"] } thiserror = "2.0.3" time = { version = "0.3.36" } unicode-segmentation = "1.11.0" -unicode-width = "0.1.12" +unicode-width = "0.2.0" utf-8 = "0.7.6" utmp-classic = "0.1.6" walkdir = "2.5" From ec67d22123bf254c138585f52856e06464487d48 Mon Sep 17 00:00:00 2001 From: Daniel Hofstetter Date: Mon, 2 Dec 2024 16:04:24 +0100 Subject: [PATCH 105/179] more: adapt test to change in unicode-width --- src/uu/more/src/more.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/uu/more/src/more.rs b/src/uu/more/src/more.rs index cb74e1176..61d9b2adb 100644 --- a/src/uu/more/src/more.rs +++ b/src/uu/more/src/more.rs @@ -700,15 +700,15 @@ mod tests { test_string.push_str("👩🏻‍🔬"); } - let lines = break_line(&test_string, 80); + let lines = break_line(&test_string, 31); let widths: Vec = lines .iter() .map(|s| UnicodeWidthStr::width(&s[..])) .collect(); - // Each 👩🏻‍🔬 is 6 character width it break line to the closest number to 80 => 6 * 13 = 78 - assert_eq!((78, 42), (widths[0], widths[1])); + // Each 👩🏻‍🔬 is 2 character width, break line to the closest even number to 31 + assert_eq!((30, 10), (widths[0], widths[1])); } #[test] From 2a406d8cbb5ba52bc0c3f7459dddeb5b6540cc6f Mon Sep 17 00:00:00 2001 From: Daniel Hofstetter Date: Wed, 4 Dec 2024 14:56:02 +0100 Subject: [PATCH 106/179] sort: adapt fixtures to change in unicode-width --- tests/fixtures/sort/keys_closed_range.expected.debug | 4 ++-- tests/fixtures/sort/keys_multiple_ranges.expected.debug | 6 +++--- tests/fixtures/sort/keys_no_field_match.expected.debug | 4 ++-- tests/fixtures/sort/keys_open_ended.expected.debug | 4 ++-- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/fixtures/sort/keys_closed_range.expected.debug b/tests/fixtures/sort/keys_closed_range.expected.debug index b78db4af1..e317d4079 100644 --- a/tests/fixtures/sort/keys_closed_range.expected.debug +++ b/tests/fixtures/sort/keys_closed_range.expected.debug @@ -11,8 +11,8 @@ ________ _ ________ 👩‍🔬 👩‍🔬 👩‍🔬 - __ -______________ + __ +________ 💣💣 💣💣 💣💣 __ ______________ diff --git a/tests/fixtures/sort/keys_multiple_ranges.expected.debug b/tests/fixtures/sort/keys_multiple_ranges.expected.debug index 830e9afd0..41b7e210d 100644 --- a/tests/fixtures/sort/keys_multiple_ranges.expected.debug +++ b/tests/fixtures/sort/keys_multiple_ranges.expected.debug @@ -15,9 +15,9 @@ ________ ___ ________ 👩‍🔬 👩‍🔬 👩‍🔬 - _____ - _____ -______________ + ___ + ___ +________ 💣💣 💣💣 💣💣 _____ _____ diff --git a/tests/fixtures/sort/keys_no_field_match.expected.debug b/tests/fixtures/sort/keys_no_field_match.expected.debug index 60197b1de..0a3ea8303 100644 --- a/tests/fixtures/sort/keys_no_field_match.expected.debug +++ b/tests/fixtures/sort/keys_no_field_match.expected.debug @@ -11,8 +11,8 @@ ________ ^ no match for key ________ 👩‍🔬 👩‍🔬 👩‍🔬 - ^ no match for key -______________ + ^ no match for key +________ 💣💣 💣💣 💣💣 ^ no match for key ______________ diff --git a/tests/fixtures/sort/keys_open_ended.expected.debug b/tests/fixtures/sort/keys_open_ended.expected.debug index d3a56ffd6..c8e4ad9ae 100644 --- a/tests/fixtures/sort/keys_open_ended.expected.debug +++ b/tests/fixtures/sort/keys_open_ended.expected.debug @@ -11,8 +11,8 @@ ________ ____ ________ 👩‍🔬 👩‍🔬 👩‍🔬 - _______ -______________ + _____ +________ 💣💣 💣💣 💣💣 _______ ______________ From cf355591b94746e5f043f3f6d4c62c7aa0c8b050 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Mon, 9 Dec 2024 11:51:39 +0000 Subject: [PATCH 107/179] chore(deps): update rust crate chrono to v0.4.39 --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0a07c7398..5fe9ac67c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -318,9 +318,9 @@ checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" [[package]] name = "chrono" -version = "0.4.38" +version = "0.4.39" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a21f936df1771bf62b77f047b726c4625ff2e8aa607c01ec06e5a05bd8463401" +checksum = "7e36cc9d416881d2e24f9a963be5fb1cd90966419ac844274161d10488b3e825" dependencies = [ "android-tzdata", "iana-time-zone", From 3211e43caa4c3cab6d4e80146020824ad5d95e93 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Mon, 9 Dec 2024 11:51:47 +0000 Subject: [PATCH 108/179] fix(deps): update rust crate libc to v0.2.168 --- Cargo.lock | 4 ++-- fuzz/Cargo.lock | 18 ++++++++++++------ 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0a07c7398..2bd4b0f18 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1305,9 +1305,9 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" -version = "0.2.167" +version = "0.2.168" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09d6582e104315a817dff97f75133544b2e094ee22447d2acf4a74e189ba06fc" +checksum = "5aaeb2981e0606ca11d79718f8bb01164f1d6ed75080182d3abf017e6d244b6d" [[package]] name = "libloading" diff --git a/fuzz/Cargo.lock b/fuzz/Cargo.lock index a300d8b65..f2ba3f375 100644 --- a/fuzz/Cargo.lock +++ b/fuzz/Cargo.lock @@ -1,6 +1,6 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -version = 3 +version = 4 [[package]] name = "aho-corasick" @@ -416,9 +416,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.167" +version = "0.2.168" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09d6582e104315a817dff97f75133544b2e094ee22447d2acf4a74e189ba06fc" +checksum = "5aaeb2981e0606ca11d79718f8bb01164f1d6ed75080182d3abf017e6d244b6d" [[package]] name = "libfuzzer-sys" @@ -572,7 +572,7 @@ version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a6229bad892b46b0dcfaaeb18ad0d2e56400f5aaea05b768bde96e73676cf75" dependencies = [ - "unicode-width", + "unicode-width 0.1.12", ] [[package]] @@ -838,6 +838,12 @@ version = "0.1.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "68f5e5f3158ecfd4b8ff6fe086db7c8467a2dfdac97fe420f2b7c4aa97af66d6" +[[package]] +name = "unicode-width" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fc81956842c57dac11422a97c3b8195a1ff727f06e85c84ed2e8aa277c9a0fd" + [[package]] name = "utf8parse" version = "0.2.1" @@ -930,7 +936,7 @@ dependencies = [ "rayon", "self_cell", "tempfile", - "unicode-width", + "unicode-width 0.2.0", "uucore", ] @@ -970,7 +976,7 @@ dependencies = [ "libc", "nix 0.29.0", "thiserror", - "unicode-width", + "unicode-width 0.2.0", "uucore", ] From 2ca7c28cd97affa300f8c2a39febb21d44747e1d Mon Sep 17 00:00:00 2001 From: Christian Legnitto Date: Mon, 9 Dec 2024 22:01:36 -0400 Subject: [PATCH 109/179] Make `Spec` public --- src/uucore/src/lib/features/format/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/uucore/src/lib/features/format/mod.rs b/src/uucore/src/lib/features/format/mod.rs index 24dd1daaa..25d128ed8 100644 --- a/src/uucore/src/lib/features/format/mod.rs +++ b/src/uucore/src/lib/features/format/mod.rs @@ -38,7 +38,7 @@ pub mod num_parser; mod spec; pub use argument::*; -use spec::Spec; +pub use spec::Spec; use std::{ error::Error, fmt::Display, From c60203ddd3394858ccff0819588925542ba4ff9f Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Tue, 10 Dec 2024 08:48:52 +0100 Subject: [PATCH 110/179] stat: improve GNU compatibility (#6933) * stat: fix the quotes when dealing with %N and other formats should fix tests/stat/stat-fmt.sh * stats: use an enum instead of a string * stats: split the functions into smaller functions * stat: handle byte as a format for better display * stat: handle error better. should make tests/stat/stat-printf.pl pass * stat: Some escape sequences are non-standard * Fix tests * Take comments into account --- src/uu/stat/src/stat.rs | 568 +++++++++++++++++++++++-------------- tests/by-util/test_stat.rs | 100 +++++-- util/build-gnu.sh | 3 + 3 files changed, 440 insertions(+), 231 deletions(-) diff --git a/src/uu/stat/src/stat.rs b/src/uu/stat/src/stat.rs index ee4178344..5e617e7a3 100644 --- a/src/uu/stat/src/stat.rs +++ b/src/uu/stat/src/stat.rs @@ -9,7 +9,9 @@ use uucore::error::{UResult, USimpleError}; use clap::builder::ValueParser; use uucore::display::Quotable; use uucore::fs::display_permissions; -use uucore::fsext::{pretty_filetype, pretty_fstype, read_fs_list, statfs, BirthTime, FsMeta}; +use uucore::fsext::{ + pretty_filetype, pretty_fstype, read_fs_list, statfs, BirthTime, FsMeta, StatFs, +}; use uucore::libc::mode_t; use uucore::{ entries, format_usage, help_about, help_section, help_usage, show_error, show_warning, @@ -19,10 +21,12 @@ use chrono::{DateTime, Local}; use clap::{crate_version, Arg, ArgAction, ArgMatches, Command}; use std::borrow::Cow; use std::ffi::{OsStr, OsString}; -use std::fs; +use std::fs::{FileType, Metadata}; +use std::io::Write; use std::os::unix::fs::{FileTypeExt, MetadataExt}; use std::os::unix::prelude::OsStrExt; use std::path::Path; +use std::{env, fs}; const ABOUT: &str = help_about!("stat.md"); const USAGE: &str = help_usage!("stat.md"); @@ -93,9 +97,33 @@ pub enum OutputType { Unknown, } +#[derive(Default)] +enum QuotingStyle { + Locale, + Shell, + #[default] + ShellEscapeAlways, + Quote, +} + +impl std::str::FromStr for QuotingStyle { + type Err = String; + + fn from_str(s: &str) -> Result { + match s { + "locale" => Ok(QuotingStyle::Locale), + "shell" => Ok(QuotingStyle::Shell), + "shell-escape-always" => Ok(QuotingStyle::ShellEscapeAlways), + // The others aren't exposed to the user + _ => Err(format!("Invalid quoting style: {}", s)), + } + } +} + #[derive(Debug, PartialEq, Eq)] enum Token { Char(char), + Byte(u8), Directive { flag: Flags, width: usize, @@ -293,6 +321,93 @@ fn print_str(s: &str, flags: &Flags, width: usize, precision: Option) { pad_and_print(s, flags.left, width, Padding::Space); } +fn quote_file_name(file_name: &str, quoting_style: &QuotingStyle) -> String { + match quoting_style { + QuotingStyle::Locale | QuotingStyle::Shell => { + let escaped = file_name.replace('\'', r"\'"); + format!("'{}'", escaped) + } + QuotingStyle::ShellEscapeAlways => format!("\"{}\"", file_name), + QuotingStyle::Quote => file_name.to_string(), + } +} + +fn get_quoted_file_name( + display_name: &str, + file: &OsString, + file_type: &FileType, + from_user: bool, +) -> Result { + let quoting_style = env::var("QUOTING_STYLE") + .ok() + .and_then(|style| style.parse().ok()) + .unwrap_or_default(); + + if file_type.is_symlink() { + let quoted_display_name = quote_file_name(display_name, "ing_style); + match fs::read_link(file) { + Ok(dst) => { + let quoted_dst = quote_file_name(&dst.to_string_lossy(), "ing_style); + Ok(format!("{quoted_display_name} -> {quoted_dst}")) + } + Err(e) => { + show_error!("{e}"); + Err(1) + } + } + } else { + let style = if from_user { + quoting_style + } else { + QuotingStyle::Quote + }; + Ok(quote_file_name(display_name, &style)) + } +} + +fn process_token_filesystem(t: &Token, meta: StatFs, display_name: &str) { + match *t { + Token::Byte(byte) => write_raw_byte(byte), + Token::Char(c) => print!("{c}"), + Token::Directive { + flag, + width, + precision, + format, + } => { + let output = match format { + // free blocks available to non-superuser + 'a' => OutputType::Unsigned(meta.avail_blocks()), + // total data blocks in file system + 'b' => OutputType::Unsigned(meta.total_blocks()), + // total file nodes in file system + 'c' => OutputType::Unsigned(meta.total_file_nodes()), + // free file nodes in file system + 'd' => OutputType::Unsigned(meta.free_file_nodes()), + // free blocks in file system + 'f' => OutputType::Unsigned(meta.free_blocks()), + // file system ID in hex + 'i' => OutputType::UnsignedHex(meta.fsid()), + // maximum length of filenames + 'l' => OutputType::Unsigned(meta.namelen()), + // file name + 'n' => OutputType::Str(display_name.to_string()), + // block size (for faster transfers) + 's' => OutputType::Unsigned(meta.io_size()), + // fundamental block size (for block counts) + 'S' => OutputType::Integer(meta.block_size()), + // file system type in hex + 't' => OutputType::UnsignedHex(meta.fs_type() as u64), + // file system type in human readable form + 'T' => OutputType::Str(pretty_fstype(meta.fs_type()).into()), + _ => OutputType::Unknown, + }; + + print_it(&output, flag, width, precision); + } + } +} + /// Prints an integer value based on the provided flags, width, and precision. /// /// # Arguments @@ -403,7 +518,26 @@ fn print_unsigned_hex( pad_and_print(&s, flags.left, width, padding_char); } +fn write_raw_byte(byte: u8) { + std::io::stdout().write_all(&[byte]).unwrap(); +} + impl Stater { + fn process_flags(chars: &[char], i: &mut usize, bound: usize, flag: &mut Flags) { + while *i < bound { + match chars[*i] { + '#' => flag.alter = true, + '0' => flag.zero = true, + '-' => flag.left = true, + ' ' => flag.space = true, + '+' => flag.sign = true, + '\'' => flag.group = true, + _ => break, + } + *i += 1; + } + } + fn handle_percent_case( chars: &[char], i: &mut usize, @@ -423,20 +557,7 @@ impl Stater { let mut flag = Flags::default(); - while *i < bound { - match chars[*i] { - '#' => flag.alter = true, - '0' => flag.zero = true, - '-' => flag.left = true, - ' ' => flag.space = true, - '+' => flag.sign = true, - '\'' => flag.group = true, - 'I' => unimplemented!(), - _ => break, - } - *i += 1; - } - check_bound(format_str, bound, old, *i)?; + Self::process_flags(chars, i, bound, &mut flag); let mut width = 0; let mut precision = None; @@ -445,6 +566,15 @@ impl Stater { if let Some((field_width, offset)) = format_str[j..].scan_num::() { width = field_width; j += offset; + + // Reject directives like `%` by checking if width has been parsed. + if j >= bound || chars[j] == '%' { + let invalid_directive: String = chars[old..=j.min(bound - 1)].iter().collect(); + return Err(USimpleError::new( + 1, + format!("{}: invalid directive", invalid_directive.quote()), + )); + } } check_bound(format_str, bound, old, j)?; @@ -465,9 +595,27 @@ impl Stater { } *i = j; + + // Check for multi-character specifiers (e.g., `%Hd`, `%Lr`) + if *i + 1 < bound { + if let Some(&next_char) = chars.get(*i + 1) { + if (chars[*i] == 'H' || chars[*i] == 'L') && (next_char == 'd' || next_char == 'r') + { + let specifier = format!("{}{}", chars[*i], next_char); + *i += 1; + return Ok(Token::Directive { + flag, + width, + precision, + format: specifier.chars().next().unwrap(), + }); + } + } + } + Ok(Token::Directive { - width, flag, + width, precision, format: chars[*i], }) @@ -485,33 +633,49 @@ impl Stater { return Token::Char('\\'); } match chars[*i] { - 'x' if *i + 1 < bound => { - if let Some((c, offset)) = format_str[*i + 1..].scan_char(16) { - *i += offset; - Token::Char(c) + 'a' => Token::Byte(0x07), // BEL + 'b' => Token::Byte(0x08), // Backspace + 'f' => Token::Byte(0x0C), // Form feed + 'n' => Token::Byte(0x0A), // Line feed + 'r' => Token::Byte(0x0D), // Carriage return + 't' => Token::Byte(0x09), // Horizontal tab + '\\' => Token::Byte(b'\\'), // Backslash + '\'' => Token::Byte(b'\''), // Single quote + '"' => Token::Byte(b'"'), // Double quote + '0'..='7' => { + // Parse octal escape sequence (up to 3 digits) + let mut value = 0u8; + let mut count = 0; + while *i < bound && count < 3 { + if let Some(digit) = chars[*i].to_digit(8) { + value = value * 8 + digit as u8; + *i += 1; + count += 1; + } else { + break; + } + } + *i -= 1; // Adjust index to account for the outer loop increment + Token::Byte(value) + } + 'x' => { + // Parse hexadecimal escape sequence + if *i + 1 < bound { + if let Some((c, offset)) = format_str[*i + 1..].scan_char(16) { + *i += offset; + Token::Byte(c as u8) + } else { + show_warning!("unrecognized escape '\\x'"); + Token::Byte(b'x') + } } else { - show_warning!("unrecognized escape '\\x'"); - Token::Char('x') + show_warning!("incomplete hex escape '\\x'"); + Token::Byte(b'x') } } - '0'..='7' => { - let (c, offset) = format_str[*i..].scan_char(8).unwrap(); - *i += offset - 1; - Token::Char(c) - } - '"' => Token::Char('"'), - '\\' => Token::Char('\\'), - 'a' => Token::Char('\x07'), - 'b' => Token::Char('\x08'), - 'e' => Token::Char('\x1B'), - 'f' => Token::Char('\x0C'), - 'n' => Token::Char('\n'), - 'r' => Token::Char('\r'), - 't' => Token::Char('\t'), - 'v' => Token::Char('\x0B'), - c => { - show_warning!("unrecognized escape '\\{}'", c); - Token::Char(c) + other => { + show_warning!("unrecognized escape '\\{}'", other); + Token::Byte(other as u8) } } } @@ -634,7 +798,128 @@ impl Stater { ret } - #[allow(clippy::cognitive_complexity)] + fn process_token_files( + &self, + t: &Token, + meta: &Metadata, + display_name: &str, + file: &OsString, + file_type: &FileType, + from_user: bool, + ) -> Result<(), i32> { + match *t { + Token::Byte(byte) => write_raw_byte(byte), + Token::Char(c) => print!("{c}"), + + Token::Directive { + flag, + width, + precision, + format, + } => { + let output = match format { + // access rights in octal + 'a' => OutputType::UnsignedOct(0o7777 & meta.mode()), + // access rights in human readable form + 'A' => OutputType::Str(display_permissions(meta, true)), + // number of blocks allocated (see %B) + 'b' => OutputType::Unsigned(meta.blocks()), + + // the size in bytes of each block reported by %b + // FIXME: blocksize differs on various platform + // See coreutils/gnulib/lib/stat-size.h ST_NBLOCKSIZE // spell-checker:disable-line + 'B' => OutputType::Unsigned(512), + + // device number in decimal + 'd' => OutputType::Unsigned(meta.dev()), + // device number in hex + 'D' => OutputType::UnsignedHex(meta.dev()), + // raw mode in hex + 'f' => OutputType::UnsignedHex(meta.mode() as u64), + // file type + 'F' => OutputType::Str( + pretty_filetype(meta.mode() as mode_t, meta.len()).to_owned(), + ), + // group ID of owner + 'g' => OutputType::Unsigned(meta.gid() as u64), + // group name of owner + 'G' => { + let group_name = + entries::gid2grp(meta.gid()).unwrap_or_else(|_| "UNKNOWN".to_owned()); + OutputType::Str(group_name) + } + // number of hard links + 'h' => OutputType::Unsigned(meta.nlink()), + // inode number + 'i' => OutputType::Unsigned(meta.ino()), + // mount point + 'm' => OutputType::Str(self.find_mount_point(file).unwrap()), + // file name + 'n' => OutputType::Str(display_name.to_string()), + // quoted file name with dereference if symbolic link + 'N' => { + let file_name = + get_quoted_file_name(display_name, file, file_type, from_user)?; + OutputType::Str(file_name) + } + // optimal I/O transfer size hint + 'o' => OutputType::Unsigned(meta.blksize()), + // total size, in bytes + 's' => OutputType::Integer(meta.len() as i64), + // major device type in hex, for character/block device special + // files + 't' => OutputType::UnsignedHex(meta.rdev() >> 8), + // minor device type in hex, for character/block device special + // files + 'T' => OutputType::UnsignedHex(meta.rdev() & 0xff), + // user ID of owner + 'u' => OutputType::Unsigned(meta.uid() as u64), + // user name of owner + 'U' => { + let user_name = + entries::uid2usr(meta.uid()).unwrap_or_else(|_| "UNKNOWN".to_owned()); + OutputType::Str(user_name) + } + + // time of file birth, human-readable; - if unknown + 'w' => OutputType::Str( + meta.birth() + .map(|(sec, nsec)| pretty_time(sec as i64, nsec as i64)) + .unwrap_or(String::from("-")), + ), + + // time of file birth, seconds since Epoch; 0 if unknown + 'W' => OutputType::Unsigned(meta.birth().unwrap_or_default().0), + + // time of last access, human-readable + 'x' => OutputType::Str(pretty_time(meta.atime(), meta.atime_nsec())), + // time of last access, seconds since Epoch + 'X' => OutputType::Integer(meta.atime()), + // time of last data modification, human-readable + 'y' => OutputType::Str(pretty_time(meta.mtime(), meta.mtime_nsec())), + // time of last data modification, seconds since Epoch + 'Y' => OutputType::Integer(meta.mtime()), + // time of last status change, human-readable + 'z' => OutputType::Str(pretty_time(meta.ctime(), meta.ctime_nsec())), + // time of last status change, seconds since Epoch + 'Z' => OutputType::Integer(meta.ctime()), + 'R' => { + let major = meta.rdev() >> 8; + let minor = meta.rdev() & 0xff; + OutputType::Str(format!("{},{}", major, minor)) + } + 'r' => OutputType::Unsigned(meta.rdev()), + 'H' => OutputType::Unsigned(meta.rdev() >> 8), // Major in decimal + 'L' => OutputType::Unsigned(meta.rdev() & 0xff), // Minor in decimal + + _ => OutputType::Unknown, + }; + print_it(&output, flag, width, precision); + } + } + Ok(()) + } + fn do_stat(&self, file: &OsStr, stdin_is_fifo: bool) -> i32 { let display_name = file.to_string_lossy(); let file = if cfg!(unix) && display_name == "-" { @@ -659,46 +944,9 @@ impl Stater { Ok(meta) => { let tokens = &self.default_tokens; + // Usage for t in tokens { - match *t { - Token::Char(c) => print!("{c}"), - Token::Directive { - flag, - width, - precision, - format, - } => { - let output = match format { - // free blocks available to non-superuser - 'a' => OutputType::Unsigned(meta.avail_blocks()), - // total data blocks in file system - 'b' => OutputType::Unsigned(meta.total_blocks()), - // total file nodes in file system - 'c' => OutputType::Unsigned(meta.total_file_nodes()), - // free file nodes in file system - 'd' => OutputType::Unsigned(meta.free_file_nodes()), - // free blocks in file system - 'f' => OutputType::Unsigned(meta.free_blocks()), - // file system ID in hex - 'i' => OutputType::UnsignedHex(meta.fsid()), - // maximum length of filenames - 'l' => OutputType::Unsigned(meta.namelen()), - // file name - 'n' => OutputType::Str(display_name.to_string()), - // block size (for faster transfers) - 's' => OutputType::Unsigned(meta.io_size()), - // fundamental block size (for block counts) - 'S' => OutputType::Integer(meta.block_size()), - // file system type in hex - 't' => OutputType::UnsignedHex(meta.fs_type() as u64), - // file system type in human readable form - 'T' => OutputType::Str(pretty_fstype(meta.fs_type()).into()), - _ => OutputType::Unknown, - }; - - print_it(&output, flag, width, precision); - } - } + process_token_filesystem(t, meta, &display_name); } } Err(e) => { @@ -728,125 +976,15 @@ impl Stater { }; for t in tokens { - match *t { - Token::Char(c) => print!("{c}"), - Token::Directive { - flag, - width, - precision, - format, - } => { - let output = match format { - // access rights in octal - 'a' => OutputType::UnsignedOct(0o7777 & meta.mode()), - // access rights in human readable form - 'A' => OutputType::Str(display_permissions(&meta, true)), - // number of blocks allocated (see %B) - 'b' => OutputType::Unsigned(meta.blocks()), - - // the size in bytes of each block reported by %b - // FIXME: blocksize differs on various platform - // See coreutils/gnulib/lib/stat-size.h ST_NBLOCKSIZE // spell-checker:disable-line - 'B' => OutputType::Unsigned(512), - - // device number in decimal - 'd' => OutputType::Unsigned(meta.dev()), - // device number in hex - 'D' => OutputType::UnsignedHex(meta.dev()), - // raw mode in hex - 'f' => OutputType::UnsignedHex(meta.mode() as u64), - // file type - 'F' => OutputType::Str( - pretty_filetype(meta.mode() as mode_t, meta.len()) - .to_owned(), - ), - // group ID of owner - 'g' => OutputType::Unsigned(meta.gid() as u64), - // group name of owner - 'G' => { - let group_name = entries::gid2grp(meta.gid()) - .unwrap_or_else(|_| "UNKNOWN".to_owned()); - OutputType::Str(group_name) - } - // number of hard links - 'h' => OutputType::Unsigned(meta.nlink()), - // inode number - 'i' => OutputType::Unsigned(meta.ino()), - // mount point - 'm' => OutputType::Str(self.find_mount_point(&file).unwrap()), - // file name - 'n' => OutputType::Str(display_name.to_string()), - // quoted file name with dereference if symbolic link - 'N' => { - let file_name = if file_type.is_symlink() { - let dst = match fs::read_link(&file) { - Ok(path) => path, - Err(e) => { - println!("{e}"); - return 1; - } - }; - format!("{} -> {}", display_name.quote(), dst.quote()) - } else { - display_name.to_string() - }; - OutputType::Str(file_name) - } - // optimal I/O transfer size hint - 'o' => OutputType::Unsigned(meta.blksize()), - // total size, in bytes - 's' => OutputType::Integer(meta.len() as i64), - // major device type in hex, for character/block device special - // files - 't' => OutputType::UnsignedHex(meta.rdev() >> 8), - // minor device type in hex, for character/block device special - // files - 'T' => OutputType::UnsignedHex(meta.rdev() & 0xff), - // user ID of owner - 'u' => OutputType::Unsigned(meta.uid() as u64), - // user name of owner - 'U' => { - let user_name = entries::uid2usr(meta.uid()) - .unwrap_or_else(|_| "UNKNOWN".to_owned()); - OutputType::Str(user_name) - } - - // time of file birth, human-readable; - if unknown - 'w' => OutputType::Str( - meta.birth() - .map(|(sec, nsec)| pretty_time(sec as i64, nsec as i64)) - .unwrap_or(String::from("-")), - ), - - // time of file birth, seconds since Epoch; 0 if unknown - 'W' => OutputType::Unsigned(meta.birth().unwrap_or_default().0), - - // time of last access, human-readable - 'x' => OutputType::Str(pretty_time( - meta.atime(), - meta.atime_nsec(), - )), - // time of last access, seconds since Epoch - 'X' => OutputType::Integer(meta.atime()), - // time of last data modification, human-readable - 'y' => OutputType::Str(pretty_time( - meta.mtime(), - meta.mtime_nsec(), - )), - // time of last data modification, seconds since Epoch - 'Y' => OutputType::Integer(meta.mtime()), - // time of last status change, human-readable - 'z' => OutputType::Str(pretty_time( - meta.ctime(), - meta.ctime_nsec(), - )), - // time of last status change, seconds since Epoch - 'Z' => OutputType::Integer(meta.ctime()), - - _ => OutputType::Unknown, - }; - print_it(&output, flag, width, precision); - } + if let Err(code) = self.process_token_files( + t, + &meta, + &display_name, + &file, + &file_type, + self.from_user, + ) { + return code; } } } @@ -1038,7 +1176,7 @@ mod tests { #[test] fn printf_format() { - let s = r#"%-# 15a\t\r\"\\\a\b\e\f\v%+020.-23w\x12\167\132\112\n"#; + let s = r#"%-# 15a\t\r\"\\\a\b\x1B\f\x0B%+020.-23w\x12\167\132\112\n"#; let expected = vec![ Token::Directive { flag: Flags { @@ -1051,15 +1189,15 @@ mod tests { precision: None, format: 'a', }, - Token::Char('\t'), - Token::Char('\r'), - Token::Char('"'), - Token::Char('\\'), - Token::Char('\x07'), - Token::Char('\x08'), - Token::Char('\x1B'), - Token::Char('\x0C'), - Token::Char('\x0B'), + Token::Byte(b'\t'), + Token::Byte(b'\r'), + Token::Byte(b'"'), + Token::Byte(b'\\'), + Token::Byte(b'\x07'), + Token::Byte(b'\x08'), + Token::Byte(b'\x1B'), + Token::Byte(b'\x0C'), + Token::Byte(b'\x0B'), Token::Directive { flag: Flags { sign: true, @@ -1070,11 +1208,11 @@ mod tests { precision: None, format: 'w', }, - Token::Char('\x12'), - Token::Char('w'), - Token::Char('Z'), - Token::Char('J'), - Token::Char('\n'), + Token::Byte(b'\x12'), + Token::Byte(b'w'), + Token::Byte(b'Z'), + Token::Byte(b'J'), + Token::Byte(b'\n'), ]; assert_eq!(&expected, &Stater::generate_tokens(s, true).unwrap()); } diff --git a/tests/by-util/test_stat.rs b/tests/by-util/test_stat.rs index 8cb4493f0..cbd36832f 100644 --- a/tests/by-util/test_stat.rs +++ b/tests/by-util/test_stat.rs @@ -242,7 +242,7 @@ fn test_multi_files() { #[test] fn test_printf() { let args = [ - "--printf=123%-# 15q\\r\\\"\\\\\\a\\b\\e\\f\\v%+020.23m\\x12\\167\\132\\112\\n", + "--printf=123%-# 15q\\r\\\"\\\\\\a\\b\\x1B\\f\\x0B%+020.23m\\x12\\167\\132\\112\\n", "/", ]; let ts = TestScenario::new(util_name!()); @@ -256,11 +256,10 @@ fn test_pipe_fifo() { let (at, mut ucmd) = at_and_ucmd!(); at.mkfifo("FIFO"); ucmd.arg("FIFO") - .run() + .succeeds() .no_stderr() .stdout_contains("fifo") - .stdout_contains("File: FIFO") - .succeeded(); + .stdout_contains("File: FIFO"); } #[test] @@ -275,19 +274,17 @@ fn test_stdin_pipe_fifo1() { new_ucmd!() .arg("-") .set_stdin(std::process::Stdio::piped()) - .run() + .succeeds() .no_stderr() .stdout_contains("fifo") - .stdout_contains("File: -") - .succeeded(); + .stdout_contains("File: -"); new_ucmd!() .args(&["-L", "-"]) .set_stdin(std::process::Stdio::piped()) - .run() + .succeeds() .no_stderr() .stdout_contains("fifo") - .stdout_contains("File: -") - .succeeded(); + .stdout_contains("File: -"); } #[test] @@ -299,11 +296,10 @@ fn test_stdin_pipe_fifo2() { new_ucmd!() .arg("-") .set_stdin(std::process::Stdio::null()) - .run() + .succeeds() .no_stderr() .stdout_contains("character special file") - .stdout_contains("File: -") - .succeeded(); + .stdout_contains("File: -"); } #[test] @@ -339,11 +335,10 @@ fn test_stdin_redirect() { ts.ucmd() .arg("-") .set_stdin(std::fs::File::open(at.plus("f")).unwrap()) - .run() + .succeeds() .no_stderr() .stdout_contains("regular empty file") - .stdout_contains("File: -") - .succeeded(); + .stdout_contains("File: -"); } #[test] @@ -352,3 +347,76 @@ fn test_without_argument() { .fails() .stderr_contains("missing operand\nTry 'stat --help' for more information."); } + +#[test] +fn test_quoting_style_locale() { + let ts = TestScenario::new(util_name!()); + let at = &ts.fixtures; + at.touch("'"); + ts.ucmd() + .env("QUOTING_STYLE", "locale") + .args(&["-c", "%N", "'"]) + .succeeds() + .stdout_only("'\\''\n"); + + ts.ucmd() + .args(&["-c", "%N", "'"]) + .succeeds() + .stdout_only("\"'\"\n"); +} + +#[test] +fn test_printf_octal_1() { + let ts = TestScenario::new(util_name!()); + let expected_stdout = vec![0x0A, 0xFF]; // Newline + byte 255 + ts.ucmd() + .args(&["--printf=\\012\\377", "."]) + .succeeds() + .stdout_is_bytes(expected_stdout); +} + +#[test] +fn test_printf_octal_2() { + let ts = TestScenario::new(util_name!()); + let expected_stdout = vec![b'.', 0x0A, b'a', 0xFF, b'b']; + ts.ucmd() + .args(&["--printf=.\\012a\\377b", "."]) + .succeeds() + .stdout_is_bytes(expected_stdout); +} + +#[test] +fn test_printf_incomplete_hex() { + let ts = TestScenario::new(util_name!()); + ts.ucmd() + .args(&["--printf=\\x", "."]) + .succeeds() + .stderr_contains("warning: incomplete hex escape"); +} + +#[test] +fn test_printf_bel_etc() { + let ts = TestScenario::new(util_name!()); + let expected_stdout = vec![0x07, 0x08, 0x0C, 0x0A, 0x0D, 0x09]; // BEL, BS, FF, LF, CR, TAB + ts.ucmd() + .args(&["--printf=\\a\\b\\f\\n\\r\\t", "."]) + .succeeds() + .stdout_is_bytes(expected_stdout); +} + +#[test] +fn test_printf_invalid_directive() { + let ts = TestScenario::new(util_name!()); + + ts.ucmd() + .args(&["--printf=%9", "."]) + .fails() + .code_is(1) + .stderr_contains("'%9': invalid directive"); + + ts.ucmd() + .args(&["--printf=%9%", "."]) + .fails() + .code_is(1) + .stderr_contains("'%9%': invalid directive"); +} diff --git a/util/build-gnu.sh b/util/build-gnu.sh index 684187733..e33e64429 100755 --- a/util/build-gnu.sh +++ b/util/build-gnu.sh @@ -204,6 +204,9 @@ sed -i "s|cp: target directory 'symlink': Permission denied|cp: 'symlink' is not # Our message is a bit better sed -i "s|cannot create regular file 'no-such/': Not a directory|'no-such/' is not a directory|" tests/mv/trailing-slash.sh +# Our message is better +sed -i "s|warning: unrecognized escape|warning: incomplete hex escape|" tests/stat/stat-printf.pl + sed -i 's|cp |/usr/bin/cp |' tests/mv/hard-2.sh sed -i 's|paste |/usr/bin/paste |' tests/od/od-endian.sh sed -i 's|timeout |'"${SYSTEM_TIMEOUT}"' |' tests/tail/follow-stdin.sh From ed15ca1d264ce9b6faa5720d7215ed6707b77651 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dorian=20P=C3=A9ron?= Date: Thu, 5 Dec 2024 17:59:00 +0100 Subject: [PATCH 111/179] checksum: keep a cache of the first used regex for non-algo-based regexes --- src/uucore/src/lib/features/checksum.rs | 216 ++++++++++++++++-------- 1 file changed, 145 insertions(+), 71 deletions(-) diff --git a/src/uucore/src/lib/features/checksum.rs b/src/uucore/src/lib/features/checksum.rs index 34dc0f870..d575a4b38 100644 --- a/src/uucore/src/lib/features/checksum.rs +++ b/src/uucore/src/lib/features/checksum.rs @@ -6,8 +6,9 @@ use data_encoding::BASE64; use os_display::Quotable; -use regex::bytes::{Captures, Regex}; +use regex::bytes::{Match, Regex}; use std::{ + borrow::Cow, ffi::OsStr, fmt::Display, fs::File, @@ -427,6 +428,67 @@ const DOUBLE_SPACE_REGEX: &str = r"^(?P[a-fA-F0-9]+)\s{2}(?P // In this case, we ignore the * const SINGLE_SPACE_REGEX: &str = r"^(?P[a-fA-F0-9]+)\s(?P\*?(?-u:.*))$"; +/// Hold the data extracted from a checksum line. +struct LineInfo { + algo_name: Option, + algo_bit_len: Option, + checksum: String, + filename: Vec, + + regex: Regex, +} + +impl LineInfo { + fn parse(s: impl AsRef, cached_regex: &mut Option) -> Option { + let regexes = [ + (Regex::new(ALGO_BASED_REGEX).unwrap(), true), + (Regex::new(DOUBLE_SPACE_REGEX).unwrap(), false), + (Regex::new(SINGLE_SPACE_REGEX).unwrap(), false), + (Regex::new(ALGO_BASED_REGEX_BASE64).unwrap(), false), + ]; + + let line_bytes = os_str_as_bytes(s.as_ref()).expect("UTF-8 decoding failed"); + + for (regex, algo_based) in ®exes { + if !regex.is_match(line_bytes) { + continue; + } + + let mut r = regex.clone(); + if !algo_based && cached_regex.is_some() { + r = cached_regex.clone().unwrap(); + } + + if let Some(caps) = r.captures(line_bytes) { + // These unwraps are safe thanks to the regex + let match_to_string = |m: Match| String::from_utf8(m.as_bytes().into()).unwrap(); + + return Some(Self { + algo_name: caps.name("algo").map(match_to_string), + algo_bit_len: caps + .name("bits") + .map(|m| match_to_string(m).parse::().unwrap()), + checksum: caps.name("checksum").map(match_to_string).unwrap(), + filename: caps.name("filename").map(|m| m.as_bytes().into()).unwrap(), + regex: r.clone(), + }); + } + } + + None + } + + #[inline] + fn is_algo_based(&self) -> bool { + self.algo_name.is_some() + } + + #[inline] + fn regex_str(&self) -> &str { + self.regex.as_str() + } +} + fn get_filename_for_output(filename: &OsStr, input_is_stdin: bool) -> String { if input_is_stdin { "standard input" @@ -437,34 +499,18 @@ fn get_filename_for_output(filename: &OsStr, input_is_stdin: bool) -> String { .to_string() } -/// Determines the appropriate regular expression to use based on the provided lines. -fn determine_regex(line: impl AsRef) -> Option<(Regex, bool)> { - let regexes = [ - (Regex::new(ALGO_BASED_REGEX).unwrap(), true), - (Regex::new(DOUBLE_SPACE_REGEX).unwrap(), false), - (Regex::new(SINGLE_SPACE_REGEX).unwrap(), false), - (Regex::new(ALGO_BASED_REGEX_BASE64).unwrap(), true), - ]; - - let line_bytes = os_str_as_bytes(line.as_ref()).expect("UTF-8 decoding failed"); - for (regex, is_algo_based) in ®exes { - if regex.is_match(line_bytes) { - return Some((regex.clone(), *is_algo_based)); - } - } - - None -} - /// Extract the expected digest from the checksum string -fn get_expected_digest_as_hex_string(caps: &Captures, chosen_regex: &Regex) -> Option { - // Unwraps are safe, ensured by regex. - let ck = caps.name("checksum").unwrap().as_bytes(); +fn get_expected_digest_as_hex_string(line_info: &LineInfo) -> Option> { + let ck = &line_info.checksum; - if chosen_regex.as_str() == ALGO_BASED_REGEX_BASE64 { - BASE64.decode(ck).map(hex::encode).ok() + if line_info.regex_str() == ALGO_BASED_REGEX_BASE64 { + BASE64 + .decode(ck.as_bytes()) + .map(hex::encode) + .map(Cow::Owned) + .ok() } else if ck.len() % 2 == 0 { - Some(str::from_utf8(ck).unwrap().to_string()) + Some(Cow::Borrowed(ck)) } else { // If the length of the digest is not a multiple of 2, then it // must be improperly formatted (1 hex digit is 2 characters) @@ -545,15 +591,14 @@ fn get_input_file(filename: &OsStr) -> UResult> { /// Extracts the algorithm name and length from the regex captures if the algo-based format is matched. fn identify_algo_name_and_length( - caps: &Captures, + line_info: &LineInfo, algo_name_input: Option<&str>, ) -> Option<(String, Option)> { // When the algo-based format is matched, extract details from regex captures - let algorithm = caps - .name("algo") - .map_or(String::new(), |m| { - String::from_utf8(m.as_bytes().into()).unwrap() - }) + let algorithm = line_info + .algo_name + .clone() + .unwrap_or_default() .to_lowercase(); // check if we are called with XXXsum (example: md5sum) but we detected a different algo parsing the file @@ -568,13 +613,9 @@ fn identify_algo_name_and_length( return None; } - let bits = caps.name("bits").map_or(Some(None), |m| { - let bits_value = String::from_utf8(m.as_bytes().into()) - .unwrap() - .parse::() - .unwrap(); - if bits_value % 8 == 0 { - Some(Some(bits_value / 8)) + let bits = line_info.algo_bitlen.map_or(Some(None), |bits| { + if bits % 8 == 0 { + Some(Some(bits / 8)) } else { None // Return None to signal a divisibility issue } @@ -597,6 +638,7 @@ fn process_checksum_line( cli_algo_name: Option<&str>, cli_algo_length: Option, opts: ChecksumOptions, + cached_regex: &mut Option, ) -> Result<(), LineCheckError> { let line_bytes = os_str_as_bytes(line)?; @@ -605,26 +647,30 @@ fn process_checksum_line( return Err(LineCheckError::Skipped); } - let (chosen_regex, is_algo_based_format) = - determine_regex(line).ok_or(LineCheckError::ImproperlyFormatted)?; + if let Some(line_info) = LineInfo::parse(line, cached_regex) { + // The cached regex ensures that when processing non-algo based regexes, + // its cannot be changed (can't have single and double space regexes + // used in the same file). + if cached_regex.is_none() && !line_info.is_algo_based() { + let _ = cached_regex.insert(line_info.regex.clone()); + } - if let Some(caps) = chosen_regex.captures(line_bytes) { - let mut filename_to_check = caps.name("filename").unwrap().as_bytes(); + let mut filename_to_check = line_info.filename.as_slice(); if filename_to_check.starts_with(b"*") && i == 0 - && chosen_regex.as_str() == SINGLE_SPACE_REGEX + && line_info.regex_str() == SINGLE_SPACE_REGEX { // Remove the leading asterisk if present - only for the first line filename_to_check = &filename_to_check[1..]; } - let expected_checksum = get_expected_digest_as_hex_string(&caps, &chosen_regex) + let expected_checksum = get_expected_digest_as_hex_string(&line_info) .ok_or(LineCheckError::ImproperlyFormatted)?; // If the algo_name is provided, we use it, otherwise we try to detect it - let (algo_name, length) = if is_algo_based_format { - identify_algo_name_and_length(&caps, cli_algo_name) + let (algo_name, length) = if line_info.is_algo_based() { + identify_algo_name_and_length(&line_info, cli_algo_name) .ok_or(LineCheckError::ImproperlyFormatted)? } else if let Some(a) = cli_algo_name { // When a specific algorithm name is input, use it and use the provided bits @@ -721,6 +767,10 @@ fn process_checksum_file( let reader = BufReader::new(file); let lines = read_os_string_lines(reader).collect::>(); + // cached_regex is used to ensure that several non algo-based checksum line + // will use the same regex. + let mut cached_regex = None; + for (i, line) in lines.iter().enumerate() { let line_result = process_checksum_line( filename_input, @@ -729,6 +779,7 @@ fn process_checksum_file( cli_algo_name, cli_algo_length, opts, + &mut cached_regex, ); // Match a first time to elude critical UErrors, and increment the total @@ -1149,52 +1200,75 @@ mod tests { } #[test] - fn test_determine_regex() { + fn test_line_info() { + let mut cached_regex = None; + // Test algo-based regex let line_algo_based = OsString::from("MD5 (example.txt) = d41d8cd98f00b204e9800998ecf8427e"); - let (regex, algo_based) = determine_regex(&line_algo_based).unwrap(); - assert!(algo_based); - assert!(regex.is_match(os_str_as_bytes(&line_algo_based).unwrap())); + let line_info = LineInfo::parse(&line_algo_based, &mut cached_regex).unwrap(); + assert!(line_info.is_algo_based()); + assert_eq!(line_info.algo_name.as_deref(), Some("MD5")); + assert!(line_info.algo_bit_len.is_none()); + assert_eq!(line_info.filename, b"example.txt"); + assert_eq!(line_info.checksum, "d41d8cd98f00b204e9800998ecf8427e"); + assert_eq!(line_info.regex_str(), ALGO_BASED_REGEX); + assert!(cached_regex.is_none()); // Test double-space regex let line_double_space = OsString::from("d41d8cd98f00b204e9800998ecf8427e example.txt"); - let (regex, algo_based) = determine_regex(&line_double_space).unwrap(); - assert!(!algo_based); - assert!(regex.is_match(os_str_as_bytes(&line_double_space).unwrap())); + let line_info = LineInfo::parse(&line_double_space, &mut cached_regex).unwrap(); + assert!(!line_info.is_algo_based()); + assert!(line_info.algo_name.is_none()); + assert!(line_info.algo_bit_len.is_none()); + assert_eq!(line_info.filename, b"example.txt"); + assert_eq!(line_info.checksum, "d41d8cd98f00b204e9800998ecf8427e"); + assert_eq!(line_info.regex_str(), DOUBLE_SPACE_REGEX); + assert!(cached_regex.is_some()); + + cached_regex = None; // Test single-space regex let line_single_space = OsString::from("d41d8cd98f00b204e9800998ecf8427e example.txt"); - let (regex, algo_based) = determine_regex(&line_single_space).unwrap(); - assert!(!algo_based); - assert!(regex.is_match(os_str_as_bytes(&line_single_space).unwrap())); + let line_info = LineInfo::parse(&line_single_space, &mut cached_regex).unwrap(); + assert!(!line_info.is_algo_based()); + assert!(line_info.algo_name.is_none()); + assert!(line_info.algo_bit_len.is_none()); + assert_eq!(line_info.filename, b"example.txt"); + assert_eq!(line_info.checksum, "d41d8cd98f00b204e9800998ecf8427e"); + assert_eq!(line_info.regex_str(), SINGLE_SPACE_REGEX); + assert!(cached_regex.is_some()); + + cached_regex = None; // Test invalid checksum line let line_invalid = OsString::from("invalid checksum line"); - assert!(determine_regex(&line_invalid).is_none()); + assert!(LineInfo::parse(&line_invalid, &mut cached_regex).is_none()); + assert!(cached_regex.is_none()); // Test leading space before checksum line let line_algo_based_leading_space = OsString::from(" MD5 (example.txt) = d41d8cd98f00b204e9800998ecf8427e"); - let res = determine_regex(&line_algo_based_leading_space); + let res = LineInfo::parse(&line_algo_based_leading_space, &mut cached_regex); assert!(res.is_some()); - assert_eq!(res.unwrap().0.as_str(), ALGO_BASED_REGEX); + assert_eq!(res.unwrap().regex_str(), ALGO_BASED_REGEX); + assert!(cached_regex.is_none()); // Test trailing space after checksum line (should fail) let line_algo_based_leading_space = OsString::from("MD5 (example.txt) = d41d8cd98f00b204e9800998ecf8427e "); - let res = determine_regex(&line_algo_based_leading_space); + let res = LineInfo::parse(&line_algo_based_leading_space, &mut cached_regex); assert!(res.is_none()); + assert!(cached_regex.is_none()); } #[test] fn test_get_expected_digest() { - let re = Regex::new(ALGO_BASED_REGEX_BASE64).unwrap(); - let caps = re - .captures(b"SHA256 (empty) = 47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU=") - .unwrap(); + let line = OsString::from("SHA256 (empty) = 47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU="); + let mut cached_regex = None; + let line_info = LineInfo::parse(&line, &mut cached_regex).unwrap(); - let result = get_expected_digest_as_hex_string(&caps, &re); + let result = get_expected_digest_as_hex_string(&line_info); assert_eq!( result.unwrap(), @@ -1204,12 +1278,12 @@ mod tests { #[test] fn test_get_expected_checksum_invalid() { - let re = Regex::new(ALGO_BASED_REGEX_BASE64).unwrap(); - let caps = re - .captures(b"SHA256 (empty) = 47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU") - .unwrap(); + // The line misses a '=' at the end to be valid base64 + let line = OsString::from("SHA256 (empty) = 47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU"); + let mut cached_regex = None; + let line_info = LineInfo::parse(&line, &mut cached_regex).unwrap(); - let result = get_expected_digest_as_hex_string(&caps, &re); + let result = get_expected_digest_as_hex_string(&line_info); assert!(result.is_none()); } From 65ddccbeb6a4e9bc45b0fc0184209a08387da411 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dorian=20P=C3=A9ron?= Date: Thu, 5 Dec 2024 18:19:50 +0100 Subject: [PATCH 112/179] checksum: avoid to recompute Regexps --- Cargo.lock | 1 + src/uucore/Cargo.toml | 1 + src/uucore/src/lib/features/checksum.rs | 40 ++++++++++++++++--------- 3 files changed, 28 insertions(+), 14 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index cb09c5fd6..611cd240c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3573,6 +3573,7 @@ dependencies = [ "glob", "hex", "itertools", + "lazy_static", "libc", "md-5", "memchr", diff --git a/src/uucore/Cargo.toml b/src/uucore/Cargo.toml index b72a8ed71..a4529f3a5 100644 --- a/src/uucore/Cargo.toml +++ b/src/uucore/Cargo.toml @@ -25,6 +25,7 @@ dns-lookup = { workspace = true, optional = true } dunce = { version = "1.0.4", optional = true } wild = "2.2.1" glob = { workspace = true } +lazy_static = "1.4.0" # * optional itertools = { workspace = true, optional = true } thiserror = { workspace = true, optional = true } diff --git a/src/uucore/src/lib/features/checksum.rs b/src/uucore/src/lib/features/checksum.rs index d575a4b38..dec5fcf21 100644 --- a/src/uucore/src/lib/features/checksum.rs +++ b/src/uucore/src/lib/features/checksum.rs @@ -5,6 +5,7 @@ // spell-checker:ignore anotherfile invalidchecksum regexes JWZG FFFD xffname prefixfilename use data_encoding::BASE64; +use lazy_static::lazy_static; use os_display::Quotable; use regex::bytes::{Match, Regex}; use std::{ @@ -428,6 +429,13 @@ const DOUBLE_SPACE_REGEX: &str = r"^(?P[a-fA-F0-9]+)\s{2}(?P // In this case, we ignore the * const SINGLE_SPACE_REGEX: &str = r"^(?P[a-fA-F0-9]+)\s(?P\*?(?-u:.*))$"; +lazy_static! { + static ref R_ALGO_BASED: Regex = Regex::new(ALGO_BASED_REGEX).unwrap(); + static ref R_DOUBLE_SPACE: Regex = Regex::new(DOUBLE_SPACE_REGEX).unwrap(); + static ref R_SINGLE_SPACE: Regex = Regex::new(SINGLE_SPACE_REGEX).unwrap(); + static ref R_ALGO_BASED_BASE_64: Regex = Regex::new(ALGO_BASED_REGEX_BASE64).unwrap(); +} + /// Hold the data extracted from a checksum line. struct LineInfo { algo_name: Option, @@ -435,28 +443,32 @@ struct LineInfo { checksum: String, filename: Vec, - regex: Regex, + regex: &'static Regex, } impl LineInfo { - fn parse(s: impl AsRef, cached_regex: &mut Option) -> Option { - let regexes = [ - (Regex::new(ALGO_BASED_REGEX).unwrap(), true), - (Regex::new(DOUBLE_SPACE_REGEX).unwrap(), false), - (Regex::new(SINGLE_SPACE_REGEX).unwrap(), false), - (Regex::new(ALGO_BASED_REGEX_BASE64).unwrap(), false), + fn parse(s: impl AsRef, cached_regex: &mut Option<&'static Regex>) -> Option { + let regexes: &[(&'static Regex, bool)] = &[ + (&R_ALGO_BASED, true), + (&R_DOUBLE_SPACE, false), + (&R_SINGLE_SPACE, false), + (&R_ALGO_BASED_BASE_64, true), ]; let line_bytes = os_str_as_bytes(s.as_ref()).expect("UTF-8 decoding failed"); - for (regex, algo_based) in ®exes { + for (regex, algo_based) in regexes { if !regex.is_match(line_bytes) { continue; } - let mut r = regex.clone(); - if !algo_based && cached_regex.is_some() { - r = cached_regex.clone().unwrap(); + let mut r = *regex; + if !algo_based { + if cached_regex.is_some() { + r = cached_regex.unwrap(); + } else { + *cached_regex = Some(r); + } } if let Some(caps) = r.captures(line_bytes) { @@ -470,7 +482,7 @@ impl LineInfo { .map(|m| match_to_string(m).parse::().unwrap()), checksum: caps.name("checksum").map(match_to_string).unwrap(), filename: caps.name("filename").map(|m| m.as_bytes().into()).unwrap(), - regex: r.clone(), + regex: r, }); } } @@ -638,7 +650,7 @@ fn process_checksum_line( cli_algo_name: Option<&str>, cli_algo_length: Option, opts: ChecksumOptions, - cached_regex: &mut Option, + cached_regex: &mut Option<&'static Regex>, ) -> Result<(), LineCheckError> { let line_bytes = os_str_as_bytes(line)?; @@ -652,7 +664,7 @@ fn process_checksum_line( // its cannot be changed (can't have single and double space regexes // used in the same file). if cached_regex.is_none() && !line_info.is_algo_based() { - let _ = cached_regex.insert(line_info.regex.clone()); + let _ = cached_regex.insert(line_info.regex); } let mut filename_to_check = line_info.filename.as_slice(); From df16c1c65560b42f0a4471876f7e3203a6c05a94 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dorian=20P=C3=A9ron?= Date: Thu, 5 Dec 2024 17:59:35 +0100 Subject: [PATCH 113/179] test(cksum): Add tests --- tests/by-util/test_cksum.rs | 81 ++++++++++++++++++++++++++++++++++++- 1 file changed, 79 insertions(+), 2 deletions(-) diff --git a/tests/by-util/test_cksum.rs b/tests/by-util/test_cksum.rs index bf74de9cc..6c1718112 100644 --- a/tests/by-util/test_cksum.rs +++ b/tests/by-util/test_cksum.rs @@ -1443,7 +1443,7 @@ mod check_utf8 { let scene = TestScenario::new(util_name!()); let at = &scene.fixtures; let filename: OsString = OsStringExt::from_vec(b"funky\xffname".to_vec()); - at.touch(&filename); + at.touch(filename); // Checksum match at.write_bytes("check", @@ -1544,7 +1544,6 @@ fn test_check_confusing_base64() { /// This test checks that when a file contains several checksum lines /// with different encoding, the decoding still works. -#[ignore = "not yet implemented"] #[test] fn test_check_mix_hex_base64() { let b64 = "BLAKE2b-128 (foo1.dat) = BBNuJPhdRwRlw9tm5Y7VbA=="; @@ -1769,3 +1768,81 @@ mod gnu_cksum_base64 { } } } + +/// The tests in this module check the behavior of cksum when given different +/// checksum formats and algorithms in the same file, while specifying an +/// algorithm on CLI or not. +mod format_mix { + use super::*; + + // First line is algo-based, second one is not + const INPUT_ALGO_NON_ALGO: &str = "\ + BLAKE2b (bar) = 786a02f742015903c6c6fd852552d272912f4740e15847618a86e217f71f5419d25e1031afee585313896444934eb04b903a685b1448b755d56f701afe9be2ce\n\ + 786a02f742015903c6c6fd852552d272912f4740e15847618a86e217f71f5419d25e1031afee585313896444934eb04b903a685b1448b755d56f701afe9be2ce foo"; + + // First line is non algo-based, second one is + const INPUT_NON_ALGO_ALGO: &str = "\ + 786a02f742015903c6c6fd852552d272912f4740e15847618a86e217f71f5419d25e1031afee585313896444934eb04b903a685b1448b755d56f701afe9be2ce foo\n\ + BLAKE2b (bar) = 786a02f742015903c6c6fd852552d272912f4740e15847618a86e217f71f5419d25e1031afee585313896444934eb04b903a685b1448b755d56f701afe9be2ce"; + + /// Make a simple scene with foo and bar empty files + fn make_scene() -> TestScenario { + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + + at.touch("foo"); + at.touch("bar"); + + scene + } + + #[test] + fn test_check_cli_algo_non_algo() { + let scene = make_scene(); + scene + .ucmd() + .arg("--check") + .arg("--algo=blake2b") + .pipe_in(INPUT_ALGO_NON_ALGO) + .succeeds() + .stdout_contains("bar: OK\nfoo: OK") + .no_stderr(); + } + + #[test] + fn test_check_cli_non_algo_algo() { + let scene = make_scene(); + scene + .ucmd() + .arg("--check") + .arg("--algo=blake2b") + .pipe_in(INPUT_NON_ALGO_ALGO) + .succeeds() + .stdout_contains("foo: OK\nbar: OK") + .no_stderr(); + } + + #[test] + fn test_check_algo_non_algo() { + let scene = make_scene(); + scene + .ucmd() + .arg("--check") + .pipe_in(INPUT_ALGO_NON_ALGO) + .succeeds() + .stdout_contains("bar: OK") + .stderr_contains("cksum: WARNING: 1 line is improperly formatted"); + } + + #[test] + fn test_check_non_algo_algo() { + let scene = make_scene(); + scene + .ucmd() + .arg("--check") + .pipe_in(INPUT_NON_ALGO_ALGO) + .succeeds() + .stdout_contains("bar: OK") + .stderr_contains("cksum: WARNING: 1 line is improperly formatted"); + } +} From 10a9b0bfbf237ed37b298ffcd932c6b31e17a18a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dorian=20P=C3=A9ron?= Date: Thu, 5 Dec 2024 18:22:52 +0100 Subject: [PATCH 114/179] checksum: split treatment of algo-based and non-algo based into separate functions --- src/uucore/src/lib/features/checksum.rs | 172 ++++++++++++++---------- 1 file changed, 102 insertions(+), 70 deletions(-) diff --git a/src/uucore/src/lib/features/checksum.rs b/src/uucore/src/lib/features/checksum.rs index dec5fcf21..0da814a76 100644 --- a/src/uucore/src/lib/features/checksum.rs +++ b/src/uucore/src/lib/features/checksum.rs @@ -464,6 +464,9 @@ impl LineInfo { let mut r = *regex; if !algo_based { + // The cached regex ensures that when processing non-algo based regexes, + // its cannot be changed (can't have single and double space regexes + // used in the same file). if cached_regex.is_some() { r = cached_regex.unwrap(); } else { @@ -636,13 +639,101 @@ fn identify_algo_name_and_length( Some((algorithm, bits)) } +/// Given a filename and an algorithm, compute the digest and compare it with +/// the expected one. +fn compute_and_check_digest_from_file( + filename: &[u8], + expected_checksum: &str, + mut algo: HashAlgorithm, + opts: ChecksumOptions, +) -> Result<(), LineCheckError> { + let (filename_to_check_unescaped, prefix) = unescape_filename(filename); + let real_filename_to_check = os_str_from_bytes(&filename_to_check_unescaped)?; + + // Open the input file + let file_to_check = get_file_to_check(&real_filename_to_check, opts)?; + let mut file_reader = BufReader::new(file_to_check); + + // Read the file and calculate the checksum + let create_fn = &mut algo.create_fn; + let mut digest = create_fn(); + let (calculated_checksum, _) = + digest_reader(&mut digest, &mut file_reader, opts.binary, algo.bits).unwrap(); + + // Do the checksum validation + let checksum_correct = expected_checksum == calculated_checksum; + print_file_report( + std::io::stdout(), + filename, + FileChecksumResult::from_bool(checksum_correct), + prefix, + opts, + ); + + if checksum_correct { + Ok(()) + } else { + Err(LineCheckError::DigestMismatch) + } +} + +/// Check a digest checksum with non-algo based pre-treatment. +fn process_algo_based_line( + line_info: &LineInfo, + cli_algo_name: Option<&str>, + opts: ChecksumOptions, +) -> Result<(), LineCheckError> { + let filename_to_check = line_info.filename.as_slice(); + let expected_checksum = + get_expected_digest_as_hex_string(line_info).ok_or(LineCheckError::ImproperlyFormatted)?; + + let (algo_name, algo_bitlen) = identify_algo_name_and_length(line_info, cli_algo_name) + .ok_or(LineCheckError::ImproperlyFormatted)?; + + let algo = detect_algo(&algo_name, algo_bitlen)?; + + compute_and_check_digest_from_file(filename_to_check, &expected_checksum, algo, opts) +} + +/// Check a digest checksum with non-algo based pre-treatment. +fn process_non_algo_based_line( + i: usize, + line_info: &LineInfo, + cli_algo_name: &str, + cli_algo_length: Option, + opts: ChecksumOptions, +) -> Result<(), LineCheckError> { + let mut filename_to_check = line_info.filename.as_slice(); + if filename_to_check.starts_with(b"*") && i == 0 && line_info.regex_str() == SINGLE_SPACE_REGEX + { + // Remove the leading asterisk if present - only for the first line + filename_to_check = &filename_to_check[1..]; + } + let expected_checksum = + get_expected_digest_as_hex_string(line_info).ok_or(LineCheckError::ImproperlyFormatted)?; + + // When a specific algorithm name is input, use it and use the provided bits + // except when dealing with blake2b, where we will detect the length + let (algo_name, algo_bitlen) = if cli_algo_name == ALGORITHM_OPTIONS_BLAKE2B { + // division by 2 converts the length of the Blake2b checksum from hexadecimal + // characters to bytes, as each byte is represented by two hexadecimal characters. + let length = Some(expected_checksum.len() / 2); + (ALGORITHM_OPTIONS_BLAKE2B.to_string(), length) + } else { + (cli_algo_name.to_lowercase(), cli_algo_length) + }; + + let algo = detect_algo(&algo_name, algo_bitlen)?; + + compute_and_check_digest_from_file(filename_to_check, &expected_checksum, algo, opts) +} + /// Parses a checksum line, detect the algorithm to use, read the file and produce /// its digest, and compare it to the expected value. /// /// Returns `Ok(bool)` if the comparison happened, bool indicates if the digest /// matched the expected. /// If the comparison didn't happen, return a `LineChecksumError`. -#[allow(clippy::too_many_arguments)] fn process_checksum_line( filename_input: &OsStr, line: &OsStr, @@ -654,82 +745,23 @@ fn process_checksum_line( ) -> Result<(), LineCheckError> { let line_bytes = os_str_as_bytes(line)?; - // early return on empty or commented lines. + // Early return on empty or commented lines. if line.is_empty() || line_bytes.starts_with(b"#") { return Err(LineCheckError::Skipped); } + // Use `LineInfo` to extract the data of a line. + // Then, depending on its format, apply a different pre-treatment. if let Some(line_info) = LineInfo::parse(line, cached_regex) { - // The cached regex ensures that when processing non-algo based regexes, - // its cannot be changed (can't have single and double space regexes - // used in the same file). - if cached_regex.is_none() && !line_info.is_algo_based() { - let _ = cached_regex.insert(line_info.regex); - } - - let mut filename_to_check = line_info.filename.as_slice(); - - if filename_to_check.starts_with(b"*") - && i == 0 - && line_info.regex_str() == SINGLE_SPACE_REGEX - { - // Remove the leading asterisk if present - only for the first line - filename_to_check = &filename_to_check[1..]; - } - - let expected_checksum = get_expected_digest_as_hex_string(&line_info) - .ok_or(LineCheckError::ImproperlyFormatted)?; - - // If the algo_name is provided, we use it, otherwise we try to detect it - let (algo_name, length) = if line_info.is_algo_based() { - identify_algo_name_and_length(&line_info, cli_algo_name) - .ok_or(LineCheckError::ImproperlyFormatted)? - } else if let Some(a) = cli_algo_name { - // When a specific algorithm name is input, use it and use the provided bits - // except when dealing with blake2b, where we will detect the length - if cli_algo_name == Some(ALGORITHM_OPTIONS_BLAKE2B) { - // division by 2 converts the length of the Blake2b checksum from hexadecimal - // characters to bytes, as each byte is represented by two hexadecimal characters. - let length = Some(expected_checksum.len() / 2); - (ALGORITHM_OPTIONS_BLAKE2B.to_string(), length) - } else { - (a.to_lowercase(), cli_algo_length) - } + if line_info.is_algo_based() { + process_algo_based_line(&line_info, cli_algo_name, opts) + } else if let Some(cli_algo) = cli_algo_name { + // If we match a non-algo based regex, we expect a cli argument + // to give us the algorithm to use + process_non_algo_based_line(i, &line_info, cli_algo, cli_algo_length, opts) } else { - // Default case if no algorithm is specified and non-algo based format is matched + // We have no clue of what algorithm to use return Err(LineCheckError::ImproperlyFormatted); - }; - - let mut algo = detect_algo(&algo_name, length)?; - - let (filename_to_check_unescaped, prefix) = unescape_filename(filename_to_check); - - let real_filename_to_check = os_str_from_bytes(&filename_to_check_unescaped)?; - - // manage the input file - let file_to_check = get_file_to_check(&real_filename_to_check, opts)?; - let mut file_reader = BufReader::new(file_to_check); - - // Read the file and calculate the checksum - let create_fn = &mut algo.create_fn; - let mut digest = create_fn(); - let (calculated_checksum, _) = - digest_reader(&mut digest, &mut file_reader, opts.binary, algo.bits).unwrap(); - - // Do the checksum validation - let checksum_correct = expected_checksum == calculated_checksum; - print_file_report( - std::io::stdout(), - filename_to_check, - FileChecksumResult::from_bool(checksum_correct), - prefix, - opts, - ); - - if checksum_correct { - Ok(()) - } else { - Err(LineCheckError::DigestMismatch) } } else { if opts.warn { From cd99102c9135837f05b8221a592e3b73d5affa1c Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Wed, 11 Dec 2024 05:16:32 +0000 Subject: [PATCH 115/179] chore(deps): update rust crate serde to v1.0.216 --- Cargo.lock | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 75ab4c2ed..781850626 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2088,9 +2088,9 @@ checksum = "e25dfac463d778e353db5be2449d1cce89bd6fd23c9f1ea21310ce6e5a1b29c4" [[package]] name = "serde" -version = "1.0.215" +version = "1.0.216" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6513c1ad0b11a9376da888e3e0baa0077f1aed55c17f50e7b2397136129fb88f" +checksum = "0b9781016e935a97e8beecf0c933758c97a5520d32930e460142b4cd80c6338e" dependencies = [ "serde_derive", ] @@ -2106,9 +2106,9 @@ dependencies = [ [[package]] name = "serde_derive" -version = "1.0.215" +version = "1.0.216" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad1e866f866923f252f05c889987993144fb74e722403468a4ebd70c3cd756c0" +checksum = "46f859dbbf73865c6627ed570e78961cd3ac92407a2d117204c49232485da55e" dependencies = [ "proc-macro2", "quote", From 3900fa91ba5d987f9c1bcb2f3c5da61d75d3b981 Mon Sep 17 00:00:00 2001 From: Alexander Shirokov Date: Wed, 11 Dec 2024 14:28:46 +0100 Subject: [PATCH 116/179] seq:reduce memory allocation during prefix search This improvement eliminates extra memory allocations during the search for 0x/0X prefixes in number strings. --- src/uu/seq/src/numberparse.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/uu/seq/src/numberparse.rs b/src/uu/seq/src/numberparse.rs index 80587f713..c8dec0180 100644 --- a/src/uu/seq/src/numberparse.rs +++ b/src/uu/seq/src/numberparse.rs @@ -341,7 +341,7 @@ impl FromStr for PreciseNumber { // Check if the string seems to be in hexadecimal format. // // May be 0x123 or -0x123, so the index `i` may be either 0 or 1. - if let Some(i) = s.to_lowercase().find("0x") { + if let Some(i) = s.find("0x").or_else(|| s.find("0X")) { if i <= 1 { return parse_hexadecimal(s); } From d62e2b500d3e591cf051efd2d68e7e4324d7bf5d Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Wed, 11 Dec 2024 19:00:55 +0000 Subject: [PATCH 117/179] chore(deps): update rust crate bstr to v1.11.1 --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 781850626..04e4db58e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -244,9 +244,9 @@ dependencies = [ [[package]] name = "bstr" -version = "1.11.0" +version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a68f1f47cdf0ec8ee4b941b2eee2a80cb796db73118c0dd09ac63fbe405be22" +checksum = "786a307d683a5bf92e6fd5fd69a7eb613751668d1d8d67d802846dfe367c62c8" dependencies = [ "memchr", "regex-automata", From f4e5dc2e0fa8ae5e2a4ad23fa4dd0f0a44e12253 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dorian=20P=C3=A9ron?= Date: Sat, 7 Dec 2024 02:29:54 +0100 Subject: [PATCH 118/179] checksum: use the blake2b length as an hint to check the correctness of the expected digest --- src/uucore/src/lib/features/checksum.rs | 95 +++++++++++++++++-------- 1 file changed, 64 insertions(+), 31 deletions(-) diff --git a/src/uucore/src/lib/features/checksum.rs b/src/uucore/src/lib/features/checksum.rs index 0da814a76..8de983490 100644 --- a/src/uucore/src/lib/features/checksum.rs +++ b/src/uucore/src/lib/features/checksum.rs @@ -2,7 +2,7 @@ // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -// spell-checker:ignore anotherfile invalidchecksum regexes JWZG FFFD xffname prefixfilename +// spell-checker:ignore anotherfile invalidchecksum regexes JWZG FFFD xffname prefixfilename bytelen bitlen hexdigit use data_encoding::BASE64; use lazy_static::lazy_static; @@ -515,22 +515,43 @@ fn get_filename_for_output(filename: &OsStr, input_is_stdin: bool) -> String { } /// Extract the expected digest from the checksum string -fn get_expected_digest_as_hex_string(line_info: &LineInfo) -> Option> { +fn get_expected_digest_as_hex_string( + line_info: &LineInfo, + len_hint: Option, +) -> Option> { let ck = &line_info.checksum; - if line_info.regex_str() == ALGO_BASED_REGEX_BASE64 { - BASE64 - .decode(ck.as_bytes()) - .map(hex::encode) - .map(Cow::Owned) - .ok() - } else if ck.len() % 2 == 0 { - Some(Cow::Borrowed(ck)) - } else { + // TODO MSRV 1.82, replace `is_some_and` with `is_none_or` + // to improve readability. This closure returns True if a length hint provided + // and the argument isn't the same as the hint. + let against_hint = |len| len_hint.is_some_and(|l| l != len); + + if ck.len() % 2 != 0 { // If the length of the digest is not a multiple of 2, then it // must be improperly formatted (1 hex digit is 2 characters) - None + return None; } + + // If the digest can be decoded as hexadecimal AND it length match the + // one expected (in case it's given), just go with it. + if ck.as_bytes().iter().all(u8::is_ascii_hexdigit) && !against_hint(ck.len()) { + return Some(Cow::Borrowed(ck)); + } + + // If hexadecimal digest fails for any reason, interpret the digest as base 64. + BASE64 + .decode(ck.as_bytes()) // Decode the string as encoded base64 + .map(hex::encode) // Encode it back as hexadecimal + .map(Cow::::Owned) + .ok() + .and_then(|s| { + // Check the digest length + if !against_hint(s.len()) { + Some(s) + } else { + None + } + }) } /// Returns a reader that reads from the specified file, or from stdin if `filename_to_check` is "-". @@ -604,12 +625,11 @@ fn get_input_file(filename: &OsStr) -> UResult> { } } -/// Extracts the algorithm name and length from the regex captures if the algo-based format is matched. +/// Gets the algorithm name and length from the `LineInfo` if the algo-based format is matched. fn identify_algo_name_and_length( line_info: &LineInfo, algo_name_input: Option<&str>, ) -> Option<(String, Option)> { - // When the algo-based format is matched, extract details from regex captures let algorithm = line_info .algo_name .clone() @@ -628,15 +648,20 @@ fn identify_algo_name_and_length( return None; } - let bits = line_info.algo_bitlen.map_or(Some(None), |bits| { - if bits % 8 == 0 { - Some(Some(bits / 8)) - } else { - None // Return None to signal a divisibility issue + let bytes = if let Some(bitlen) = line_info.algo_bit_len { + if bitlen % 8 != 0 { + // The given length is wrong + return None; } - })?; + Some(bitlen / 8) + } else if algorithm == ALGORITHM_OPTIONS_BLAKE2B { + // Default length with BLAKE2b, + Some(64) + } else { + None + }; - Some((algorithm, bits)) + Some((algorithm, bytes)) } /// Given a filename and an algorithm, compute the digest and compare it with @@ -684,13 +709,21 @@ fn process_algo_based_line( opts: ChecksumOptions, ) -> Result<(), LineCheckError> { let filename_to_check = line_info.filename.as_slice(); - let expected_checksum = - get_expected_digest_as_hex_string(line_info).ok_or(LineCheckError::ImproperlyFormatted)?; - let (algo_name, algo_bitlen) = identify_algo_name_and_length(line_info, cli_algo_name) + let (algo_name, algo_byte_len) = identify_algo_name_and_length(line_info, cli_algo_name) .ok_or(LineCheckError::ImproperlyFormatted)?; - let algo = detect_algo(&algo_name, algo_bitlen)?; + // If the digest bitlen is known, we can check the format of the expected + // checksum with it. + let digest_char_length_hint = match (algo_name.as_str(), algo_byte_len) { + (ALGORITHM_OPTIONS_BLAKE2B, Some(bytelen)) => Some(bytelen * 2), + _ => None, + }; + + let expected_checksum = get_expected_digest_as_hex_string(line_info, digest_char_length_hint) + .ok_or(LineCheckError::ImproperlyFormatted)?; + + let algo = detect_algo(&algo_name, algo_byte_len)?; compute_and_check_digest_from_file(filename_to_check, &expected_checksum, algo, opts) } @@ -709,12 +742,12 @@ fn process_non_algo_based_line( // Remove the leading asterisk if present - only for the first line filename_to_check = &filename_to_check[1..]; } - let expected_checksum = - get_expected_digest_as_hex_string(line_info).ok_or(LineCheckError::ImproperlyFormatted)?; + let expected_checksum = get_expected_digest_as_hex_string(line_info, None) + .ok_or(LineCheckError::ImproperlyFormatted)?; // When a specific algorithm name is input, use it and use the provided bits // except when dealing with blake2b, where we will detect the length - let (algo_name, algo_bitlen) = if cli_algo_name == ALGORITHM_OPTIONS_BLAKE2B { + let (algo_name, algo_byte_len) = if cli_algo_name == ALGORITHM_OPTIONS_BLAKE2B { // division by 2 converts the length of the Blake2b checksum from hexadecimal // characters to bytes, as each byte is represented by two hexadecimal characters. let length = Some(expected_checksum.len() / 2); @@ -723,7 +756,7 @@ fn process_non_algo_based_line( (cli_algo_name.to_lowercase(), cli_algo_length) }; - let algo = detect_algo(&algo_name, algo_bitlen)?; + let algo = detect_algo(&algo_name, algo_byte_len)?; compute_and_check_digest_from_file(filename_to_check, &expected_checksum, algo, opts) } @@ -1312,7 +1345,7 @@ mod tests { let mut cached_regex = None; let line_info = LineInfo::parse(&line, &mut cached_regex).unwrap(); - let result = get_expected_digest_as_hex_string(&line_info); + let result = get_expected_digest_as_hex_string(&line_info, None); assert_eq!( result.unwrap(), @@ -1327,7 +1360,7 @@ mod tests { let mut cached_regex = None; let line_info = LineInfo::parse(&line, &mut cached_regex).unwrap(); - let result = get_expected_digest_as_hex_string(&line_info); + let result = get_expected_digest_as_hex_string(&line_info, None); assert!(result.is_none()); } From 567bbc5f3c8a242607881554728a420b68613792 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dorian=20P=C3=A9ron?= Date: Sat, 7 Dec 2024 02:38:00 +0100 Subject: [PATCH 119/179] checksum: remove ALGO_BASED_REGEX (non base64) as its not useful anymore and introduce LineFormat struct --- src/uucore/src/lib/features/checksum.rs | 85 ++++++++++++++----------- 1 file changed, 48 insertions(+), 37 deletions(-) diff --git a/src/uucore/src/lib/features/checksum.rs b/src/uucore/src/lib/features/checksum.rs index 8de983490..e19845f18 100644 --- a/src/uucore/src/lib/features/checksum.rs +++ b/src/uucore/src/lib/features/checksum.rs @@ -421,8 +421,7 @@ pub fn detect_algo(algo: &str, length: Option) -> UResult // algo must be uppercase or b (for blake2b) // 2. [* ] // 3. [*] (only one space) -const ALGO_BASED_REGEX: &str = r"^\s*\\?(?P(?:[A-Z0-9]+|BLAKE2b))(?:-(?P\d+))?\s?\((?P(?-u:.*))\)\s*=\s*(?P[a-fA-F0-9]+)$"; -const ALGO_BASED_REGEX_BASE64: &str = r"^\s*\\?(?P(?:[A-Z0-9]+|BLAKE2b))(?:-(?P\d+))?\s?\((?P(?-u:.*))\)\s*=\s*(?P[A-Za-z0-9+/]+={0,2})$"; +const ALGO_BASED_REGEX: &str = r"^\s*\\?(?P(?:[A-Z0-9]+|BLAKE2b))(?:-(?P\d+))?\s?\((?P(?-u:.*))\)\s*=\s*(?P[A-Za-z0-9+/]+={0,2})$"; const DOUBLE_SPACE_REGEX: &str = r"^(?P[a-fA-F0-9]+)\s{2}(?P(?-u:.*))$"; @@ -433,7 +432,23 @@ lazy_static! { static ref R_ALGO_BASED: Regex = Regex::new(ALGO_BASED_REGEX).unwrap(); static ref R_DOUBLE_SPACE: Regex = Regex::new(DOUBLE_SPACE_REGEX).unwrap(); static ref R_SINGLE_SPACE: Regex = Regex::new(SINGLE_SPACE_REGEX).unwrap(); - static ref R_ALGO_BASED_BASE_64: Regex = Regex::new(ALGO_BASED_REGEX_BASE64).unwrap(); +} + +#[derive(Debug, PartialEq, Eq, Clone, Copy)] +enum LineFormat { + AlgoBased, + SingleSpace, + DoubleSpace, +} + +impl LineFormat { + fn to_regex(self) -> &'static Regex { + match self { + LineFormat::AlgoBased => &R_ALGO_BASED, + LineFormat::SingleSpace => &R_SINGLE_SPACE, + LineFormat::DoubleSpace => &R_DOUBLE_SPACE, + } + } } /// Hold the data extracted from a checksum line. @@ -443,34 +458,41 @@ struct LineInfo { checksum: String, filename: Vec, - regex: &'static Regex, + format: LineFormat, } impl LineInfo { - fn parse(s: impl AsRef, cached_regex: &mut Option<&'static Regex>) -> Option { - let regexes: &[(&'static Regex, bool)] = &[ - (&R_ALGO_BASED, true), - (&R_DOUBLE_SPACE, false), - (&R_SINGLE_SPACE, false), - (&R_ALGO_BASED_BASE_64, true), + /// Returns a `LineInfo` parsed from a checksum line. + /// The function will run 3 regexes against the line and select the first one that matches + /// to populate the fields of the struct. + /// However, there is a catch to handle regarding the handling of `cached_regex`. + /// In case of non-algo-based regex, if `cached_regex` is Some, it must take the priority + /// over the detected regex. Otherwise, we must set it the the detected regex. + /// This specific behavior is emphasized by the test + /// `test_hashsum::test_check_md5sum_only_one_space`. + fn parse(s: impl AsRef, cached_regex: &mut Option) -> Option { + let regexes: &[(&'static Regex, LineFormat)] = &[ + (&R_ALGO_BASED, LineFormat::AlgoBased), + (&R_DOUBLE_SPACE, LineFormat::DoubleSpace), + (&R_SINGLE_SPACE, LineFormat::SingleSpace), ]; let line_bytes = os_str_as_bytes(s.as_ref()).expect("UTF-8 decoding failed"); - for (regex, algo_based) in regexes { + for (regex, format) in regexes { if !regex.is_match(line_bytes) { continue; } let mut r = *regex; - if !algo_based { + if *format != LineFormat::AlgoBased { // The cached regex ensures that when processing non-algo based regexes, - // its cannot be changed (can't have single and double space regexes + // it cannot be changed (can't have single and double space regexes // used in the same file). if cached_regex.is_some() { - r = cached_regex.unwrap(); + r = cached_regex.unwrap().to_regex(); } else { - *cached_regex = Some(r); + *cached_regex = Some(*format); } } @@ -485,23 +507,13 @@ impl LineInfo { .map(|m| match_to_string(m).parse::().unwrap()), checksum: caps.name("checksum").map(match_to_string).unwrap(), filename: caps.name("filename").map(|m| m.as_bytes().into()).unwrap(), - regex: r, + format: *format, }); } } None } - - #[inline] - fn is_algo_based(&self) -> bool { - self.algo_name.is_some() - } - - #[inline] - fn regex_str(&self) -> &str { - self.regex.as_str() - } } fn get_filename_for_output(filename: &OsStr, input_is_stdin: bool) -> String { @@ -730,14 +742,16 @@ fn process_algo_based_line( /// Check a digest checksum with non-algo based pre-treatment. fn process_non_algo_based_line( - i: usize, + line_number: usize, line_info: &LineInfo, cli_algo_name: &str, cli_algo_length: Option, opts: ChecksumOptions, ) -> Result<(), LineCheckError> { let mut filename_to_check = line_info.filename.as_slice(); - if filename_to_check.starts_with(b"*") && i == 0 && line_info.regex_str() == SINGLE_SPACE_REGEX + if filename_to_check.starts_with(b"*") + && line_number == 0 + && line_info.format == LineFormat::SingleSpace { // Remove the leading asterisk if present - only for the first line filename_to_check = &filename_to_check[1..]; @@ -774,7 +788,7 @@ fn process_checksum_line( cli_algo_name: Option<&str>, cli_algo_length: Option, opts: ChecksumOptions, - cached_regex: &mut Option<&'static Regex>, + cached_regex: &mut Option, ) -> Result<(), LineCheckError> { let line_bytes = os_str_as_bytes(line)?; @@ -786,7 +800,7 @@ fn process_checksum_line( // Use `LineInfo` to extract the data of a line. // Then, depending on its format, apply a different pre-treatment. if let Some(line_info) = LineInfo::parse(line, cached_regex) { - if line_info.is_algo_based() { + if line_info.format == LineFormat::AlgoBased { process_algo_based_line(&line_info, cli_algo_name, opts) } else if let Some(cli_algo) = cli_algo_name { // If we match a non-algo based regex, we expect a cli argument @@ -1284,23 +1298,21 @@ mod tests { let line_algo_based = OsString::from("MD5 (example.txt) = d41d8cd98f00b204e9800998ecf8427e"); let line_info = LineInfo::parse(&line_algo_based, &mut cached_regex).unwrap(); - assert!(line_info.is_algo_based()); assert_eq!(line_info.algo_name.as_deref(), Some("MD5")); assert!(line_info.algo_bit_len.is_none()); assert_eq!(line_info.filename, b"example.txt"); assert_eq!(line_info.checksum, "d41d8cd98f00b204e9800998ecf8427e"); - assert_eq!(line_info.regex_str(), ALGO_BASED_REGEX); + assert_eq!(line_info.format, LineFormat::AlgoBased); assert!(cached_regex.is_none()); // Test double-space regex let line_double_space = OsString::from("d41d8cd98f00b204e9800998ecf8427e example.txt"); let line_info = LineInfo::parse(&line_double_space, &mut cached_regex).unwrap(); - assert!(!line_info.is_algo_based()); assert!(line_info.algo_name.is_none()); assert!(line_info.algo_bit_len.is_none()); assert_eq!(line_info.filename, b"example.txt"); assert_eq!(line_info.checksum, "d41d8cd98f00b204e9800998ecf8427e"); - assert_eq!(line_info.regex_str(), DOUBLE_SPACE_REGEX); + assert_eq!(line_info.format, LineFormat::DoubleSpace); assert!(cached_regex.is_some()); cached_regex = None; @@ -1308,12 +1320,11 @@ mod tests { // Test single-space regex let line_single_space = OsString::from("d41d8cd98f00b204e9800998ecf8427e example.txt"); let line_info = LineInfo::parse(&line_single_space, &mut cached_regex).unwrap(); - assert!(!line_info.is_algo_based()); assert!(line_info.algo_name.is_none()); assert!(line_info.algo_bit_len.is_none()); assert_eq!(line_info.filename, b"example.txt"); assert_eq!(line_info.checksum, "d41d8cd98f00b204e9800998ecf8427e"); - assert_eq!(line_info.regex_str(), SINGLE_SPACE_REGEX); + assert_eq!(line_info.format, LineFormat::SingleSpace); assert!(cached_regex.is_some()); cached_regex = None; @@ -1328,7 +1339,7 @@ mod tests { OsString::from(" MD5 (example.txt) = d41d8cd98f00b204e9800998ecf8427e"); let res = LineInfo::parse(&line_algo_based_leading_space, &mut cached_regex); assert!(res.is_some()); - assert_eq!(res.unwrap().regex_str(), ALGO_BASED_REGEX); + assert_eq!(line_info.format, LineFormat::AlgoBased); assert!(cached_regex.is_none()); // Test trailing space after checksum line (should fail) From 958222a07ccc25940a8aa7304bfe22cd41fd904b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dorian=20P=C3=A9ron?= Date: Sat, 7 Dec 2024 02:30:30 +0100 Subject: [PATCH 120/179] test(cksum): un-ignore tests that are now implemented --- src/uucore/src/lib/features/checksum.rs | 3 +-- tests/by-util/test_cksum.rs | 2 -- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/src/uucore/src/lib/features/checksum.rs b/src/uucore/src/lib/features/checksum.rs index e19845f18..0b3e4e249 100644 --- a/src/uucore/src/lib/features/checksum.rs +++ b/src/uucore/src/lib/features/checksum.rs @@ -1337,8 +1337,7 @@ mod tests { // Test leading space before checksum line let line_algo_based_leading_space = OsString::from(" MD5 (example.txt) = d41d8cd98f00b204e9800998ecf8427e"); - let res = LineInfo::parse(&line_algo_based_leading_space, &mut cached_regex); - assert!(res.is_some()); + let line_info = LineInfo::parse(&line_algo_based_leading_space, &mut cached_regex).unwrap(); assert_eq!(line_info.format, LineFormat::AlgoBased); assert!(cached_regex.is_none()); diff --git a/tests/by-util/test_cksum.rs b/tests/by-util/test_cksum.rs index 6c1718112..2efc78b96 100644 --- a/tests/by-util/test_cksum.rs +++ b/tests/by-util/test_cksum.rs @@ -1480,7 +1480,6 @@ mod check_utf8 { } } -#[ignore = "not yet implemented"] #[test] fn test_check_blake_length_guess() { let correct_lines = [ @@ -1523,7 +1522,6 @@ fn test_check_blake_length_guess() { .stderr_contains("foo.sums: no properly formatted checksum lines found"); } -#[ignore = "not yet implemented"] #[test] fn test_check_confusing_base64() { let cksum = "BLAKE2b-48 (foo.dat) = fc1f97C4"; From a5446b7bd02d0c857091b6ac653ffa774f25f2c7 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Thu, 12 Dec 2024 09:18:04 +0000 Subject: [PATCH 121/179] fix(deps): update rust crate lazy_static to v1.5.0 --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 10be1cadb..dda7b9746 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1284,9 +1284,9 @@ dependencies = [ [[package]] name = "lazy_static" -version = "1.4.0" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" [[package]] name = "libc" From 91ea45956f9ebac6715b1644740fb14bb1af6cef Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Thu, 12 Dec 2024 14:08:04 +0000 Subject: [PATCH 122/179] chore(deps): update mozilla-actions/sccache-action action to v0.0.7 --- .github/workflows/CICD.yml | 18 +++++++++--------- .github/workflows/code-quality.yml | 2 +- .github/workflows/freebsd.yml | 4 ++-- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/.github/workflows/CICD.yml b/.github/workflows/CICD.yml index 1875c512d..56418dd6e 100644 --- a/.github/workflows/CICD.yml +++ b/.github/workflows/CICD.yml @@ -112,7 +112,7 @@ jobs: components: clippy - uses: Swatinem/rust-cache@v2 - name: Run sccache-cache - uses: mozilla-actions/sccache-action@v0.0.6 + uses: mozilla-actions/sccache-action@v0.0.7 - name: Initialize workflow variables id: vars shell: bash @@ -166,7 +166,7 @@ jobs: - uses: taiki-e/install-action@nextest - uses: Swatinem/rust-cache@v2 - name: Run sccache-cache - uses: mozilla-actions/sccache-action@v0.0.6 + uses: mozilla-actions/sccache-action@v0.0.7 - name: Initialize workflow variables id: vars shell: bash @@ -254,7 +254,7 @@ jobs: - uses: taiki-e/install-action@nextest - uses: Swatinem/rust-cache@v2 - name: Run sccache-cache - uses: mozilla-actions/sccache-action@v0.0.6 + uses: mozilla-actions/sccache-action@v0.0.7 - name: "`make build`" shell: bash run: | @@ -308,7 +308,7 @@ jobs: - uses: taiki-e/install-action@nextest - uses: Swatinem/rust-cache@v2 - name: Run sccache-cache - uses: mozilla-actions/sccache-action@v0.0.6 + uses: mozilla-actions/sccache-action@v0.0.7 - name: Test run: cargo nextest run --hide-progress-bar --profile ci --features ${{ matrix.job.features }} env: @@ -335,7 +335,7 @@ jobs: - uses: taiki-e/install-action@nextest - uses: Swatinem/rust-cache@v2 - name: Run sccache-cache - uses: mozilla-actions/sccache-action@v0.0.6 + uses: mozilla-actions/sccache-action@v0.0.7 - name: Test run: cargo nextest run --hide-progress-bar --profile ci --features ${{ matrix.job.features }} env: @@ -358,7 +358,7 @@ jobs: - uses: dtolnay/rust-toolchain@stable - uses: Swatinem/rust-cache@v2 - name: Run sccache-cache - uses: mozilla-actions/sccache-action@v0.0.6 + uses: mozilla-actions/sccache-action@v0.0.7 - name: Install dependencies shell: bash run: | @@ -493,7 +493,7 @@ jobs: with: key: "${{ matrix.job.os }}_${{ matrix.job.target }}" - name: Run sccache-cache - uses: mozilla-actions/sccache-action@v0.0.6 + uses: mozilla-actions/sccache-action@v0.0.7 - name: Initialize workflow variables id: vars shell: bash @@ -782,7 +782,7 @@ jobs: - uses: actions/checkout@v4 - uses: Swatinem/rust-cache@v2 - name: Run sccache-cache - uses: mozilla-actions/sccache-action@v0.0.6 + uses: mozilla-actions/sccache-action@v0.0.7 - name: Install/setup prerequisites shell: bash run: | @@ -866,7 +866,7 @@ jobs: components: rustfmt - uses: Swatinem/rust-cache@v2 - name: Run sccache-cache - uses: mozilla-actions/sccache-action@v0.0.6 + uses: mozilla-actions/sccache-action@v0.0.7 - name: Build coreutils as multiple binaries shell: bash run: | diff --git a/.github/workflows/code-quality.yml b/.github/workflows/code-quality.yml index cd1334c2e..8e7db5fc3 100644 --- a/.github/workflows/code-quality.yml +++ b/.github/workflows/code-quality.yml @@ -81,7 +81,7 @@ jobs: components: clippy - uses: Swatinem/rust-cache@v2 - name: Run sccache-cache - uses: mozilla-actions/sccache-action@v0.0.6 + uses: mozilla-actions/sccache-action@v0.0.7 - name: Initialize workflow variables id: vars shell: bash diff --git a/.github/workflows/freebsd.yml b/.github/workflows/freebsd.yml index b31ac3353..1ff0ba047 100644 --- a/.github/workflows/freebsd.yml +++ b/.github/workflows/freebsd.yml @@ -37,7 +37,7 @@ jobs: - uses: actions/checkout@v4 - uses: Swatinem/rust-cache@v2 - name: Run sccache-cache - uses: mozilla-actions/sccache-action@v0.0.6 + uses: mozilla-actions/sccache-action@v0.0.7 - name: Prepare, build and test uses: vmactions/freebsd-vm@v1.1.5 with: @@ -129,7 +129,7 @@ jobs: - uses: actions/checkout@v4 - uses: Swatinem/rust-cache@v2 - name: Run sccache-cache - uses: mozilla-actions/sccache-action@v0.0.6 + uses: mozilla-actions/sccache-action@v0.0.7 - name: Prepare, build and test uses: vmactions/freebsd-vm@v1.1.5 with: From bbea4ba72adee40ced54e3035f30a340ec293bbe Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Fri, 13 Dec 2024 23:14:28 +0000 Subject: [PATCH 123/179] chore(deps): update rust crate thiserror to v2.0.7 --- Cargo.lock | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index dda7b9746..e4ca19309 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2319,11 +2319,11 @@ dependencies = [ [[package]] name = "thiserror" -version = "2.0.6" +version = "2.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8fec2a1820ebd077e2b90c4df007bebf344cd394098a13c563957d0afc83ea47" +checksum = "93605438cbd668185516ab499d589afb7ee1859ea3d5fc8f6b0755e1c7443767" dependencies = [ - "thiserror-impl 2.0.6", + "thiserror-impl 2.0.7", ] [[package]] @@ -2339,9 +2339,9 @@ dependencies = [ [[package]] name = "thiserror-impl" -version = "2.0.6" +version = "2.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d65750cab40f4ff1929fb1ba509e9914eb756131cef4210da8d5d700d26f6312" +checksum = "e1d8749b4531af2117677a5fcd12b1348a3fe2b81e36e61ffeac5c4aa3273e36" dependencies = [ "proc-macro2", "quote", @@ -2540,7 +2540,7 @@ version = "0.0.28" dependencies = [ "clap", "nix", - "thiserror 2.0.6", + "thiserror 2.0.7", "uucore", ] @@ -2552,7 +2552,7 @@ dependencies = [ "fts-sys", "libc", "selinux", - "thiserror 2.0.6", + "thiserror 2.0.7", "uucore", ] @@ -2629,7 +2629,7 @@ version = "0.0.28" dependencies = [ "clap", "regex", - "thiserror 2.0.6", + "thiserror 2.0.7", "uucore", ] @@ -3145,7 +3145,7 @@ dependencies = [ "clap", "libc", "selinux", - "thiserror 2.0.6", + "thiserror 2.0.7", "uucore", ] @@ -3424,7 +3424,7 @@ version = "0.0.28" dependencies = [ "chrono", "clap", - "thiserror 2.0.6", + "thiserror 2.0.7", "utmp-classic", "uucore", ] @@ -3455,7 +3455,7 @@ dependencies = [ "clap", "libc", "nix", - "thiserror 2.0.6", + "thiserror 2.0.7", "unicode-width 0.2.0", "uucore", ] @@ -3517,7 +3517,7 @@ dependencies = [ "sha3", "sm3", "tempfile", - "thiserror 2.0.6", + "thiserror 2.0.7", "time", "uucore_procs", "walkdir", From 934cbb38f697eb6fd7c6565934cceee7a326deeb Mon Sep 17 00:00:00 2001 From: Daniel Hofstetter Date: Thu, 12 Dec 2024 10:22:30 +0100 Subject: [PATCH 124/179] Bump clap from 4.4.2 to 4.5.23 --- Cargo.lock | 41 ++++++++++++++++++++++++----------------- Cargo.toml | 2 +- 2 files changed, 25 insertions(+), 18 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e4ca19309..44534dc15 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -61,23 +61,24 @@ dependencies = [ [[package]] name = "anstream" -version = "0.5.0" +version = "0.6.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1f58811cfac344940f1a400b6e6231ce35171f614f26439e80f8c1465c5cc0c" +checksum = "8acc5369981196006228e28809f761875c0327210a891e941f4c683b3a99529b" dependencies = [ "anstyle", "anstyle-parse", "anstyle-query", "anstyle-wincon", "colorchoice", + "is_terminal_polyfill", "utf8parse", ] [[package]] name = "anstyle" -version = "1.0.0" +version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41ed9a86bf92ae6580e0a31281f65a1b1d867c0cc68d5346e2ae128dddfa6a7d" +checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9" [[package]] name = "anstyle-parse" @@ -99,12 +100,12 @@ dependencies = [ [[package]] name = "anstyle-wincon" -version = "2.1.0" +version = "3.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "58f54d10c6dfa51283a066ceab3ec1ab78d13fae00aa49243a45e4571fb79dfd" +checksum = "2109dbce0e72be3ec00bed26e6a7479ca384ad226efdd66db8fa2e3a38c83125" dependencies = [ "anstyle", - "windows-sys 0.48.0", + "windows-sys 0.59.0", ] [[package]] @@ -326,24 +327,24 @@ dependencies = [ [[package]] name = "clap" -version = "4.4.2" +version = "4.5.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a13b88d2c62ff462f88e4a121f17a82c1af05693a2f192b5c38d14de73c19f6" +checksum = "3135e7ec2ef7b10c6ed8950f0f792ed96ee093fa088608f1c76e569722700c84" dependencies = [ "clap_builder", ] [[package]] name = "clap_builder" -version = "4.4.2" +version = "4.5.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2bb9faaa7c2ef94b2743a21f5a29e6f0010dff4caa69ac8e9d6cf8b6fa74da08" +checksum = "30582fc632330df2bd26877bde0c1f4470d57c582bbc070376afcd04d8cb4838" dependencies = [ "anstream", "anstyle", "clap_lex", "strsim", - "terminal_size 0.2.6", + "terminal_size 0.4.1", ] [[package]] @@ -357,9 +358,9 @@ dependencies = [ [[package]] name = "clap_lex" -version = "0.5.0" +version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2da6da31387c7e4ef160ffab6d5e7f00c42626fe39aea70a7b0f1773f7dd6c1b" +checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6" [[package]] name = "clap_mangen" @@ -1229,6 +1230,12 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "is_terminal_polyfill" +version = "1.70.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" + [[package]] name = "itertools" version = "0.13.0" @@ -2231,9 +2238,9 @@ dependencies = [ [[package]] name = "strsim" -version = "0.10.0" +version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" [[package]] name = "syn" @@ -3673,7 +3680,7 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" dependencies = [ - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index a4f8462e4..79e6dff40 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -276,7 +276,7 @@ chrono = { version = "0.4.38", default-features = false, features = [ "alloc", "clock", ] } -clap = { version = "4.4", features = ["wrap_help", "cargo"] } +clap = { version = "4.5", features = ["wrap_help", "cargo"] } clap_complete = "4.4" clap_mangen = "0.2" compare = "0.1.0" From ee0426e3f36a420334382477ca4f205d4fb0d3c9 Mon Sep 17 00:00:00 2001 From: Daniel Hofstetter Date: Thu, 12 Dec 2024 15:37:36 +0100 Subject: [PATCH 125/179] seq: use allow_hyphen_values instead of allow_negative_numbers because clap removed support for "exotic" negative numbers like -.1 --- src/uu/seq/src/seq.rs | 31 +++++++++++++++++++++++++++++-- tests/by-util/test_seq.rs | 4 ++-- 2 files changed, 31 insertions(+), 4 deletions(-) diff --git a/src/uu/seq/src/seq.rs b/src/uu/seq/src/seq.rs index 96ae83ba0..e14ba35a9 100644 --- a/src/uu/seq/src/seq.rs +++ b/src/uu/seq/src/seq.rs @@ -3,6 +3,7 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. // spell-checker:ignore (ToDO) extendedbigdecimal numberparse +use std::ffi::OsString; use std::io::{stdout, ErrorKind, Write}; use clap::{crate_version, Arg, ArgAction, Command}; @@ -47,9 +48,33 @@ struct SeqOptions<'a> { /// The elements are (first, increment, last). type RangeFloat = (ExtendedBigDecimal, ExtendedBigDecimal, ExtendedBigDecimal); +// Turn short args with attached value, for example "-s,", into two args "-s" and "," to make +// them work with clap. +fn split_short_args_with_value(args: impl uucore::Args) -> impl uucore::Args { + let mut v: Vec = Vec::new(); + + for arg in args { + let bytes = arg.as_encoded_bytes(); + + if bytes.len() > 2 + && (bytes.starts_with(b"-f") || bytes.starts_with(b"-s") || bytes.starts_with(b"-t")) + { + let (short_arg, value) = bytes.split_at(2); + // SAFETY: + // Both `short_arg` and `value` only contain content that originated from `OsStr::as_encoded_bytes` + v.push(unsafe { OsString::from_encoded_bytes_unchecked(short_arg.to_vec()) }); + v.push(unsafe { OsString::from_encoded_bytes_unchecked(value.to_vec()) }); + } else { + v.push(arg); + } + } + + v.into_iter() +} + #[uucore::main] pub fn uumain(args: impl uucore::Args) -> UResult<()> { - let matches = uu_app().try_get_matches_from(args)?; + let matches = uu_app().try_get_matches_from(split_short_args_with_value(args))?; let numbers_option = matches.get_many::(ARG_NUMBERS); @@ -138,7 +163,6 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { pub fn uu_app() -> Command { Command::new(uucore::util_name()) .trailing_var_arg(true) - .allow_negative_numbers(true) .infer_long_args(true) .version(crate_version!()) .about(ABOUT) @@ -169,7 +193,10 @@ pub fn uu_app() -> Command { .help("use printf style floating-point FORMAT"), ) .arg( + // we use allow_hyphen_values instead of allow_negative_numbers because clap removed + // the support for "exotic" negative numbers like -.1 (see https://github.com/clap-rs/clap/discussions/5837) Arg::new(ARG_NUMBERS) + .allow_hyphen_values(true) .action(ArgAction::Append) .num_args(1..=3), ) diff --git a/tests/by-util/test_seq.rs b/tests/by-util/test_seq.rs index 8f33c3aa7..96460cf5f 100644 --- a/tests/by-util/test_seq.rs +++ b/tests/by-util/test_seq.rs @@ -48,12 +48,12 @@ fn test_hex_rejects_sign_after_identifier() { .args(&["-0x-123ABC"]) .fails() .no_stdout() - .stderr_contains("unexpected argument '-0' found"); + .usage_error("invalid floating point argument: '-0x-123ABC'"); new_ucmd!() .args(&["-0x+123ABC"]) .fails() .no_stdout() - .stderr_contains("unexpected argument '-0' found"); + .usage_error("invalid floating point argument: '-0x+123ABC'"); } #[test] From def66f335ca9b1fb7c866cb50b18b881a588b379 Mon Sep 17 00:00:00 2001 From: Daniel Hofstetter Date: Fri, 13 Dec 2024 11:07:48 +0100 Subject: [PATCH 126/179] build-gnu.sh: adapt "du" error message --- util/build-gnu.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/util/build-gnu.sh b/util/build-gnu.sh index e33e64429..974e188f4 100755 --- a/util/build-gnu.sh +++ b/util/build-gnu.sh @@ -314,7 +314,7 @@ sed -i -e "s|mv: cannot overwrite 'a/t': Directory not empty|mv: cannot move 'b/ # disable these test cases sed -i -E "s|^([^#]*2_31.*)$|#\1|g" tests/printf/printf-cov.pl -sed -i -e "s/du: invalid -t argument/du: invalid --threshold argument/" -e "s/du: option requires an argument/error: a value is required for '--threshold ' but none was supplied/" -e "/Try 'du --help' for more information./d" tests/du/threshold.sh +sed -i -e "s/du: invalid -t argument/du: invalid --threshold argument/" -e "s/du: option requires an argument/error: a value is required for '--threshold ' but none was supplied/" -e "s/Try 'du --help' for more information./\nFor more information, try '--help'./" tests/du/threshold.sh # Remove the extra output check sed -i -e "s|Try '\$prog --help' for more information.\\\n||" tests/du/files0-from.pl From a8ad6d92f1cb052d503bf10936fd3a54cbf5e046 Mon Sep 17 00:00:00 2001 From: Daniel Hofstetter Date: Fri, 13 Dec 2024 11:37:52 +0100 Subject: [PATCH 127/179] factor: adapt message in patch to clap change --- util/gnu-patches/tests_factor_factor.pl.patch | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/util/gnu-patches/tests_factor_factor.pl.patch b/util/gnu-patches/tests_factor_factor.pl.patch index fc8b988fe..731abcc91 100644 --- a/util/gnu-patches/tests_factor_factor.pl.patch +++ b/util/gnu-patches/tests_factor_factor.pl.patch @@ -1,8 +1,8 @@ diff --git a/tests/factor/factor.pl b/tests/factor/factor.pl -index 6e612e418..f19c06ca0 100755 +index b1406c266..3d97cd6a5 100755 --- a/tests/factor/factor.pl +++ b/tests/factor/factor.pl -@@ -61,12 +61,13 @@ my @Tests = +@@ -61,12 +61,14 @@ my @Tests = # Map newer glibc diagnostic to expected. # Also map OpenBSD 5.1's "unknown option" to expected "invalid option". {ERR_SUBST => q!s/'1'/1/;s/unknown/invalid/!}, @@ -10,7 +10,8 @@ index 6e612e418..f19c06ca0 100755 - . "Try '$prog --help' for more information.\n"}, + {ERR => "error: unexpected argument '-1' found\n\n" + . " tip: to pass '-1' as a value, use '-- -1'\n\n" -+ . "Usage: factor [OPTION]... [NUMBER]...\n"}, ++ . "Usage: factor [OPTION]... [NUMBER]...\n\n" ++ . "For more information, try '--help'.\n"}, {EXIT => 1}], ['cont', 'a 4', {OUT => "4: 2 2\n"}, From 5b45750115b132e5db9193b8234712f58d4d3e1f Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Sun, 15 Dec 2024 16:06:42 +0000 Subject: [PATCH 128/179] chore(deps): update rust crate clap_complete to v4.5.38 --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 44534dc15..8397192b3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -349,9 +349,9 @@ dependencies = [ [[package]] name = "clap_complete" -version = "4.4.0" +version = "4.5.38" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "586a385f7ef2f8b4d86bddaa0c094794e7ccbfe5ffef1f434fe928143fc783a5" +checksum = "d9647a559c112175f17cf724dc72d3645680a883c58481332779192b0d8e7a01" dependencies = [ "clap", ] From 6bdcad32da40e7fe2a1b159c72a455c1452a4baa Mon Sep 17 00:00:00 2001 From: Karl McDowall Date: Wed, 11 Dec 2024 18:27:05 -0700 Subject: [PATCH 129/179] sort: Rework merge batching logic Fix bug #6944 Rework the way batching is done with sort such that it doesn't open more input files than necessary. Previously, the code would always open one extra input file which causes problems in ulimit scenarios. Add additional test case. --- src/uu/sort/src/ext_sort.rs | 4 +- src/uu/sort/src/merge.rs | 77 +++++++++++++++++++------------------ src/uu/sort/src/sort.rs | 3 +- tests/by-util/test_sort.rs | 27 ++++++++++++- 4 files changed, 69 insertions(+), 42 deletions(-) diff --git a/src/uu/sort/src/ext_sort.rs b/src/uu/sort/src/ext_sort.rs index 183098812..57e434e99 100644 --- a/src/uu/sort/src/ext_sort.rs +++ b/src/uu/sort/src/ext_sort.rs @@ -98,12 +98,12 @@ fn reader_writer< )?; match read_result { ReadResult::WroteChunksToFile { tmp_files } => { - let merger = merge::merge_with_file_limit::<_, _, Tmp>( + merge::merge_with_file_limit::<_, _, Tmp>( tmp_files.into_iter().map(|c| c.reopen()), settings, + output, tmp_dir, )?; - merger.write_all(settings, output)?; } ReadResult::SortedSingleChunk(chunk) => { if settings.unique { diff --git a/src/uu/sort/src/merge.rs b/src/uu/sort/src/merge.rs index d6872ec80..300733d1e 100644 --- a/src/uu/sort/src/merge.rs +++ b/src/uu/sort/src/merge.rs @@ -25,7 +25,6 @@ use std::{ }; use compare::Compare; -use itertools::Itertools; use uucore::error::UResult; use crate::{ @@ -67,58 +66,63 @@ fn replace_output_file_in_input_files( /// /// If `settings.merge_batch_size` is greater than the length of `files`, intermediate files will be used. /// If `settings.compress_prog` is `Some`, intermediate files will be compressed with it. -pub fn merge<'a>( +pub fn merge( files: &mut [OsString], - settings: &'a GlobalSettings, - output: Option<&str>, + settings: &GlobalSettings, + output: Output, tmp_dir: &mut TmpDirWrapper, -) -> UResult> { - replace_output_file_in_input_files(files, output, tmp_dir)?; +) -> UResult<()> { + replace_output_file_in_input_files(files, output.as_output_name(), tmp_dir)?; + let files = files + .iter() + .map(|file| open(file).map(|file| PlainMergeInput { inner: file })); if settings.compress_prog.is_none() { - merge_with_file_limit::<_, _, WriteablePlainTmpFile>( - files - .iter() - .map(|file| open(file).map(|file| PlainMergeInput { inner: file })), - settings, - tmp_dir, - ) + merge_with_file_limit::<_, _, WriteablePlainTmpFile>(files, settings, output, tmp_dir) } else { - merge_with_file_limit::<_, _, WriteableCompressedTmpFile>( - files - .iter() - .map(|file| open(file).map(|file| PlainMergeInput { inner: file })), - settings, - tmp_dir, - ) + merge_with_file_limit::<_, _, WriteableCompressedTmpFile>(files, settings, output, tmp_dir) } } // Merge already sorted `MergeInput`s. pub fn merge_with_file_limit< - 'a, M: MergeInput + 'static, F: ExactSizeIterator>, Tmp: WriteableTmpFile + 'static, >( files: F, - settings: &'a GlobalSettings, + settings: &GlobalSettings, + output: Output, tmp_dir: &mut TmpDirWrapper, -) -> UResult> { - if files.len() > settings.merge_batch_size { - let mut remaining_files = files.len(); - let batches = files.chunks(settings.merge_batch_size); - let mut batches = batches.into_iter(); +) -> UResult<()> { + if files.len() <= settings.merge_batch_size { + let merger = merge_without_limit(files, settings); + merger?.write_all(settings, output) + } else { let mut temporary_files = vec![]; - while remaining_files != 0 { - // Work around the fact that `Chunks` is not an `ExactSizeIterator`. - remaining_files = remaining_files.saturating_sub(settings.merge_batch_size); - let merger = merge_without_limit(batches.next().unwrap(), settings)?; + let mut batch = vec![]; + for file in files { + batch.push(file); + if batch.len() >= settings.merge_batch_size { + assert_eq!(batch.len(), settings.merge_batch_size); + let merger = merge_without_limit(batch.into_iter(), settings)?; + batch = vec![]; + + let mut tmp_file = + Tmp::create(tmp_dir.next_file()?, settings.compress_prog.as_deref())?; + merger.write_all_to(settings, tmp_file.as_write())?; + temporary_files.push(tmp_file.finished_writing()?); + } + } + // Merge any remaining files that didn't get merged in a full batch above. + if !batch.is_empty() { + assert!(batch.len() < settings.merge_batch_size); + let merger = merge_without_limit(batch.into_iter(), settings)?; + let mut tmp_file = Tmp::create(tmp_dir.next_file()?, settings.compress_prog.as_deref())?; merger.write_all_to(settings, tmp_file.as_write())?; temporary_files.push(tmp_file.finished_writing()?); } - assert!(batches.next().is_none()); merge_with_file_limit::<_, _, Tmp>( temporary_files .into_iter() @@ -127,10 +131,9 @@ pub fn merge_with_file_limit< dyn FnMut(Tmp::Closed) -> UResult<::Reopened>, >), settings, + output, tmp_dir, ) - } else { - merge_without_limit(files, settings) } } @@ -260,7 +263,7 @@ struct PreviousLine { } /// Merges files together. This is **not** an iterator because of lifetime problems. -pub struct FileMerger<'a> { +struct FileMerger<'a> { heap: binary_heap_plus::BinaryHeap>, request_sender: Sender<(usize, RecycledChunk)>, prev: Option, @@ -269,12 +272,12 @@ pub struct FileMerger<'a> { impl FileMerger<'_> { /// Write the merged contents to the output file. - pub fn write_all(self, settings: &GlobalSettings, output: Output) -> UResult<()> { + fn write_all(self, settings: &GlobalSettings, output: Output) -> UResult<()> { let mut out = output.into_write(); self.write_all_to(settings, &mut out) } - pub fn write_all_to(mut self, settings: &GlobalSettings, out: &mut impl Write) -> UResult<()> { + fn write_all_to(mut self, settings: &GlobalSettings, out: &mut impl Write) -> UResult<()> { while self.write_next(settings, out) {} drop(self.request_sender); self.reader_join_handle.join().unwrap() diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index c2e752bdf..8b6fcbb25 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -1567,8 +1567,7 @@ fn exec( tmp_dir: &mut TmpDirWrapper, ) -> UResult<()> { if settings.merge { - let file_merger = merge::merge(files, settings, output.as_output_name(), tmp_dir)?; - file_merger.write_all(settings, output) + merge::merge(files, settings, output, tmp_dir) } else if settings.check { if files.len() > 1 { Err(UUsageError::new(2, "only one file allowed with -c")) diff --git a/tests/by-util/test_sort.rs b/tests/by-util/test_sort.rs index 97bfc6a74..62aa07dae 100644 --- a/tests/by-util/test_sort.rs +++ b/tests/by-util/test_sort.rs @@ -3,7 +3,7 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -// spell-checker:ignore (words) ints +// spell-checker:ignore (words) ints (linux) NOFILE #![allow(clippy::cast_possible_wrap)] use std::time::Duration; @@ -1084,6 +1084,31 @@ fn test_merge_batch_size() { .stdout_only_fixture("merge_ints_interleaved.expected"); } +#[test] +#[cfg(any(target_os = "linux", target_os = "android"))] +fn test_merge_batch_size_with_limit() { + use rlimit::Resource; + // Currently need... + // 3 descriptors for stdin, stdout, stderr + // 2 descriptors for CTRL+C handling logic (to be reworked at some point) + // 2 descriptors for the input files (i.e. batch-size of 2). + let limit_fd = 3 + 2 + 2; + TestScenario::new(util_name!()) + .ucmd() + .limit(Resource::NOFILE, limit_fd, limit_fd) + .arg("--batch-size=2") + .arg("-m") + .arg("--unique") + .arg("merge_ints_interleaved_1.txt") + .arg("merge_ints_interleaved_2.txt") + .arg("merge_ints_interleaved_3.txt") + .arg("merge_ints_interleaved_3.txt") + .arg("merge_ints_interleaved_2.txt") + .arg("merge_ints_interleaved_1.txt") + .succeeds() + .stdout_only_fixture("merge_ints_interleaved.expected"); +} + #[test] fn test_sigpipe_panic() { let mut cmd = new_ucmd!(); From a7b9737490e782943ce65ceb6fded6555f2c0567 Mon Sep 17 00:00:00 2001 From: Daniel Hofstetter Date: Mon, 16 Dec 2024 09:36:45 +0100 Subject: [PATCH 130/179] GNUmakefile: remove "sleep" from UNIX_PROGS because it's already in PROGS --- GNUmakefile | 1 - 1 file changed, 1 deletion(-) diff --git a/GNUmakefile b/GNUmakefile index 0b4f2d04c..af73a10f4 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -147,7 +147,6 @@ UNIX_PROGS := \ nohup \ pathchk \ pinky \ - sleep \ stat \ stdbuf \ timeout \ From 6755956bc4bce6c154e1b821f400d43eeb9800a7 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Mon, 16 Dec 2024 15:37:29 +0100 Subject: [PATCH 131/179] cut.pl: adjust to our messages as they are better (#6921) * cut.pl: adjust to our messages as they are better but we still have some differences on this test * cut: add some missing line return when needed * cut: add failing tests covered by cut.pl * Remove dup test * cut: add spell-checker:ignore line to test --------- Co-authored-by: Daniel Hofstetter --- .../cspell.dictionaries/jargon.wordlist.txt | 2 + src/uu/cut/src/cut.rs | 9 ++- tests/by-util/test_cut.rs | 33 ++++++++- util/gnu-patches/tests_cut_error_msg.patch | 72 +++++++++++++++++++ 4 files changed, 113 insertions(+), 3 deletions(-) create mode 100644 util/gnu-patches/tests_cut_error_msg.patch diff --git a/.vscode/cspell.dictionaries/jargon.wordlist.txt b/.vscode/cspell.dictionaries/jargon.wordlist.txt index c2e01f508..6dd5483c6 100644 --- a/.vscode/cspell.dictionaries/jargon.wordlist.txt +++ b/.vscode/cspell.dictionaries/jargon.wordlist.txt @@ -157,6 +157,8 @@ retval subdir val vals +inval +nofield # * clippy uninlined diff --git a/src/uu/cut/src/cut.rs b/src/uu/cut/src/cut.rs index 25bb73330..421b35eac 100644 --- a/src/uu/cut/src/cut.rs +++ b/src/uu/cut/src/cut.rs @@ -131,8 +131,9 @@ fn cut_fields_explicit_out_delim( if delim_search.peek().is_none() { if !only_delimited { + // Always write the entire line, even if it doesn't end with `newline_char` out.write_all(line)?; - if line[line.len() - 1] != newline_char { + if line.is_empty() || line[line.len() - 1] != newline_char { out.write_all(&[newline_char])?; } } @@ -213,8 +214,12 @@ fn cut_fields_implicit_out_delim( let mut print_delim = false; if delim_search.peek().is_none() { - if !only_delimited && line[line.len() - 1] == newline_char { + if !only_delimited { + // Always write the entire line, even if it doesn't end with `newline_char` out.write_all(line)?; + if line.is_empty() || line[line.len() - 1] != newline_char { + out.write_all(&[newline_char])?; + } } return Ok(true); diff --git a/tests/by-util/test_cut.rs b/tests/by-util/test_cut.rs index 6b376b0ca..7d6009a30 100644 --- a/tests/by-util/test_cut.rs +++ b/tests/by-util/test_cut.rs @@ -2,6 +2,9 @@ // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. + +// spell-checker:ignore defg + use crate::common::util::TestScenario; static INPUT: &str = "lists.txt"; @@ -288,7 +291,7 @@ fn test_newline_delimited() { .args(&["-f", "1", "-d", "\n"]) .pipe_in("a:1\nb:") .succeeds() - .stdout_only_bytes("a:1\n"); + .stdout_only_bytes("a:1\nb:\n"); } #[test] @@ -329,3 +332,31 @@ fn test_8bit_non_utf8_delimiter() { .succeeds() .stdout_check(|out| out == "b_c\n".as_bytes()); } + +#[test] +fn test_newline_preservation_with_f1_option() { + let (at, mut ucmd) = at_and_ucmd!(); + at.write("1", "a\nb"); + let expected = "a\nb\n"; + ucmd.args(&["-f1-", "1"]).succeeds().stdout_is(expected); +} + +#[ignore = "Not yet implemented"] +#[test] +fn test_output_delimiter_with_character_ranges() { + new_ucmd!() + .args(&["-c2-3,4-", "--output-delim=:"]) + .pipe_in("abcdefg\n") + .succeeds() + .stdout_only("bc:defg\n"); +} + +#[ignore = "Not yet implemented"] +#[test] +fn test_output_delimiter_with_adjacent_ranges() { + new_ucmd!() + .args(&["-b1-2,3-4", "--output-d=:"]) + .pipe_in("abcd\n") + .succeeds() + .stdout_only("ab:cd\n"); +} diff --git a/util/gnu-patches/tests_cut_error_msg.patch b/util/gnu-patches/tests_cut_error_msg.patch new file mode 100644 index 000000000..3f57d2048 --- /dev/null +++ b/util/gnu-patches/tests_cut_error_msg.patch @@ -0,0 +1,72 @@ +diff --git a/tests/cut/cut.pl b/tests/cut/cut.pl +index 1670db02e..ed633792a 100755 +--- a/tests/cut/cut.pl ++++ b/tests/cut/cut.pl +@@ -29,13 +29,15 @@ my $mb_locale = $ENV{LOCALE_FR_UTF8}; + + my $prog = 'cut'; + my $try = "Try '$prog --help' for more information.\n"; +-my $from_field1 = "$prog: fields are numbered from 1\n$try"; +-my $from_pos1 = "$prog: byte/character positions are numbered from 1\n$try"; +-my $inval_fld = "$prog: invalid field range\n$try"; +-my $inval_pos = "$prog: invalid byte or character range\n$try"; +-my $no_endpoint = "$prog: invalid range with no endpoint: -\n$try"; +-my $nofield = "$prog: an input delimiter may be specified only when " . +- "operating on fields\n$try"; ++my $from_field1 = "$prog: range '' was invalid: failed to parse range\n"; ++my $from_field_0 = "$prog: range '0' was invalid: fields and positions are numbered from 1\n"; ++my $from_field_0_dash = "$prog: range '0-' was invalid: fields and positions are numbered from 1\n"; ++my $from_field_0_2 = "$prog: range '0-2' was invalid: fields and positions are numbered from 1\n"; ++my $from_pos1 = "$prog: range '' was invalid: failed to parse range\n"; ++my $inval_fld = "$prog: range '--' was invalid: failed to parse range\n"; ++my $inval_pos = "$prog: range '--' was invalid: failed to parse range\n"; ++my $no_endpoint = "$prog: range '-' was invalid: invalid range with no endpoint\n"; ++my $nofield = "$prog: invalid input: The '--delimiter' ('-d') option only usable if printing a sequence of fields\n"; + + my @Tests = + ( +@@ -44,16 +46,16 @@ my @Tests = + + # This failed (as it should) even before coreutils-6.9.90, + # but cut from 6.9.90 produces a more useful diagnostic. +- ['zero-1', '-b0', {ERR=>$from_pos1}, {EXIT => 1} ], ++ ['zero-1', '-b0', {ERR=>$from_field_0}, {EXIT => 1} ], + + # Up to coreutils-6.9, specifying a range of 0-2 was not an error. + # It was treated just like "-2". +- ['zero-2', '-f0-2', {ERR=>$from_field1}, {EXIT => 1} ], ++ ['zero-2', '-f0-2', {ERR=>$from_field_0_2}, {EXIT => 1} ], + + # Up to coreutils-8.20, specifying a range of 0- was not an error. +- ['zero-3b', '-b0-', {ERR=>$from_pos1}, {EXIT => 1} ], +- ['zero-3c', '-c0-', {ERR=>$from_pos1}, {EXIT => 1} ], +- ['zero-3f', '-f0-', {ERR=>$from_field1}, {EXIT => 1} ], ++ ['zero-3b', '-b0-', {ERR=>$from_field_0_dash}, {EXIT => 1} ], ++ ['zero-3c', '-c0-', {ERR=>$from_field_0_dash}, {EXIT => 1} ], ++ ['zero-3f', '-f0-', {ERR=>$from_field_0_dash}, {EXIT => 1} ], + + ['1', '-d:', '-f1,3-', {IN=>"a:b:c\n"}, {OUT=>"a:c\n"}], + ['2', '-d:', '-f1,3-', {IN=>"a:b:c\n"}, {OUT=>"a:c\n"}], +@@ -96,11 +98,10 @@ my @Tests = + # Errors + # -s may be used only with -f + ['y', qw(-s -b4), {IN=>":\n"}, {OUT=>""}, {EXIT=>1}, +- {ERR=>"$prog: suppressing non-delimited lines makes sense\n" +- . "\tonly when operating on fields\n$try"}], ++ {ERR=>"$prog: invalid input: The '--only-delimited' ('-s') option only usable if printing a sequence of fields\n"}], + # You must specify bytes or fields (or chars) + ['z', '', {IN=>":\n"}, {OUT=>""}, {EXIT=>1}, +- {ERR=>"$prog: you must specify a list of bytes, characters, or fields\n$try"} ++ {ERR=>"$prog: invalid usage: expects one of --fields (-f), --chars (-c) or --bytes (-b)\n"} + ], + # Empty field list + ['empty-fl', qw(-f ''), {IN=>":\n"}, {OUT=>""}, {EXIT=>1}, +@@ -199,7 +200,7 @@ my @Tests = + + # None of the following invalid ranges provoked an error up to coreutils-6.9. + ['inval1', qw(-f 2-0), {IN=>''}, {OUT=>''}, {EXIT=>1}, +- {ERR=>"$prog: invalid decreasing range\n$try"}], ++ {ERR=>"$prog: range '2-0' was invalid: fields and positions are numbered from 1\n"}], + ['inval2', qw(-f -), {IN=>''}, {OUT=>''}, {EXIT=>1}, {ERR=>$no_endpoint}], + ['inval3', '-f', '4,-', {IN=>''}, {OUT=>''}, {EXIT=>1}, {ERR=>$no_endpoint}], + ['inval4', '-f', '1-2,-', {IN=>''}, {OUT=>''}, {EXIT=>1}, From d20c7fdef62d19fd071500ba7dc7b43344ee75e7 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Mon, 16 Dec 2024 18:34:19 +0000 Subject: [PATCH 132/179] fix(deps): update rust crate clap_complete to v4.5.39 --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8397192b3..da693812e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -349,9 +349,9 @@ dependencies = [ [[package]] name = "clap_complete" -version = "4.5.38" +version = "4.5.39" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9647a559c112175f17cf724dc72d3645680a883c58481332779192b0d8e7a01" +checksum = "fd4db298d517d5fa00b2b84bbe044efd3fde43874a41db0d46f91994646a2da4" dependencies = [ "clap", ] From dd3d0a383f6f9a5b9dcb6affffa99579c4f37942 Mon Sep 17 00:00:00 2001 From: Daniel Hofstetter Date: Tue, 17 Dec 2024 07:40:08 +0100 Subject: [PATCH 133/179] Bump zip from 1.1.4 to 2.2.2 --- Cargo.lock | 94 +++++++++++++++++++++++++++++------------------------- Cargo.toml | 2 +- 2 files changed, 51 insertions(+), 45 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index da693812e..c4c28b0b4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3,10 +3,10 @@ version = 3 [[package]] -name = "adler" -version = "1.0.2" +name = "adler2" +version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" +checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" [[package]] name = "ahash" @@ -256,9 +256,9 @@ dependencies = [ [[package]] name = "bumpalo" -version = "3.11.1" +version = "3.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "572f695136211188308f16ad2ca5c851a712c464060ae6974944458eb83880ba" +checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" [[package]] name = "bytecount" @@ -642,9 +642,9 @@ dependencies = [ [[package]] name = "crc32fast" -version = "1.4.0" +version = "1.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3855a8a784b474f333699ef2bbca9db2c4a1f6d9088a90a2d25b1eb53111eaa" +checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3" dependencies = [ "cfg-if", ] @@ -683,9 +683,9 @@ dependencies = [ [[package]] name = "crossbeam-utils" -version = "0.8.19" +version = "0.8.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "248e3bacc7dc6baa3b21e405ee045c3047101a49145e7e9eca583ab4c2ca5345" +checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" [[package]] name = "crossterm" @@ -803,9 +803,9 @@ dependencies = [ [[package]] name = "displaydoc" -version = "0.2.4" +version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "487585f4d0c6655fe74905e2504d8ad6908e4db67f744eb140876906c2f3175d" +checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", "quote", @@ -916,9 +916,9 @@ dependencies = [ [[package]] name = "flate2" -version = "1.0.28" +version = "1.0.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46303f565772937ffe1d394a4fac6f411c6013172fadde9dcdb1e147a086940e" +checksum = "c936bfdafb507ebbf50b8074c54fa31c5be9a1e7e5f467dd659697041407d07c" dependencies = [ "crc32fast", "miniz_oxide", @@ -1351,10 +1351,16 @@ dependencies = [ ] [[package]] -name = "log" -version = "0.4.20" +name = "lockfree-object-pool" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" +checksum = "9374ef4228402d4b7e403e5838cb880d9ee663314b0a900d5a6aabf0c213552e" + +[[package]] +name = "log" +version = "0.4.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" [[package]] name = "lru" @@ -1408,11 +1414,11 @@ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" [[package]] name = "miniz_oxide" -version = "0.7.2" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d811f3e15f28568be3407c8e7fdb6514c1cda3cb30683f15b6a1a1dc4ea14a7" +checksum = "e2d80299ef12ff69b16a84bb182e3b9df68b5a91574d3d4fa6e41b65deec4df1" dependencies = [ - "adler", + "adler2", ] [[package]] @@ -1551,27 +1557,6 @@ dependencies = [ "autocfg", ] -[[package]] -name = "num_enum" -version = "0.7.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e613fc340b2220f734a8595782c551f1250e969d87d3be1ae0579e8d4065179" -dependencies = [ - "num_enum_derive", -] - -[[package]] -name = "num_enum_derive" -version = "0.7.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af1844ef2428cc3e1cb900be36181049ef3d3193c63e43026cfe202983b27a56" -dependencies = [ - "proc-macro-crate", - "proc-macro2", - "quote", - "syn 2.0.87", -] - [[package]] name = "num_threads" version = "0.1.6" @@ -2190,6 +2175,12 @@ dependencies = [ "libc", ] +[[package]] +name = "simd-adler32" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d66dc143e6b11c1eddc06d5c423cfc97062865baf299914ab64caa38182078fe" + [[package]] name = "siphasher" version = "0.3.10" @@ -3986,9 +3977,9 @@ dependencies = [ [[package]] name = "zip" -version = "1.1.4" +version = "2.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9cc23c04387f4da0374be4533ad1208cbb091d5c11d070dfef13676ad6497164" +checksum = "ae9c1ea7b3a5e1f4b922ff856a129881167511563dc219869afe3787fc0c1a45" dependencies = [ "arbitrary", "crc32fast", @@ -3996,6 +3987,21 @@ dependencies = [ "displaydoc", "flate2", "indexmap", - "num_enum", - "thiserror 1.0.69", + "memchr", + "thiserror 2.0.7", + "zopfli", +] + +[[package]] +name = "zopfli" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5019f391bac5cf252e93bbcc53d039ffd62c7bfb7c150414d61369afe57e946" +dependencies = [ + "bumpalo", + "crc32fast", + "lockfree-object-pool", + "log", + "once_cell", + "simd-adler32", ] diff --git a/Cargo.toml b/Cargo.toml index 79e6dff40..a6b9958d4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -342,7 +342,7 @@ walkdir = "2.5" winapi-util = "0.1.8" windows-sys = { version = "0.59.0", default-features = false } xattr = "1.3.1" -zip = { version = "1.1.4", default-features = false, features = ["deflate"] } +zip = { version = "2.2.2", default-features = false, features = ["deflate"] } hex = "0.4.3" md-5 = "0.10.6" From d414dbc83beef3bfd8c8283440fadd34295ae8f4 Mon Sep 17 00:00:00 2001 From: Daniel Hofstetter Date: Tue, 17 Dec 2024 10:52:25 +0100 Subject: [PATCH 134/179] basenc: ignore case with "--base16 --decode" --- src/uu/base32/src/base_common.rs | 6 +++--- tests/by-util/test_basenc.rs | 18 ++++++++++++++++++ 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/src/uu/base32/src/base_common.rs b/src/uu/base32/src/base_common.rs index 84a461963..878d07a92 100644 --- a/src/uu/base32/src/base_common.rs +++ b/src/uu/base32/src/base_common.rs @@ -11,7 +11,7 @@ use std::io::{self, ErrorKind, Read, Seek, SeekFrom}; use std::path::{Path, PathBuf}; use uucore::display::Quotable; use uucore::encoding::{ - for_base_common::{BASE32, BASE32HEX, BASE64, BASE64URL, BASE64_NOPAD, HEXUPPER}, + for_base_common::{BASE32, BASE32HEX, BASE64, BASE64URL, BASE64_NOPAD, HEXUPPER_PERMISSIVE}, Format, Z85Wrapper, BASE2LSBF, BASE2MSBF, }; use uucore::encoding::{EncodingWrapper, SupportsFastDecodeAndEncode}; @@ -226,11 +226,11 @@ pub fn get_supports_fast_decode_and_encode( match format { Format::Base16 => Box::from(EncodingWrapper::new( - HEXUPPER, + HEXUPPER_PERMISSIVE, BASE16_VALID_DECODING_MULTIPLE, BASE16_UNPADDED_MULTIPLE, // spell-checker:disable-next-line - b"0123456789ABCDEF", + b"0123456789ABCDEFabcdef", )), Format::Base2Lsbf => Box::from(EncodingWrapper::new( BASE2LSBF, diff --git a/tests/by-util/test_basenc.rs b/tests/by-util/test_basenc.rs index 85c05ad3e..c0f40cd1d 100644 --- a/tests/by-util/test_basenc.rs +++ b/tests/by-util/test_basenc.rs @@ -130,6 +130,24 @@ fn test_base16_decode() { .stdout_only("Hello, World!"); } +#[test] +fn test_base16_decode_lowercase() { + new_ucmd!() + .args(&["--base16", "-d"]) + .pipe_in("48656c6c6f2c20576f726c6421") + .succeeds() + .stdout_only("Hello, World!"); +} + +#[test] +fn test_base16_decode_and_ignore_garbage_lowercase() { + new_ucmd!() + .args(&["--base16", "-d", "-i"]) + .pipe_in("48656c6c6f2c20576f726c6421") + .succeeds() + .stdout_only("Hello, World!"); +} + #[test] fn test_base2msbf() { new_ucmd!() From dd741eac1900b6731068bd72d5266e6d26a3b18b Mon Sep 17 00:00:00 2001 From: Daniel Hofstetter Date: Tue, 17 Dec 2024 14:59:40 +0100 Subject: [PATCH 135/179] build-gnu.sh: adapt basenc message to clap changes --- util/build-gnu.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/util/build-gnu.sh b/util/build-gnu.sh index 974e188f4..16868af4d 100755 --- a/util/build-gnu.sh +++ b/util/build-gnu.sh @@ -267,7 +267,7 @@ sed -i "s/\(\(b2[ml]_[69]\|b32h_[56]\|z85_8\|z85_35\).*OUT=>\)[^}]*\(.*\)/\1\"\" sed -i "s/\$prog: invalid input/\$prog: error: invalid input/g" tests/basenc/basenc.pl # basenc: swap out error message for unexpected arg -sed -i "s/ {ERR=>\"\$prog: foobar\\\\n\" \. \$try_help }/ {ERR=>\"error: Found argument '--foobar' which wasn't expected, or isn't valid in this context\n\n If you tried to supply '--foobar' as a value rather than a flag, use '-- --foobar'\n\nUsage: basenc [OPTION]... [FILE]\n\nFor more information try '--help'\n\"}]/" tests/basenc/basenc.pl +sed -i "s/ {ERR=>\"\$prog: foobar\\\\n\" \. \$try_help }/ {ERR=>\"error: unexpected argument '--foobar' found\n\n tip: to pass '--foobar' as a value, use '-- --foobar'\n\nUsage: basenc [OPTION]... [FILE]\n\nFor more information, try '--help'.\n\"}]/" tests/basenc/basenc.pl sed -i "s/ {ERR_SUBST=>\"s\/(unrecognized|unknown) option \[-' \]\*foobar\[' \]\*\/foobar\/\"}],//" tests/basenc/basenc.pl # Remove the check whether a util was built. Otherwise tests against utils like "arch" are not run. From 8edefdc287d2cbb64228b1dceebbd65ad174e15a Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Tue, 17 Dec 2024 18:36:05 +0000 Subject: [PATCH 136/179] fix(deps): update rust crate clap_complete to v4.5.40 --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c4c28b0b4..773223470 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -349,9 +349,9 @@ dependencies = [ [[package]] name = "clap_complete" -version = "4.5.39" +version = "4.5.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd4db298d517d5fa00b2b84bbe044efd3fde43874a41db0d46f91994646a2da4" +checksum = "ac2e663e3e3bed2d32d065a8404024dad306e699a04263ec59919529f803aee9" dependencies = [ "clap", ] From beb56b10ab0aaddc8aa6ac58b00773feb6c186e7 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Tue, 3 Dec 2024 22:34:14 +0100 Subject: [PATCH 137/179] clippy: fix clippy warnings See: https://rust-lang.github.io/rust-clippy/master/index.html#map_unwrap_or --- src/uu/chcon/src/chcon.rs | 2 +- src/uu/du/src/du.rs | 2 +- src/uu/fold/src/fold.rs | 2 +- src/uu/tail/src/args.rs | 4 ++-- src/uu/tail/src/paths.rs | 4 ++-- src/uu/uniq/src/uniq.rs | 2 +- src/uucore/src/lib/features/fs.rs | 2 +- src/uuhelp_parser/src/lib.rs | 2 +- 8 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/uu/chcon/src/chcon.rs b/src/uu/chcon/src/chcon.rs index c8d1c4017..b5b892f6c 100644 --- a/src/uu/chcon/src/chcon.rs +++ b/src/uu/chcon/src/chcon.rs @@ -727,7 +727,7 @@ fn get_root_dev_ino() -> Result { } fn root_dev_ino_check(root_dev_ino: Option, dir_dev_ino: DeviceAndINode) -> bool { - root_dev_ino.map_or(false, |root_dev_ino| root_dev_ino == dir_dev_ino) + root_dev_ino == Some(dir_dev_ino) } fn root_dev_ino_warn(dir_name: &Path) { diff --git a/src/uu/du/src/du.rs b/src/uu/du/src/du.rs index d4bec77ef..2392497a9 100644 --- a/src/uu/du/src/du.rs +++ b/src/uu/du/src/du.rs @@ -533,7 +533,7 @@ impl StatPrinter { if !self .threshold - .map_or(false, |threshold| threshold.should_exclude(size)) + .is_some_and(|threshold| threshold.should_exclude(size)) && self .max_depth .map_or(true, |max_depth| stat_info.depth <= max_depth) diff --git a/src/uu/fold/src/fold.rs b/src/uu/fold/src/fold.rs index 0223248be..e17ba21c3 100644 --- a/src/uu/fold/src/fold.rs +++ b/src/uu/fold/src/fold.rs @@ -99,7 +99,7 @@ pub fn uu_app() -> Command { fn handle_obsolete(args: &[String]) -> (Vec, Option) { for (i, arg) in args.iter().enumerate() { let slice = &arg; - if slice.starts_with('-') && slice.chars().nth(1).map_or(false, |c| c.is_ascii_digit()) { + if slice.starts_with('-') && slice.chars().nth(1).is_some_and(|c| c.is_ascii_digit()) { let mut v = args.to_vec(); v.remove(i); return (v, Some(slice[1..].to_owned())); diff --git a/src/uu/tail/src/args.rs b/src/uu/tail/src/args.rs index 5cadac608..24b064d1b 100644 --- a/src/uu/tail/src/args.rs +++ b/src/uu/tail/src/args.rs @@ -336,11 +336,11 @@ impl Settings { let blocking_stdin = self.pid == 0 && self.follow == Some(FollowMode::Descriptor) && self.num_inputs() == 1 - && Handle::stdin().map_or(false, |handle| { + && Handle::stdin().is_ok_and(|handle| { handle .as_file() .metadata() - .map_or(false, |meta| !meta.is_file()) + .is_ok_and(|meta| !meta.is_file()) }); if !blocking_stdin && std::io::stdin().is_terminal() { diff --git a/src/uu/tail/src/paths.rs b/src/uu/tail/src/paths.rs index 117cab8b0..4a680943c 100644 --- a/src/uu/tail/src/paths.rs +++ b/src/uu/tail/src/paths.rs @@ -93,7 +93,7 @@ impl Input { pub fn is_tailable(&self) -> bool { match &self.kind { InputKind::File(path) => path_is_tailable(path), - InputKind::Stdin => self.resolve().map_or(false, |path| path_is_tailable(&path)), + InputKind::Stdin => self.resolve().is_some_and(|path| path_is_tailable(&path)), } } } @@ -233,7 +233,7 @@ impl PathExtTail for Path { } pub fn path_is_tailable(path: &Path) -> bool { - path.is_file() || path.exists() && path.metadata().map_or(false, |meta| meta.is_tailable()) + path.is_file() || path.exists() && path.metadata().is_ok_and(|meta| meta.is_tailable()) } #[inline] diff --git a/src/uu/uniq/src/uniq.rs b/src/uu/uniq/src/uniq.rs index 4084a7b3f..b9090cd50 100644 --- a/src/uu/uniq/src/uniq.rs +++ b/src/uu/uniq/src/uniq.rs @@ -383,7 +383,7 @@ fn should_extract_obs_skip_chars( && posix_version().is_some_and(|v| v <= OBSOLETE) && !preceding_long_opt_req_value && !preceding_short_opt_req_value - && slice.chars().nth(1).map_or(false, |c| c.is_ascii_digit()) + && slice.chars().nth(1).is_some_and(|c| c.is_ascii_digit()) } /// Helper function to [`filter_args`] diff --git a/src/uucore/src/lib/features/fs.rs b/src/uucore/src/lib/features/fs.rs index beb4d77a9..e2958232f 100644 --- a/src/uucore/src/lib/features/fs.rs +++ b/src/uucore/src/lib/features/fs.rs @@ -710,7 +710,7 @@ pub fn path_ends_with_terminator(path: &Path) -> bool { path.as_os_str() .as_bytes() .last() - .map_or(false, |&byte| byte == b'/' || byte == b'\\') + .is_some_and(|&byte| byte == b'/' || byte == b'\\') } #[cfg(windows)] diff --git a/src/uuhelp_parser/src/lib.rs b/src/uuhelp_parser/src/lib.rs index da50c037b..0e0907f8a 100644 --- a/src/uuhelp_parser/src/lib.rs +++ b/src/uuhelp_parser/src/lib.rs @@ -73,7 +73,7 @@ pub fn parse_usage(content: &str) -> String { pub fn parse_section(section: &str, content: &str) -> Option { fn is_section_header(line: &str, section: &str) -> bool { line.strip_prefix("##") - .map_or(false, |l| l.trim().to_lowercase() == section) + .is_some_and(|l| l.trim().to_lowercase() == section) } let section = §ion.to_lowercase(); From 7708f6e39a50e29ba59643daed8cc84cc188a937 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Tue, 17 Dec 2024 22:47:45 +0100 Subject: [PATCH 138/179] clippy: replace .as_bytes().len() => .len() --- src/uu/split/src/split.rs | 2 +- src/uu/tac/src/tac.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/uu/split/src/split.rs b/src/uu/split/src/split.rs index 86fded1d5..279e91dae 100644 --- a/src/uu/split/src/split.rs +++ b/src/uu/split/src/split.rs @@ -492,7 +492,7 @@ impl Settings { } match first.as_str() { "\\0" => b'\0', - s if s.as_bytes().len() == 1 => s.as_bytes()[0], + s if s.len() == 1 => s.as_bytes()[0], s => return Err(SettingsError::MultiCharacterSeparator(s.to_string())), } } diff --git a/src/uu/tac/src/tac.rs b/src/uu/tac/src/tac.rs index 3865c61ae..d1eca4706 100644 --- a/src/uu/tac/src/tac.rs +++ b/src/uu/tac/src/tac.rs @@ -184,7 +184,7 @@ fn buffer_tac(data: &[u8], before: bool, separator: &str) -> std::io::Result<()> let mut out = BufWriter::new(out.lock()); // The number of bytes in the line separator. - let slen = separator.as_bytes().len(); + let slen = separator.len(); // The index of the start of the next line in the `data`. // From 5cd665363d9b39287f6c7bd4955e0fd7c6e816e1 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Wed, 18 Dec 2024 04:30:52 +0000 Subject: [PATCH 139/179] chore(deps): update rust crate thiserror to v2.0.8 --- Cargo.lock | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 773223470..9f906aa68 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2317,11 +2317,11 @@ dependencies = [ [[package]] name = "thiserror" -version = "2.0.7" +version = "2.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93605438cbd668185516ab499d589afb7ee1859ea3d5fc8f6b0755e1c7443767" +checksum = "08f5383f3e0071702bf93ab5ee99b52d26936be9dedd9413067cbdcddcb6141a" dependencies = [ - "thiserror-impl 2.0.7", + "thiserror-impl 2.0.8", ] [[package]] @@ -2337,9 +2337,9 @@ dependencies = [ [[package]] name = "thiserror-impl" -version = "2.0.7" +version = "2.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1d8749b4531af2117677a5fcd12b1348a3fe2b81e36e61ffeac5c4aa3273e36" +checksum = "f2f357fcec90b3caef6623a099691be676d033b40a058ac95d2a6ade6fa0c943" dependencies = [ "proc-macro2", "quote", @@ -2538,7 +2538,7 @@ version = "0.0.28" dependencies = [ "clap", "nix", - "thiserror 2.0.7", + "thiserror 2.0.8", "uucore", ] @@ -2550,7 +2550,7 @@ dependencies = [ "fts-sys", "libc", "selinux", - "thiserror 2.0.7", + "thiserror 2.0.8", "uucore", ] @@ -2627,7 +2627,7 @@ version = "0.0.28" dependencies = [ "clap", "regex", - "thiserror 2.0.7", + "thiserror 2.0.8", "uucore", ] @@ -3143,7 +3143,7 @@ dependencies = [ "clap", "libc", "selinux", - "thiserror 2.0.7", + "thiserror 2.0.8", "uucore", ] @@ -3422,7 +3422,7 @@ version = "0.0.28" dependencies = [ "chrono", "clap", - "thiserror 2.0.7", + "thiserror 2.0.8", "utmp-classic", "uucore", ] @@ -3453,7 +3453,7 @@ dependencies = [ "clap", "libc", "nix", - "thiserror 2.0.7", + "thiserror 2.0.8", "unicode-width 0.2.0", "uucore", ] @@ -3515,7 +3515,7 @@ dependencies = [ "sha3", "sm3", "tempfile", - "thiserror 2.0.7", + "thiserror 2.0.8", "time", "uucore_procs", "walkdir", @@ -3988,7 +3988,7 @@ dependencies = [ "flate2", "indexmap", "memchr", - "thiserror 2.0.7", + "thiserror 2.0.8", "zopfli", ] From 2f82853bfa3d16c1497927c26432a4853fadbfd3 Mon Sep 17 00:00:00 2001 From: Daniel Hofstetter Date: Wed, 18 Dec 2024 14:42:18 +0100 Subject: [PATCH 140/179] cut: move two tests to better places within file --- tests/by-util/test_cut.rs | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/tests/by-util/test_cut.rs b/tests/by-util/test_cut.rs index 7d6009a30..fe20b5de6 100644 --- a/tests/by-util/test_cut.rs +++ b/tests/by-util/test_cut.rs @@ -46,6 +46,13 @@ static COMPLEX_SEQUENCE: &TestedSequence = &TestedSequence { sequence: "9-,6-7,-2,4", }; +#[test] +fn test_no_argument() { + new_ucmd!().fails().stderr_is( + "cut: invalid usage: expects one of --fields (-f), --chars (-c) or --bytes (-b)\n", + ); +} + #[test] fn test_invalid_arg() { new_ucmd!().arg("--definitely-invalid").fails().code_is(1); @@ -275,6 +282,15 @@ fn test_equal_as_delimiter3() { .stdout_only_bytes("abZcd\n"); } +#[test] +fn test_newline_delimited() { + new_ucmd!() + .args(&["-f", "1", "-d", "\n"]) + .pipe_in("a:1\nb:") + .succeeds() + .stdout_only_bytes("a:1\nb:\n"); +} + #[test] fn test_multiple() { let result = new_ucmd!() @@ -285,15 +301,6 @@ fn test_multiple() { assert_eq!(result.stderr_str(), ""); } -#[test] -fn test_newline_delimited() { - new_ucmd!() - .args(&["-f", "1", "-d", "\n"]) - .pipe_in("a:1\nb:") - .succeeds() - .stdout_only_bytes("a:1\nb:\n"); -} - #[test] fn test_multiple_mode_args() { for args in [ @@ -312,13 +319,6 @@ fn test_multiple_mode_args() { } } -#[test] -fn test_no_argument() { - new_ucmd!().fails().stderr_is( - "cut: invalid usage: expects one of --fields (-f), --chars (-c) or --bytes (-b)\n", - ); -} - #[test] #[cfg(unix)] fn test_8bit_non_utf8_delimiter() { From 9aca24365faef34214d78bd242493b3442a26a41 Mon Sep 17 00:00:00 2001 From: Daniel Hofstetter Date: Wed, 18 Dec 2024 14:44:12 +0100 Subject: [PATCH 141/179] cut: simplify test by removing assert_eq! calls --- tests/by-util/test_cut.rs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tests/by-util/test_cut.rs b/tests/by-util/test_cut.rs index fe20b5de6..e4c93cd75 100644 --- a/tests/by-util/test_cut.rs +++ b/tests/by-util/test_cut.rs @@ -293,12 +293,11 @@ fn test_newline_delimited() { #[test] fn test_multiple() { - let result = new_ucmd!() + new_ucmd!() .args(&["-f2", "-d:", "-d="]) .pipe_in("a=b\n") - .succeeds(); - assert_eq!(result.stdout_str(), "b\n"); - assert_eq!(result.stderr_str(), ""); + .succeeds() + .stdout_only("b\n"); } #[test] From 5ea4903632859e27e7d9947816a1e3fd30f90de8 Mon Sep 17 00:00:00 2001 From: Daniel Hofstetter Date: Wed, 18 Dec 2024 14:52:04 +0100 Subject: [PATCH 142/179] cut: rename some tests --- tests/by-util/test_cut.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/by-util/test_cut.rs b/tests/by-util/test_cut.rs index e4c93cd75..267eedf45 100644 --- a/tests/by-util/test_cut.rs +++ b/tests/by-util/test_cut.rs @@ -47,7 +47,7 @@ static COMPLEX_SEQUENCE: &TestedSequence = &TestedSequence { }; #[test] -fn test_no_argument() { +fn test_no_args() { new_ucmd!().fails().stderr_is( "cut: invalid usage: expects one of --fields (-f), --chars (-c) or --bytes (-b)\n", ); @@ -256,7 +256,7 @@ fn test_no_such_file() { } #[test] -fn test_equal_as_delimiter1() { +fn test_equal_as_delimiter() { new_ucmd!() .args(&["-f", "2", "-d="]) .pipe_in("--dir=./out/lib") @@ -265,7 +265,7 @@ fn test_equal_as_delimiter1() { } #[test] -fn test_equal_as_delimiter2() { +fn test_empty_string_as_delimiter() { new_ucmd!() .args(&["-f2", "--delimiter="]) .pipe_in("a=b\n") @@ -274,7 +274,7 @@ fn test_equal_as_delimiter2() { } #[test] -fn test_equal_as_delimiter3() { +fn test_empty_string_as_delimiter_with_output_delimiter() { new_ucmd!() .args(&["-f", "1,2", "-d", "''", "--output-delimiter=Z"]) .pipe_in("ab\0cd\n") @@ -283,7 +283,7 @@ fn test_equal_as_delimiter3() { } #[test] -fn test_newline_delimited() { +fn test_newline_as_delimiter() { new_ucmd!() .args(&["-f", "1", "-d", "\n"]) .pipe_in("a:1\nb:") @@ -292,7 +292,7 @@ fn test_newline_delimited() { } #[test] -fn test_multiple() { +fn test_multiple_delimiters() { new_ucmd!() .args(&["-f2", "-d:", "-d="]) .pipe_in("a=b\n") From 6224c374ae490596e2d7e7429d219b82c66471bb Mon Sep 17 00:00:00 2001 From: Daniel Hofstetter Date: Wed, 18 Dec 2024 16:15:48 +0100 Subject: [PATCH 143/179] cut: use short and long args in two tests --- tests/by-util/test_cut.rs | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/tests/by-util/test_cut.rs b/tests/by-util/test_cut.rs index 267eedf45..6c6914a12 100644 --- a/tests/by-util/test_cut.rs +++ b/tests/by-util/test_cut.rs @@ -257,20 +257,24 @@ fn test_no_such_file() { #[test] fn test_equal_as_delimiter() { - new_ucmd!() - .args(&["-f", "2", "-d="]) - .pipe_in("--dir=./out/lib") - .succeeds() - .stdout_only("./out/lib\n"); + for arg in ["-d=", "--delimiter=="] { + new_ucmd!() + .args(&["-f2", arg]) + .pipe_in("--dir=./out/lib") + .succeeds() + .stdout_only("./out/lib\n"); + } } #[test] fn test_empty_string_as_delimiter() { - new_ucmd!() - .args(&["-f2", "--delimiter="]) - .pipe_in("a=b\n") - .succeeds() - .stdout_only("a=b\n"); + for arg in ["-d''", "--delimiter=", "--delimiter=''"] { + new_ucmd!() + .args(&["-f2", arg]) + .pipe_in("a\0b\n") + .succeeds() + .stdout_only("b\n"); + } } #[test] From cb3be5e3aa58860d93d8f4fd9043daefdc9153bc Mon Sep 17 00:00:00 2001 From: Justin Tracey Date: Thu, 21 Nov 2024 22:33:08 -0500 Subject: [PATCH 144/179] Bump MSRV to 1.79 --- .clippy.toml | 2 +- .github/workflows/CICD.yml | 2 +- Cargo.toml | 2 +- README.md | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.clippy.toml b/.clippy.toml index 72e8c35cf..6339ccf21 100644 --- a/.clippy.toml +++ b/.clippy.toml @@ -1,4 +1,4 @@ -msrv = "1.77.0" +msrv = "1.79.0" cognitive-complexity-threshold = 24 missing-docs-in-crate-items = true check-private-items = true diff --git a/.github/workflows/CICD.yml b/.github/workflows/CICD.yml index 56418dd6e..f1f9661b3 100644 --- a/.github/workflows/CICD.yml +++ b/.github/workflows/CICD.yml @@ -11,7 +11,7 @@ env: PROJECT_NAME: coreutils PROJECT_DESC: "Core universal (cross-platform) utilities" PROJECT_AUTH: "uutils" - RUST_MIN_SRV: "1.77.0" + RUST_MIN_SRV: "1.79.0" # * style job configuration STYLE_FAIL_ON_FAULT: true ## (bool) fail the build if a style job contains a fault (error or warning); may be overridden on a per-job basis diff --git a/Cargo.toml b/Cargo.toml index a6b9958d4..1991679d8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,7 +16,7 @@ repository = "https://github.com/uutils/coreutils" readme = "README.md" keywords = ["coreutils", "uutils", "cross-platform", "cli", "utility"] categories = ["command-line-utilities"] -rust-version = "1.77.0" +rust-version = "1.79.0" edition = "2021" build = "build.rs" diff --git a/README.md b/README.md index 9f7d1c2ae..37c5a596b 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ [![dependency status](https://deps.rs/repo/github/uutils/coreutils/status.svg)](https://deps.rs/repo/github/uutils/coreutils) [![CodeCov](https://codecov.io/gh/uutils/coreutils/branch/master/graph/badge.svg)](https://codecov.io/gh/uutils/coreutils) -![MSRV](https://img.shields.io/badge/MSRV-1.77.0-brightgreen) +![MSRV](https://img.shields.io/badge/MSRV-1.79.0-brightgreen) @@ -70,7 +70,7 @@ the [coreutils docs](https://github.com/uutils/uutils.github.io) repository. ### Rust Version uutils follows Rust's release channels and is tested against stable, beta and -nightly. The current Minimum Supported Rust Version (MSRV) is `1.77.0`. +nightly. The current Minimum Supported Rust Version (MSRV) is `1.79.0`. ## Building From 355103134b76923053b2fa3c5ccda53c0e1108b9 Mon Sep 17 00:00:00 2001 From: Justin Tracey Date: Thu, 21 Nov 2024 22:35:42 -0500 Subject: [PATCH 145/179] quoting_style: add support for non-unicode bytes This new functionality is implemented, but not yet exposed here. --- src/uucore/src/lib/features/quoting_style.rs | 512 +++++++++++++++---- 1 file changed, 406 insertions(+), 106 deletions(-) diff --git a/src/uucore/src/lib/features/quoting_style.rs b/src/uucore/src/lib/features/quoting_style.rs index 1efa6f746..0544633bb 100644 --- a/src/uucore/src/lib/features/quoting_style.rs +++ b/src/uucore/src/lib/features/quoting_style.rs @@ -11,34 +11,38 @@ use std::fmt; // These are characters with special meaning in the shell (e.g. bash). // The first const contains characters that only have a special meaning when they appear at the beginning of a name. -const SPECIAL_SHELL_CHARS_START: &[char] = &['~', '#']; +const SPECIAL_SHELL_CHARS_START: &[u8] = b"~#"; // PR#6559 : Remove `]{}` from special shell chars. const SPECIAL_SHELL_CHARS: &str = "`$&*()|[;\\'\"<>?! "; /// The quoting style to use when escaping a name. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub enum QuotingStyle { - /// Escape the name as a literal string. + /// Escape the name as a shell string. + /// Used in, e.g., `ls --quoting-style=shell`. Shell { /// Whether to escape characters in the name. + /// True in, e.g., `ls --quoting-style=shell-escape`. escape: bool, /// Whether to always quote the name. always_quote: bool, - /// Whether to show control characters. + /// Whether to show control and non-unicode characters, or replace them with `?`. show_control: bool, }, /// Escape the name as a C string. + /// Used in, e.g., `ls --quote-name`. C { /// The type of quotes to use. quotes: Quotes, }, - /// Escape the name as a literal string. + /// Do not escape the string. + /// Used in, e.g., `ls --literal`. Literal { - /// Whether to show control characters. + /// Whether to show control and non-unicode characters, or replace them with `?`. show_control: bool, }, } @@ -72,8 +76,9 @@ enum EscapeState { Octal(EscapeOctal), } +/// Byte we need to present as escaped octal, in the form of `\nnn` struct EscapeOctal { - c: char, + c: u8, state: EscapeOctalState, idx: usize, } @@ -95,20 +100,20 @@ impl Iterator for EscapeOctal { Some('\\') } EscapeOctalState::Value => { - let octal_digit = ((self.c as u32) >> (self.idx * 3)) & 0o7; + let octal_digit = ((self.c) >> (self.idx * 3)) & 0o7; if self.idx == 0 { self.state = EscapeOctalState::Done; } else { self.idx -= 1; } - Some(from_digit(octal_digit, 8).unwrap()) + Some(from_digit(octal_digit.into(), 8).unwrap()) } } } } impl EscapeOctal { - fn from(c: char) -> Self { + fn from(c: u8) -> Self { Self { c, idx: 2, @@ -124,6 +129,12 @@ impl EscapedChar { } } + fn new_octal(b: u8) -> Self { + Self { + state: EscapeState::Octal(EscapeOctal::from(b)), + } + } + fn new_c(c: char, quotes: Quotes, dirname: bool) -> Self { use EscapeState::*; let init_state = match c { @@ -148,7 +159,7 @@ impl EscapedChar { _ => Char(' '), }, ':' if dirname => Backslash(':'), - _ if c.is_ascii_control() => Octal(EscapeOctal::from(c)), + _ if c.is_ascii_control() => Octal(EscapeOctal::from(c as u8)), _ => Char(c), }; Self { state: init_state } @@ -165,7 +176,7 @@ impl EscapedChar { '\x0B' => Backslash('v'), '\x0C' => Backslash('f'), '\r' => Backslash('r'), - '\x00'..='\x1F' | '\x7F' => Octal(EscapeOctal::from(c)), + '\x00'..='\x1F' | '\x7F' => Octal(EscapeOctal::from(c as u8)), '\'' => match quotes { Quotes::Single => Backslash('\''), _ => Char('\''), @@ -205,102 +216,124 @@ impl Iterator for EscapedChar { } } -fn shell_without_escape(name: &str, quotes: Quotes, show_control_chars: bool) -> (String, bool) { +/// Check whether `bytes` starts with any byte in `pattern`. +fn bytes_start_with(bytes: &[u8], pattern: &[u8]) -> bool { + !bytes.is_empty() && pattern.contains(&bytes[0]) +} + +fn shell_without_escape(name: &[u8], quotes: Quotes, show_control_chars: bool) -> (Vec, bool) { let mut must_quote = false; - let mut escaped_str = String::with_capacity(name.len()); + let mut escaped_str = Vec::with_capacity(name.len()); + let mut utf8_buf = vec![0; 4]; - for c in name.chars() { - let escaped = { - let ec = EscapedChar::new_shell(c, false, quotes); - if show_control_chars { - ec - } else { - ec.hide_control() - } - }; + for s in name.utf8_chunks() { + for c in s.valid().chars() { + let escaped = { + let ec = EscapedChar::new_shell(c, false, quotes); + if show_control_chars { + ec + } else { + ec.hide_control() + } + }; - match escaped.state { - EscapeState::Backslash('\'') => escaped_str.push_str("'\\''"), - EscapeState::ForceQuote(x) => { - must_quote = true; - escaped_str.push(x); - } - _ => { - for char in escaped { - escaped_str.push(char); + match escaped.state { + EscapeState::Backslash('\'') => escaped_str.extend_from_slice(b"'\\''"), + EscapeState::ForceQuote(x) => { + must_quote = true; + escaped_str.extend_from_slice(x.encode_utf8(&mut utf8_buf).as_bytes()); + } + _ => { + for c in escaped { + escaped_str.extend_from_slice(c.encode_utf8(&mut utf8_buf).as_bytes()); + } } } } + + if show_control_chars { + escaped_str.extend_from_slice(s.invalid()); + } else { + escaped_str.resize(escaped_str.len() + s.invalid().len(), b'?'); + } } - must_quote = must_quote || name.starts_with(SPECIAL_SHELL_CHARS_START); + must_quote = must_quote || bytes_start_with(name, SPECIAL_SHELL_CHARS_START); (escaped_str, must_quote) } -fn shell_with_escape(name: &str, quotes: Quotes) -> (String, bool) { +fn shell_with_escape(name: &[u8], quotes: Quotes) -> (Vec, bool) { // We need to keep track of whether we are in a dollar expression // because e.g. \b\n is escaped as $'\b\n' and not like $'b'$'n' let mut in_dollar = false; let mut must_quote = false; let mut escaped_str = String::with_capacity(name.len()); - for c in name.chars() { - let escaped = EscapedChar::new_shell(c, true, quotes); - match escaped.state { - EscapeState::Char(x) => { - if in_dollar { - escaped_str.push_str("''"); + for s in name.utf8_chunks() { + for c in s.valid().chars() { + let escaped = EscapedChar::new_shell(c, true, quotes); + match escaped.state { + EscapeState::Char(x) => { + if in_dollar { + escaped_str.push_str("''"); + in_dollar = false; + } + escaped_str.push(x); + } + EscapeState::ForceQuote(x) => { + if in_dollar { + escaped_str.push_str("''"); + in_dollar = false; + } + must_quote = true; + escaped_str.push(x); + } + // Single quotes are not put in dollar expressions, but are escaped + // if the string also contains double quotes. In that case, they must + // be handled separately. + EscapeState::Backslash('\'') => { + must_quote = true; in_dollar = false; + escaped_str.push_str("'\\''"); } - escaped_str.push(x); - } - EscapeState::ForceQuote(x) => { - if in_dollar { - escaped_str.push_str("''"); - in_dollar = false; - } - must_quote = true; - escaped_str.push(x); - } - // Single quotes are not put in dollar expressions, but are escaped - // if the string also contains double quotes. In that case, they must - // be handled separately. - EscapeState::Backslash('\'') => { - must_quote = true; - in_dollar = false; - escaped_str.push_str("'\\''"); - } - _ => { - if !in_dollar { - escaped_str.push_str("'$'"); - in_dollar = true; - } - must_quote = true; - for char in escaped { - escaped_str.push(char); + _ => { + if !in_dollar { + escaped_str.push_str("'$'"); + in_dollar = true; + } + must_quote = true; + for char in escaped { + escaped_str.push(char); + } } } } + if !s.invalid().is_empty() { + if !in_dollar { + escaped_str.push_str("'$'"); + in_dollar = true; + } + must_quote = true; + let escaped_bytes: String = s + .invalid() + .iter() + .flat_map(|b| EscapedChar::new_octal(*b)) + .collect(); + escaped_str.push_str(&escaped_bytes); + } } - must_quote = must_quote || name.starts_with(SPECIAL_SHELL_CHARS_START); - (escaped_str, must_quote) + must_quote = must_quote || bytes_start_with(name, SPECIAL_SHELL_CHARS_START); + (escaped_str.into(), must_quote) } /// Return a set of characters that implies quoting of the word in /// shell-quoting mode. -fn shell_escaped_char_set(is_dirname: bool) -> &'static [char] { - const ESCAPED_CHARS: &[char] = &[ - // the ':' colon character only induce quoting in the - // context of ls displaying a directory name before listing its content. - // (e.g. with the recursive flag -R) - ':', - // Under this line are the control characters that should be - // quoted in shell mode in all cases. - '"', '`', '$', '\\', '^', '\n', '\t', '\r', '=', - ]; - +fn shell_escaped_char_set(is_dirname: bool) -> &'static [u8] { + const ESCAPED_CHARS: &[u8] = b":\"`$\\^\n\t\r="; + // the ':' colon character only induce quoting in the + // context of ls displaying a directory name before listing its content. + // (e.g. with the recursive flag -R) let start_index = if is_dirname { 0 } else { 1 }; - &ESCAPED_CHARS[start_index..] } @@ -308,41 +341,57 @@ fn shell_escaped_char_set(is_dirname: bool) -> &'static [char] { /// /// This inner function provides an additional flag `dirname` which /// is meant for ls' directory name display. -fn escape_name_inner(name: &OsStr, style: &QuotingStyle, dirname: bool) -> String { +fn escape_name_inner(name: &[u8], style: &QuotingStyle, dirname: bool) -> Vec { match style { QuotingStyle::Literal { show_control } => { if *show_control { - name.to_string_lossy().into_owned() + name.to_owned() } else { - name.to_string_lossy() - .chars() - .flat_map(|c| EscapedChar::new_literal(c).hide_control()) - .collect() + name.utf8_chunks() + .map(|s| { + let valid: String = s + .valid() + .chars() + .flat_map(|c| EscapedChar::new_literal(c).hide_control()) + .collect(); + let invalid = "?".repeat(s.invalid().len()); + valid + &invalid + }) + .collect::() + .into() } } QuotingStyle::C { quotes } => { let escaped_str: String = name - .to_string_lossy() - .chars() - .flat_map(|c| EscapedChar::new_c(c, *quotes, dirname)) - .collect(); + .utf8_chunks() + .flat_map(|s| { + let valid = s + .valid() + .chars() + .flat_map(|c| EscapedChar::new_c(c, *quotes, dirname)); + let invalid = s.invalid().iter().flat_map(|b| EscapedChar::new_octal(*b)); + valid.chain(invalid) + }) + .collect::(); match quotes { Quotes::Single => format!("'{escaped_str}'"), Quotes::Double => format!("\"{escaped_str}\""), Quotes::None => escaped_str, } + .into() } QuotingStyle::Shell { escape, always_quote, show_control, } => { - let name = name.to_string_lossy(); - - let (quotes, must_quote) = if name.contains(shell_escaped_char_set(dirname)) { + let (quotes, must_quote) = if name + .iter() + .any(|c| shell_escaped_char_set(dirname).contains(c)) + { (Quotes::Single, true) - } else if name.contains('\'') { + } else if name.contains(&b'\'') { (Quotes::Double, true) } else if *always_quote { (Quotes::Single, true) @@ -351,15 +400,24 @@ fn escape_name_inner(name: &OsStr, style: &QuotingStyle, dirname: bool) -> Strin }; let (escaped_str, contains_quote_chars) = if *escape { - shell_with_escape(&name, quotes) + shell_with_escape(name, quotes) } else { - shell_without_escape(&name, quotes, *show_control) + shell_without_escape(name, quotes, *show_control) }; - match (must_quote | contains_quote_chars, quotes) { - (true, Quotes::Single) => format!("'{escaped_str}'"), - (true, Quotes::Double) => format!("\"{escaped_str}\""), - _ => escaped_str, + if must_quote | contains_quote_chars && quotes != Quotes::None { + let mut quoted_str = Vec::::with_capacity(escaped_str.len() + 2); + let quote = if quotes == Quotes::Single { + b'\'' + } else { + b'"' + }; + quoted_str.push(quote); + quoted_str.extend(escaped_str); + quoted_str.push(quote); + quoted_str + } else { + escaped_str } } } @@ -367,14 +425,16 @@ fn escape_name_inner(name: &OsStr, style: &QuotingStyle, dirname: bool) -> Strin /// Escape a filename with respect to the given style. pub fn escape_name(name: &OsStr, style: &QuotingStyle) -> String { - escape_name_inner(name, style, false) + let name = name.to_string_lossy(); + String::from_utf8_lossy(&escape_name_inner(name.as_bytes(), style, false)).to_string() } /// Escape a directory name with respect to the given style. /// This is mainly meant to be used for ls' directory name printing and is not /// likely to be used elsewhere. pub fn escape_dir_name(dir_name: &OsStr, style: &QuotingStyle) -> String { - escape_name_inner(dir_name, style, true) + let dir_name = dir_name.to_string_lossy(); + String::from_utf8_lossy(&escape_name_inner(dir_name.as_bytes(), style, true)).to_string() } impl fmt::Display for QuotingStyle { @@ -415,7 +475,7 @@ impl fmt::Display for Quotes { #[cfg(test)] mod tests { - use crate::quoting_style::{escape_name, Quotes, QuotingStyle}; + use crate::quoting_style::{escape_name_inner, Quotes, QuotingStyle}; // spell-checker:ignore (tests/words) one\'two one'two @@ -465,14 +525,31 @@ mod tests { } } + fn check_names_inner(name: &[u8], map: &[(T, &str)]) -> Vec> { + map.iter() + .map(|(_, style)| escape_name_inner(name, &get_style(style), false)) + .collect() + } + fn check_names(name: &str, map: &[(&str, &str)]) { assert_eq!( map.iter() - .map(|(_, style)| escape_name(name.as_ref(), &get_style(style))) - .collect::>(), + .map(|(correct, _)| *correct) + .collect::>(), + check_names_inner(name.as_bytes(), map) + .iter() + .map(|bytes| std::str::from_utf8(bytes) + .expect("valid str goes in, valid str comes out")) + .collect::>() + ); + } + + fn check_names_raw(name: &[u8], map: &[(&[u8], &str)]) { + assert_eq!( map.iter() - .map(|(correct, _)| correct.to_string()) - .collect::>() + .map(|(correct, _)| *correct) + .collect::>(), + check_names_inner(name, map) ); } @@ -732,6 +809,229 @@ mod tests { ); } + #[test] + fn test_non_unicode_bytes() { + let ascii = b'_'; + let continuation = b'\xA7'; + let first2byte = b'\xC2'; + let first3byte = b'\xE0'; + let first4byte = b'\xF0'; + let invalid = b'\xC0'; + + // a single byte value invalid outside of additional context in UTF-8 + check_names_raw( + &[continuation], + &[ + (b"?", "literal"), + (b"\xA7", "literal-show"), + (b"\\247", "escape"), + (b"\"\\247\"", "c"), + (b"?", "shell"), + (b"\xA7", "shell-show"), + (b"'?'", "shell-always"), + (b"'\xA7'", "shell-always-show"), + (b"''$'\\247'", "shell-escape"), + (b"''$'\\247'", "shell-escape-always"), + ], + ); + + // ...but the byte becomes valid with appropriate context + // (this is just the § character in UTF-8, written as bytes) + check_names_raw( + &[first2byte, continuation], + &[ + (b"\xC2\xA7", "literal"), + (b"\xC2\xA7", "literal-show"), + (b"\xC2\xA7", "escape"), + (b"\"\xC2\xA7\"", "c"), + (b"\xC2\xA7", "shell"), + (b"\xC2\xA7", "shell-show"), + (b"'\xC2\xA7'", "shell-always"), + (b"'\xC2\xA7'", "shell-always-show"), + (b"\xC2\xA7", "shell-escape"), + (b"'\xC2\xA7'", "shell-escape-always"), + ], + ); + + // mixed with valid characters + check_names_raw( + &[continuation, ascii], + &[ + (b"?_", "literal"), + (b"\xA7_", "literal-show"), + (b"\\247_", "escape"), + (b"\"\\247_\"", "c"), + (b"?_", "shell"), + (b"\xA7_", "shell-show"), + (b"'?_'", "shell-always"), + (b"'\xA7_'", "shell-always-show"), + (b"''$'\\247''_'", "shell-escape"), + (b"''$'\\247''_'", "shell-escape-always"), + ], + ); + check_names_raw( + &[ascii, continuation], + &[ + (b"_?", "literal"), + (b"_\xA7", "literal-show"), + (b"_\\247", "escape"), + (b"\"_\\247\"", "c"), + (b"_?", "shell"), + (b"_\xA7", "shell-show"), + (b"'_?'", "shell-always"), + (b"'_\xA7'", "shell-always-show"), + (b"'_'$'\\247'", "shell-escape"), + (b"'_'$'\\247'", "shell-escape-always"), + ], + ); + check_names_raw( + &[ascii, continuation, ascii], + &[ + (b"_?_", "literal"), + (b"_\xA7_", "literal-show"), + (b"_\\247_", "escape"), + (b"\"_\\247_\"", "c"), + (b"_?_", "shell"), + (b"_\xA7_", "shell-show"), + (b"'_?_'", "shell-always"), + (b"'_\xA7_'", "shell-always-show"), + (b"'_'$'\\247''_'", "shell-escape"), + (b"'_'$'\\247''_'", "shell-escape-always"), + ], + ); + check_names_raw( + &[continuation, ascii, continuation], + &[ + (b"?_?", "literal"), + (b"\xA7_\xA7", "literal-show"), + (b"\\247_\\247", "escape"), + (b"\"\\247_\\247\"", "c"), + (b"?_?", "shell"), + (b"\xA7_\xA7", "shell-show"), + (b"'?_?'", "shell-always"), + (b"'\xA7_\xA7'", "shell-always-show"), + (b"''$'\\247''_'$'\\247'", "shell-escape"), + (b"''$'\\247''_'$'\\247'", "shell-escape-always"), + ], + ); + + // contiguous invalid bytes + check_names_raw( + &[ + ascii, + invalid, + ascii, + continuation, + continuation, + ascii, + continuation, + continuation, + continuation, + ascii, + continuation, + continuation, + continuation, + continuation, + ascii, + ], + &[ + (b"_?_??_???_????_", "literal"), + ( + b"_\xC0_\xA7\xA7_\xA7\xA7\xA7_\xA7\xA7\xA7\xA7_", + "literal-show", + ), + ( + b"_\\300_\\247\\247_\\247\\247\\247_\\247\\247\\247\\247_", + "escape", + ), + ( + b"\"_\\300_\\247\\247_\\247\\247\\247_\\247\\247\\247\\247_\"", + "c", + ), + (b"_?_??_???_????_", "shell"), + ( + b"_\xC0_\xA7\xA7_\xA7\xA7\xA7_\xA7\xA7\xA7\xA7_", + "shell-show", + ), + (b"'_?_??_???_????_'", "shell-always"), + ( + b"'_\xC0_\xA7\xA7_\xA7\xA7\xA7_\xA7\xA7\xA7\xA7_'", + "shell-always-show", + ), + ( + b"'_'$'\\300''_'$'\\247\\247''_'$'\\247\\247\\247''_'$'\\247\\247\\247\\247''_'", + "shell-escape", + ), + ( + b"'_'$'\\300''_'$'\\247\\247''_'$'\\247\\247\\247''_'$'\\247\\247\\247\\247''_'", + "shell-escape-always", + ), + ], + ); + + // invalid multi-byte sequences that start valid + check_names_raw( + &[first2byte, ascii], + &[ + (b"?_", "literal"), + (b"\xC2_", "literal-show"), + (b"\\302_", "escape"), + (b"\"\\302_\"", "c"), + (b"?_", "shell"), + (b"\xC2_", "shell-show"), + (b"'?_'", "shell-always"), + (b"'\xC2_'", "shell-always-show"), + (b"''$'\\302''_'", "shell-escape"), + (b"''$'\\302''_'", "shell-escape-always"), + ], + ); + check_names_raw( + &[first2byte, first2byte, continuation], + &[ + (b"?\xC2\xA7", "literal"), + (b"\xC2\xC2\xA7", "literal-show"), + (b"\\302\xC2\xA7", "escape"), + (b"\"\\302\xC2\xA7\"", "c"), + (b"?\xC2\xA7", "shell"), + (b"\xC2\xC2\xA7", "shell-show"), + (b"'?\xC2\xA7'", "shell-always"), + (b"'\xC2\xC2\xA7'", "shell-always-show"), + (b"''$'\\302''\xC2\xA7'", "shell-escape"), + (b"''$'\\302''\xC2\xA7'", "shell-escape-always"), + ], + ); + check_names_raw( + &[first3byte, continuation, ascii], + &[ + (b"??_", "literal"), + (b"\xE0\xA7_", "literal-show"), + (b"\\340\\247_", "escape"), + (b"\"\\340\\247_\"", "c"), + (b"??_", "shell"), + (b"\xE0\xA7_", "shell-show"), + (b"'??_'", "shell-always"), + (b"'\xE0\xA7_'", "shell-always-show"), + (b"''$'\\340\\247''_'", "shell-escape"), + (b"''$'\\340\\247''_'", "shell-escape-always"), + ], + ); + check_names_raw( + &[first4byte, continuation, continuation, ascii], + &[ + (b"???_", "literal"), + (b"\xF0\xA7\xA7_", "literal-show"), + (b"\\360\\247\\247_", "escape"), + (b"\"\\360\\247\\247_\"", "c"), + (b"???_", "shell"), + (b"\xF0\xA7\xA7_", "shell-show"), + (b"'???_'", "shell-always"), + (b"'\xF0\xA7\xA7_'", "shell-always-show"), + (b"''$'\\360\\247\\247''_'", "shell-escape"), + (b"''$'\\360\\247\\247''_'", "shell-escape-always"), + ], + ); + } + #[test] fn test_question_mark() { // A question mark must force quotes in shell and shell-always, unless From 2331600f4c72444a3e846f29e51cc6841c9aa96f Mon Sep 17 00:00:00 2001 From: Justin Tracey Date: Fri, 22 Nov 2024 00:32:23 -0500 Subject: [PATCH 146/179] quoting_style: fix multi-byte control characters --- src/uucore/src/lib/features/quoting_style.rs | 127 +++++++++++++++---- 1 file changed, 99 insertions(+), 28 deletions(-) diff --git a/src/uucore/src/lib/features/quoting_style.rs b/src/uucore/src/lib/features/quoting_style.rs index 0544633bb..8abc6bda7 100644 --- a/src/uucore/src/lib/features/quoting_style.rs +++ b/src/uucore/src/lib/features/quoting_style.rs @@ -76,17 +76,24 @@ enum EscapeState { Octal(EscapeOctal), } -/// Byte we need to present as escaped octal, in the form of `\nnn` +/// Bytes we need to present as escaped octal, in the form of `\nnn` per byte. +/// Only supports characters up to 2 bytes long in UTF-8. struct EscapeOctal { - c: u8, + c: [u8; 2], state: EscapeOctalState, - idx: usize, + idx: u8, } enum EscapeOctalState { Done, - Backslash, - Value, + FirstBackslash, + FirstValue, + LastBackslash, + LastValue, +} + +fn byte_to_octal_digit(byte: u8, idx: u8) -> u8 { + (byte >> (idx * 3)) & 0o7 } impl Iterator for EscapeOctal { @@ -95,12 +102,26 @@ impl Iterator for EscapeOctal { fn next(&mut self) -> Option { match self.state { EscapeOctalState::Done => None, - EscapeOctalState::Backslash => { - self.state = EscapeOctalState::Value; + EscapeOctalState::FirstBackslash => { + self.state = EscapeOctalState::FirstValue; Some('\\') } - EscapeOctalState::Value => { - let octal_digit = ((self.c) >> (self.idx * 3)) & 0o7; + EscapeOctalState::LastBackslash => { + self.state = EscapeOctalState::LastValue; + Some('\\') + } + EscapeOctalState::FirstValue => { + let octal_digit = byte_to_octal_digit(self.c[0], self.idx); + if self.idx == 0 { + self.state = EscapeOctalState::LastBackslash; + self.idx = 2; + } else { + self.idx -= 1; + } + Some(from_digit(octal_digit.into(), 8).unwrap()) + } + EscapeOctalState::LastValue => { + let octal_digit = byte_to_octal_digit(self.c[1], self.idx); if self.idx == 0 { self.state = EscapeOctalState::Done; } else { @@ -113,11 +134,25 @@ impl Iterator for EscapeOctal { } impl EscapeOctal { - fn from(c: u8) -> Self { + fn from_char(c: char) -> Self { + if c.len_utf8() == 1 { + return Self::from_byte(c as u8); + } + + let mut buf = [0; 2]; + let _s = c.encode_utf8(&mut buf); Self { - c, + c: buf, idx: 2, - state: EscapeOctalState::Backslash, + state: EscapeOctalState::FirstBackslash, + } + } + + fn from_byte(b: u8) -> Self { + Self { + c: [0, b], + idx: 2, + state: EscapeOctalState::LastBackslash, } } } @@ -131,7 +166,7 @@ impl EscapedChar { fn new_octal(b: u8) -> Self { Self { - state: EscapeState::Octal(EscapeOctal::from(b)), + state: EscapeState::Octal(EscapeOctal::from_byte(b)), } } @@ -159,7 +194,7 @@ impl EscapedChar { _ => Char(' '), }, ':' if dirname => Backslash(':'), - _ if c.is_ascii_control() => Octal(EscapeOctal::from(c as u8)), + _ if c.is_control() => Octal(EscapeOctal::from_char(c)), _ => Char(c), }; Self { state: init_state } @@ -176,11 +211,11 @@ impl EscapedChar { '\x0B' => Backslash('v'), '\x0C' => Backslash('f'), '\r' => Backslash('r'), - '\x00'..='\x1F' | '\x7F' => Octal(EscapeOctal::from(c as u8)), '\'' => match quotes { Quotes::Single => Backslash('\''), _ => Char('\''), }, + _ if c.is_control() => Octal(EscapeOctal::from_char(c)), _ if SPECIAL_SHELL_CHARS.contains(c) => ForceQuote(c), _ => Char(c), }; @@ -564,10 +599,10 @@ mod tests { ("\"one_two\"", "c"), ("one_two", "shell"), ("one_two", "shell-show"), - ("\'one_two\'", "shell-always"), - ("\'one_two\'", "shell-always-show"), + ("'one_two'", "shell-always"), + ("'one_two'", "shell-always-show"), ("one_two", "shell-escape"), - ("\'one_two\'", "shell-escape-always"), + ("'one_two'", "shell-escape-always"), ], ); } @@ -581,12 +616,12 @@ mod tests { ("one two", "literal-show"), ("one\\ two", "escape"), ("\"one two\"", "c"), - ("\'one two\'", "shell"), - ("\'one two\'", "shell-show"), - ("\'one two\'", "shell-always"), - ("\'one two\'", "shell-always-show"), - ("\'one two\'", "shell-escape"), - ("\'one two\'", "shell-escape-always"), + ("'one two'", "shell"), + ("'one two'", "shell-show"), + ("'one two'", "shell-always"), + ("'one two'", "shell-always-show"), + ("'one two'", "shell-escape"), + ("'one two'", "shell-escape-always"), ], ); @@ -628,7 +663,7 @@ mod tests { // One single quote check_names( - "one\'two", + "one'two", &[ ("one'two", "literal"), ("one'two", "literal-show"), @@ -714,7 +749,7 @@ mod tests { ], ); - // The first 16 control characters. NUL is also included, even though it is of + // The first 16 ASCII control characters. NUL is also included, even though it is of // no importance for file names. check_names( "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F", @@ -753,7 +788,7 @@ mod tests { ], ); - // The last 16 control characters. + // The last 16 ASCII control characters. check_names( "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F", &[ @@ -807,6 +842,42 @@ mod tests { ("''$'\\177'", "shell-escape-always"), ], ); + + // The first 16 Unicode control characters. + let test_str = std::str::from_utf8(b"\xC2\x80\xC2\x81\xC2\x82\xC2\x83\xC2\x84\xC2\x85\xC2\x86\xC2\x87\xC2\x88\xC2\x89\xC2\x8A\xC2\x8B\xC2\x8C\xC2\x8D\xC2\x8E\xC2\x8F").unwrap(); + check_names( + test_str, + &[ + ("????????????????", "literal"), + (test_str, "literal-show"), + ("\\302\\200\\302\\201\\302\\202\\302\\203\\302\\204\\302\\205\\302\\206\\302\\207\\302\\210\\302\\211\\302\\212\\302\\213\\302\\214\\302\\215\\302\\216\\302\\217", "escape"), + ("\"\\302\\200\\302\\201\\302\\202\\302\\203\\302\\204\\302\\205\\302\\206\\302\\207\\302\\210\\302\\211\\302\\212\\302\\213\\302\\214\\302\\215\\302\\216\\302\\217\"", "c"), + ("????????????????", "shell"), + (test_str, "shell-show"), + ("'????????????????'", "shell-always"), + (&format!("'{}'", test_str), "shell-always-show"), + ("''$'\\302\\200\\302\\201\\302\\202\\302\\203\\302\\204\\302\\205\\302\\206\\302\\207\\302\\210\\302\\211\\302\\212\\302\\213\\302\\214\\302\\215\\302\\216\\302\\217'", "shell-escape"), + ("''$'\\302\\200\\302\\201\\302\\202\\302\\203\\302\\204\\302\\205\\302\\206\\302\\207\\302\\210\\302\\211\\302\\212\\302\\213\\302\\214\\302\\215\\302\\216\\302\\217'", "shell-escape-always"), + ], + ); + + // The last 16 Unicode control characters. + let test_str = std::str::from_utf8(b"\xC2\x90\xC2\x91\xC2\x92\xC2\x93\xC2\x94\xC2\x95\xC2\x96\xC2\x97\xC2\x98\xC2\x99\xC2\x9A\xC2\x9B\xC2\x9C\xC2\x9D\xC2\x9E\xC2\x9F").unwrap(); + check_names( + test_str, + &[ + ("????????????????", "literal"), + (test_str, "literal-show"), + ("\\302\\220\\302\\221\\302\\222\\302\\223\\302\\224\\302\\225\\302\\226\\302\\227\\302\\230\\302\\231\\302\\232\\302\\233\\302\\234\\302\\235\\302\\236\\302\\237", "escape"), + ("\"\\302\\220\\302\\221\\302\\222\\302\\223\\302\\224\\302\\225\\302\\226\\302\\227\\302\\230\\302\\231\\302\\232\\302\\233\\302\\234\\302\\235\\302\\236\\302\\237\"", "c"), + ("????????????????", "shell"), + (test_str, "shell-show"), + ("'????????????????'", "shell-always"), + (&format!("'{}'", test_str), "shell-always-show"), + ("''$'\\302\\220\\302\\221\\302\\222\\302\\223\\302\\224\\302\\225\\302\\226\\302\\227\\302\\230\\302\\231\\302\\232\\302\\233\\302\\234\\302\\235\\302\\236\\302\\237'", "shell-escape"), + ("''$'\\302\\220\\302\\221\\302\\222\\302\\223\\302\\224\\302\\225\\302\\226\\302\\227\\302\\230\\302\\231\\302\\232\\302\\233\\302\\234\\302\\235\\302\\236\\302\\237'", "shell-escape-always"), + ], + ); } #[test] @@ -1065,7 +1136,7 @@ mod tests { ("one\\\\two", "escape"), ("\"one\\\\two\"", "c"), ("'one\\two'", "shell"), - ("\'one\\two\'", "shell-always"), + ("'one\\two'", "shell-always"), ("'one\\two'", "shell-escape"), ("'one\\two'", "shell-escape-always"), ], From 43229ae10465119165fb275374f9d52e149b8f3d Mon Sep 17 00:00:00 2001 From: Justin Tracey Date: Fri, 22 Nov 2024 18:13:16 -0500 Subject: [PATCH 147/179] quoting_style: use and return `OsString`s This exposes the non-UTF-8 functionality to callers. Support in `argument`, `spec`, and `wc` are implemented, as their usage is simple. A wrapper only returning valid unicode is used in `ls`, since proper handling of OsStrings there is more involved (outputs that escape non-unicode work now though). --- src/uu/ls/src/ls.rs | 18 +++++++--- src/uu/wc/src/wc.rs | 26 +++++++++----- .../src/lib/features/format/argument.rs | 3 +- src/uucore/src/lib/features/format/spec.rs | 28 +++++++-------- src/uucore/src/lib/features/quoting_style.rs | 36 +++++++++++++++---- 5 files changed, 76 insertions(+), 35 deletions(-) diff --git a/src/uu/ls/src/ls.rs b/src/uu/ls/src/ls.rs index f4e347147..9a22006e0 100644 --- a/src/uu/ls/src/ls.rs +++ b/src/uu/ls/src/ls.rs @@ -21,7 +21,7 @@ use std::os::windows::fs::MetadataExt; use std::{ cmp::Reverse, error::Error, - ffi::OsString, + ffi::{OsStr, OsString}, fmt::{Display, Write as FmtWrite}, fs::{self, DirEntry, FileType, Metadata, ReadDir}, io::{stdout, BufWriter, ErrorKind, Stdout, Write}, @@ -55,7 +55,7 @@ use uucore::libc::{dev_t, major, minor}; #[cfg(unix)] use uucore::libc::{S_IXGRP, S_IXOTH, S_IXUSR}; use uucore::line_ending::LineEnding; -use uucore::quoting_style::{escape_dir_name, escape_name, QuotingStyle}; +use uucore::quoting_style::{self, QuotingStyle}; use uucore::{ display::Quotable, error::{set_exit_code, UError, UResult}, @@ -2048,7 +2048,11 @@ impl PathData { /// file11 /// ``` fn show_dir_name(path_data: &PathData, out: &mut BufWriter, config: &Config) { - let escaped_name = escape_dir_name(path_data.p_buf.as_os_str(), &config.quoting_style); + // FIXME: replace this with appropriate behavior for literal unprintable bytes + let escaped_name = + quoting_style::escape_dir_name(path_data.p_buf.as_os_str(), &config.quoting_style) + .to_string_lossy() + .to_string(); let name = if config.hyperlink && !config.dired { create_hyperlink(&escaped_name, path_data) @@ -3002,7 +3006,6 @@ use std::sync::Mutex; #[cfg(unix)] use uucore::entries; use uucore::fs::FileInformation; -use uucore::quoting_style; #[cfg(unix)] fn cached_uid2usr(uid: u32) -> String { @@ -3542,3 +3545,10 @@ fn calculate_padding_collection( padding_collections } + +// FIXME: replace this with appropriate behavior for literal unprintable bytes +fn escape_name(name: &OsStr, style: &QuotingStyle) -> String { + quoting_style::escape_name(name, style) + .to_string_lossy() + .to_string() +} diff --git a/src/uu/wc/src/wc.rs b/src/uu/wc/src/wc.rs index 33b70ee62..1c2d99628 100644 --- a/src/uu/wc/src/wc.rs +++ b/src/uu/wc/src/wc.rs @@ -13,7 +13,7 @@ mod word_count; use std::{ borrow::{Borrow, Cow}, cmp::max, - ffi::OsString, + ffi::{OsStr, OsString}, fs::{self, File}, io::{self, Write}, iter, @@ -28,7 +28,7 @@ use utf8::{BufReadDecoder, BufReadDecoderError}; use uucore::{ error::{FromIo, UError, UResult}, format_usage, help_about, help_usage, - quoting_style::{escape_name, QuotingStyle}, + quoting_style::{self, QuotingStyle}, shortcut_value_parser::ShortcutValueParser, show, }; @@ -259,7 +259,7 @@ impl<'a> Input<'a> { match self { Self::Path(path) => Some(match path.to_str() { Some(s) if !s.contains('\n') => Cow::Borrowed(s), - _ => Cow::Owned(escape_name(path.as_os_str(), QS_ESCAPE)), + _ => Cow::Owned(escape_name_wrapper(path.as_os_str())), }), Self::Stdin(StdinKind::Explicit) => Some(Cow::Borrowed(STDIN_REPR)), Self::Stdin(StdinKind::Implicit) => None, @@ -269,7 +269,7 @@ impl<'a> Input<'a> { /// Converts input into the form that appears in errors. fn path_display(&self) -> String { match self { - Self::Path(path) => escape_name(path.as_os_str(), QS_ESCAPE), + Self::Path(path) => escape_name_wrapper(path.as_os_str()), Self::Stdin(_) => String::from("standard input"), } } @@ -361,7 +361,7 @@ impl WcError { Some((input, idx)) => { let path = match input { Input::Stdin(_) => STDIN_REPR.into(), - Input::Path(path) => escape_name(path.as_os_str(), QS_ESCAPE).into(), + Input::Path(path) => escape_name_wrapper(path.as_os_str()).into(), }; Self::ZeroLengthFileNameCtx { path, idx } } @@ -761,7 +761,9 @@ fn files0_iter_file<'a>(path: &Path) -> UResult Err(e.map_err_context(|| { format!( "cannot open {} for reading", - escape_name(path.as_os_str(), QS_QUOTE_ESCAPE) + quoting_style::escape_name(path.as_os_str(), QS_QUOTE_ESCAPE) + .into_string() + .expect("All escaped names with the escaping option return valid strings.") ) })), } @@ -793,9 +795,9 @@ fn files0_iter<'a>( Ok(Input::Path(PathBuf::from(s).into())) } } - Err(e) => Err(e.map_err_context(|| { - format!("{}: read error", escape_name(&err_path, QS_ESCAPE)) - }) as Box), + Err(e) => Err(e + .map_err_context(|| format!("{}: read error", escape_name_wrapper(&err_path))) + as Box), }), ); // Loop until there is an error; yield that error and then nothing else. @@ -808,6 +810,12 @@ fn files0_iter<'a>( }) } +fn escape_name_wrapper(name: &OsStr) -> String { + quoting_style::escape_name(name, QS_ESCAPE) + .into_string() + .expect("All escaped names with the escaping option return valid strings.") +} + fn wc(inputs: &Inputs, settings: &Settings) -> UResult<()> { let mut total_word_count = WordCount::default(); let mut num_inputs: usize = 0; diff --git a/src/uucore/src/lib/features/format/argument.rs b/src/uucore/src/lib/features/format/argument.rs index 758510498..5cdd03421 100644 --- a/src/uucore/src/lib/features/format/argument.rs +++ b/src/uucore/src/lib/features/format/argument.rs @@ -112,7 +112,8 @@ fn extract_value(p: Result>, input: &str) -> T Default::default() } ParseError::PartialMatch(v, rest) => { - if input.starts_with('\'') { + let bytes = input.as_encoded_bytes(); + if !bytes.is_empty() && bytes[0] == b'\'' { show_warning!( "{}: character(s) following character constant have been ignored", &rest, diff --git a/src/uucore/src/lib/features/format/spec.rs b/src/uucore/src/lib/features/format/spec.rs index 581e1fa06..81dbc1ebc 100644 --- a/src/uucore/src/lib/features/format/spec.rs +++ b/src/uucore/src/lib/features/format/spec.rs @@ -353,20 +353,20 @@ impl Spec { writer.write_all(&parsed).map_err(FormatError::IoError) } Self::QuotedString => { - let s = args.get_str(); - writer - .write_all( - escape_name( - s.as_ref(), - &QuotingStyle::Shell { - escape: true, - always_quote: false, - show_control: false, - }, - ) - .as_bytes(), - ) - .map_err(FormatError::IoError) + let s = escape_name( + args.get_str().as_ref(), + &QuotingStyle::Shell { + escape: true, + always_quote: false, + show_control: false, + }, + ); + #[cfg(unix)] + let bytes = std::os::unix::ffi::OsStringExt::into_vec(s); + #[cfg(not(unix))] + let bytes = s.to_string_lossy().as_bytes().to_owned(); + + writer.write_all(&bytes).map_err(FormatError::IoError) } Self::SignedInt { width, diff --git a/src/uucore/src/lib/features/quoting_style.rs b/src/uucore/src/lib/features/quoting_style.rs index 8abc6bda7..2e9cd0b7e 100644 --- a/src/uucore/src/lib/features/quoting_style.rs +++ b/src/uucore/src/lib/features/quoting_style.rs @@ -6,8 +6,10 @@ //! Set of functions for escaping names according to different quoting styles. use std::char::from_digit; -use std::ffi::OsStr; +use std::ffi::{OsStr, OsString}; use std::fmt; +#[cfg(unix)] +use std::os::unix::ffi::{OsStrExt, OsStringExt}; // These are characters with special meaning in the shell (e.g. bash). // The first const contains characters that only have a special meaning when they appear at the beginning of a name. @@ -459,17 +461,37 @@ fn escape_name_inner(name: &[u8], style: &QuotingStyle, dirname: bool) -> Vec String { - let name = name.to_string_lossy(); - String::from_utf8_lossy(&escape_name_inner(name.as_bytes(), style, false)).to_string() +pub fn escape_name(name: &OsStr, style: &QuotingStyle) -> OsString { + #[cfg(unix)] + { + let name = name.as_bytes(); + OsStringExt::from_vec(escape_name_inner(name, style, false)) + } + #[cfg(not(unix))] + { + let name = name.to_string_lossy(); + String::from_utf8_lossy(&escape_name_inner(name.as_bytes(), style, false)) + .to_string() + .into() + } } /// Escape a directory name with respect to the given style. /// This is mainly meant to be used for ls' directory name printing and is not /// likely to be used elsewhere. -pub fn escape_dir_name(dir_name: &OsStr, style: &QuotingStyle) -> String { - let dir_name = dir_name.to_string_lossy(); - String::from_utf8_lossy(&escape_name_inner(dir_name.as_bytes(), style, true)).to_string() +pub fn escape_dir_name(dir_name: &OsStr, style: &QuotingStyle) -> OsString { + #[cfg(unix)] + { + let name = dir_name.as_bytes(); + OsStringExt::from_vec(escape_name_inner(name, style, true)) + } + #[cfg(not(unix))] + { + let name = dir_name.to_string_lossy(); + String::from_utf8_lossy(&escape_name_inner(name.as_bytes(), style, true)) + .to_string() + .into() + } } impl fmt::Display for QuotingStyle { From 67360ba8e9cae57ece1a19820e75ebb59b030ae5 Mon Sep 17 00:00:00 2001 From: David Campbell Date: Wed, 18 Dec 2024 18:42:06 -0500 Subject: [PATCH 148/179] Put DEFAULT_BRANCH into an env variable. --- .github/workflows/GnuTests.yml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/workflows/GnuTests.yml b/.github/workflows/GnuTests.yml index b47b43596..5022ac39c 100644 --- a/.github/workflows/GnuTests.yml +++ b/.github/workflows/GnuTests.yml @@ -23,6 +23,9 @@ concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} +env: + DEFAULT_BRANCH: ${{ github.event.repository.default_branch }} + jobs: gnu: permissions: @@ -45,9 +48,9 @@ jobs: path_reference="reference" outputs path_GNU path_GNU_tests path_reference path_UUTILS # - repo_default_branch="${{ github.event.repository.default_branch }}" + repo_default_branch="$DEFAULT_BRANCH" repo_GNU_ref="v9.5" - repo_reference_branch="${{ github.event.repository.default_branch }}" + repo_reference_branch="$DEFAULT_BRANCH" outputs repo_default_branch repo_GNU_ref repo_reference_branch # SUITE_LOG_FILE="${path_GNU_tests}/test-suite.log" From 15bea52b531651a756a089eb824c77b9af4b07b8 Mon Sep 17 00:00:00 2001 From: David Campbell Date: Wed, 18 Dec 2024 20:04:50 -0500 Subject: [PATCH 149/179] GnuComment: zizmor: ignore[dangerous-triggers] --- .github/workflows/GnuComment.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/GnuComment.yml b/.github/workflows/GnuComment.yml index 36c54490c..987343723 100644 --- a/.github/workflows/GnuComment.yml +++ b/.github/workflows/GnuComment.yml @@ -4,7 +4,7 @@ on: workflow_run: workflows: ["GnuTests"] types: - - completed + - completed # zizmor: ignore[dangerous-triggers] permissions: {} jobs: From 655defd15cf944371a311ac22281984f2f715b49 Mon Sep 17 00:00:00 2001 From: David Campbell Date: Wed, 18 Dec 2024 21:27:34 -0500 Subject: [PATCH 150/179] Set persist-credentials: false --- .github/workflows/CICD.yml | 26 ++++++++++++++++++++++++++ .github/workflows/CheckScripts.yml | 4 ++++ .github/workflows/FixPR.yml | 4 ++++ .github/workflows/GnuTests.yml | 2 ++ .github/workflows/android.yml | 2 ++ .github/workflows/code-quality.yml | 8 ++++++++ .github/workflows/freebsd.yml | 4 ++++ .github/workflows/fuzzing.yml | 4 ++++ 8 files changed, 54 insertions(+) diff --git a/.github/workflows/CICD.yml b/.github/workflows/CICD.yml index 56418dd6e..f84fdf0cd 100644 --- a/.github/workflows/CICD.yml +++ b/.github/workflows/CICD.yml @@ -37,6 +37,8 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 + with: + persist-credentials: false - uses: EmbarkStudios/cargo-deny-action@v2 style_deps: @@ -54,6 +56,8 @@ jobs: - { os: windows-latest , features: feat_os_windows } steps: - uses: actions/checkout@v4 + with: + persist-credentials: false - uses: dtolnay/rust-toolchain@nightly ## note: requires 'nightly' toolchain b/c `cargo-udeps` uses the `rustc` '-Z save-analysis' option ## * ... ref: @@ -106,6 +110,8 @@ jobs: # - { os: windows-latest , features: feat_os_windows } steps: - uses: actions/checkout@v4 + with: + persist-credentials: false - uses: dtolnay/rust-toolchain@master with: toolchain: stable @@ -159,6 +165,8 @@ jobs: - { os: ubuntu-latest , features: feat_os_unix } steps: - uses: actions/checkout@v4 + with: + persist-credentials: false - uses: dtolnay/rust-toolchain@master with: toolchain: ${{ env.RUST_MIN_SRV }} @@ -227,6 +235,8 @@ jobs: - { os: ubuntu-latest , features: feat_os_unix } steps: - uses: actions/checkout@v4 + with: + persist-credentials: false - uses: dtolnay/rust-toolchain@stable - uses: Swatinem/rust-cache@v2 - name: "`cargo update` testing" @@ -250,6 +260,8 @@ jobs: - { os: ubuntu-latest , features: feat_os_unix } steps: - uses: actions/checkout@v4 + with: + persist-credentials: false - uses: dtolnay/rust-toolchain@stable - uses: taiki-e/install-action@nextest - uses: Swatinem/rust-cache@v2 @@ -304,6 +316,8 @@ jobs: - { os: windows-latest , features: feat_os_windows } steps: - uses: actions/checkout@v4 + with: + persist-credentials: false - uses: dtolnay/rust-toolchain@stable - uses: taiki-e/install-action@nextest - uses: Swatinem/rust-cache@v2 @@ -331,6 +345,8 @@ jobs: - { os: windows-latest , features: feat_os_windows } steps: - uses: actions/checkout@v4 + with: + persist-credentials: false - uses: dtolnay/rust-toolchain@nightly - uses: taiki-e/install-action@nextest - uses: Swatinem/rust-cache@v2 @@ -355,6 +371,8 @@ jobs: - { os: ubuntu-latest , features: feat_os_unix } steps: - uses: actions/checkout@v4 + with: + persist-credentials: false - uses: dtolnay/rust-toolchain@stable - uses: Swatinem/rust-cache@v2 - name: Run sccache-cache @@ -485,6 +503,8 @@ jobs: - { os: windows-latest , target: aarch64-pc-windows-msvc , features: feat_os_windows, use-cross: use-cross , skip-tests: true } steps: - uses: actions/checkout@v4 + with: + persist-credentials: false - uses: dtolnay/rust-toolchain@master with: toolchain: ${{ env.RUST_MIN_SRV }} @@ -780,6 +800,8 @@ jobs: ## VARs setup echo "TEST_SUMMARY_FILE=busybox-result.json" >> $GITHUB_OUTPUT - uses: actions/checkout@v4 + with: + persist-credentials: false - uses: Swatinem/rust-cache@v2 - name: Run sccache-cache uses: mozilla-actions/sccache-action@v0.0.7 @@ -860,6 +882,8 @@ jobs: TEST_SUMMARY_FILE="toybox-result.json" outputs TEST_SUMMARY_FILE - uses: actions/checkout@v4 + with: + persist-credentials: false - uses: dtolnay/rust-toolchain@master with: toolchain: ${{ env.RUST_MIN_SRV }} @@ -935,6 +959,8 @@ jobs: os: [ubuntu-latest, macos-latest, windows-latest] steps: - uses: actions/checkout@v4 + with: + persist-credentials: false - uses: dtolnay/rust-toolchain@stable - uses: Swatinem/rust-cache@v2 - name: build and test all programs individually diff --git a/.github/workflows/CheckScripts.yml b/.github/workflows/CheckScripts.yml index c18c4733c..4800cd285 100644 --- a/.github/workflows/CheckScripts.yml +++ b/.github/workflows/CheckScripts.yml @@ -30,6 +30,8 @@ jobs: contents: read steps: - uses: actions/checkout@v4 + with: + persist-credentials: false - name: Run ShellCheck uses: ludeeus/action-shellcheck@master env: @@ -46,6 +48,8 @@ jobs: contents: read steps: - uses: actions/checkout@v4 + with: + persist-credentials: false - name: Setup shfmt uses: mfinelli/setup-shfmt@v3 - name: Run shfmt diff --git a/.github/workflows/FixPR.yml b/.github/workflows/FixPR.yml index e837b3546..5cd7fe647 100644 --- a/.github/workflows/FixPR.yml +++ b/.github/workflows/FixPR.yml @@ -27,6 +27,8 @@ jobs: - { os: ubuntu-latest , features: feat_os_unix } steps: - uses: actions/checkout@v4 + with: + persist-credentials: false - name: Initialize job variables id: vars shell: bash @@ -86,6 +88,8 @@ jobs: - { os: ubuntu-latest , features: feat_os_unix } steps: - uses: actions/checkout@v4 + with: + persist-credentials: false - name: Initialize job variables id: vars shell: bash diff --git a/.github/workflows/GnuTests.yml b/.github/workflows/GnuTests.yml index b47b43596..ddca7ab72 100644 --- a/.github/workflows/GnuTests.yml +++ b/.github/workflows/GnuTests.yml @@ -62,6 +62,7 @@ jobs: uses: actions/checkout@v4 with: path: '${{ steps.vars.outputs.path_UUTILS }}' + persist-credentials: false - uses: dtolnay/rust-toolchain@master with: toolchain: stable @@ -76,6 +77,7 @@ jobs: path: '${{ steps.vars.outputs.path_GNU }}' ref: ${{ steps.vars.outputs.repo_GNU_ref }} submodules: false + persist-credentials: false - name: Override submodule URL and initialize submodules # Use github instead of upstream git server diff --git a/.github/workflows/android.yml b/.github/workflows/android.yml index d920ad801..319f7b11c 100644 --- a/.github/workflows/android.yml +++ b/.github/workflows/android.yml @@ -79,6 +79,8 @@ jobs: sudo udevadm control --reload-rules sudo udevadm trigger --name-match=kvm - uses: actions/checkout@v4 + with: + persist-credentials: false - name: Collect information about runner if: always() continue-on-error: true diff --git a/.github/workflows/code-quality.yml b/.github/workflows/code-quality.yml index 8e7db5fc3..0e598b502 100644 --- a/.github/workflows/code-quality.yml +++ b/.github/workflows/code-quality.yml @@ -32,6 +32,8 @@ jobs: - { os: ubuntu-latest , features: feat_os_unix } steps: - uses: actions/checkout@v4 + with: + persist-credentials: false - uses: dtolnay/rust-toolchain@master with: toolchain: stable @@ -75,6 +77,8 @@ jobs: - { os: windows-latest , features: feat_os_windows } steps: - uses: actions/checkout@v4 + with: + persist-credentials: false - uses: dtolnay/rust-toolchain@master with: toolchain: stable @@ -120,6 +124,8 @@ jobs: - { os: ubuntu-latest , features: feat_os_unix } steps: - uses: actions/checkout@v4 + with: + persist-credentials: false - name: Initialize workflow variables id: vars shell: bash @@ -156,6 +162,8 @@ jobs: steps: - name: Clone repository uses: actions/checkout@v4 + with: + persist-credentials: false - name: Check run: npx --yes @taplo/cli fmt --check diff --git a/.github/workflows/freebsd.yml b/.github/workflows/freebsd.yml index 1ff0ba047..42255d889 100644 --- a/.github/workflows/freebsd.yml +++ b/.github/workflows/freebsd.yml @@ -35,6 +35,8 @@ jobs: RUSTC_WRAPPER: "sccache" steps: - uses: actions/checkout@v4 + with: + persist-credentials: false - uses: Swatinem/rust-cache@v2 - name: Run sccache-cache uses: mozilla-actions/sccache-action@v0.0.7 @@ -127,6 +129,8 @@ jobs: RUSTC_WRAPPER: "sccache" steps: - uses: actions/checkout@v4 + with: + persist-credentials: false - uses: Swatinem/rust-cache@v2 - name: Run sccache-cache uses: mozilla-actions/sccache-action@v0.0.7 diff --git a/.github/workflows/fuzzing.yml b/.github/workflows/fuzzing.yml index df40b1236..24d0f1c43 100644 --- a/.github/workflows/fuzzing.yml +++ b/.github/workflows/fuzzing.yml @@ -22,6 +22,8 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 + with: + persist-credentials: false - uses: dtolnay/rust-toolchain@nightly - name: Install `cargo-fuzz` run: cargo install cargo-fuzz @@ -62,6 +64,8 @@ jobs: steps: - uses: actions/checkout@v4 + with: + persist-credentials: false - uses: dtolnay/rust-toolchain@nightly - name: Install `cargo-fuzz` run: cargo install cargo-fuzz From 3295e831a10301b8f84c58f6164d49bdbf7e33a9 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Thu, 19 Dec 2024 06:09:57 +0000 Subject: [PATCH 151/179] fix(deps): update rust crate libc to v0.2.169 --- Cargo.lock | 4 ++-- fuzz/Cargo.lock | 13 ++++++++++--- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 9f906aa68..84c28bcd9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1297,9 +1297,9 @@ checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" [[package]] name = "libc" -version = "0.2.168" +version = "0.2.169" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5aaeb2981e0606ca11d79718f8bb01164f1d6ed75080182d3abf017e6d244b6d" +checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a" [[package]] name = "libloading" diff --git a/fuzz/Cargo.lock b/fuzz/Cargo.lock index f2ba3f375..ce24e5827 100644 --- a/fuzz/Cargo.lock +++ b/fuzz/Cargo.lock @@ -415,10 +415,16 @@ dependencies = [ ] [[package]] -name = "libc" -version = "0.2.168" +name = "lazy_static" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5aaeb2981e0606ca11d79718f8bb01164f1d6ed75080182d3abf017e6d244b6d" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "libc" +version = "0.2.169" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a" [[package]] name = "libfuzzer-sys" @@ -988,6 +994,7 @@ dependencies = [ "dunce", "glob", "itertools", + "lazy_static", "libc", "nix 0.29.0", "number_prefix", From 8ba264fa27b6f3f77c8a510640bb799b017d80f3 Mon Sep 17 00:00:00 2001 From: David Campbell Date: Wed, 18 Dec 2024 22:15:07 -0500 Subject: [PATCH 152/179] Use the env variable STYLE_FAIL_ON_FAULT. --- .github/workflows/code-quality.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/code-quality.yml b/.github/workflows/code-quality.yml index 0e598b502..814da316a 100644 --- a/.github/workflows/code-quality.yml +++ b/.github/workflows/code-quality.yml @@ -46,7 +46,7 @@ jobs: ## VARs setup outputs() { step_id="${{ github.action }}"; for var in "$@" ; do echo steps.${step_id}.outputs.${var}="${!var}"; echo "${var}=${!var}" >> $GITHUB_OUTPUT; done; } # failure mode - unset FAIL_ON_FAULT ; case '${{ env.STYLE_FAIL_ON_FAULT }}' in + unset FAIL_ON_FAULT ; case "$STYLE_FAIL_ON_FAULT" in ''|0|f|false|n|no|off) FAULT_TYPE=warning ;; *) FAIL_ON_FAULT=true ; FAULT_TYPE=error ;; esac; @@ -93,7 +93,7 @@ jobs: ## VARs setup outputs() { step_id="${{ github.action }}"; for var in "$@" ; do echo steps.${step_id}.outputs.${var}="${!var}"; echo "${var}=${!var}" >> $GITHUB_OUTPUT; done; } # failure mode - unset FAIL_ON_FAULT ; case '${{ env.STYLE_FAIL_ON_FAULT }}' in + unset FAIL_ON_FAULT ; case "$STYLE_FAIL_ON_FAULT" in ''|0|f|false|n|no|off) FAULT_TYPE=warning ;; *) FAIL_ON_FAULT=true ; FAULT_TYPE=error ;; esac; @@ -133,7 +133,7 @@ jobs: ## VARs setup outputs() { step_id="${{ github.action }}"; for var in "$@" ; do echo steps.${step_id}.outputs.${var}="${!var}"; echo "${var}=${!var}" >> $GITHUB_OUTPUT; done; } # failure mode - unset FAIL_ON_FAULT ; case '${{ env.STYLE_FAIL_ON_FAULT }}' in + unset FAIL_ON_FAULT ; case "$STYLE_FAIL_ON_FAULT" in ''|0|f|false|n|no|off) FAULT_TYPE=warning ;; *) FAIL_ON_FAULT=true ; FAULT_TYPE=error ;; esac; From 625c49d0fedd60af8814c842d2d037bc256e028f Mon Sep 17 00:00:00 2001 From: Daringcuteseal Date: Thu, 19 Dec 2024 18:32:11 +0700 Subject: [PATCH 153/179] uucore: add common splice-write functionality Splice is a Linux-specific syscall that allows direct data copying from one file descriptor to another without user-space buffer. As of now, this is used by `cp`, `cat`, and `install` when compiled for Linux and Android. --- src/uucore/Cargo.toml | 1 + src/uucore/src/lib/features.rs | 4 +- src/uucore/src/lib/features/buf_copy.rs | 373 ++++++++++++++++++++++++ src/uucore/src/lib/lib.rs | 4 +- 4 files changed, 380 insertions(+), 2 deletions(-) create mode 100644 src/uucore/src/lib/features/buf_copy.rs diff --git a/src/uucore/Cargo.toml b/src/uucore/Cargo.toml index a4529f3a5..5e1a065a6 100644 --- a/src/uucore/Cargo.toml +++ b/src/uucore/Cargo.toml @@ -87,6 +87,7 @@ lines = [] format = ["itertools", "quoting-style"] mode = ["libc"] perms = ["libc", "walkdir"] +buf-copy = [] pipes = [] process = ["libc"] proc-info = ["tty", "walkdir"] diff --git a/src/uucore/src/lib/features.rs b/src/uucore/src/lib/features.rs index cf24637f7..dfe5b7733 100644 --- a/src/uucore/src/lib/features.rs +++ b/src/uucore/src/lib/features.rs @@ -39,11 +39,13 @@ pub mod version_cmp; pub mod mode; // ** unix-only +#[cfg(all(any(target_os = "linux", target_os = "android"), feature = "buf-copy"))] +pub mod buf_copy; #[cfg(all(unix, feature = "entries"))] pub mod entries; #[cfg(all(unix, feature = "perms"))] pub mod perms; -#[cfg(all(unix, feature = "pipes"))] +#[cfg(all(unix, any(feature = "pipes", feature = "buf-copy")))] pub mod pipes; #[cfg(all(target_os = "linux", feature = "proc-info"))] pub mod proc_info; diff --git a/src/uucore/src/lib/features/buf_copy.rs b/src/uucore/src/lib/features/buf_copy.rs new file mode 100644 index 000000000..2b46248a5 --- /dev/null +++ b/src/uucore/src/lib/features/buf_copy.rs @@ -0,0 +1,373 @@ +// This file is part of the uutils coreutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +//! This module provides several buffer-based copy/write functions that leverage +//! the `splice` system call in Linux systems, thus increasing the I/O +//! performance of copying between two file descriptors. This module is mostly +//! used by utilities to work around the limitations of Rust's `fs::copy` which +//! does not handle copying special files (e.g pipes, character/block devices). + +use crate::error::{UError, UResult}; +use nix::unistd; +use std::fs::File; +use std::{ + io::{self, Read, Write}, + os::{ + fd::AsFd, + unix::io::{AsRawFd, RawFd}, + }, +}; + +use nix::{errno::Errno, libc::S_IFIFO, sys::stat::fstat}; + +use super::pipes::{pipe, splice, splice_exact, vmsplice}; + +type Result = std::result::Result; + +/// Error types used by buffer-copying functions from the `buf_copy` module. +#[derive(Debug)] +pub enum Error { + Io(io::Error), + WriteError(String), +} + +impl std::fmt::Display for Error { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Error::WriteError(msg) => write!(f, "splice() write error: {}", msg), + Error::Io(err) => write!(f, "I/O error: {}", err), + } + } +} + +impl std::error::Error for Error {} + +impl UError for Error { + fn code(&self) -> i32 { + 1 + } + + fn usage(&self) -> bool { + false + } +} + +/// Helper function to determine whether a given handle (such as a file) is a pipe or not. +/// +/// # Arguments +/// * `out` - path of handle +/// +/// # Returns +/// A `bool` indicating whether the given handle is a pipe or not. +#[inline] +#[cfg(unix)] +pub fn is_pipe

(path: &P) -> Result +where + P: AsRawFd, +{ + Ok(fstat(path.as_raw_fd())?.st_mode as nix::libc::mode_t & S_IFIFO != 0) +} + +const SPLICE_SIZE: usize = 1024 * 128; +const BUF_SIZE: usize = 1024 * 16; + +/// Copy data from `Read` implementor `source` into a `Write` implementor +/// `dest`. This works by reading a chunk of data from `source` and writing the +/// data to `dest` in a loop. +/// +/// This function uses the Linux-specific `splice` call when possible which does +/// not use any intermediate user-space buffer. It falls backs to +/// `std::io::copy` under other platforms or when the call fails and is still +/// recoverable. +/// +/// # Arguments +/// * `source` - `Read` implementor to copy data from. +/// * `dest` - `Write` implementor to copy data to. +/// +/// # Returns +/// +/// Result of operation and bytes successfully written (as a `u64`) when +/// operation is successful. +pub fn copy_stream(src: &mut R, dest: &mut S) -> UResult +where + R: Read + AsFd + AsRawFd, + S: Write + AsFd + AsRawFd, +{ + #[cfg(any(target_os = "linux", target_os = "android"))] + { + // If we're on Linux or Android, try to use the splice() system call + // for faster writing. If it works, we're done. + let result = splice_write(src, &dest.as_fd())?; + if !result.1 { + return Ok(result.0); + } + } + // If we're not on Linux or Android, or the splice() call failed, + // fall back on slower writing. + let result = std::io::copy(src, dest)?; + + // If the splice() call failed and there has been some data written to + // stdout via while loop above AND there will be second splice() call + // that will succeed, data pushed through splice will be output before + // the data buffered in stdout.lock. Therefore additional explicit flush + // is required here. + dest.flush()?; + Ok(result) +} + +/// Write from source `handle` into destination `write_fd` using Linux-specific +/// `splice` system call. +/// +/// # Arguments +/// - `source` - source handle +/// - `dest` - destination handle +#[inline] +#[cfg(any(target_os = "linux", target_os = "android"))] +fn splice_write(source: &R, dest: &S) -> UResult<(u64, bool)> +where + R: Read + AsFd + AsRawFd, + S: AsRawFd + AsFd, +{ + let (pipe_rd, pipe_wr) = pipe()?; + let mut bytes: u64 = 0; + + loop { + match splice(&source, &pipe_wr, SPLICE_SIZE) { + Ok(n) => { + if n == 0 { + return Ok((bytes, false)); + } + if splice_exact(&pipe_rd, dest, n).is_err() { + // If the first splice manages to copy to the intermediate + // pipe, but the second splice to stdout fails for some reason + // we can recover by copying the data that we have from the + // intermediate pipe to stdout using normal read/write. Then + // we tell the caller to fall back. + copy_exact(pipe_rd.as_raw_fd(), dest, n)?; + return Ok((bytes, true)); + } + + bytes += n as u64; + } + Err(_) => { + return Ok((bytes, true)); + } + } + } +} + +/// Move exactly `num_bytes` bytes from `read_fd` to `write_fd` using the `read` +/// and `write` calls. +fn copy_exact(read_fd: RawFd, write_fd: &impl AsFd, num_bytes: usize) -> std::io::Result { + let mut left = num_bytes; + let mut buf = [0; BUF_SIZE]; + let mut written = 0; + while left > 0 { + let read = unistd::read(read_fd, &mut buf)?; + assert_ne!(read, 0, "unexpected end of pipe"); + while written < read { + let n = unistd::write(write_fd, &buf[written..read])?; + written += n; + } + left -= read; + } + Ok(written) +} + +/// Write input `bytes` to a file descriptor. This uses the Linux-specific +/// `vmsplice()` call to write into a file descriptor directly, which only works +/// if the destination is a pipe. +/// +/// # Arguments +/// * `bytes` - data to be written +/// * `dest` - destination handle +/// +/// # Returns +/// When write succeeds, the amount of bytes written is returned as a +/// `u64`. The `bool` indicates if we need to fall back to normal copying or +/// not. `true` means we need to fall back, `false` means we don't have to. +/// +/// A `UError` error is returned when the operation is not supported or when an +/// I/O error occurs. +#[cfg(any(target_os = "linux", target_os = "android"))] +pub fn splice_data_to_pipe(bytes: &[u8], dest: &T) -> UResult<(u64, bool)> +where + T: AsRawFd + AsFd, +{ + let mut n_bytes: u64 = 0; + let mut bytes = bytes; + while !bytes.is_empty() { + let len = match vmsplice(dest, bytes) { + Ok(n) => n, + // The maybe_unsupported call below may emit an error, when the + // error is considered as unrecoverable error (ones that won't make + // us fall back to other method) + Err(e) => return Ok(maybe_unsupported(e)?), + }; + bytes = &bytes[len..]; + n_bytes += len as u64; + } + Ok((n_bytes, false)) +} + +/// Write input `bytes` to a handle using a temporary pipe. A `vmsplice()` call +/// is issued to write to the temporary pipe, which then gets written to the +/// final destination using `splice()`. +/// +/// # Arguments * `bytes` - data to be written * `dest` - destination handle +/// +/// # Returns When write succeeds, the amount of bytes written is returned as a +/// `u64`. The `bool` indicates if we need to fall back to normal copying or +/// not. `true` means we need to fall back, `false` means we don't have to. +/// +/// A `UError` error is returned when the operation is not supported or when an +/// I/O error occurs. +#[cfg(any(target_os = "linux", target_os = "android"))] +pub fn splice_data_to_fd( + bytes: &[u8], + read_pipe: &File, + write_pipe: &File, + dest: &T, +) -> UResult<(u64, bool)> { + loop { + let mut bytes = bytes; + while !bytes.is_empty() { + let len = match vmsplice(&write_pipe, bytes) { + Ok(n) => n, + Err(e) => return Ok(maybe_unsupported(e)?), + }; + if let Err(e) = splice_exact(&read_pipe, dest, len) { + return Ok(maybe_unsupported(e)?); + } + bytes = &bytes[len..]; + } + } +} + +/// Conversion from a `nix::Error` into our `Error` which implements `UError`. +#[cfg(any(target_os = "linux", target_os = "android"))] +impl From for Error { + fn from(error: nix::Error) -> Self { + Self::Io(io::Error::from_raw_os_error(error as i32)) + } +} + +/// Several error values from `nix::Error` (`EINVAL`, `ENOSYS`, and `EBADF`) get +/// treated as errors indicating that the `splice` call is not available, i.e we +/// can still recover from the error. Thus, return the final result of the call +/// as `Result` and indicate that we have to fall back using other write method. +/// +/// # Arguments +/// * `error` - the `nix::Error` received +/// +/// # Returns +/// Result with tuple containing a `u64` `0` indicating that no data had been +/// written and a `true` indicating we have to fall back, if error is still +/// recoverable. Returns an `Error` implementing `UError` otherwise. +#[cfg(any(target_os = "linux", target_os = "android"))] +fn maybe_unsupported(error: nix::Error) -> Result<(u64, bool)> { + match error { + Errno::EINVAL | Errno::ENOSYS | Errno::EBADF => Ok((0, true)), + _ => Err(error.into()), + } +} + +#[cfg(test)] +mod tests { + use tempfile::tempdir; + + use super::*; + use crate::pipes; + + fn new_temp_file() -> File { + let temp_dir = tempdir().unwrap(); + File::create(temp_dir.path().join("file.txt")).unwrap() + } + + #[test] + fn test_file_is_pipe() { + let temp_file = new_temp_file(); + let (pipe_read, pipe_write) = pipes::pipe().unwrap(); + + assert!(is_pipe(&pipe_read).unwrap()); + assert!(is_pipe(&pipe_write).unwrap()); + assert!(!is_pipe(&temp_file).unwrap()); + } + + #[test] + fn test_valid_splice_errs() { + let err = nix::Error::from(Errno::EINVAL); + assert_eq!(maybe_unsupported(err).unwrap(), (0, true)); + + let err = nix::Error::from(Errno::ENOSYS); + assert_eq!(maybe_unsupported(err).unwrap(), (0, true)); + + let err = nix::Error::from(Errno::EBADF); + assert_eq!(maybe_unsupported(err).unwrap(), (0, true)); + + let err = nix::Error::from(Errno::EPERM); + assert!(maybe_unsupported(err).is_err()); + } + + #[test] + fn test_splice_data_to_pipe() { + let (pipe_read, pipe_write) = pipes::pipe().unwrap(); + let data = b"Hello, world!"; + let (bytes, _) = splice_data_to_pipe(data, &pipe_write).unwrap(); + let mut buf = [0; 1024]; + let n = unistd::read(pipe_read.as_raw_fd(), &mut buf).unwrap(); + assert_eq!(&buf[..n], data); + assert_eq!(bytes as usize, data.len()); + } + + #[test] + fn test_splice_data_to_file() { + let mut temp_file = new_temp_file(); + let (pipe_read, pipe_write) = pipes::pipe().unwrap(); + let data = b"Hello, world!"; + let (bytes, _) = splice_data_to_fd(data, &pipe_read, &pipe_write, &temp_file).unwrap(); + let mut buf = [0; 1024]; + let n = temp_file.read(&mut buf).unwrap(); + assert_eq!(&buf[..n], data); + assert_eq!(bytes as usize, data.len()); + } + + #[test] + fn test_copy_exact() { + let (mut pipe_read, mut pipe_write) = pipes::pipe().unwrap(); + let data = b"Hello, world!"; + let n = pipe_write.write(data).unwrap(); + assert_eq!(n, data.len()); + let mut buf = [0; 1024]; + let n = copy_exact(pipe_read.as_raw_fd(), &pipe_write, data.len()).unwrap(); + let n2 = pipe_read.read(&mut buf).unwrap(); + assert_eq!(n, n2); + assert_eq!(&buf[..n], data); + } + + #[test] + fn test_copy_stream() { + let (mut pipe_read, mut pipe_write) = pipes::pipe().unwrap(); + let data = b"Hello, world!"; + let n = pipe_write.write(data).unwrap(); + assert_eq!(n, data.len()); + let mut buf = [0; 1024]; + let n = copy_stream(&mut pipe_read, &mut pipe_write).unwrap(); + let n2 = pipe_read.read(&mut buf).unwrap(); + assert_eq!(n as usize, n2); + assert_eq!(&buf[..n as usize], data); + } + + #[test] + fn test_splice_write() { + let (mut pipe_read, pipe_write) = pipes::pipe().unwrap(); + let data = b"Hello, world!"; + let (bytes, _) = splice_write(&pipe_read, &pipe_write).unwrap(); + let mut buf = [0; 1024]; + let n = pipe_read.read(&mut buf).unwrap(); + assert_eq!(&buf[..n], data); + assert_eq!(bytes as usize, data.len()); + } +} diff --git a/src/uucore/src/lib/lib.rs b/src/uucore/src/lib/lib.rs index 6142e688d..fc4709aab 100644 --- a/src/uucore/src/lib/lib.rs +++ b/src/uucore/src/lib/lib.rs @@ -70,11 +70,13 @@ pub use crate::features::version_cmp; #[cfg(all(not(windows), feature = "mode"))] pub use crate::features::mode; // ** unix-only +#[cfg(all(any(target_os = "linux", target_os = "android"), feature = "buf-copy"))] +pub use crate::features::buf_copy; #[cfg(all(unix, feature = "entries"))] pub use crate::features::entries; #[cfg(all(unix, feature = "perms"))] pub use crate::features::perms; -#[cfg(all(unix, feature = "pipes"))] +#[cfg(all(unix, any(feature = "pipes", feature = "buf-copy")))] pub use crate::features::pipes; #[cfg(all(unix, feature = "process"))] pub use crate::features::process; From 2ae914b268fcc4790db444e4765b4e6941063f12 Mon Sep 17 00:00:00 2001 From: Santeri Paavolainen Date: Thu, 19 Dec 2024 14:54:24 +0200 Subject: [PATCH 154/179] uucore: correctly truncate response if getgroups shrinks (#6978) The code above this line handles the case if `res` is larger than `ngroups`, but `res < ngroups` is also a possibility, which this line attempts to address but actually does not. The original code resizes to `ngroups` which is a no-op (given that `groups` is already `ngroups` size). The correct target for re-sizing the `groups` is the result from the last `getgroups`, i.e., `res`. --- src/uucore/src/lib/features/entries.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/uucore/src/lib/features/entries.rs b/src/uucore/src/lib/features/entries.rs index d1c9f9c04..f3d1232eb 100644 --- a/src/uucore/src/lib/features/entries.rs +++ b/src/uucore/src/lib/features/entries.rs @@ -83,13 +83,14 @@ pub fn get_groups() -> IOResult> { if res == -1 { let err = IOError::last_os_error(); if err.raw_os_error() == Some(libc::EINVAL) { - // Number of groups changed, retry + // Number of groups has increased, retry continue; } else { return Err(err); } } else { - groups.truncate(ngroups.try_into().unwrap()); + // Number of groups may have decreased + groups.truncate(res.try_into().unwrap()); return Ok(groups); } } From db1ed4c094b0dc63b3567191739647d9b7c1bfef Mon Sep 17 00:00:00 2001 From: Justin Tracey Date: Thu, 19 Dec 2024 17:08:05 -0500 Subject: [PATCH 155/179] core: improve OsStr(ing) helpers This adds the `os_str_as_bytes_lossy` function, for when we want infallible conversion across platforms, and improves the doc comments of similar functions to be more accurate and better formatted. --- src/uucore/src/lib/features/quoting_style.rs | 32 +++------------ src/uucore/src/lib/lib.rs | 41 ++++++++++++++------ 2 files changed, 35 insertions(+), 38 deletions(-) diff --git a/src/uucore/src/lib/features/quoting_style.rs b/src/uucore/src/lib/features/quoting_style.rs index 2e9cd0b7e..6d0265dc6 100644 --- a/src/uucore/src/lib/features/quoting_style.rs +++ b/src/uucore/src/lib/features/quoting_style.rs @@ -8,8 +8,6 @@ use std::char::from_digit; use std::ffi::{OsStr, OsString}; use std::fmt; -#[cfg(unix)] -use std::os::unix::ffi::{OsStrExt, OsStringExt}; // These are characters with special meaning in the shell (e.g. bash). // The first const contains characters that only have a special meaning when they appear at the beginning of a name. @@ -462,36 +460,18 @@ fn escape_name_inner(name: &[u8], style: &QuotingStyle, dirname: bool) -> Vec OsString { - #[cfg(unix)] - { - let name = name.as_bytes(); - OsStringExt::from_vec(escape_name_inner(name, style, false)) - } - #[cfg(not(unix))] - { - let name = name.to_string_lossy(); - String::from_utf8_lossy(&escape_name_inner(name.as_bytes(), style, false)) - .to_string() - .into() - } + let name = crate::os_str_as_bytes_lossy(name); + crate::os_string_from_vec(escape_name_inner(&name, style, false)) + .expect("all byte sequences should be valid for platform, or already replaced in name") } /// Escape a directory name with respect to the given style. /// This is mainly meant to be used for ls' directory name printing and is not /// likely to be used elsewhere. pub fn escape_dir_name(dir_name: &OsStr, style: &QuotingStyle) -> OsString { - #[cfg(unix)] - { - let name = dir_name.as_bytes(); - OsStringExt::from_vec(escape_name_inner(name, style, true)) - } - #[cfg(not(unix))] - { - let name = dir_name.to_string_lossy(); - String::from_utf8_lossy(&escape_name_inner(name.as_bytes(), style, true)) - .to_string() - .into() - } + let name = crate::os_str_as_bytes_lossy(dir_name); + crate::os_string_from_vec(escape_name_inner(&name, style, true)) + .expect("all byte sequences should be valid for platform, or already replaced in name") } impl fmt::Display for QuotingStyle { diff --git a/src/uucore/src/lib/lib.rs b/src/uucore/src/lib/lib.rs index 6142e688d..e98a22815 100644 --- a/src/uucore/src/lib/lib.rs +++ b/src/uucore/src/lib/lib.rs @@ -253,9 +253,10 @@ pub fn read_yes() -> bool { } } -/// Helper function for processing delimiter values (which could be non UTF-8) -/// It converts OsString to &[u8] for unix targets only -/// On non-unix (i.e. Windows) it will just return an error if delimiter value is not UTF-8 +/// Converts an `OsStr` to a UTF-8 `&[u8]`. +/// +/// This always succeeds on unix platforms, +/// and fails on other platforms if the string can't be coerced to UTF-8. pub fn os_str_as_bytes(os_string: &OsStr) -> mods::error::UResult<&[u8]> { #[cfg(unix)] let bytes = os_string.as_bytes(); @@ -271,13 +272,28 @@ pub fn os_str_as_bytes(os_string: &OsStr) -> mods::error::UResult<&[u8]> { Ok(bytes) } -/// Helper function for converting a slice of bytes into an &OsStr -/// or OsString in non-unix targets. +/// Performs a potentially lossy conversion from `OsStr` to UTF-8 bytes. /// -/// It converts `&[u8]` to `Cow` for unix targets only. -/// On non-unix (i.e. Windows), the conversion goes through the String type -/// and thus undergo UTF-8 validation, making it fail if the stream contains -/// non-UTF-8 characters. +/// This is always lossless on unix platforms, +/// and wraps [`OsStr::to_string_lossy`] on non-unix platforms. +pub fn os_str_as_bytes_lossy(os_string: &OsStr) -> Cow<[u8]> { + #[cfg(unix)] + let bytes = Cow::from(os_string.as_bytes()); + + #[cfg(not(unix))] + let bytes = match os_string.to_string_lossy() { + Cow::Borrowed(slice) => Cow::from(slice.as_bytes()), + Cow::Owned(owned) => Cow::from(owned.into_bytes()), + }; + + bytes +} + +/// Converts a `&[u8]` to an `&OsStr`, +/// or parses it as UTF-8 into an [`OsString`] on non-unix platforms. +/// +/// This always succeeds on unix platforms, +/// and fails on other platforms if the bytes can't be parsed as UTF-8. pub fn os_str_from_bytes(bytes: &[u8]) -> mods::error::UResult> { #[cfg(unix)] let os_str = Cow::Borrowed(OsStr::from_bytes(bytes)); @@ -289,9 +305,10 @@ pub fn os_str_from_bytes(bytes: &[u8]) -> mods::error::UResult> { Ok(os_str) } -/// Helper function for making an `OsString` from a byte field -/// It converts `Vec` to `OsString` for unix targets only. -/// On non-unix (i.e. Windows) it may fail if the bytes are not valid UTF-8 +/// Converts a `Vec` into an `OsString`, parsing as UTF-8 on non-unix platforms. +/// +/// This always succeeds on unix platforms, +/// and fails on other platforms if the bytes can't be parsed as UTF-8. pub fn os_string_from_vec(vec: Vec) -> mods::error::UResult { #[cfg(unix)] let s = OsString::from_vec(vec); From 74b613d15555e3743eecde9dc98ee24360359b20 Mon Sep 17 00:00:00 2001 From: Justin Tracey Date: Thu, 19 Dec 2024 21:01:56 -0500 Subject: [PATCH 156/179] Android CICD: use posix style test --- .github/workflows/android.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/android.yml b/.github/workflows/android.yml index 319f7b11c..a7dcbdbbd 100644 --- a/.github/workflows/android.yml +++ b/.github/workflows/android.yml @@ -178,7 +178,7 @@ jobs: util/android-commands.sh sync_host util/android-commands.sh build util/android-commands.sh tests - if [[ "${{ steps.rust-cache.outputs.cache-hit }}" != 'true' ]]; then util/android-commands.sh sync_image; fi; exit 0 + if [ "${{ steps.rust-cache.outputs.cache-hit }}" != 'true' ]; then util/android-commands.sh sync_image; fi; exit 0 - name: Collect information about runner ressources if: always() continue-on-error: true From cf8a81c6c25c36757e51790da81481591593c1ca Mon Sep 17 00:00:00 2001 From: Daniel Hofstetter Date: Fri, 20 Dec 2024 10:02:36 +0100 Subject: [PATCH 157/179] cut: fix overriding of -d= --- src/uu/cut/src/cut.rs | 29 ++++++++++++++++------------- tests/by-util/test_cut.rs | 8 +++++++- 2 files changed, 23 insertions(+), 14 deletions(-) diff --git a/src/uu/cut/src/cut.rs b/src/uu/cut/src/cut.rs index 421b35eac..3dde5e665 100644 --- a/src/uu/cut/src/cut.rs +++ b/src/uu/cut/src/cut.rs @@ -350,10 +350,7 @@ fn cut_files(mut filenames: Vec, mode: &Mode) { // Get delimiter and output delimiter from `-d`/`--delimiter` and `--output-delimiter` options respectively // Allow either delimiter to have a value that is neither UTF-8 nor ASCII to align with GNU behavior -fn get_delimiters( - matches: &ArgMatches, - delimiter_is_equal: bool, -) -> UResult<(Delimiter, Option<&[u8]>)> { +fn get_delimiters(matches: &ArgMatches) -> UResult<(Delimiter, Option<&[u8]>)> { let whitespace_delimited = matches.get_flag(options::WHITESPACE_DELIMITED); let delim_opt = matches.get_one::(options::DELIMITER); let delim = match delim_opt { @@ -364,12 +361,7 @@ fn get_delimiters( )); } Some(os_string) => { - // GNU's `cut` supports `-d=` to set the delimiter to `=`. - // Clap parsing is limited in this situation, see: - // https://github.com/uutils/coreutils/issues/2424#issuecomment-863825242 - if delimiter_is_equal { - Delimiter::Slice(b"=") - } else if os_string == "''" || os_string.is_empty() { + if os_string == "''" || os_string.is_empty() { // treat `''` as empty delimiter Delimiter::Slice(b"\0") } else { @@ -423,15 +415,26 @@ mod options { #[uucore::main] pub fn uumain(args: impl uucore::Args) -> UResult<()> { - let args = args.collect::>(); + // GNU's `cut` supports `-d=` to set the delimiter to `=`. + // Clap parsing is limited in this situation, see: + // https://github.com/uutils/coreutils/issues/2424#issuecomment-863825242 + let args: Vec = args + .into_iter() + .map(|x| { + if x == "-d=" { + "--delimiter==".into() + } else { + x + } + }) + .collect(); - let delimiter_is_equal = args.contains(&OsString::from("-d=")); // special case let matches = uu_app().try_get_matches_from(args)?; let complement = matches.get_flag(options::COMPLEMENT); let only_delimited = matches.get_flag(options::ONLY_DELIMITED); - let (delimiter, out_delimiter) = get_delimiters(&matches, delimiter_is_equal)?; + let (delimiter, out_delimiter) = get_delimiters(&matches)?; let line_ending = LineEnding::from_zero_flag(matches.get_flag(options::ZERO_TERMINATED)); // Only one, and only one of cutting mode arguments, i.e. `-b`, `-c`, `-f`, diff --git a/tests/by-util/test_cut.rs b/tests/by-util/test_cut.rs index 6c6914a12..c9a932a67 100644 --- a/tests/by-util/test_cut.rs +++ b/tests/by-util/test_cut.rs @@ -299,9 +299,15 @@ fn test_newline_as_delimiter() { fn test_multiple_delimiters() { new_ucmd!() .args(&["-f2", "-d:", "-d="]) - .pipe_in("a=b\n") + .pipe_in("a:=b\n") .succeeds() .stdout_only("b\n"); + + new_ucmd!() + .args(&["-f2", "-d=", "-d:"]) + .pipe_in("a:=b\n") + .succeeds() + .stdout_only("=b\n"); } #[test] From 31ffc3a0ebd444edeb72db8fb7edf36a24ad180b Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Wed, 18 Dec 2024 20:42:28 +0100 Subject: [PATCH 158/179] mkfifo: better handle the mode + umask Should make tests/misc/mknod.sh pass --- src/uu/mkfifo/Cargo.toml | 2 +- src/uu/mkfifo/src/mkfifo.rs | 19 ++++++++++++--- tests/by-util/test_mkfifo.rs | 47 ++++++++++++++++++++++++++++++++++++ 3 files changed, 63 insertions(+), 5 deletions(-) diff --git a/src/uu/mkfifo/Cargo.toml b/src/uu/mkfifo/Cargo.toml index 960ed601d..68f16a1f6 100644 --- a/src/uu/mkfifo/Cargo.toml +++ b/src/uu/mkfifo/Cargo.toml @@ -19,7 +19,7 @@ path = "src/mkfifo.rs" [dependencies] clap = { workspace = true } libc = { workspace = true } -uucore = { workspace = true } +uucore = { workspace = true, features = ["fs"] } [[bin]] name = "mkfifo" diff --git a/src/uu/mkfifo/src/mkfifo.rs b/src/uu/mkfifo/src/mkfifo.rs index 9320f76ed..01fc5dc1e 100644 --- a/src/uu/mkfifo/src/mkfifo.rs +++ b/src/uu/mkfifo/src/mkfifo.rs @@ -6,6 +6,8 @@ use clap::{crate_version, Arg, ArgAction, Command}; use libc::mkfifo; use std::ffi::CString; +use std::fs; +use std::os::unix::fs::PermissionsExt; use uucore::display::Quotable; use uucore::error::{UResult, USimpleError}; use uucore::{format_usage, help_about, help_usage, show}; @@ -32,11 +34,13 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { } let mode = match matches.get_one::(options::MODE) { + // if mode is passed, ignore umask Some(m) => match usize::from_str_radix(m, 8) { Ok(m) => m, Err(e) => return Err(USimpleError::new(1, format!("invalid mode: {e}"))), }, - None => 0o666, + // Default value + umask if present + None => 0o666 & !(uucore::mode::get_umask() as usize), }; let fifos: Vec = match matches.get_many::(options::FIFO) { @@ -47,12 +51,20 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { for f in fifos { let err = unsafe { let name = CString::new(f.as_bytes()).unwrap(); - mkfifo(name.as_ptr(), mode as libc::mode_t) + mkfifo(name.as_ptr(), 0o666) }; if err == -1 { show!(USimpleError::new( 1, - format!("cannot create fifo {}: File exists", f.quote()) + format!("cannot create fifo {}: File exists", f.quote()), + )); + } + + // Explicitly set the permissions to ignore umask + if let Err(e) = fs::set_permissions(&f, fs::Permissions::from_mode(mode as u32)) { + return Err(USimpleError::new( + 1, + format!("cannot set permissions on {}: {}", f.quote(), e), )); } } @@ -71,7 +83,6 @@ pub fn uu_app() -> Command { .short('m') .long(options::MODE) .help("file permissions for the fifo") - .default_value("0666") .value_name("MODE"), ) .arg( diff --git a/tests/by-util/test_mkfifo.rs b/tests/by-util/test_mkfifo.rs index 731b6c1d5..e25bbfc44 100644 --- a/tests/by-util/test_mkfifo.rs +++ b/tests/by-util/test_mkfifo.rs @@ -52,3 +52,50 @@ fn test_create_one_fifo_already_exists() { .fails() .stderr_is("mkfifo: cannot create fifo 'abcdef': File exists\n"); } + +#[test] +fn test_create_fifo_with_mode_and_umask() { + use uucore::fs::display_permissions; + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + + let test_fifo_creation = |mode: &str, umask: u16, expected: &str| { + scene + .ucmd() + .arg("-m") + .arg(mode) + .arg(format!("fifo_test_{mode}")) + .umask(libc::mode_t::from(umask)) + .succeeds(); + + let metadata = std::fs::metadata(at.subdir.join(format!("fifo_test_{mode}"))).unwrap(); + let permissions = display_permissions(&metadata, true); + assert_eq!(permissions, expected.to_string()); + }; + + test_fifo_creation("734", 0o077, "prwx-wxr--"); // spell-checker:disable-line + test_fifo_creation("706", 0o777, "prwx---rw-"); // spell-checker:disable-line +} + +#[test] +fn test_create_fifo_with_umask() { + use uucore::fs::display_permissions; + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + + let test_fifo_creation = |umask: u16, expected: &str| { + scene + .ucmd() + .arg("fifo_test") + .umask(libc::mode_t::from(umask)) + .succeeds(); + + let metadata = std::fs::metadata(at.subdir.join("fifo_test")).unwrap(); + let permissions = display_permissions(&metadata, true); + assert_eq!(permissions, expected.to_string()); + at.remove("fifo_test"); + }; + + test_fifo_creation(0o022, "prw-r--r--"); // spell-checker:disable-line + test_fifo_creation(0o777, "p---------"); // spell-checker:disable-line +} From d762a1633f33576332cd2f860ac1298df674a065 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Sat, 21 Dec 2024 19:31:59 +0000 Subject: [PATCH 159/179] chore(deps): update rust crate thiserror to v2.0.9 --- Cargo.lock | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 84c28bcd9..972a4efcf 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2317,11 +2317,11 @@ dependencies = [ [[package]] name = "thiserror" -version = "2.0.8" +version = "2.0.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08f5383f3e0071702bf93ab5ee99b52d26936be9dedd9413067cbdcddcb6141a" +checksum = "f072643fd0190df67a8bab670c20ef5d8737177d6ac6b2e9a236cb096206b2cc" dependencies = [ - "thiserror-impl 2.0.8", + "thiserror-impl 2.0.9", ] [[package]] @@ -2337,9 +2337,9 @@ dependencies = [ [[package]] name = "thiserror-impl" -version = "2.0.8" +version = "2.0.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2f357fcec90b3caef6623a099691be676d033b40a058ac95d2a6ade6fa0c943" +checksum = "7b50fa271071aae2e6ee85f842e2e28ba8cd2c5fb67f11fcb1fd70b276f9e7d4" dependencies = [ "proc-macro2", "quote", @@ -2538,7 +2538,7 @@ version = "0.0.28" dependencies = [ "clap", "nix", - "thiserror 2.0.8", + "thiserror 2.0.9", "uucore", ] @@ -2550,7 +2550,7 @@ dependencies = [ "fts-sys", "libc", "selinux", - "thiserror 2.0.8", + "thiserror 2.0.9", "uucore", ] @@ -2627,7 +2627,7 @@ version = "0.0.28" dependencies = [ "clap", "regex", - "thiserror 2.0.8", + "thiserror 2.0.9", "uucore", ] @@ -3143,7 +3143,7 @@ dependencies = [ "clap", "libc", "selinux", - "thiserror 2.0.8", + "thiserror 2.0.9", "uucore", ] @@ -3422,7 +3422,7 @@ version = "0.0.28" dependencies = [ "chrono", "clap", - "thiserror 2.0.8", + "thiserror 2.0.9", "utmp-classic", "uucore", ] @@ -3453,7 +3453,7 @@ dependencies = [ "clap", "libc", "nix", - "thiserror 2.0.8", + "thiserror 2.0.9", "unicode-width 0.2.0", "uucore", ] @@ -3515,7 +3515,7 @@ dependencies = [ "sha3", "sm3", "tempfile", - "thiserror 2.0.8", + "thiserror 2.0.9", "time", "uucore_procs", "walkdir", @@ -3988,7 +3988,7 @@ dependencies = [ "flate2", "indexmap", "memchr", - "thiserror 2.0.8", + "thiserror 2.0.9", "zopfli", ] From 1337c6f174d6364e9d642c956c93963954eb6427 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Sun, 22 Dec 2024 12:52:05 +0100 Subject: [PATCH 160/179] Try to report no longer SKIP --- .github/workflows/GnuTests.yml | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/.github/workflows/GnuTests.yml b/.github/workflows/GnuTests.yml index 04b487f5c..bba5de96c 100644 --- a/.github/workflows/GnuTests.yml +++ b/.github/workflows/GnuTests.yml @@ -249,11 +249,15 @@ jobs: CURRENT_RUN_ERROR=$(sed -n "s/^ERROR: \([[:print:]]\+\).*/\1/p" "${new_log_file}" | sort) REF_FAILING=$(sed -n "s/^FAIL: \([[:print:]]\+\).*/\1/p" "${ref_log_file}"| sort) CURRENT_RUN_FAILING=$(sed -n "s/^FAIL: \([[:print:]]\+\).*/\1/p" "${new_log_file}" | sort) - echo "Detailled information:" + REF_SKIP=$(sed -n "s/^SKIP: \([[:print:]]\+\).*/\1/p" "${ref_log_file}"| sort) + CURRENT_RUN_SKIP=$(sed -n "s/^SKIP: \([[:print:]]\+\).*/\1/p" "${new_log_file}" | sort) + + echo "Detailed information:" echo "REF_ERROR = ${REF_ERROR}" echo "CURRENT_RUN_ERROR = ${CURRENT_RUN_ERROR}" echo "REF_FAILING = ${REF_FAILING}" echo "CURRENT_RUN_FAILING = ${CURRENT_RUN_FAILING}" + echo "REF_SKIP_PASS = ${REF_SKIP}" # Compare failing and error tests for LINE in ${CURRENT_RUN_FAILING} @@ -313,6 +317,17 @@ jobs: echo $MSG >> ${COMMENT_LOG} fi done + + for LINE in ${REF_SKIP} + do + if ! grep -Fxq ${LINE}<<<"${CURRENT_RUN_SKIP}" + then + MSG="Congrats! The gnu test ${LINE} is no longer SKIP! (might be PASS, ERROR or FAIL)" + echo "::warning ::$MSG" + echo $MSG >> ${COMMENT_LOG} + fi + done + else echo "::warning ::Skipping ${test_type} test failure comparison; no prior reference test logs are available." fi From b1d4e1b8128167ba5dc78ce749609456ed38463a Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Sun, 22 Dec 2024 14:12:57 +0100 Subject: [PATCH 161/179] gnu comment: explain what might be the state --- .github/workflows/GnuTests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/GnuTests.yml b/.github/workflows/GnuTests.yml index bba5de96c..ab973defc 100644 --- a/.github/workflows/GnuTests.yml +++ b/.github/workflows/GnuTests.yml @@ -312,7 +312,7 @@ jobs: do if ! grep -Fxq ${LINE}<<<"${CURRENT_RUN_ERROR}" then - MSG="Congrats! The gnu test ${LINE} is no longer ERROR!" + MSG="Congrats! The gnu test ${LINE} is no longer ERROR! (might be PASS or FAIL)" echo "::warning ::$MSG" echo $MSG >> ${COMMENT_LOG} fi From efe3cda6ffe900efab8b2e7d5259218f17cabcb1 Mon Sep 17 00:00:00 2001 From: Daniel Hofstetter Date: Sun, 22 Dec 2024 14:26:35 +0100 Subject: [PATCH 162/179] echo: remove code made obsolete by MSRV 1.79 --- src/uu/echo/src/echo.rs | 25 +++---------------------- 1 file changed, 3 insertions(+), 22 deletions(-) diff --git a/src/uu/echo/src/echo.rs b/src/uu/echo/src/echo.rs index 746cdd7c5..2d2884b1d 100644 --- a/src/uu/echo/src/echo.rs +++ b/src/uu/echo/src/echo.rs @@ -208,13 +208,6 @@ fn print_escaped(input: &[u8], output: &mut StdoutLock) -> io::Result= 1.79.0 - // https://github.com/rust-lang/rust/pull/121346 - // TODO: when we have a MSRV >= 1.79.0, delete these "hold" bindings - let hold_one_byte_outside_of_match: [u8; 1_usize]; - let hold_two_bytes_outside_of_match: [u8; 2_usize]; - let unescaped: &[u8] = match *next { b'\\' => br"\", b'a' => b"\x07", @@ -230,12 +223,7 @@ fn print_escaped(input: &[u8], output: &mut StdoutLock) -> io::Result= 1.79.0 - hold_one_byte_outside_of_match = [parsed_hexadecimal_number]; - - // TODO: when we have a MSRV >= 1.79.0, return reference to a temporary array: - // &[parsed_hexadecimal_number] - &hold_one_byte_outside_of_match + &[parsed_hexadecimal_number] } else { // "\x" with any non-hexadecimal digit after means "\x" is treated literally br"\x" @@ -246,12 +234,7 @@ fn print_escaped(input: &[u8], output: &mut StdoutLock) -> io::Result= 1.79.0 - hold_one_byte_outside_of_match = [parsed_octal_number]; - - // TODO: when we have a MSRV >= 1.79.0, return reference to a temporary array: - // &[parsed_octal_number] - &hold_one_byte_outside_of_match + &[parsed_octal_number] } else { // "\0" with any non-octal digit after it means "\0" is treated as ASCII '\0' (NUL), 0x00 b"\0" @@ -259,9 +242,7 @@ fn print_escaped(input: &[u8], output: &mut StdoutLock) -> io::Result { // Backslash and the following byte are treated literally - hold_two_bytes_outside_of_match = [b'\\', other_byte]; - - &hold_two_bytes_outside_of_match + &[b'\\', other_byte] } }; From 392c48002cc966e80ab018d49b3969bcf5c5756b Mon Sep 17 00:00:00 2001 From: Daniel Hofstetter Date: Sun, 22 Dec 2024 14:53:37 +0100 Subject: [PATCH 163/179] cut: don't merge adjacent ranges --- src/uucore/src/lib/features/ranges.rs | 11 ++++------- tests/by-util/test_cut.rs | 2 -- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/src/uucore/src/lib/features/ranges.rs b/src/uucore/src/lib/features/ranges.rs index 222be7ca3..88851b9aa 100644 --- a/src/uucore/src/lib/features/ranges.rs +++ b/src/uucore/src/lib/features/ranges.rs @@ -91,7 +91,7 @@ impl Range { Ok(Self::merge(ranges)) } - /// Merge any overlapping ranges + /// Merge any overlapping ranges. Adjacent ranges are *NOT* merged. /// /// Is guaranteed to return only disjoint ranges in a sorted order. fn merge(mut ranges: Vec) -> Vec { @@ -101,10 +101,7 @@ impl Range { for i in 0..ranges.len() { let j = i + 1; - // The +1 is a small optimization, because we can merge adjacent Ranges. - // For example (1,3) and (4,6), because in the integers, there are no - // possible values between 3 and 4, this is equivalent to (1,6). - while j < ranges.len() && ranges[j].low <= ranges[i].high + 1 { + while j < ranges.len() && ranges[j].low <= ranges[i].high { let j_high = ranges.remove(j).high; ranges[i].high = max(ranges[i].high, j_high); } @@ -216,8 +213,8 @@ mod test { &[r(10, 40), r(50, 60)], ); - // Merge adjacent ranges - m(vec![r(1, 3), r(4, 6)], &[r(1, 6)]); + // Don't merge adjacent ranges + m(vec![r(1, 3), r(4, 6)], &[r(1, 3), r(4, 6)]); } #[test] diff --git a/tests/by-util/test_cut.rs b/tests/by-util/test_cut.rs index c9a932a67..1aa3c126a 100644 --- a/tests/by-util/test_cut.rs +++ b/tests/by-util/test_cut.rs @@ -350,7 +350,6 @@ fn test_newline_preservation_with_f1_option() { ucmd.args(&["-f1-", "1"]).succeeds().stdout_is(expected); } -#[ignore = "Not yet implemented"] #[test] fn test_output_delimiter_with_character_ranges() { new_ucmd!() @@ -360,7 +359,6 @@ fn test_output_delimiter_with_character_ranges() { .stdout_only("bc:defg\n"); } -#[ignore = "Not yet implemented"] #[test] fn test_output_delimiter_with_adjacent_ranges() { new_ucmd!() From d25d2df7f9dc9579eae59ae77e7882145622136e Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Sun, 22 Dec 2024 14:23:29 +0000 Subject: [PATCH 164/179] chore(deps): update rust crate platform-info to v2.0.5 --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 972a4efcf..435d1a39d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1711,9 +1711,9 @@ checksum = "6ac9a59f73473f1b8d852421e59e64809f025994837ef743615c6d0c5b305160" [[package]] name = "platform-info" -version = "2.0.4" +version = "2.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91077ffd05d058d70d79eefcd7d7f6aac34980860a7519960f7913b6563a8c3a" +checksum = "7539aeb3fdd8cb4f6a331307cf71a1039cee75e94e8a71725b9484f4a0d9451a" dependencies = [ "libc", "winapi", From 9a97c18877691f0f17b0fc1b3c0d9b21d2354b14 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Sat, 21 Dec 2024 21:18:38 +0100 Subject: [PATCH 165/179] ls: when a file has capabilities (setcap), change the color Should fix tests/ls/capability.sh --- src/uu/ls/src/colors.rs | 16 ++++++++++++++ tests/by-util/test_ls.rs | 47 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+) diff --git a/src/uu/ls/src/colors.rs b/src/uu/ls/src/colors.rs index 6c580d18a..0e314e5a4 100644 --- a/src/uu/ls/src/colors.rs +++ b/src/uu/ls/src/colors.rs @@ -156,6 +156,22 @@ pub(crate) fn color_name( target_symlink: Option<&PathData>, wrap: bool, ) -> String { + #[cfg(any(not(unix), target_os = "android", target_os = "macos"))] + let has_capabilities = false; + #[cfg(all(unix, not(any(target_os = "android", target_os = "macos"))))] + // Check if the file has capabilities + let has_capabilities = uucore::fsxattr::has_acl(path.p_buf.as_path()); + + // If the file has capabilities, use a specific style for `ca` (capabilities) + if has_capabilities { + if let Some(style) = style_manager + .colors + .style_for_indicator(Indicator::Capabilities) + { + return style_manager.apply_style(Some(style), name, wrap); + } + } + if !path.must_dereference { // If we need to dereference (follow) a symlink, we will need to get the metadata if let Some(de) = &path.de { diff --git a/tests/by-util/test_ls.rs b/tests/by-util/test_ls.rs index 3b2d46b39..f65078a0d 100644 --- a/tests/by-util/test_ls.rs +++ b/tests/by-util/test_ls.rs @@ -3,6 +3,7 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. // spell-checker:ignore (words) READMECAREFULLY birthtime doesntexist oneline somebackup lrwx somefile somegroup somehiddenbackup somehiddenfile tabsize aaaaaaaa bbbb cccc dddddddd ncccc neee naaaaa nbcdef nfffff dired subdired tmpfs mdir COLORTERM mexe bcdef mfoo +// spell-checker:ignore (words) fakeroot setcap #![allow( clippy::similar_names, clippy::too_many_lines, @@ -5516,3 +5517,49 @@ fn test_suffix_case_sensitivity() { /* cSpell:enable */ ); } + +#[cfg(all(unix, target_os = "linux"))] +#[test] +fn test_ls_capabilities() { + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + + // Test must be run as root (or with `sudo -E`) + // fakeroot setcap cap_net_bind_service=ep /tmp/file_name + // doesn't trigger an error and fails silently + if scene.cmd("whoami").run().stdout_str() != "root\n" { + return; + } + at.mkdir("test"); + at.mkdir("test/dir"); + at.touch("test/cap_pos"); + at.touch("test/dir/cap_neg"); + at.touch("test/dir/cap_pos"); + + let files = ["test/cap_pos", "test/dir/cap_pos"]; + for file in &files { + scene + .cmd("sudo") + .args(&[ + "-E", + "--non-interactive", + "setcap", + "cap_net_bind_service=ep", + at.plus(file).to_str().unwrap(), + ]) + .succeeds(); + } + + let ls_colors = "di=:ca=30;41"; + + scene + .ucmd() + .env("LS_COLORS", ls_colors) + .arg("--color=always") + .arg("test/cap_pos") + .arg("test/dir") + .succeeds() + .stdout_contains("\x1b[30;41mtest/cap_pos") // spell-checker:disable-line + .stdout_contains("\x1b[30;41mcap_pos") // spell-checker:disable-line + .stdout_does_not_contain("0;41mtest/dir/cap_neg"); // spell-checker:disable-line +} From ffc6eb094a646554fdd2c456a7cfd572e6e4c700 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Sat, 21 Dec 2024 22:51:29 +0100 Subject: [PATCH 166/179] ls: Don't call the capabilites features of the system when passed an empty ca= in LS_COLORS In parallel, in the GNU test, adjust the GNU tests as we don't use libcap but xattr instead. --- .../cspell.dictionaries/jargon.wordlist.txt | 3 +++ src/uu/ls/src/colors.rs | 26 +++++++++++-------- util/gnu-patches/tests_ls_no_cap.patch | 22 ++++++++++++++++ 3 files changed, 40 insertions(+), 11 deletions(-) create mode 100644 util/gnu-patches/tests_ls_no_cap.patch diff --git a/.vscode/cspell.dictionaries/jargon.wordlist.txt b/.vscode/cspell.dictionaries/jargon.wordlist.txt index 6dd5483c6..4109630e5 100644 --- a/.vscode/cspell.dictionaries/jargon.wordlist.txt +++ b/.vscode/cspell.dictionaries/jargon.wordlist.txt @@ -10,6 +10,7 @@ bytewise canonicalization canonicalize canonicalizing +capget codepoint codepoints codegen @@ -65,6 +66,7 @@ kibi kibibytes libacl lcase +llistxattr lossily lstat mebi @@ -108,6 +110,7 @@ seedable semver semiprime semiprimes +setcap setfacl shortcode shortcodes diff --git a/src/uu/ls/src/colors.rs b/src/uu/ls/src/colors.rs index 0e314e5a4..4f97e42e2 100644 --- a/src/uu/ls/src/colors.rs +++ b/src/uu/ls/src/colors.rs @@ -156,19 +156,23 @@ pub(crate) fn color_name( target_symlink: Option<&PathData>, wrap: bool, ) -> String { - #[cfg(any(not(unix), target_os = "android", target_os = "macos"))] - let has_capabilities = false; - #[cfg(all(unix, not(any(target_os = "android", target_os = "macos"))))] // Check if the file has capabilities - let has_capabilities = uucore::fsxattr::has_acl(path.p_buf.as_path()); - - // If the file has capabilities, use a specific style for `ca` (capabilities) - if has_capabilities { - if let Some(style) = style_manager + #[cfg(all(unix, not(any(target_os = "android", target_os = "macos"))))] + { + // Skip checking capabilities if LS_COLORS=ca=: + let capabilities = style_manager .colors - .style_for_indicator(Indicator::Capabilities) - { - return style_manager.apply_style(Some(style), name, wrap); + .style_for_indicator(Indicator::Capabilities); + + let has_capabilities = if capabilities.is_none() { + false + } else { + uucore::fsxattr::has_acl(path.p_buf.as_path()) + }; + + // If the file has capabilities, use a specific style for `ca` (capabilities) + if has_capabilities { + return style_manager.apply_style(capabilities, name, wrap); } } diff --git a/util/gnu-patches/tests_ls_no_cap.patch b/util/gnu-patches/tests_ls_no_cap.patch new file mode 100644 index 000000000..5944e3f56 --- /dev/null +++ b/util/gnu-patches/tests_ls_no_cap.patch @@ -0,0 +1,22 @@ +diff --git a/tests/ls/no-cap.sh b/tests/ls/no-cap.sh +index 3d84c74ff..d1f60e70a 100755 +--- a/tests/ls/no-cap.sh ++++ b/tests/ls/no-cap.sh +@@ -21,13 +21,13 @@ print_ver_ ls + require_strace_ capget + + LS_COLORS=ca=1; export LS_COLORS +-strace -e capget ls --color=always > /dev/null 2> out || fail=1 +-$EGREP 'capget\(' out || skip_ "your ls doesn't call capget" ++strace -e llistxattr ls --color=always > /dev/null 2> out || fail=1 ++$EGREP 'llistxattr\(' out || skip_ "your ls doesn't call llistxattr" + + rm -f out + + LS_COLORS=ca=:; export LS_COLORS +-strace -e capget ls --color=always > /dev/null 2> out || fail=1 +-$EGREP 'capget\(' out && fail=1 ++strace -e llistxattr ls --color=always > /dev/null 2> out || fail=1 ++$EGREP 'llistxattr\(' out && fail=1 + + Exit $fail From cec9d9f00a767f086c4d63467eadbe957184075b Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Sun, 22 Dec 2024 16:31:56 +0100 Subject: [PATCH 167/179] GnuTests: also display CURRENT_RUN_SKIP for debug purposes --- .github/workflows/GnuTests.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/GnuTests.yml b/.github/workflows/GnuTests.yml index ab973defc..0b9d8ce7f 100644 --- a/.github/workflows/GnuTests.yml +++ b/.github/workflows/GnuTests.yml @@ -258,6 +258,7 @@ jobs: echo "REF_FAILING = ${REF_FAILING}" echo "CURRENT_RUN_FAILING = ${CURRENT_RUN_FAILING}" echo "REF_SKIP_PASS = ${REF_SKIP}" + echo "CURRENT_RUN_SKIP = ${CURRENT_RUN_SKIP}" # Compare failing and error tests for LINE in ${CURRENT_RUN_FAILING} From e4d03122654ee9946bc5b710807e8ac5cf8a9c58 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Mon, 23 Dec 2024 19:40:37 +0000 Subject: [PATCH 168/179] chore(deps): update vmactions/freebsd-vm action to v1.1.6 --- .github/workflows/freebsd.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/freebsd.yml b/.github/workflows/freebsd.yml index 42255d889..27ff2afe4 100644 --- a/.github/workflows/freebsd.yml +++ b/.github/workflows/freebsd.yml @@ -41,7 +41,7 @@ jobs: - name: Run sccache-cache uses: mozilla-actions/sccache-action@v0.0.7 - name: Prepare, build and test - uses: vmactions/freebsd-vm@v1.1.5 + uses: vmactions/freebsd-vm@v1.1.6 with: usesh: true sync: rsync @@ -135,7 +135,7 @@ jobs: - name: Run sccache-cache uses: mozilla-actions/sccache-action@v0.0.7 - name: Prepare, build and test - uses: vmactions/freebsd-vm@v1.1.5 + uses: vmactions/freebsd-vm@v1.1.6 with: usesh: true sync: rsync From 2c2f5f14a40d762374e02f3f96985ee872c3d912 Mon Sep 17 00:00:00 2001 From: Daniel Hofstetter Date: Tue, 24 Dec 2024 16:03:00 +0100 Subject: [PATCH 169/179] echo: use succeeds() to simplify some tests --- tests/by-util/test_echo.rs | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/tests/by-util/test_echo.rs b/tests/by-util/test_echo.rs index 136500b48..8cb60877c 100644 --- a/tests/by-util/test_echo.rs +++ b/tests/by-util/test_echo.rs @@ -219,8 +219,7 @@ fn test_hyphen_values_at_start() { .arg("-test") .arg("araba") .arg("-merci") - .run() - .success() + .succeeds() .stdout_does_not_contain("-E") .stdout_is("-test araba -merci\n"); } @@ -231,8 +230,7 @@ fn test_hyphen_values_between() { .arg("test") .arg("-E") .arg("araba") - .run() - .success() + .succeeds() .stdout_is("test -E araba\n"); new_ucmd!() @@ -240,8 +238,7 @@ fn test_hyphen_values_between() { .arg("dum dum dum") .arg("-e") .arg("dum") - .run() - .success() + .succeeds() .stdout_is("dumdum dum dum dum -e dum\n"); } From 90465357e2d313cf51ad5f470cc6bfd5c339c39c Mon Sep 17 00:00:00 2001 From: Daniel Hofstetter Date: Tue, 24 Dec 2024 17:01:45 +0100 Subject: [PATCH 170/179] echo: handle double hyphens --- src/uu/echo/src/echo.rs | 19 ++++++++++++++++++- tests/by-util/test_echo.rs | 10 ++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/src/uu/echo/src/echo.rs b/src/uu/echo/src/echo.rs index 2d2884b1d..097e4f2e9 100644 --- a/src/uu/echo/src/echo.rs +++ b/src/uu/echo/src/echo.rs @@ -255,9 +255,26 @@ fn print_escaped(input: &[u8], output: &mut StdoutLock) -> io::Result impl uucore::Args { + let mut result = Vec::new(); + let mut is_first_double_hyphen = true; + + for arg in args { + if arg == "--" && is_first_double_hyphen { + result.push(OsString::from("--")); + is_first_double_hyphen = false; + } + result.push(arg); + } + + result.into_iter() +} + #[uucore::main] pub fn uumain(args: impl uucore::Args) -> UResult<()> { - let matches = uu_app().get_matches_from(args); + let matches = uu_app().get_matches_from(handle_double_hyphens(args)); // TODO // "If the POSIXLY_CORRECT environment variable is set, then when echo’s first argument is not -n it outputs option-like arguments instead of treating them as options." diff --git a/tests/by-util/test_echo.rs b/tests/by-util/test_echo.rs index 8cb60877c..dd6b412a4 100644 --- a/tests/by-util/test_echo.rs +++ b/tests/by-util/test_echo.rs @@ -242,6 +242,16 @@ fn test_hyphen_values_between() { .stdout_is("dumdum dum dum dum -e dum\n"); } +#[test] +fn test_double_hyphens() { + new_ucmd!().arg("--").succeeds().stdout_only("--\n"); + new_ucmd!() + .arg("--") + .arg("--") + .succeeds() + .stdout_only("-- --\n"); +} + #[test] fn wrapping_octal() { // Some odd behavior of GNU. Values of \0400 and greater do not fit in the From e221d2a624ebb1df1b623b4dc55c5bb7cfd57774 Mon Sep 17 00:00:00 2001 From: Daniel Hofstetter Date: Wed, 25 Dec 2024 09:39:03 +0100 Subject: [PATCH 171/179] comm: adapt GNU error messages --- util/gnu-patches/tests_comm.pl.patch | 44 ++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 util/gnu-patches/tests_comm.pl.patch diff --git a/util/gnu-patches/tests_comm.pl.patch b/util/gnu-patches/tests_comm.pl.patch new file mode 100644 index 000000000..d3d5595a2 --- /dev/null +++ b/util/gnu-patches/tests_comm.pl.patch @@ -0,0 +1,44 @@ +diff --git a/tests/misc/comm.pl b/tests/misc/comm.pl +index 5bd5f56d7..8322d92ba 100755 +--- a/tests/misc/comm.pl ++++ b/tests/misc/comm.pl +@@ -73,18 +73,24 @@ my @Tests = + + # invalid missing command line argument (1) + ['missing-arg1', $inputs[0], {EXIT=>1}, +- {ERR => "$prog: missing operand after 'a'\n" +- . "Try '$prog --help' for more information.\n"}], ++ {ERR => "error: the following required arguments were not provided:\n" ++ . " \n\n" ++ . "Usage: $prog [OPTION]... FILE1 FILE2\n\n" ++ . "For more information, try '--help'.\n"}], + + # invalid missing command line argument (both) + ['missing-arg2', {EXIT=>1}, +- {ERR => "$prog: missing operand\n" +- . "Try '$prog --help' for more information.\n"}], ++ {ERR => "error: the following required arguments were not provided:\n" ++ . " \n" ++ . " \n\n" ++ . "Usage: $prog [OPTION]... FILE1 FILE2\n\n" ++ . "For more information, try '--help'.\n"}], + + # invalid extra command line argument + ['extra-arg', @inputs, 'no-such', {EXIT=>1}, +- {ERR => "$prog: extra operand 'no-such'\n" +- . "Try '$prog --help' for more information.\n"}], ++ {ERR => "error: unexpected argument 'no-such' found\n\n" ++ . "Usage: $prog [OPTION]... FILE1 FILE2\n\n" ++ . "For more information, try '--help'.\n"}], + + # out-of-order input + ['ooo', {IN=>{a=>"1\n3"}}, {IN=>{b=>"3\n2"}}, {EXIT=>1}, +@@ -163,7 +169,7 @@ my @Tests = + + # invalid dual delimiter + ['delim-dual', '--output-delimiter=,', '--output-delimiter=+', @inputs, +- {EXIT=>1}, {ERR => "$prog: multiple output delimiters specified\n"}], ++ {EXIT=>1}, {ERR => "$prog: multiple conflicting output delimiters specified\n"}], + + # valid dual delimiter specification + ['delim-dual2', '--output-delimiter=,', '--output-delimiter=,', @inputs, From 2bfa45652e0a651fda1b3e00a0e9d4f3674ccd1c Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Thu, 26 Dec 2024 07:24:21 +0000 Subject: [PATCH 172/179] fix(deps): update rust crate quote to v1.0.38 --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 435d1a39d..2717a64e2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1805,9 +1805,9 @@ checksum = "a993555f31e5a609f617c12db6250dedcac1b0a85076912c436e6fc9b2c8e6a3" [[package]] name = "quote" -version = "1.0.37" +version = "1.0.38" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af" +checksum = "0e4dccaaaf89514f546c693ddc140f729f958c247918a13380cccc6078391acc" dependencies = [ "proc-macro2", ] From db37c316af1534bae2646a65eaab3dceabbad418 Mon Sep 17 00:00:00 2001 From: Daniel Hofstetter Date: Thu, 26 Dec 2024 09:40:55 +0100 Subject: [PATCH 173/179] csplit: add support for -q --- src/uu/csplit/src/csplit.rs | 3 ++- tests/by-util/test_csplit.rs | 27 ++++++++++++++++----------- 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/src/uu/csplit/src/csplit.rs b/src/uu/csplit/src/csplit.rs index 2054e6cff..0602f0dee 100644 --- a/src/uu/csplit/src/csplit.rs +++ b/src/uu/csplit/src/csplit.rs @@ -621,8 +621,9 @@ pub fn uu_app() -> Command { ) .arg( Arg::new(options::QUIET) - .short('s') + .short('q') .long(options::QUIET) + .visible_short_alias('s') .visible_alias("silent") .help("do not print counts of output file sizes") .action(ArgAction::SetTrue), diff --git a/tests/by-util/test_csplit.rs b/tests/by-util/test_csplit.rs index 03b8c92fc..10ead0b45 100644 --- a/tests/by-util/test_csplit.rs +++ b/tests/by-util/test_csplit.rs @@ -387,18 +387,23 @@ fn test_option_keep() { #[test] fn test_option_quiet() { - let (at, mut ucmd) = at_and_ucmd!(); - ucmd.args(&["--quiet", "numbers50.txt", "13", "%25%", "/0$/"]) - .succeeds() - .no_stdout(); + for arg in ["-q", "--quiet", "-s", "--silent"] { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&[arg, "numbers50.txt", "13", "%25%", "/0$/"]) + .succeeds() + .no_stdout(); - let count = glob(&at.plus_as_string("xx*")) - .expect("there should be splits created") - .count(); - assert_eq!(count, 3); - assert_eq!(at.read("xx00"), generate(1, 13)); - assert_eq!(at.read("xx01"), generate(25, 30)); - assert_eq!(at.read("xx02"), generate(30, 51)); + let count = glob(&at.plus_as_string("xx*")) + .expect("there should be splits created") + .count(); + assert_eq!(count, 3); + assert_eq!(at.read("xx00"), generate(1, 13)); + assert_eq!(at.read("xx01"), generate(25, 30)); + assert_eq!(at.read("xx02"), generate(30, 51)); + at.remove("xx00"); + at.remove("xx01"); + at.remove("xx02"); + } } #[test] From 1180905b5e8746cc480638be4929aa524fcf725b Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Thu, 26 Dec 2024 16:43:53 +0100 Subject: [PATCH 174/179] cp: use the function from uucore --- src/uu/cp/Cargo.toml | 1 + src/uu/cp/src/cp.rs | 13 +++++-------- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/src/uu/cp/Cargo.toml b/src/uu/cp/Cargo.toml index 6801e6a09..3912f3308 100644 --- a/src/uu/cp/Cargo.toml +++ b/src/uu/cp/Cargo.toml @@ -30,6 +30,7 @@ uucore = { workspace = true, features = [ "backup-control", "entries", "fs", + "fsxattr", "perms", "mode", "update-control", diff --git a/src/uu/cp/src/cp.rs b/src/uu/cp/src/cp.rs index 32168b090..2f1f1ef1e 100644 --- a/src/uu/cp/src/cp.rs +++ b/src/uu/cp/src/cp.rs @@ -17,6 +17,8 @@ use std::os::unix::ffi::OsStrExt; #[cfg(unix)] use std::os::unix::fs::{FileTypeExt, PermissionsExt}; use std::path::{Path, PathBuf, StripPrefixError}; +#[cfg(all(unix, not(any(target_os = "android", target_os = "macos"))))] +use uucore::fsxattr::copy_xattrs; use clap::{builder::ValueParser, crate_version, Arg, ArgAction, ArgMatches, Command}; use filetime::FileTime; @@ -1603,16 +1605,11 @@ pub(crate) fn copy_attributes( })?; handle_preserve(&attributes.xattr, || -> CopyResult<()> { - #[cfg(all(unix, not(target_os = "android")))] + #[cfg(all(unix, not(any(target_os = "android", target_os = "macos"))))] { - let xattrs = xattr::list(source)?; - for attr in xattrs { - if let Some(attr_value) = xattr::get(source, attr.clone())? { - xattr::set(dest, attr, &attr_value[..])?; - } - } + copy_xattrs(source, dest)?; } - #[cfg(not(all(unix, not(target_os = "android"))))] + #[cfg(not(all(unix, not(any(target_os = "android", target_os = "macos")))))] { // The documentation for GNU cp states: // From 2deeb7882c58f6ed457bd40bd93f93ab70341b11 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Thu, 26 Dec 2024 17:10:01 +0100 Subject: [PATCH 175/179] xattr feature: enable it on mac too --- src/uu/cp/src/cp.rs | 6 +++--- src/uucore/src/lib/features.rs | 2 +- src/uucore/src/lib/lib.rs | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/uu/cp/src/cp.rs b/src/uu/cp/src/cp.rs index 2f1f1ef1e..b74694047 100644 --- a/src/uu/cp/src/cp.rs +++ b/src/uu/cp/src/cp.rs @@ -17,7 +17,7 @@ use std::os::unix::ffi::OsStrExt; #[cfg(unix)] use std::os::unix::fs::{FileTypeExt, PermissionsExt}; use std::path::{Path, PathBuf, StripPrefixError}; -#[cfg(all(unix, not(any(target_os = "android", target_os = "macos"))))] +#[cfg(all(unix, not(target_os = "android")))] use uucore::fsxattr::copy_xattrs; use clap::{builder::ValueParser, crate_version, Arg, ArgAction, ArgMatches, Command}; @@ -1605,11 +1605,11 @@ pub(crate) fn copy_attributes( })?; handle_preserve(&attributes.xattr, || -> CopyResult<()> { - #[cfg(all(unix, not(any(target_os = "android", target_os = "macos"))))] + #[cfg(all(unix, not(target_os = "android")))] { copy_xattrs(source, dest)?; } - #[cfg(not(all(unix, not(any(target_os = "android", target_os = "macos")))))] + #[cfg(not(all(unix, not(target_os = "android"))))] { // The documentation for GNU cp states: // diff --git a/src/uucore/src/lib/features.rs b/src/uucore/src/lib/features.rs index dfe5b7733..cde1cf264 100644 --- a/src/uucore/src/lib/features.rs +++ b/src/uucore/src/lib/features.rs @@ -54,7 +54,7 @@ pub mod process; #[cfg(all(target_os = "linux", feature = "tty"))] pub mod tty; -#[cfg(all(unix, not(target_os = "macos"), feature = "fsxattr"))] +#[cfg(all(unix, feature = "fsxattr"))] pub mod fsxattr; #[cfg(all(unix, not(target_os = "fuchsia"), feature = "signals"))] pub mod signals; diff --git a/src/uucore/src/lib/lib.rs b/src/uucore/src/lib/lib.rs index 3200145bd..3a6a537ad 100644 --- a/src/uucore/src/lib/lib.rs +++ b/src/uucore/src/lib/lib.rs @@ -99,7 +99,7 @@ pub use crate::features::wide; #[cfg(feature = "fsext")] pub use crate::features::fsext; -#[cfg(all(unix, not(target_os = "macos"), feature = "fsxattr"))] +#[cfg(all(unix, feature = "fsxattr"))] pub use crate::features::fsxattr; //## core functions From 98c9be5ec4d5a61f52ea4582aab9918599d0a30a Mon Sep 17 00:00:00 2001 From: Solomon Date: Thu, 26 Dec 2024 12:48:29 -0700 Subject: [PATCH 176/179] mv: improve move-to-self error handling (#6995) - improve move-to-self detection, so this errors without data loss: ```diff mkdir mydir mv mydir mydir/subdir -mv: No such file or directory (os error 2) +mv: cannot move 'mydir' to a subdirectory of itself, 'mydir/subdir' ``` - align "cannot move source to a subdirectory of itself" and "same file" errors more closely with coreutils: ```diff mkdir mydir mv mydir/ mydir/.. -mv: cannot move 'mydir/' to a subdirectory of itself, 'mydir/../mydir/' +mv: 'mydir/' and 'mydir/../mydir' are the same file ``` Causing: https://github.com/nushell/nushell/issues/13082 --- src/uu/mv/src/error.rs | 12 +--- src/uu/mv/src/mv.rs | 137 +++++++++++++++++++++++++-------------- tests/by-util/test_mv.rs | 102 ++++++++++++++--------------- 3 files changed, 140 insertions(+), 111 deletions(-) diff --git a/src/uu/mv/src/error.rs b/src/uu/mv/src/error.rs index f989d4e13..6daa8188e 100644 --- a/src/uu/mv/src/error.rs +++ b/src/uu/mv/src/error.rs @@ -12,7 +12,6 @@ pub enum MvError { NoSuchFile(String), CannotStatNotADirectory(String), SameFile(String, String), - SelfSubdirectory(String), SelfTargetSubdirectory(String, String), DirectoryToNonDirectory(String), NonDirectoryToDirectory(String, String), @@ -29,14 +28,9 @@ impl Display for MvError { Self::NoSuchFile(s) => write!(f, "cannot stat {s}: No such file or directory"), Self::CannotStatNotADirectory(s) => write!(f, "cannot stat {s}: Not a directory"), Self::SameFile(s, t) => write!(f, "{s} and {t} are the same file"), - Self::SelfSubdirectory(s) => write!( - f, - "cannot move '{s}' to a subdirectory of itself, '{s}/{s}'" - ), - Self::SelfTargetSubdirectory(s, t) => write!( - f, - "cannot move '{s}' to a subdirectory of itself, '{t}/{s}'" - ), + Self::SelfTargetSubdirectory(s, t) => { + write!(f, "cannot move {s} to a subdirectory of itself, {t}") + } Self::DirectoryToNonDirectory(t) => { write!(f, "cannot overwrite directory {t} with non-directory") } diff --git a/src/uu/mv/src/mv.rs b/src/uu/mv/src/mv.rs index 7debf52c9..675982bac 100644 --- a/src/uu/mv/src/mv.rs +++ b/src/uu/mv/src/mv.rs @@ -19,13 +19,13 @@ use std::io; use std::os::unix; #[cfg(windows)] use std::os::windows; -use std::path::{Path, PathBuf}; +use std::path::{absolute, Path, PathBuf}; use uucore::backup_control::{self, source_is_target_backup}; use uucore::display::Quotable; use uucore::error::{set_exit_code, FromIo, UResult, USimpleError, UUsageError}; use uucore::fs::{ - are_hardlinks_or_one_way_symlink_to_same_file, are_hardlinks_to_same_file, - path_ends_with_terminator, + are_hardlinks_or_one_way_symlink_to_same_file, are_hardlinks_to_same_file, canonicalize, + path_ends_with_terminator, MissingHandling, ResolveMode, }; #[cfg(all(unix, not(any(target_os = "macos", target_os = "redox"))))] use uucore::fsxattr; @@ -322,20 +322,6 @@ fn handle_two_paths(source: &Path, target: &Path, opts: &Options) -> UResult<()> }); } - if (source.eq(target) - || are_hardlinks_to_same_file(source, target) - || are_hardlinks_or_one_way_symlink_to_same_file(source, target)) - && opts.backup == BackupMode::NoBackup - { - if source.eq(Path::new(".")) || source.ends_with("/.") || source.is_file() { - return Err( - MvError::SameFile(source.quote().to_string(), target.quote().to_string()).into(), - ); - } else { - return Err(MvError::SelfSubdirectory(source.display().to_string()).into()); - } - } - let target_is_dir = target.is_dir(); let source_is_dir = source.is_dir(); @@ -347,6 +333,8 @@ fn handle_two_paths(source: &Path, target: &Path, opts: &Options) -> UResult<()> return Err(MvError::FailedToAccessNotADirectory(target.quote().to_string()).into()); } + assert_not_same_file(source, target, target_is_dir, opts)?; + if target_is_dir { if opts.no_target_dir { if source.is_dir() { @@ -356,14 +344,6 @@ fn handle_two_paths(source: &Path, target: &Path, opts: &Options) -> UResult<()> } else { Err(MvError::DirectoryToNonDirectory(target.quote().to_string()).into()) } - // Check that source & target do not contain same subdir/dir when both exist - // mkdir dir1/dir2; mv dir1 dir1/dir2 - } else if target.starts_with(source) { - Err(MvError::SelfTargetSubdirectory( - source.display().to_string(), - target.display().to_string(), - ) - .into()) } else { move_files_into_dir(&[source.to_path_buf()], target, opts) } @@ -387,6 +367,88 @@ fn handle_two_paths(source: &Path, target: &Path, opts: &Options) -> UResult<()> } } +fn assert_not_same_file( + source: &Path, + target: &Path, + target_is_dir: bool, + opts: &Options, +) -> UResult<()> { + // we'll compare canonicalized_source and canonicalized_target for same file detection + let canonicalized_source = match canonicalize( + absolute(source)?, + MissingHandling::Normal, + ResolveMode::Logical, + ) { + Ok(source) if source.exists() => source, + _ => absolute(source)?, // file or symlink target doesn't exist but its absolute path is still used for comparison + }; + + // special case if the target exists, is a directory, and the `-T` flag wasn't used + let target_is_dir = target_is_dir && !opts.no_target_dir; + let canonicalized_target = if target_is_dir { + // `mv source_file target_dir` => target_dir/source_file + // canonicalize the path that exists (target directory) and join the source file name + canonicalize( + absolute(target)?, + MissingHandling::Normal, + ResolveMode::Logical, + )? + .join(source.file_name().unwrap_or_default()) + } else { + // `mv source target_dir/target` => target_dir/target + // we canonicalize target_dir and join /target + match absolute(target)?.parent() { + Some(parent) if parent.to_str() != Some("") => { + canonicalize(parent, MissingHandling::Normal, ResolveMode::Logical)? + .join(target.file_name().unwrap_or_default()) + } + // path.parent() returns Some("") or None if there's no parent + _ => absolute(target)?, // absolute paths should always have a parent, but we'll fall back just in case + } + }; + + let same_file = (canonicalized_source.eq(&canonicalized_target) + || are_hardlinks_to_same_file(source, target) + || are_hardlinks_or_one_way_symlink_to_same_file(source, target)) + && opts.backup == BackupMode::NoBackup; + + // get the expected target path to show in errors + // this is based on the argument and not canonicalized + let target_display = match source.file_name() { + Some(file_name) if target_is_dir => { + // join target_dir/source_file in a platform-independent manner + let mut path = target + .display() + .to_string() + .trim_end_matches("/") + .to_owned(); + + path.push('/'); + path.push_str(&file_name.to_string_lossy()); + + path.quote().to_string() + } + _ => target.quote().to_string(), + }; + + if same_file + && (canonicalized_source.eq(&canonicalized_target) + || source.eq(Path::new(".")) + || source.ends_with("/.") + || source.is_file()) + { + return Err(MvError::SameFile(source.quote().to_string(), target_display).into()); + } else if (same_file || canonicalized_target.starts_with(canonicalized_source)) + // don't error if we're moving a symlink of a directory into itself + && !source.is_symlink() + { + return Err( + MvError::SelfTargetSubdirectory(source.quote().to_string(), target_display).into(), + ); + } + Ok(()) +} + fn handle_multiple_paths(paths: &[PathBuf], opts: &Options) -> UResult<()> { if opts.no_target_dir { return Err(UUsageError::new( @@ -425,10 +487,6 @@ fn move_files_into_dir(files: &[PathBuf], target_dir: &Path, options: &Options) return Err(MvError::NotADirectory(target_dir.quote().to_string()).into()); } - let canonicalized_target_dir = target_dir - .canonicalize() - .unwrap_or_else(|_| target_dir.to_path_buf()); - let multi_progress = options.progress_bar.then(MultiProgress::new); let count_progress = if let Some(ref multi_progress) = multi_progress { @@ -479,24 +537,9 @@ fn move_files_into_dir(files: &[PathBuf], target_dir: &Path, options: &Options) // Check if we have mv dir1 dir2 dir2 // And generate an error if this is the case - if let Ok(canonicalized_source) = sourcepath.canonicalize() { - if canonicalized_source == canonicalized_target_dir { - // User tried to move directory to itself, warning is shown - // and process of moving files is continued. - show!(USimpleError::new( - 1, - format!( - "cannot move '{}' to a subdirectory of itself, '{}/{}'", - sourcepath.display(), - uucore::fs::normalize_path(target_dir).display(), - canonicalized_target_dir.components().last().map_or_else( - || target_dir.display().to_string(), - |dir| { PathBuf::from(dir.as_os_str()).display().to_string() } - ) - ) - )); - continue; - } + if let Err(e) = assert_not_same_file(sourcepath, target_dir, true, options) { + show!(e); + continue; } match rename(sourcepath, &targetpath, options, multi_progress.as_ref()) { diff --git a/tests/by-util/test_mv.rs b/tests/by-util/test_mv.rs index ac64fae7e..1419be4e9 100644 --- a/tests/by-util/test_mv.rs +++ b/tests/by-util/test_mv.rs @@ -6,6 +6,7 @@ // spell-checker:ignore mydir use crate::common::util::TestScenario; use filetime::FileTime; +use rstest::rstest; use std::io::Write; #[test] @@ -467,7 +468,31 @@ fn test_mv_same_symlink() { .arg(file_c) .arg(file_a) .fails() - .stderr_is(format!("mv: '{file_c}' and '{file_a}' are the same file\n",)); + .stderr_is(format!("mv: '{file_c}' and '{file_a}' are the same file\n")); +} + +#[test] +#[cfg(all(unix, not(target_os = "android")))] +fn test_mv_same_broken_symlink() { + let (at, mut ucmd) = at_and_ucmd!(); + + at.symlink_file("missing-target", "broken"); + + ucmd.arg("broken") + .arg("broken") + .fails() + .stderr_is("mv: 'broken' and 'broken' are the same file\n"); +} + +#[test] +#[cfg(all(unix, not(target_os = "android")))] +fn test_mv_symlink_into_target() { + let (at, mut ucmd) = at_and_ucmd!(); + + at.mkdir("dir"); + at.symlink_file("dir", "dir-link"); + + ucmd.arg("dir-link").arg("dir").succeeds(); } #[test] @@ -1389,24 +1414,6 @@ fn test_mv_interactive_error() { .is_empty()); } -#[test] -fn test_mv_into_self() { - let scene = TestScenario::new(util_name!()); - let at = &scene.fixtures; - let dir1 = "dir1"; - let dir2 = "dir2"; - at.mkdir(dir1); - at.mkdir(dir2); - - scene - .ucmd() - .arg(dir1) - .arg(dir2) - .arg(dir2) - .fails() - .stderr_contains("mv: cannot move 'dir2' to a subdirectory of itself, 'dir2/dir2'"); -} - #[test] fn test_mv_arg_interactive_skipped() { let (at, mut ucmd) = at_and_ucmd!(); @@ -1456,27 +1463,32 @@ fn test_mv_into_self_data() { assert!(!at.file_exists(file1)); } -#[test] -fn test_mv_directory_into_subdirectory_of_itself_fails() { +#[rstest] +#[case(vec!["mydir"], vec!["mydir", "mydir"], "mv: cannot move 'mydir' to a subdirectory of itself, 'mydir/mydir'")] +#[case(vec!["mydir"], vec!["mydir/", "mydir/"], "mv: cannot move 'mydir/' to a subdirectory of itself, 'mydir/mydir'")] +#[case(vec!["mydir"], vec!["./mydir", "mydir", "mydir/"], "mv: cannot move './mydir' to a subdirectory of itself, 'mydir/mydir'")] +#[case(vec!["mydir"], vec!["mydir/", "mydir/mydir_2/"], "mv: cannot move 'mydir/' to a subdirectory of itself, 'mydir/mydir_2/'")] +#[case(vec!["mydir/mydir_2"], vec!["mydir", "mydir/mydir_2"], "mv: cannot move 'mydir' to a subdirectory of itself, 'mydir/mydir_2/mydir'\n")] +#[case(vec!["mydir/mydir_2"], vec!["mydir/", "mydir/mydir_2/"], "mv: cannot move 'mydir/' to a subdirectory of itself, 'mydir/mydir_2/mydir'\n")] +#[case(vec!["mydir", "mydir_2"], vec!["mydir/", "mydir_2/", "mydir_2/"], "mv: cannot move 'mydir_2/' to a subdirectory of itself, 'mydir_2/mydir_2'")] +#[case(vec!["mydir"], vec!["mydir/", "mydir"], "mv: cannot move 'mydir/' to a subdirectory of itself, 'mydir/mydir'")] +#[case(vec!["mydir"], vec!["-T", "mydir", "mydir"], "mv: 'mydir' and 'mydir' are the same file")] +#[case(vec!["mydir"], vec!["mydir/", "mydir/../"], "mv: 'mydir/' and 'mydir/../mydir' are the same file")] +fn test_mv_directory_self( + #[case] dirs: Vec<&str>, + #[case] args: Vec<&str>, + #[case] expected_error: &str, +) { let scene = TestScenario::new(util_name!()); let at = &scene.fixtures; - let dir1 = "mydir"; - let dir2 = "mydir/mydir_2"; - at.mkdir(dir1); - at.mkdir(dir2); - scene.ucmd().arg(dir1).arg(dir2).fails().stderr_contains( - "mv: cannot move 'mydir' to a subdirectory of itself, 'mydir/mydir_2/mydir'", - ); - - // check that it also errors out with / + for dir in dirs { + at.mkdir_all(dir); + } scene .ucmd() - .arg(format!("{dir1}/")) - .arg(dir2) + .args(&args) .fails() - .stderr_contains( - "mv: cannot move 'mydir/' to a subdirectory of itself, 'mydir/mydir_2/mydir/'", - ); + .stderr_contains(expected_error); } #[test] @@ -1755,23 +1767,3 @@ fn test_mv_error_msg_with_multiple_sources_that_does_not_exist() { .stderr_contains("mv: cannot stat 'a': No such file or directory") .stderr_contains("mv: cannot stat 'b/': No such file or directory"); } - -#[test] -fn test_mv_error_cant_move_itself() { - let scene = TestScenario::new(util_name!()); - let at = &scene.fixtures; - at.mkdir("b"); - scene - .ucmd() - .arg("b") - .arg("b/") - .fails() - .stderr_contains("mv: cannot move 'b' to a subdirectory of itself, 'b/b'"); - scene - .ucmd() - .arg("./b") - .arg("b") - .arg("b/") - .fails() - .stderr_contains("mv: cannot move 'b' to a subdirectory of itself, 'b/b'"); -} From 20dfb270577eb77ddd7fac6b9f1342c207d99458 Mon Sep 17 00:00:00 2001 From: Daniel Hofstetter Date: Fri, 27 Dec 2024 09:12:47 +0100 Subject: [PATCH 177/179] cut: fix handling of newline as delimiter --- src/uu/cut/src/cut.rs | 38 +++++++++++++++++++++++++++++++++++++- tests/by-util/test_cut.rs | 17 ++++++++++++++--- 2 files changed, 51 insertions(+), 4 deletions(-) diff --git a/src/uu/cut/src/cut.rs b/src/uu/cut/src/cut.rs index 3dde5e665..5e128425b 100644 --- a/src/uu/cut/src/cut.rs +++ b/src/uu/cut/src/cut.rs @@ -9,7 +9,7 @@ use bstr::io::BufReadExt; use clap::{builder::ValueParser, crate_version, Arg, ArgAction, ArgMatches, Command}; use std::ffi::OsString; use std::fs::File; -use std::io::{stdin, stdout, BufReader, BufWriter, IsTerminal, Read, Write}; +use std::io::{stdin, stdout, BufRead, BufReader, BufWriter, IsTerminal, Read, Write}; use std::path::Path; use uucore::display::Quotable; use uucore::error::{set_exit_code, FromIo, UResult, USimpleError}; @@ -267,10 +267,46 @@ fn cut_fields_implicit_out_delim( Ok(()) } +// The input delimiter is identical to `newline_char` +fn cut_fields_newline_char_delim( + reader: R, + ranges: &[Range], + newline_char: u8, + out_delim: &[u8], +) -> UResult<()> { + let buf_in = BufReader::new(reader); + let mut out = stdout_writer(); + + let segments: Vec<_> = buf_in.split(newline_char).filter_map(|x| x.ok()).collect(); + let mut print_delim = false; + + for &Range { low, high } in ranges { + for i in low..=high { + // "- 1" is necessary because fields start from 1 whereas a Vec starts from 0 + if let Some(segment) = segments.get(i - 1) { + if print_delim { + out.write_all(out_delim)?; + } else { + print_delim = true; + } + out.write_all(segment.as_slice())?; + } else { + break; + } + } + } + out.write_all(&[newline_char])?; + Ok(()) +} + fn cut_fields(reader: R, ranges: &[Range], opts: &Options) -> UResult<()> { let newline_char = opts.line_ending.into(); let field_opts = opts.field_opts.as_ref().unwrap(); // it is safe to unwrap() here - field_opts will always be Some() for cut_fields() call match field_opts.delimiter { + Delimiter::Slice(delim) if delim == [newline_char] => { + let out_delim = opts.out_delimiter.unwrap_or(delim); + cut_fields_newline_char_delim(reader, ranges, newline_char, out_delim) + } Delimiter::Slice(delim) => { let matcher = ExactMatcher::new(delim); match opts.out_delimiter { diff --git a/tests/by-util/test_cut.rs b/tests/by-util/test_cut.rs index 1aa3c126a..dbd26abb2 100644 --- a/tests/by-util/test_cut.rs +++ b/tests/by-util/test_cut.rs @@ -288,11 +288,22 @@ fn test_empty_string_as_delimiter_with_output_delimiter() { #[test] fn test_newline_as_delimiter() { + for (field, expected_output) in [("1", "a:1\n"), ("2", "b:\n")] { + new_ucmd!() + .args(&["-f", field, "-d", "\n"]) + .pipe_in("a:1\nb:") + .succeeds() + .stdout_only_bytes(expected_output); + } +} + +#[test] +fn test_newline_as_delimiter_with_output_delimiter() { new_ucmd!() - .args(&["-f", "1", "-d", "\n"]) - .pipe_in("a:1\nb:") + .args(&["-f1-", "-d", "\n", "--output-delimiter=:"]) + .pipe_in("a\nb\n") .succeeds() - .stdout_only_bytes("a:1\nb:\n"); + .stdout_only_bytes("a:b\n"); } #[test] From f62b8d79759650ff00bdc691249ca8eb7304e65e Mon Sep 17 00:00:00 2001 From: Daniel Hofstetter Date: Fri, 27 Dec 2024 16:08:48 +0100 Subject: [PATCH 178/179] csplit: allow offset without sign in pattern --- src/uu/csplit/src/patterns.rs | 34 +++++++++++++++++++------- tests/by-util/test_csplit.rs | 45 ++++++++++++++++++++--------------- 2 files changed, 51 insertions(+), 28 deletions(-) diff --git a/src/uu/csplit/src/patterns.rs b/src/uu/csplit/src/patterns.rs index bd6c4fbfa..edd632d08 100644 --- a/src/uu/csplit/src/patterns.rs +++ b/src/uu/csplit/src/patterns.rs @@ -106,7 +106,7 @@ pub fn get_patterns(args: &[String]) -> Result, CsplitError> { fn extract_patterns(args: &[String]) -> Result, CsplitError> { let mut patterns = Vec::with_capacity(args.len()); let to_match_reg = - Regex::new(r"^(/(?P.+)/|%(?P.+)%)(?P[\+-]\d+)?$").unwrap(); + Regex::new(r"^(/(?P.+)/|%(?P.+)%)(?P[\+-]?\d+)?$").unwrap(); let execute_ntimes_reg = Regex::new(r"^\{(?P\d+)|\*\}$").unwrap(); let mut iter = args.iter().peekable(); @@ -219,14 +219,15 @@ mod tests { "{*}", "/test3.*end$/", "{4}", - "/test4.*end$/+3", - "/test5.*end$/-3", + "/test4.*end$/3", + "/test5.*end$/+3", + "/test6.*end$/-3", ] .into_iter() .map(|v| v.to_string()) .collect(); let patterns = get_patterns(input.as_slice()).unwrap(); - assert_eq!(patterns.len(), 5); + assert_eq!(patterns.len(), 6); match patterns.first() { Some(Pattern::UpToMatch(reg, 0, ExecutePattern::Times(1))) => { let parsed_reg = format!("{reg}"); @@ -256,12 +257,19 @@ mod tests { _ => panic!("expected UpToMatch pattern"), }; match patterns.get(4) { - Some(Pattern::UpToMatch(reg, -3, ExecutePattern::Times(1))) => { + Some(Pattern::UpToMatch(reg, 3, ExecutePattern::Times(1))) => { let parsed_reg = format!("{reg}"); assert_eq!(parsed_reg, "test5.*end$"); } _ => panic!("expected UpToMatch pattern"), }; + match patterns.get(5) { + Some(Pattern::UpToMatch(reg, -3, ExecutePattern::Times(1))) => { + let parsed_reg = format!("{reg}"); + assert_eq!(parsed_reg, "test6.*end$"); + } + _ => panic!("expected UpToMatch pattern"), + }; } #[test] @@ -273,14 +281,15 @@ mod tests { "{*}", "%test3.*end$%", "{4}", - "%test4.*end$%+3", - "%test5.*end$%-3", + "%test4.*end$%3", + "%test5.*end$%+3", + "%test6.*end$%-3", ] .into_iter() .map(|v| v.to_string()) .collect(); let patterns = get_patterns(input.as_slice()).unwrap(); - assert_eq!(patterns.len(), 5); + assert_eq!(patterns.len(), 6); match patterns.first() { Some(Pattern::SkipToMatch(reg, 0, ExecutePattern::Times(1))) => { let parsed_reg = format!("{reg}"); @@ -310,12 +319,19 @@ mod tests { _ => panic!("expected SkipToMatch pattern"), }; match patterns.get(4) { - Some(Pattern::SkipToMatch(reg, -3, ExecutePattern::Times(1))) => { + Some(Pattern::SkipToMatch(reg, 3, ExecutePattern::Times(1))) => { let parsed_reg = format!("{reg}"); assert_eq!(parsed_reg, "test5.*end$"); } _ => panic!("expected SkipToMatch pattern"), }; + match patterns.get(5) { + Some(Pattern::SkipToMatch(reg, -3, ExecutePattern::Times(1))) => { + let parsed_reg = format!("{reg}"); + assert_eq!(parsed_reg, "test6.*end$"); + } + _ => panic!("expected SkipToMatch pattern"), + }; } #[test] diff --git a/tests/by-util/test_csplit.rs b/tests/by-util/test_csplit.rs index 10ead0b45..231571522 100644 --- a/tests/by-util/test_csplit.rs +++ b/tests/by-util/test_csplit.rs @@ -130,17 +130,21 @@ fn test_up_to_match_sequence() { #[test] fn test_up_to_match_offset() { - let (at, mut ucmd) = at_and_ucmd!(); - ucmd.args(&["numbers50.txt", "/9$/+3"]) - .succeeds() - .stdout_only("24\n117\n"); + for offset in ["3", "+3"] { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", &format!("/9$/{offset}")]) + .succeeds() + .stdout_only("24\n117\n"); - let count = glob(&at.plus_as_string("xx*")) - .expect("there should be splits created") - .count(); - assert_eq!(count, 2); - assert_eq!(at.read("xx00"), generate(1, 12)); - assert_eq!(at.read("xx01"), generate(12, 51)); + let count = glob(&at.plus_as_string("xx*")) + .expect("there should be splits created") + .count(); + assert_eq!(count, 2); + assert_eq!(at.read("xx00"), generate(1, 12)); + assert_eq!(at.read("xx01"), generate(12, 51)); + at.remove("xx00"); + at.remove("xx01"); + } } #[test] @@ -316,16 +320,19 @@ fn test_skip_to_match_sequence4() { #[test] fn test_skip_to_match_offset() { - let (at, mut ucmd) = at_and_ucmd!(); - ucmd.args(&["numbers50.txt", "%23%+3"]) - .succeeds() - .stdout_only("75\n"); + for offset in ["3", "+3"] { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", &format!("%23%{offset}")]) + .succeeds() + .stdout_only("75\n"); - let count = glob(&at.plus_as_string("xx*")) - .expect("there should be splits created") - .count(); - assert_eq!(count, 1); - assert_eq!(at.read("xx00"), generate(26, 51)); + let count = glob(&at.plus_as_string("xx*")) + .expect("there should be splits created") + .count(); + assert_eq!(count, 1); + assert_eq!(at.read("xx00"), generate(26, 51)); + at.remove("xx00"); + } } #[test] From 02f1f50ccbed13c691bc741a2ea8df04b36951e0 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Fri, 27 Dec 2024 21:07:48 +0000 Subject: [PATCH 179/179] chore(deps): update rust crate serde to v1.0.217 --- Cargo.lock | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2717a64e2..642b3fdda 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2080,9 +2080,9 @@ checksum = "e25dfac463d778e353db5be2449d1cce89bd6fd23c9f1ea21310ce6e5a1b29c4" [[package]] name = "serde" -version = "1.0.216" +version = "1.0.217" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b9781016e935a97e8beecf0c933758c97a5520d32930e460142b4cd80c6338e" +checksum = "02fc4265df13d6fa1d00ecff087228cc0a2b5f3c0e87e258d8b94a156e984c70" dependencies = [ "serde_derive", ] @@ -2098,9 +2098,9 @@ dependencies = [ [[package]] name = "serde_derive" -version = "1.0.216" +version = "1.0.217" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46f859dbbf73865c6627ed570e78961cd3ac92407a2d117204c49232485da55e" +checksum = "5a9bf7cf98d04a2b28aead066b7496853d4779c9cc183c440dbac457641e19a0" dependencies = [ "proc-macro2", "quote",