From beb7395c84aafd95ba60d94b54e1b92a7eac9238 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Thu, 18 Apr 2024 12:28:33 +0200 Subject: [PATCH 1/4] hashsum: move handle_captures & gnu_re_template move away from the hashsum function --- src/uu/hashsum/src/hashsum.rs | 84 +++++++++++++++++------------------ 1 file changed, 41 insertions(+), 43 deletions(-) diff --git a/src/uu/hashsum/src/hashsum.rs b/src/uu/hashsum/src/hashsum.rs index 7742358a5..8c574f066 100644 --- a/src/uu/hashsum/src/hashsum.rs +++ b/src/uu/hashsum/src/hashsum.rs @@ -597,6 +597,47 @@ impl std::fmt::Display for HashsumError { } } +/// Creates a Regex for parsing lines based on the given format. +/// The default value of `gnu_re` created with this function has to be recreated +/// after the initial line has been parsed, as this line dictates the format +/// for the rest of them, and mixing of formats is disallowed. +fn gnu_re_template(bytes_marker: &str, format_marker: &str) -> Result { + Regex::new(&format!( + r"^(?P[a-fA-F0-9]{bytes_marker}) {format_marker}(?P.*)" + )) + .map_err(|_| HashsumError::InvalidRegex) +} + +fn handle_captures( + caps: &Captures, + bytes_marker: &str, + bsd_reversed: &mut Option, + gnu_re: &mut Regex, +) -> Result<(String, String, bool), HashsumError> { + if bsd_reversed.is_none() { + let is_bsd_reversed = caps.name("binary").is_none(); + let format_marker = if is_bsd_reversed { + "" + } else { + r"(?P[ \*])" + } + .to_string(); + + *bsd_reversed = Some(is_bsd_reversed); + *gnu_re = gnu_re_template(bytes_marker, &format_marker)?; + } + + Ok(( + caps.name("fileName").unwrap().as_str().to_string(), + caps.name("digest").unwrap().as_str().to_ascii_lowercase(), + if *bsd_reversed == Some(false) { + caps.name("binary").unwrap().as_str() == "*" + } else { + false + }, + )) +} + #[allow(clippy::cognitive_complexity)] fn hashsum<'a, I>(mut options: Options, files: I) -> UResult<()> where @@ -636,19 +677,6 @@ where // BSD reversed mode format is similar to the default mode, but doesn’t use a character to distinguish binary and text modes. let mut bsd_reversed = None; - /// Creates a Regex for parsing lines based on the given format. - /// The default value of `gnu_re` created with this function has to be recreated - /// after the initial line has been parsed, as this line dictates the format - /// for the rest of them, and mixing of formats is disallowed. - fn gnu_re_template( - bytes_marker: &str, - format_marker: &str, - ) -> Result { - Regex::new(&format!( - r"^(?P[a-fA-F0-9]{bytes_marker}) {format_marker}(?P.*)" - )) - .map_err(|_| HashsumError::InvalidRegex) - } let mut gnu_re = gnu_re_template(&bytes_marker, r"(?P[ \*])?")?; let bsd_re = Regex::new(&format!( // it can start with \ @@ -658,36 +686,6 @@ where )) .map_err(|_| HashsumError::InvalidRegex)?; - fn handle_captures( - caps: &Captures, - bytes_marker: &str, - bsd_reversed: &mut Option, - gnu_re: &mut Regex, - ) -> Result<(String, String, bool), HashsumError> { - if bsd_reversed.is_none() { - let is_bsd_reversed = caps.name("binary").is_none(); - let format_marker = if is_bsd_reversed { - "" - } else { - r"(?P[ \*])" - } - .to_string(); - - *bsd_reversed = Some(is_bsd_reversed); - *gnu_re = gnu_re_template(bytes_marker, &format_marker)?; - } - - Ok(( - caps.name("fileName").unwrap().as_str().to_string(), - caps.name("digest").unwrap().as_str().to_ascii_lowercase(), - if *bsd_reversed == Some(false) { - caps.name("binary").unwrap().as_str() == "*" - } else { - false - }, - )) - } - let buffer = file; for (i, maybe_line) in buffer.lines().enumerate() { let line = match maybe_line { From 94f5e82dbd316bfe38c08af0a9a8e498142abdc8 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Fri, 19 Apr 2024 00:00:26 +0200 Subject: [PATCH 2/4] hashsum: ignore empty lines in --check --- src/uu/hashsum/src/hashsum.rs | 4 ++++ tests/by-util/test_hashsum.rs | 18 ++++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/src/uu/hashsum/src/hashsum.rs b/src/uu/hashsum/src/hashsum.rs index 8c574f066..597bca565 100644 --- a/src/uu/hashsum/src/hashsum.rs +++ b/src/uu/hashsum/src/hashsum.rs @@ -692,6 +692,10 @@ where Ok(l) => l, Err(e) => return Err(e.map_err_context(|| "failed to read file".to_string())), }; + if line.is_empty() { + // empty line, skip it + continue; + } let (ck_filename, sum, binary_check) = match gnu_re.captures(&line) { Some(caps) => { handle_captures(&caps, &bytes_marker, &mut bsd_reversed, &mut gnu_re)? diff --git a/tests/by-util/test_hashsum.rs b/tests/by-util/test_hashsum.rs index e5e3e1e98..535a4d0ba 100644 --- a/tests/by-util/test_hashsum.rs +++ b/tests/by-util/test_hashsum.rs @@ -457,6 +457,24 @@ fn test_with_escape_filename_zero_text() { assert!(stdout.contains("a\nb")); } +#[test] +fn test_check_empty_line() { + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + + at.touch("f"); + at.write( + "in.md5", + "d41d8cd98f00b204e9800998ecf8427e f\n\nd41d8cd98f00b204e9800998ecf8427e f\ninvalid\n\n", + ); + scene + .ccmd("md5sum") + .arg("--check") + .arg(at.subdir.join("in.md5")) + .fails() + .stderr_contains("WARNING: 1 line is improperly formatted"); +} + #[test] #[cfg(not(windows))] fn test_check_with_escape_filename() { From 128c0bc6b5c9870c086d3d9a8c7cc892431afc31 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Fri, 19 Apr 2024 00:44:45 +0200 Subject: [PATCH 3/4] hashsum: gnu compat no need to replicate this output with hashsum --- util/build-gnu.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/util/build-gnu.sh b/util/build-gnu.sh index 6a4b09f89..e8095fe6c 100755 --- a/util/build-gnu.sh +++ b/util/build-gnu.sh @@ -346,3 +346,6 @@ sed -i -e "s|WARNING: 1 line is improperly formatted|warning: 1 line is improper echo "n_stat1 = \$n_stat1"\n\ echo "n_stat2 = \$n_stat2"\n\ test \$n_stat1 -ge \$n_stat2 \\' tests/ls/stat-free-color.sh + +# no need to replicate this output with hashsum +sed -i -e "s|Try 'md5sum --help' for more information.\\\n||" tests/cksum/md5sum.pl From 6ef08d7f1c48af230c6c8d86e3b8c7ba935bae36 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Fri, 19 Apr 2024 01:20:06 +0200 Subject: [PATCH 4/4] hashsum: improve the error management to match GNU Should make tests/cksum/md5sum.pl and tests/cksum/sha1sum.pl pass --- src/uu/hashsum/src/hashsum.rs | 80 ++++++++++++++++++----- src/uucore/src/lib/macros.rs | 8 +++ tests/by-util/test_hashsum.rs | 115 +++++++++++++++++++++++++++++++++- util/build-gnu.sh | 3 - 4 files changed, 184 insertions(+), 22 deletions(-) diff --git a/src/uu/hashsum/src/hashsum.rs b/src/uu/hashsum/src/hashsum.rs index 597bca565..d4c5aacbb 100644 --- a/src/uu/hashsum/src/hashsum.rs +++ b/src/uu/hashsum/src/hashsum.rs @@ -26,7 +26,8 @@ use uucore::sum::{ Blake2b, Blake3, Digest, DigestWriter, Md5, Sha1, Sha224, Sha256, Sha384, Sha3_224, Sha3_256, Sha3_384, Sha3_512, Sha512, Shake128, Shake256, }; -use uucore::{display::Quotable, show_warning}; +use uucore::util_name; +use uucore::{display::Quotable, show_warning_caps}; use uucore::{format_usage, help_about, help_usage}; const NAME: &str = "hashsum"; @@ -577,7 +578,6 @@ fn uu_app(binary_name: &str) -> Command { #[derive(Debug)] enum HashsumError { InvalidRegex, - InvalidFormat, IgnoreNotCheck, } @@ -588,7 +588,6 @@ impl std::fmt::Display for HashsumError { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { match self { Self::InvalidRegex => write!(f, "invalid regular expression"), - Self::InvalidFormat => Ok(()), Self::IgnoreNotCheck => write!( f, "the --ignore-missing option is meaningful only when verifying checksums" @@ -644,8 +643,10 @@ where I: Iterator, { let mut bad_format = 0; + let mut correct_format = 0; let mut failed_cksum = 0; let mut failed_open_file = 0; + let mut skip_summary = false; let binary_marker = if options.binary { "*" } else { " " }; for filename in files { let filename = Path::new(filename); @@ -680,13 +681,14 @@ where let mut gnu_re = gnu_re_template(&bytes_marker, r"(?P[ \*])?")?; let bsd_re = Regex::new(&format!( // it can start with \ - r"^(|\\){algorithm} \((?P.*)\) = (?P[a-fA-F0-9]{digest_size})", + r"^(\\)?{algorithm}\s*\((?P.*)\)\s*=\s*(?P[a-fA-F0-9]{digest_size})$", algorithm = options.algoname, digest_size = bytes_marker, )) .map_err(|_| HashsumError::InvalidRegex)?; let buffer = file; + // iterate on the lines of the file for (i, maybe_line) in buffer.lines().enumerate() { let line = match maybe_line { Ok(l) => l, @@ -701,6 +703,7 @@ where handle_captures(&caps, &bytes_marker, &mut bsd_reversed, &mut gnu_re)? } None => match bsd_re.captures(&line) { + // if the GNU style parsing failed, try the BSD style Some(caps) => ( caps.name("fileName").unwrap().as_str().to_string(), caps.name("digest").unwrap().as_str().to_ascii_lowercase(), @@ -709,11 +712,14 @@ where None => { bad_format += 1; if options.strict { - return Err(HashsumError::InvalidFormat.into()); + // if we use strict, the warning "lines are improperly formatted" + // will trigger an exit code of 1 + set_exit_code(1); } if options.warn { - show_warning!( - "{}: {}: improperly formatted {} checksum line", + eprintln!( + "{}: {}: {}: improperly formatted {} checksum line", + util_name(), filename.maybe_quote(), i + 1, options.algoname @@ -727,7 +733,8 @@ where let f = match File::open(ck_filename_unescaped) { Err(_) => { if options.ignore_missing { - // No need to show or return an error. + // No need to show or return an error + // except when the file doesn't have any successful checks continue; } @@ -762,6 +769,7 @@ where // and display it using uucore::display::print_verbatim(). This is // easier (and more important) on Unix than on Windows. if sum == real_sum { + correct_format += 1; if !options.quiet { println!("{prefix}{ck_filename}: OK"); } @@ -770,6 +778,7 @@ where println!("{prefix}{ck_filename}: FAILED"); } failed_cksum += 1; + set_exit_code(1); } } } else { @@ -795,20 +804,57 @@ where println!("{}{} {}{}", prefix, sum, binary_marker, escaped_filename); } } + if bad_format > 0 && failed_cksum == 0 && correct_format == 0 && !options.status { + // we have only bad format. we didn't have anything correct. + // GNU has a different error message for this (with the filename) + set_exit_code(1); + eprintln!( + "{}: {}: no properly formatted checksum lines found", + util_name(), + filename.maybe_quote(), + ); + skip_summary = true; + } + if options.ignore_missing && correct_format == 0 { + // we have only bad format + // and we had ignore-missing + eprintln!( + "{}: {}: no file was verified", + util_name(), + filename.maybe_quote(), + ); + skip_summary = true; + set_exit_code(1); + } } - if !options.status { + + if !options.status && !skip_summary { match bad_format.cmp(&1) { - Ordering::Equal => show_warning!("{} line is improperly formatted", bad_format), - Ordering::Greater => show_warning!("{} lines are improperly formatted", bad_format), + Ordering::Equal => { + show_warning_caps!("{} line is improperly formatted", bad_format) + } + Ordering::Greater => { + show_warning_caps!("{} lines are improperly formatted", bad_format) + } Ordering::Less => {} }; - if failed_cksum > 0 { - show_warning!("{} computed checksum did NOT match", failed_cksum); - } - match failed_open_file.cmp(&1) { - Ordering::Equal => show_warning!("{} listed file could not be read", failed_open_file), + + match failed_cksum.cmp(&1) { + Ordering::Equal => { + show_warning_caps!("{} computed checksum did NOT match", failed_cksum) + } Ordering::Greater => { - show_warning!("{} listed files could not be read", failed_open_file); + show_warning_caps!("{} computed checksums did NOT match", failed_cksum) + } + Ordering::Less => {} + }; + + match failed_open_file.cmp(&1) { + Ordering::Equal => { + show_warning_caps!("{} listed file could not be read", failed_open_file) + } + Ordering::Greater => { + show_warning_caps!("{} listed files could not be read", failed_open_file); } Ordering::Less => {} } diff --git a/src/uucore/src/lib/macros.rs b/src/uucore/src/lib/macros.rs index d1a09c281..359c9d00b 100644 --- a/src/uucore/src/lib/macros.rs +++ b/src/uucore/src/lib/macros.rs @@ -182,6 +182,14 @@ macro_rules! show_warning( }) ); +#[macro_export] +macro_rules! show_warning_caps( + ($($args:tt)+) => ({ + eprint!("{}: WARNING: ", $crate::util_name()); + eprintln!($($args)+); + }) +); + /// Display an error and [`std::process::exit`] /// /// Displays the provided error message using [`show_error!`], then invokes diff --git a/tests/by-util/test_hashsum.rs b/tests/by-util/test_hashsum.rs index 535a4d0ba..432eb3ddf 100644 --- a/tests/by-util/test_hashsum.rs +++ b/tests/by-util/test_hashsum.rs @@ -244,7 +244,7 @@ fn test_check_file_not_found_warning() { .arg(at.subdir.join("testf.sha1")) .fails() .stdout_is("sha1sum: testf: No such file or directory\ntestf: FAILED open or read\n") - .stderr_is("sha1sum: warning: 1 listed file could not be read\n"); + .stderr_is("sha1sum: WARNING: 1 listed file could not be read\n"); } // Asterisk `*` is a reserved paths character on win32, nor the path can end with a whitespace. @@ -471,7 +471,7 @@ fn test_check_empty_line() { .ccmd("md5sum") .arg("--check") .arg(at.subdir.join("in.md5")) - .fails() + .succeeds() .stderr_contains("WARNING: 1 line is improperly formatted"); } @@ -498,3 +498,114 @@ fn test_check_with_escape_filename() { .succeeds(); result.stdout_is("\\a\\nb: OK\n"); } + +#[test] +fn test_check_strict_error() { + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + + at.write("f", ""); + at.write( + "in.md5", + "ERR\nERR\nd41d8cd98f00b204e9800998ecf8427e f\nERR\n", + ); + scene + .ccmd("md5sum") + .arg("--check") + .arg("--strict") + .arg(at.subdir.join("in.md5")) + .fails() + .stderr_contains("WARNING: 3 lines are improperly formatted"); +} + +#[test] +fn test_check_warn() { + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + + at.write("f", ""); + at.write( + "in.md5", + "d41d8cd98f00b204e9800998ecf8427e f\nd41d8cd98f00b204e9800998ecf8427e f\ninvalid\n", + ); + scene + .ccmd("md5sum") + .arg("--check") + .arg("--warn") + .arg(at.subdir.join("in.md5")) + .succeeds() + .stderr_contains("in.md5: 3: improperly formatted MD5 checksum line") + .stderr_contains("WARNING: 1 line is improperly formatted"); + + // with strict, we should fail the execution + scene + .ccmd("md5sum") + .arg("--check") + .arg("--strict") + .arg(at.subdir.join("in.md5")) + .fails(); +} + +#[test] +fn test_check_status() { + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + + at.write("f", ""); + at.write("in.md5", "MD5(f)= d41d8cd98f00b204e9800998ecf8427f\n"); + scene + .ccmd("md5sum") + .arg("--check") + .arg("--status") + .arg(at.subdir.join("in.md5")) + .fails() + .no_output(); +} + +#[test] +fn test_check_status_code() { + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + + at.write("f", ""); + at.write("in.md5", "d41d8cd98f00b204e9800998ecf8427f f\n"); + scene + .ccmd("md5sum") + .arg("--check") + .arg("--status") + .arg(at.subdir.join("in.md5")) + .fails() + .stderr_is("") + .stdout_is(""); +} + +#[test] +fn test_check_no_backslash_no_space() { + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + + at.write("f", ""); + at.write("in.md5", "MD5(f)= d41d8cd98f00b204e9800998ecf8427e\n"); + scene + .ccmd("md5sum") + .arg("--check") + .arg(at.subdir.join("in.md5")) + .succeeds() + .stdout_is("f: OK\n"); +} + +#[test] +fn test_check_check_ignore_no_file() { + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + + at.write("f", ""); + at.write("in.md5", "d41d8cd98f00b204e9800998ecf8427f missing\n"); + scene + .ccmd("md5sum") + .arg("--check") + .arg("--ignore-missing") + .arg(at.subdir.join("in.md5")) + .fails() + .stderr_contains("in.md5: no file was verified"); +} diff --git a/util/build-gnu.sh b/util/build-gnu.sh index e8095fe6c..debb7bdd9 100755 --- a/util/build-gnu.sh +++ b/util/build-gnu.sh @@ -337,9 +337,6 @@ ls: invalid --time-style argument 'XX'\nPossible values are: [\"full-iso\", \"lo # "hostid BEFORE --help AFTER " same for this sed -i -e "s/env \$prog \$BEFORE \$opt > out2/env \$prog \$BEFORE \$opt > out2 #/" -e "s/env \$prog \$BEFORE \$opt AFTER > out3/env \$prog \$BEFORE \$opt AFTER > out3 #/" -e "s/compare exp out2/compare exp out2 #/" -e "s/compare exp out3/compare exp out3 #/" tests/help/help-version-getopt.sh -# The case doesn't really matter here -sed -i -e "s|WARNING: 1 line is improperly formatted|warning: 1 line is improperly formatted|" tests/cksum/md5sum-bsd.sh - # Add debug info + we have less syscall then GNU's. Adjust our check. # Use GNU sed for /c command "${SED}" -i -e '/test \$n_stat1 = \$n_stat2 \\/c\