1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-27 11:07:44 +00:00

Merge pull request #6252 from sylvestre/hash-error

hashsum: improve the error management to match GNU
This commit is contained in:
Sylvestre Ledru 2024-04-21 09:55:16 +02:00 committed by GitHub
commit 64027e5a57
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 249 additions and 64 deletions

View file

@ -26,7 +26,8 @@ use uucore::sum::{
Blake2b, Blake3, Digest, DigestWriter, Md5, Sha1, Sha224, Sha256, Sha384, Sha3_224, Sha3_256, Blake2b, Blake3, Digest, DigestWriter, Md5, Sha1, Sha224, Sha256, Sha384, Sha3_224, Sha3_256,
Sha3_384, Sha3_512, Sha512, Shake128, Shake256, Sha3_384, Sha3_512, Sha512, Shake128, Shake256,
}; };
use uucore::{display::Quotable, show_warning}; use uucore::util_name;
use uucore::{display::Quotable, show_warning_caps};
use uucore::{format_usage, help_about, help_usage}; use uucore::{format_usage, help_about, help_usage};
const NAME: &str = "hashsum"; const NAME: &str = "hashsum";
@ -577,7 +578,6 @@ fn uu_app(binary_name: &str) -> Command {
#[derive(Debug)] #[derive(Debug)]
enum HashsumError { enum HashsumError {
InvalidRegex, InvalidRegex,
InvalidFormat,
IgnoreNotCheck, IgnoreNotCheck,
} }
@ -588,7 +588,6 @@ impl std::fmt::Display for HashsumError {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match self { match self {
Self::InvalidRegex => write!(f, "invalid regular expression"), Self::InvalidRegex => write!(f, "invalid regular expression"),
Self::InvalidFormat => Ok(()),
Self::IgnoreNotCheck => write!( Self::IgnoreNotCheck => write!(
f, f,
"the --ignore-missing option is meaningful only when verifying checksums" "the --ignore-missing option is meaningful only when verifying checksums"
@ -597,66 +596,16 @@ impl std::fmt::Display for HashsumError {
} }
} }
#[allow(clippy::cognitive_complexity)]
fn hashsum<'a, I>(mut options: Options, files: I) -> UResult<()>
where
I: Iterator<Item = &'a OsStr>,
{
let mut bad_format = 0;
let mut failed_cksum = 0;
let mut failed_open_file = 0;
let binary_marker = if options.binary { "*" } else { " " };
for filename in files {
let filename = Path::new(filename);
let stdin_buf;
let file_buf;
let mut file = BufReader::new(if filename == OsStr::new("-") {
stdin_buf = stdin();
Box::new(stdin_buf) as Box<dyn Read>
} else {
file_buf =
File::open(filename).map_err_context(|| "failed to open file".to_string())?;
Box::new(file_buf) as Box<dyn Read>
});
if options.check {
// Set up Regexes for line validation and parsing
//
// First, we compute the number of bytes we expect to be in
// the digest string. If the algorithm has a variable number
// of output bits, then we use the `+` modifier in the
// regular expression, otherwise we use the `{n}` modifier,
// where `n` is the number of bytes.
let bytes = options.digest.output_bits() / 4;
let bytes_marker = if bytes > 0 {
format!("{{{bytes}}}")
} else {
"+".to_string()
};
// BSD reversed mode format is similar to the default mode, but doesnt use a character to distinguish binary and text modes.
let mut bsd_reversed = None;
/// Creates a Regex for parsing lines based on the given format. /// Creates a Regex for parsing lines based on the given format.
/// The default value of `gnu_re` created with this function has to be recreated /// The default value of `gnu_re` created with this function has to be recreated
/// after the initial line has been parsed, as this line dictates the format /// after the initial line has been parsed, as this line dictates the format
/// for the rest of them, and mixing of formats is disallowed. /// for the rest of them, and mixing of formats is disallowed.
fn gnu_re_template( fn gnu_re_template(bytes_marker: &str, format_marker: &str) -> Result<Regex, HashsumError> {
bytes_marker: &str,
format_marker: &str,
) -> Result<Regex, HashsumError> {
Regex::new(&format!( Regex::new(&format!(
r"^(?P<digest>[a-fA-F0-9]{bytes_marker}) {format_marker}(?P<fileName>.*)" r"^(?P<digest>[a-fA-F0-9]{bytes_marker}) {format_marker}(?P<fileName>.*)"
)) ))
.map_err(|_| HashsumError::InvalidRegex) .map_err(|_| HashsumError::InvalidRegex)
} }
let mut gnu_re = gnu_re_template(&bytes_marker, r"(?P<binary>[ \*])?")?;
let bsd_re = Regex::new(&format!(
// it can start with \
r"^(|\\){algorithm} \((?P<fileName>.*)\) = (?P<digest>[a-fA-F0-9]{digest_size})",
algorithm = options.algoname,
digest_size = bytes_marker,
))
.map_err(|_| HashsumError::InvalidRegex)?;
fn handle_captures( fn handle_captures(
caps: &Captures, caps: &Captures,
@ -688,17 +637,73 @@ where
)) ))
} }
#[allow(clippy::cognitive_complexity)]
fn hashsum<'a, I>(mut options: Options, files: I) -> UResult<()>
where
I: Iterator<Item = &'a OsStr>,
{
let mut bad_format = 0;
let mut correct_format = 0;
let mut failed_cksum = 0;
let mut failed_open_file = 0;
let mut skip_summary = false;
let binary_marker = if options.binary { "*" } else { " " };
for filename in files {
let filename = Path::new(filename);
let stdin_buf;
let file_buf;
let mut file = BufReader::new(if filename == OsStr::new("-") {
stdin_buf = stdin();
Box::new(stdin_buf) as Box<dyn Read>
} else {
file_buf =
File::open(filename).map_err_context(|| "failed to open file".to_string())?;
Box::new(file_buf) as Box<dyn Read>
});
if options.check {
// Set up Regexes for line validation and parsing
//
// First, we compute the number of bytes we expect to be in
// the digest string. If the algorithm has a variable number
// of output bits, then we use the `+` modifier in the
// regular expression, otherwise we use the `{n}` modifier,
// where `n` is the number of bytes.
let bytes = options.digest.output_bits() / 4;
let bytes_marker = if bytes > 0 {
format!("{{{bytes}}}")
} else {
"+".to_string()
};
// BSD reversed mode format is similar to the default mode, but doesnt use a character to distinguish binary and text modes.
let mut bsd_reversed = None;
let mut gnu_re = gnu_re_template(&bytes_marker, r"(?P<binary>[ \*])?")?;
let bsd_re = Regex::new(&format!(
// it can start with \
r"^(\\)?{algorithm}\s*\((?P<fileName>.*)\)\s*=\s*(?P<digest>[a-fA-F0-9]{digest_size})$",
algorithm = options.algoname,
digest_size = bytes_marker,
))
.map_err(|_| HashsumError::InvalidRegex)?;
let buffer = file; let buffer = file;
// iterate on the lines of the file
for (i, maybe_line) in buffer.lines().enumerate() { for (i, maybe_line) in buffer.lines().enumerate() {
let line = match maybe_line { let line = match maybe_line {
Ok(l) => l, Ok(l) => l,
Err(e) => return Err(e.map_err_context(|| "failed to read file".to_string())), Err(e) => return Err(e.map_err_context(|| "failed to read file".to_string())),
}; };
if line.is_empty() {
// empty line, skip it
continue;
}
let (ck_filename, sum, binary_check) = match gnu_re.captures(&line) { let (ck_filename, sum, binary_check) = match gnu_re.captures(&line) {
Some(caps) => { Some(caps) => {
handle_captures(&caps, &bytes_marker, &mut bsd_reversed, &mut gnu_re)? handle_captures(&caps, &bytes_marker, &mut bsd_reversed, &mut gnu_re)?
} }
None => match bsd_re.captures(&line) { None => match bsd_re.captures(&line) {
// if the GNU style parsing failed, try the BSD style
Some(caps) => ( Some(caps) => (
caps.name("fileName").unwrap().as_str().to_string(), caps.name("fileName").unwrap().as_str().to_string(),
caps.name("digest").unwrap().as_str().to_ascii_lowercase(), caps.name("digest").unwrap().as_str().to_ascii_lowercase(),
@ -707,11 +712,14 @@ where
None => { None => {
bad_format += 1; bad_format += 1;
if options.strict { if options.strict {
return Err(HashsumError::InvalidFormat.into()); // if we use strict, the warning "lines are improperly formatted"
// will trigger an exit code of 1
set_exit_code(1);
} }
if options.warn { if options.warn {
show_warning!( eprintln!(
"{}: {}: improperly formatted {} checksum line", "{}: {}: {}: improperly formatted {} checksum line",
util_name(),
filename.maybe_quote(), filename.maybe_quote(),
i + 1, i + 1,
options.algoname options.algoname
@ -725,7 +733,8 @@ where
let f = match File::open(ck_filename_unescaped) { let f = match File::open(ck_filename_unescaped) {
Err(_) => { Err(_) => {
if options.ignore_missing { if options.ignore_missing {
// No need to show or return an error. // No need to show or return an error
// except when the file doesn't have any successful checks
continue; continue;
} }
@ -760,6 +769,7 @@ where
// and display it using uucore::display::print_verbatim(). This is // and display it using uucore::display::print_verbatim(). This is
// easier (and more important) on Unix than on Windows. // easier (and more important) on Unix than on Windows.
if sum == real_sum { if sum == real_sum {
correct_format += 1;
if !options.quiet { if !options.quiet {
println!("{prefix}{ck_filename}: OK"); println!("{prefix}{ck_filename}: OK");
} }
@ -768,6 +778,7 @@ where
println!("{prefix}{ck_filename}: FAILED"); println!("{prefix}{ck_filename}: FAILED");
} }
failed_cksum += 1; failed_cksum += 1;
set_exit_code(1);
} }
} }
} else { } else {
@ -793,20 +804,57 @@ where
println!("{}{} {}{}", prefix, sum, binary_marker, escaped_filename); println!("{}{} {}{}", prefix, sum, binary_marker, escaped_filename);
} }
} }
if bad_format > 0 && failed_cksum == 0 && correct_format == 0 && !options.status {
// we have only bad format. we didn't have anything correct.
// GNU has a different error message for this (with the filename)
set_exit_code(1);
eprintln!(
"{}: {}: no properly formatted checksum lines found",
util_name(),
filename.maybe_quote(),
);
skip_summary = true;
} }
if !options.status { if options.ignore_missing && correct_format == 0 {
// we have only bad format
// and we had ignore-missing
eprintln!(
"{}: {}: no file was verified",
util_name(),
filename.maybe_quote(),
);
skip_summary = true;
set_exit_code(1);
}
}
if !options.status && !skip_summary {
match bad_format.cmp(&1) { match bad_format.cmp(&1) {
Ordering::Equal => show_warning!("{} line is improperly formatted", bad_format), Ordering::Equal => {
Ordering::Greater => show_warning!("{} lines are improperly formatted", bad_format), show_warning_caps!("{} line is improperly formatted", bad_format)
}
Ordering::Greater => {
show_warning_caps!("{} lines are improperly formatted", bad_format)
}
Ordering::Less => {} Ordering::Less => {}
}; };
if failed_cksum > 0 {
show_warning!("{} computed checksum did NOT match", failed_cksum); match failed_cksum.cmp(&1) {
Ordering::Equal => {
show_warning_caps!("{} computed checksum did NOT match", failed_cksum)
} }
match failed_open_file.cmp(&1) {
Ordering::Equal => show_warning!("{} listed file could not be read", failed_open_file),
Ordering::Greater => { Ordering::Greater => {
show_warning!("{} listed files could not be read", failed_open_file); show_warning_caps!("{} computed checksums did NOT match", failed_cksum)
}
Ordering::Less => {}
};
match failed_open_file.cmp(&1) {
Ordering::Equal => {
show_warning_caps!("{} listed file could not be read", failed_open_file)
}
Ordering::Greater => {
show_warning_caps!("{} listed files could not be read", failed_open_file);
} }
Ordering::Less => {} Ordering::Less => {}
} }

View file

@ -182,6 +182,14 @@ macro_rules! show_warning(
}) })
); );
#[macro_export]
macro_rules! show_warning_caps(
($($args:tt)+) => ({
eprint!("{}: WARNING: ", $crate::util_name());
eprintln!($($args)+);
})
);
/// Display an error and [`std::process::exit`] /// Display an error and [`std::process::exit`]
/// ///
/// Displays the provided error message using [`show_error!`], then invokes /// Displays the provided error message using [`show_error!`], then invokes

View file

@ -244,7 +244,7 @@ fn test_check_file_not_found_warning() {
.arg(at.subdir.join("testf.sha1")) .arg(at.subdir.join("testf.sha1"))
.fails() .fails()
.stdout_is("sha1sum: testf: No such file or directory\ntestf: FAILED open or read\n") .stdout_is("sha1sum: testf: No such file or directory\ntestf: FAILED open or read\n")
.stderr_is("sha1sum: warning: 1 listed file could not be read\n"); .stderr_is("sha1sum: WARNING: 1 listed file could not be read\n");
} }
// Asterisk `*` is a reserved paths character on win32, nor the path can end with a whitespace. // Asterisk `*` is a reserved paths character on win32, nor the path can end with a whitespace.
@ -457,6 +457,24 @@ fn test_with_escape_filename_zero_text() {
assert!(stdout.contains("a\nb")); assert!(stdout.contains("a\nb"));
} }
#[test]
fn test_check_empty_line() {
let scene = TestScenario::new(util_name!());
let at = &scene.fixtures;
at.touch("f");
at.write(
"in.md5",
"d41d8cd98f00b204e9800998ecf8427e f\n\nd41d8cd98f00b204e9800998ecf8427e f\ninvalid\n\n",
);
scene
.ccmd("md5sum")
.arg("--check")
.arg(at.subdir.join("in.md5"))
.succeeds()
.stderr_contains("WARNING: 1 line is improperly formatted");
}
#[test] #[test]
#[cfg(not(windows))] #[cfg(not(windows))]
fn test_check_with_escape_filename() { fn test_check_with_escape_filename() {
@ -480,3 +498,114 @@ fn test_check_with_escape_filename() {
.succeeds(); .succeeds();
result.stdout_is("\\a\\nb: OK\n"); result.stdout_is("\\a\\nb: OK\n");
} }
#[test]
fn test_check_strict_error() {
let scene = TestScenario::new(util_name!());
let at = &scene.fixtures;
at.write("f", "");
at.write(
"in.md5",
"ERR\nERR\nd41d8cd98f00b204e9800998ecf8427e f\nERR\n",
);
scene
.ccmd("md5sum")
.arg("--check")
.arg("--strict")
.arg(at.subdir.join("in.md5"))
.fails()
.stderr_contains("WARNING: 3 lines are improperly formatted");
}
#[test]
fn test_check_warn() {
let scene = TestScenario::new(util_name!());
let at = &scene.fixtures;
at.write("f", "");
at.write(
"in.md5",
"d41d8cd98f00b204e9800998ecf8427e f\nd41d8cd98f00b204e9800998ecf8427e f\ninvalid\n",
);
scene
.ccmd("md5sum")
.arg("--check")
.arg("--warn")
.arg(at.subdir.join("in.md5"))
.succeeds()
.stderr_contains("in.md5: 3: improperly formatted MD5 checksum line")
.stderr_contains("WARNING: 1 line is improperly formatted");
// with strict, we should fail the execution
scene
.ccmd("md5sum")
.arg("--check")
.arg("--strict")
.arg(at.subdir.join("in.md5"))
.fails();
}
#[test]
fn test_check_status() {
let scene = TestScenario::new(util_name!());
let at = &scene.fixtures;
at.write("f", "");
at.write("in.md5", "MD5(f)= d41d8cd98f00b204e9800998ecf8427f\n");
scene
.ccmd("md5sum")
.arg("--check")
.arg("--status")
.arg(at.subdir.join("in.md5"))
.fails()
.no_output();
}
#[test]
fn test_check_status_code() {
let scene = TestScenario::new(util_name!());
let at = &scene.fixtures;
at.write("f", "");
at.write("in.md5", "d41d8cd98f00b204e9800998ecf8427f f\n");
scene
.ccmd("md5sum")
.arg("--check")
.arg("--status")
.arg(at.subdir.join("in.md5"))
.fails()
.stderr_is("")
.stdout_is("");
}
#[test]
fn test_check_no_backslash_no_space() {
let scene = TestScenario::new(util_name!());
let at = &scene.fixtures;
at.write("f", "");
at.write("in.md5", "MD5(f)= d41d8cd98f00b204e9800998ecf8427e\n");
scene
.ccmd("md5sum")
.arg("--check")
.arg(at.subdir.join("in.md5"))
.succeeds()
.stdout_is("f: OK\n");
}
#[test]
fn test_check_check_ignore_no_file() {
let scene = TestScenario::new(util_name!());
let at = &scene.fixtures;
at.write("f", "");
at.write("in.md5", "d41d8cd98f00b204e9800998ecf8427f missing\n");
scene
.ccmd("md5sum")
.arg("--check")
.arg("--ignore-missing")
.arg(at.subdir.join("in.md5"))
.fails()
.stderr_contains("in.md5: no file was verified");
}

View file

@ -337,12 +337,12 @@ ls: invalid --time-style argument 'XX'\nPossible values are: [\"full-iso\", \"lo
# "hostid BEFORE --help AFTER " same for this # "hostid BEFORE --help AFTER " same for this
sed -i -e "s/env \$prog \$BEFORE \$opt > out2/env \$prog \$BEFORE \$opt > out2 #/" -e "s/env \$prog \$BEFORE \$opt AFTER > out3/env \$prog \$BEFORE \$opt AFTER > out3 #/" -e "s/compare exp out2/compare exp out2 #/" -e "s/compare exp out3/compare exp out3 #/" tests/help/help-version-getopt.sh sed -i -e "s/env \$prog \$BEFORE \$opt > out2/env \$prog \$BEFORE \$opt > out2 #/" -e "s/env \$prog \$BEFORE \$opt AFTER > out3/env \$prog \$BEFORE \$opt AFTER > out3 #/" -e "s/compare exp out2/compare exp out2 #/" -e "s/compare exp out3/compare exp out3 #/" tests/help/help-version-getopt.sh
# The case doesn't really matter here
sed -i -e "s|WARNING: 1 line is improperly formatted|warning: 1 line is improperly formatted|" tests/cksum/md5sum-bsd.sh
# Add debug info + we have less syscall then GNU's. Adjust our check. # Add debug info + we have less syscall then GNU's. Adjust our check.
# Use GNU sed for /c command # Use GNU sed for /c command
"${SED}" -i -e '/test \$n_stat1 = \$n_stat2 \\/c\ "${SED}" -i -e '/test \$n_stat1 = \$n_stat2 \\/c\
echo "n_stat1 = \$n_stat1"\n\ echo "n_stat1 = \$n_stat1"\n\
echo "n_stat2 = \$n_stat2"\n\ echo "n_stat2 = \$n_stat2"\n\
test \$n_stat1 -ge \$n_stat2 \\' tests/ls/stat-free-color.sh test \$n_stat1 -ge \$n_stat2 \\' tests/ls/stat-free-color.sh
# no need to replicate this output with hashsum
sed -i -e "s|Try 'md5sum --help' for more information.\\\n||" tests/cksum/md5sum.pl