mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-29 12:07:46 +00:00
checksum: move regex detection to the line level
This commit is contained in:
parent
c8bcdb9267
commit
5cbe87620c
1 changed files with 36 additions and 73 deletions
|
@ -8,7 +8,7 @@ use data_encoding::BASE64;
|
|||
use os_display::Quotable;
|
||||
use regex::bytes::{Captures, Regex};
|
||||
use std::{
|
||||
ffi::{OsStr, OsString},
|
||||
ffi::OsStr,
|
||||
fmt::Display,
|
||||
fs::File,
|
||||
io::{self, stdin, BufReader, Read, Write},
|
||||
|
@ -130,9 +130,6 @@ enum FileCheckError {
|
|||
ImproperlyFormatted,
|
||||
/// reading of the checksum file failed
|
||||
CantOpenChecksumFile,
|
||||
/// Algorithm detection was unsuccessful.
|
||||
/// Either none is provided, or there is a conflict.
|
||||
AlgoDetectionError,
|
||||
}
|
||||
|
||||
impl From<Box<dyn UError>> for FileCheckError {
|
||||
|
@ -441,7 +438,7 @@ fn get_filename_for_output(filename: &OsStr, input_is_stdin: bool) -> String {
|
|||
}
|
||||
|
||||
/// Determines the appropriate regular expression to use based on the provided lines.
|
||||
fn determine_regex(lines: &[OsString]) -> Option<(Regex, bool)> {
|
||||
fn determine_regex(line: impl AsRef<OsStr>) -> Option<(Regex, bool)> {
|
||||
let regexes = [
|
||||
(Regex::new(ALGO_BASED_REGEX).unwrap(), true),
|
||||
(Regex::new(DOUBLE_SPACE_REGEX).unwrap(), false),
|
||||
|
@ -449,14 +446,12 @@ fn determine_regex(lines: &[OsString]) -> Option<(Regex, bool)> {
|
|||
(Regex::new(ALGO_BASED_REGEX_BASE64).unwrap(), true),
|
||||
];
|
||||
|
||||
for line in lines {
|
||||
let line_bytes = os_str_as_bytes(line).expect("UTF-8 decoding failed");
|
||||
let line_bytes = os_str_as_bytes(line.as_ref()).expect("UTF-8 decoding failed");
|
||||
for (regex, is_algo_based) in ®exes {
|
||||
if regex.is_match(line_bytes) {
|
||||
return Some((regex.clone(), *is_algo_based));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
@ -599,13 +594,20 @@ fn process_checksum_line(
|
|||
filename_input: &OsStr,
|
||||
line: &OsStr,
|
||||
i: usize,
|
||||
chosen_regex: &Regex,
|
||||
is_algo_based_format: bool,
|
||||
cli_algo_name: Option<&str>,
|
||||
cli_algo_length: Option<usize>,
|
||||
opts: ChecksumOptions,
|
||||
) -> Result<(), LineCheckError> {
|
||||
let line_bytes = os_str_as_bytes(line)?;
|
||||
|
||||
// early return on empty or commented lines.
|
||||
if line.is_empty() || line_bytes.starts_with(b"#") {
|
||||
return Err(LineCheckError::Skipped);
|
||||
}
|
||||
|
||||
let (chosen_regex, is_algo_based_format) =
|
||||
determine_regex(line).ok_or(LineCheckError::ImproperlyFormatted)?;
|
||||
|
||||
if let Some(caps) = chosen_regex.captures(line_bytes) {
|
||||
let mut filename_to_check = caps.name("filename").unwrap().as_bytes();
|
||||
|
||||
|
@ -617,7 +619,7 @@ fn process_checksum_line(
|
|||
filename_to_check = &filename_to_check[1..];
|
||||
}
|
||||
|
||||
let expected_checksum = get_expected_digest_as_hex_string(&caps, chosen_regex)
|
||||
let expected_checksum = get_expected_digest_as_hex_string(&caps, &chosen_regex)
|
||||
.ok_or(LineCheckError::ImproperlyFormatted)?;
|
||||
|
||||
// If the algo_name is provided, we use it, otherwise we try to detect it
|
||||
|
@ -672,10 +674,6 @@ fn process_checksum_line(
|
|||
Err(LineCheckError::DigestMismatch)
|
||||
}
|
||||
} else {
|
||||
if line.is_empty() || line_bytes.starts_with(b"#") {
|
||||
// Don't show any warning for empty or commented lines.
|
||||
return Err(LineCheckError::Skipped);
|
||||
}
|
||||
if opts.warn {
|
||||
let algo = if let Some(algo_name_input) = cli_algo_name {
|
||||
algo_name_input.to_uppercase()
|
||||
|
@ -723,19 +721,11 @@ fn process_checksum_file(
|
|||
let reader = BufReader::new(file);
|
||||
let lines = read_os_string_lines(reader).collect::<Vec<_>>();
|
||||
|
||||
let Some((chosen_regex, is_algo_based_format)) = determine_regex(&lines) else {
|
||||
log_no_properly_formatted(get_filename_for_output(filename_input, input_is_stdin));
|
||||
set_exit_code(1);
|
||||
return Err(FileCheckError::AlgoDetectionError);
|
||||
};
|
||||
|
||||
for (i, line) in lines.iter().enumerate() {
|
||||
let line_result = process_checksum_line(
|
||||
filename_input,
|
||||
line,
|
||||
i,
|
||||
&chosen_regex,
|
||||
is_algo_based_format,
|
||||
cli_algo_name,
|
||||
cli_algo_length,
|
||||
opts,
|
||||
|
@ -816,8 +806,7 @@ where
|
|||
use FileCheckError::*;
|
||||
match process_checksum_file(filename_input, algo_name_input, length_input, opts) {
|
||||
Err(UError(e)) => return Err(e),
|
||||
Err(ImproperlyFormatted) => break,
|
||||
Err(CantOpenChecksumFile | AlgoDetectionError) | Ok(_) => continue,
|
||||
Err(CantOpenChecksumFile | ImproperlyFormatted) | Ok(_) => continue,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -926,6 +915,7 @@ pub fn escape_filename(filename: &Path) -> (String, &'static str) {
|
|||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::ffi::OsString;
|
||||
|
||||
#[test]
|
||||
fn test_unescape_filename() {
|
||||
|
@ -1161,66 +1151,39 @@ mod tests {
|
|||
#[test]
|
||||
fn test_determine_regex() {
|
||||
// Test algo-based regex
|
||||
let lines_algo_based = ["MD5 (example.txt) = d41d8cd98f00b204e9800998ecf8427e"]
|
||||
.iter()
|
||||
.map(|s| OsString::from(s.to_string()))
|
||||
.collect::<Vec<_>>();
|
||||
let (regex, algo_based) = determine_regex(&lines_algo_based).unwrap();
|
||||
let line_algo_based =
|
||||
OsString::from("MD5 (example.txt) = d41d8cd98f00b204e9800998ecf8427e");
|
||||
let (regex, algo_based) = determine_regex(&line_algo_based).unwrap();
|
||||
assert!(algo_based);
|
||||
assert!(regex.is_match(os_str_as_bytes(&lines_algo_based[0]).unwrap()));
|
||||
assert!(regex.is_match(os_str_as_bytes(&line_algo_based).unwrap()));
|
||||
|
||||
// Test double-space regex
|
||||
let lines_double_space = ["d41d8cd98f00b204e9800998ecf8427e example.txt"]
|
||||
.iter()
|
||||
.map(|s| OsString::from(s.to_string()))
|
||||
.collect::<Vec<_>>();
|
||||
let (regex, algo_based) = determine_regex(&lines_double_space).unwrap();
|
||||
let line_double_space = OsString::from("d41d8cd98f00b204e9800998ecf8427e example.txt");
|
||||
let (regex, algo_based) = determine_regex(&line_double_space).unwrap();
|
||||
assert!(!algo_based);
|
||||
assert!(regex.is_match(os_str_as_bytes(&lines_double_space[0]).unwrap()));
|
||||
assert!(regex.is_match(os_str_as_bytes(&line_double_space).unwrap()));
|
||||
|
||||
// Test single-space regex
|
||||
let lines_single_space = ["d41d8cd98f00b204e9800998ecf8427e example.txt"]
|
||||
.iter()
|
||||
.map(|s| OsString::from(s.to_string()))
|
||||
.collect::<Vec<_>>();
|
||||
let (regex, algo_based) = determine_regex(&lines_single_space).unwrap();
|
||||
let line_single_space = OsString::from("d41d8cd98f00b204e9800998ecf8427e example.txt");
|
||||
let (regex, algo_based) = determine_regex(&line_single_space).unwrap();
|
||||
assert!(!algo_based);
|
||||
assert!(regex.is_match(os_str_as_bytes(&lines_single_space[0]).unwrap()));
|
||||
|
||||
// Test double-space regex start with invalid
|
||||
let lines_double_space = ["ERR", "d41d8cd98f00b204e9800998ecf8427e example.txt"]
|
||||
.iter()
|
||||
.map(|s| OsString::from(s.to_string()))
|
||||
.collect::<Vec<_>>();
|
||||
let (regex, algo_based) = determine_regex(&lines_double_space).unwrap();
|
||||
assert!(!algo_based);
|
||||
assert!(!regex.is_match(os_str_as_bytes(&lines_double_space[0]).unwrap()));
|
||||
assert!(regex.is_match(os_str_as_bytes(&lines_double_space[1]).unwrap()));
|
||||
assert!(regex.is_match(os_str_as_bytes(&line_single_space).unwrap()));
|
||||
|
||||
// Test invalid checksum line
|
||||
let lines_invalid = ["invalid checksum line"]
|
||||
.iter()
|
||||
.map(|s| OsString::from(s.to_string()))
|
||||
.collect::<Vec<_>>();
|
||||
assert!(determine_regex(&lines_invalid).is_none());
|
||||
let line_invalid = OsString::from("invalid checksum line");
|
||||
assert!(determine_regex(&line_invalid).is_none());
|
||||
|
||||
// Test leading space before checksum line
|
||||
let lines_algo_based_leading_space =
|
||||
[" MD5 (example.txt) = d41d8cd98f00b204e9800998ecf8427e"]
|
||||
.iter()
|
||||
.map(|s| OsString::from(s.to_string()))
|
||||
.collect::<Vec<_>>();
|
||||
let res = determine_regex(&lines_algo_based_leading_space);
|
||||
let line_algo_based_leading_space =
|
||||
OsString::from(" MD5 (example.txt) = d41d8cd98f00b204e9800998ecf8427e");
|
||||
let res = determine_regex(&line_algo_based_leading_space);
|
||||
assert!(res.is_some());
|
||||
assert_eq!(res.unwrap().0.as_str(), ALGO_BASED_REGEX);
|
||||
|
||||
// Test trailing space after checksum line (should fail)
|
||||
let lines_algo_based_leading_space =
|
||||
["MD5 (example.txt) = d41d8cd98f00b204e9800998ecf8427e "]
|
||||
.iter()
|
||||
.map(|s| OsString::from(s.to_string()))
|
||||
.collect::<Vec<_>>();
|
||||
let res = determine_regex(&lines_algo_based_leading_space);
|
||||
let line_algo_based_leading_space =
|
||||
OsString::from("MD5 (example.txt) = d41d8cd98f00b204e9800998ecf8427e ");
|
||||
let res = determine_regex(&line_algo_based_leading_space);
|
||||
assert!(res.is_none());
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue