mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-29 12:07:46 +00:00
checksum: move regex detection to the line level
This commit is contained in:
parent
c8bcdb9267
commit
5cbe87620c
1 changed files with 36 additions and 73 deletions
|
@ -8,7 +8,7 @@ use data_encoding::BASE64;
|
||||||
use os_display::Quotable;
|
use os_display::Quotable;
|
||||||
use regex::bytes::{Captures, Regex};
|
use regex::bytes::{Captures, Regex};
|
||||||
use std::{
|
use std::{
|
||||||
ffi::{OsStr, OsString},
|
ffi::OsStr,
|
||||||
fmt::Display,
|
fmt::Display,
|
||||||
fs::File,
|
fs::File,
|
||||||
io::{self, stdin, BufReader, Read, Write},
|
io::{self, stdin, BufReader, Read, Write},
|
||||||
|
@ -130,9 +130,6 @@ enum FileCheckError {
|
||||||
ImproperlyFormatted,
|
ImproperlyFormatted,
|
||||||
/// reading of the checksum file failed
|
/// reading of the checksum file failed
|
||||||
CantOpenChecksumFile,
|
CantOpenChecksumFile,
|
||||||
/// Algorithm detection was unsuccessful.
|
|
||||||
/// Either none is provided, or there is a conflict.
|
|
||||||
AlgoDetectionError,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<Box<dyn UError>> for FileCheckError {
|
impl From<Box<dyn UError>> for FileCheckError {
|
||||||
|
@ -441,7 +438,7 @@ fn get_filename_for_output(filename: &OsStr, input_is_stdin: bool) -> String {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Determines the appropriate regular expression to use based on the provided lines.
|
/// Determines the appropriate regular expression to use based on the provided lines.
|
||||||
fn determine_regex(lines: &[OsString]) -> Option<(Regex, bool)> {
|
fn determine_regex(line: impl AsRef<OsStr>) -> Option<(Regex, bool)> {
|
||||||
let regexes = [
|
let regexes = [
|
||||||
(Regex::new(ALGO_BASED_REGEX).unwrap(), true),
|
(Regex::new(ALGO_BASED_REGEX).unwrap(), true),
|
||||||
(Regex::new(DOUBLE_SPACE_REGEX).unwrap(), false),
|
(Regex::new(DOUBLE_SPACE_REGEX).unwrap(), false),
|
||||||
|
@ -449,12 +446,10 @@ fn determine_regex(lines: &[OsString]) -> Option<(Regex, bool)> {
|
||||||
(Regex::new(ALGO_BASED_REGEX_BASE64).unwrap(), true),
|
(Regex::new(ALGO_BASED_REGEX_BASE64).unwrap(), true),
|
||||||
];
|
];
|
||||||
|
|
||||||
for line in lines {
|
let line_bytes = os_str_as_bytes(line.as_ref()).expect("UTF-8 decoding failed");
|
||||||
let line_bytes = os_str_as_bytes(line).expect("UTF-8 decoding failed");
|
for (regex, is_algo_based) in ®exes {
|
||||||
for (regex, is_algo_based) in ®exes {
|
if regex.is_match(line_bytes) {
|
||||||
if regex.is_match(line_bytes) {
|
return Some((regex.clone(), *is_algo_based));
|
||||||
return Some((regex.clone(), *is_algo_based));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -599,13 +594,20 @@ fn process_checksum_line(
|
||||||
filename_input: &OsStr,
|
filename_input: &OsStr,
|
||||||
line: &OsStr,
|
line: &OsStr,
|
||||||
i: usize,
|
i: usize,
|
||||||
chosen_regex: &Regex,
|
|
||||||
is_algo_based_format: bool,
|
|
||||||
cli_algo_name: Option<&str>,
|
cli_algo_name: Option<&str>,
|
||||||
cli_algo_length: Option<usize>,
|
cli_algo_length: Option<usize>,
|
||||||
opts: ChecksumOptions,
|
opts: ChecksumOptions,
|
||||||
) -> Result<(), LineCheckError> {
|
) -> Result<(), LineCheckError> {
|
||||||
let line_bytes = os_str_as_bytes(line)?;
|
let line_bytes = os_str_as_bytes(line)?;
|
||||||
|
|
||||||
|
// early return on empty or commented lines.
|
||||||
|
if line.is_empty() || line_bytes.starts_with(b"#") {
|
||||||
|
return Err(LineCheckError::Skipped);
|
||||||
|
}
|
||||||
|
|
||||||
|
let (chosen_regex, is_algo_based_format) =
|
||||||
|
determine_regex(line).ok_or(LineCheckError::ImproperlyFormatted)?;
|
||||||
|
|
||||||
if let Some(caps) = chosen_regex.captures(line_bytes) {
|
if let Some(caps) = chosen_regex.captures(line_bytes) {
|
||||||
let mut filename_to_check = caps.name("filename").unwrap().as_bytes();
|
let mut filename_to_check = caps.name("filename").unwrap().as_bytes();
|
||||||
|
|
||||||
|
@ -617,7 +619,7 @@ fn process_checksum_line(
|
||||||
filename_to_check = &filename_to_check[1..];
|
filename_to_check = &filename_to_check[1..];
|
||||||
}
|
}
|
||||||
|
|
||||||
let expected_checksum = get_expected_digest_as_hex_string(&caps, chosen_regex)
|
let expected_checksum = get_expected_digest_as_hex_string(&caps, &chosen_regex)
|
||||||
.ok_or(LineCheckError::ImproperlyFormatted)?;
|
.ok_or(LineCheckError::ImproperlyFormatted)?;
|
||||||
|
|
||||||
// If the algo_name is provided, we use it, otherwise we try to detect it
|
// If the algo_name is provided, we use it, otherwise we try to detect it
|
||||||
|
@ -672,10 +674,6 @@ fn process_checksum_line(
|
||||||
Err(LineCheckError::DigestMismatch)
|
Err(LineCheckError::DigestMismatch)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if line.is_empty() || line_bytes.starts_with(b"#") {
|
|
||||||
// Don't show any warning for empty or commented lines.
|
|
||||||
return Err(LineCheckError::Skipped);
|
|
||||||
}
|
|
||||||
if opts.warn {
|
if opts.warn {
|
||||||
let algo = if let Some(algo_name_input) = cli_algo_name {
|
let algo = if let Some(algo_name_input) = cli_algo_name {
|
||||||
algo_name_input.to_uppercase()
|
algo_name_input.to_uppercase()
|
||||||
|
@ -723,19 +721,11 @@ fn process_checksum_file(
|
||||||
let reader = BufReader::new(file);
|
let reader = BufReader::new(file);
|
||||||
let lines = read_os_string_lines(reader).collect::<Vec<_>>();
|
let lines = read_os_string_lines(reader).collect::<Vec<_>>();
|
||||||
|
|
||||||
let Some((chosen_regex, is_algo_based_format)) = determine_regex(&lines) else {
|
|
||||||
log_no_properly_formatted(get_filename_for_output(filename_input, input_is_stdin));
|
|
||||||
set_exit_code(1);
|
|
||||||
return Err(FileCheckError::AlgoDetectionError);
|
|
||||||
};
|
|
||||||
|
|
||||||
for (i, line) in lines.iter().enumerate() {
|
for (i, line) in lines.iter().enumerate() {
|
||||||
let line_result = process_checksum_line(
|
let line_result = process_checksum_line(
|
||||||
filename_input,
|
filename_input,
|
||||||
line,
|
line,
|
||||||
i,
|
i,
|
||||||
&chosen_regex,
|
|
||||||
is_algo_based_format,
|
|
||||||
cli_algo_name,
|
cli_algo_name,
|
||||||
cli_algo_length,
|
cli_algo_length,
|
||||||
opts,
|
opts,
|
||||||
|
@ -816,8 +806,7 @@ where
|
||||||
use FileCheckError::*;
|
use FileCheckError::*;
|
||||||
match process_checksum_file(filename_input, algo_name_input, length_input, opts) {
|
match process_checksum_file(filename_input, algo_name_input, length_input, opts) {
|
||||||
Err(UError(e)) => return Err(e),
|
Err(UError(e)) => return Err(e),
|
||||||
Err(ImproperlyFormatted) => break,
|
Err(CantOpenChecksumFile | ImproperlyFormatted) | Ok(_) => continue,
|
||||||
Err(CantOpenChecksumFile | AlgoDetectionError) | Ok(_) => continue,
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -926,6 +915,7 @@ pub fn escape_filename(filename: &Path) -> (String, &'static str) {
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
use std::ffi::OsString;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_unescape_filename() {
|
fn test_unescape_filename() {
|
||||||
|
@ -1161,66 +1151,39 @@ mod tests {
|
||||||
#[test]
|
#[test]
|
||||||
fn test_determine_regex() {
|
fn test_determine_regex() {
|
||||||
// Test algo-based regex
|
// Test algo-based regex
|
||||||
let lines_algo_based = ["MD5 (example.txt) = d41d8cd98f00b204e9800998ecf8427e"]
|
let line_algo_based =
|
||||||
.iter()
|
OsString::from("MD5 (example.txt) = d41d8cd98f00b204e9800998ecf8427e");
|
||||||
.map(|s| OsString::from(s.to_string()))
|
let (regex, algo_based) = determine_regex(&line_algo_based).unwrap();
|
||||||
.collect::<Vec<_>>();
|
|
||||||
let (regex, algo_based) = determine_regex(&lines_algo_based).unwrap();
|
|
||||||
assert!(algo_based);
|
assert!(algo_based);
|
||||||
assert!(regex.is_match(os_str_as_bytes(&lines_algo_based[0]).unwrap()));
|
assert!(regex.is_match(os_str_as_bytes(&line_algo_based).unwrap()));
|
||||||
|
|
||||||
// Test double-space regex
|
// Test double-space regex
|
||||||
let lines_double_space = ["d41d8cd98f00b204e9800998ecf8427e example.txt"]
|
let line_double_space = OsString::from("d41d8cd98f00b204e9800998ecf8427e example.txt");
|
||||||
.iter()
|
let (regex, algo_based) = determine_regex(&line_double_space).unwrap();
|
||||||
.map(|s| OsString::from(s.to_string()))
|
|
||||||
.collect::<Vec<_>>();
|
|
||||||
let (regex, algo_based) = determine_regex(&lines_double_space).unwrap();
|
|
||||||
assert!(!algo_based);
|
assert!(!algo_based);
|
||||||
assert!(regex.is_match(os_str_as_bytes(&lines_double_space[0]).unwrap()));
|
assert!(regex.is_match(os_str_as_bytes(&line_double_space).unwrap()));
|
||||||
|
|
||||||
// Test single-space regex
|
// Test single-space regex
|
||||||
let lines_single_space = ["d41d8cd98f00b204e9800998ecf8427e example.txt"]
|
let line_single_space = OsString::from("d41d8cd98f00b204e9800998ecf8427e example.txt");
|
||||||
.iter()
|
let (regex, algo_based) = determine_regex(&line_single_space).unwrap();
|
||||||
.map(|s| OsString::from(s.to_string()))
|
|
||||||
.collect::<Vec<_>>();
|
|
||||||
let (regex, algo_based) = determine_regex(&lines_single_space).unwrap();
|
|
||||||
assert!(!algo_based);
|
assert!(!algo_based);
|
||||||
assert!(regex.is_match(os_str_as_bytes(&lines_single_space[0]).unwrap()));
|
assert!(regex.is_match(os_str_as_bytes(&line_single_space).unwrap()));
|
||||||
|
|
||||||
// Test double-space regex start with invalid
|
|
||||||
let lines_double_space = ["ERR", "d41d8cd98f00b204e9800998ecf8427e example.txt"]
|
|
||||||
.iter()
|
|
||||||
.map(|s| OsString::from(s.to_string()))
|
|
||||||
.collect::<Vec<_>>();
|
|
||||||
let (regex, algo_based) = determine_regex(&lines_double_space).unwrap();
|
|
||||||
assert!(!algo_based);
|
|
||||||
assert!(!regex.is_match(os_str_as_bytes(&lines_double_space[0]).unwrap()));
|
|
||||||
assert!(regex.is_match(os_str_as_bytes(&lines_double_space[1]).unwrap()));
|
|
||||||
|
|
||||||
// Test invalid checksum line
|
// Test invalid checksum line
|
||||||
let lines_invalid = ["invalid checksum line"]
|
let line_invalid = OsString::from("invalid checksum line");
|
||||||
.iter()
|
assert!(determine_regex(&line_invalid).is_none());
|
||||||
.map(|s| OsString::from(s.to_string()))
|
|
||||||
.collect::<Vec<_>>();
|
|
||||||
assert!(determine_regex(&lines_invalid).is_none());
|
|
||||||
|
|
||||||
// Test leading space before checksum line
|
// Test leading space before checksum line
|
||||||
let lines_algo_based_leading_space =
|
let line_algo_based_leading_space =
|
||||||
[" MD5 (example.txt) = d41d8cd98f00b204e9800998ecf8427e"]
|
OsString::from(" MD5 (example.txt) = d41d8cd98f00b204e9800998ecf8427e");
|
||||||
.iter()
|
let res = determine_regex(&line_algo_based_leading_space);
|
||||||
.map(|s| OsString::from(s.to_string()))
|
|
||||||
.collect::<Vec<_>>();
|
|
||||||
let res = determine_regex(&lines_algo_based_leading_space);
|
|
||||||
assert!(res.is_some());
|
assert!(res.is_some());
|
||||||
assert_eq!(res.unwrap().0.as_str(), ALGO_BASED_REGEX);
|
assert_eq!(res.unwrap().0.as_str(), ALGO_BASED_REGEX);
|
||||||
|
|
||||||
// Test trailing space after checksum line (should fail)
|
// Test trailing space after checksum line (should fail)
|
||||||
let lines_algo_based_leading_space =
|
let line_algo_based_leading_space =
|
||||||
["MD5 (example.txt) = d41d8cd98f00b204e9800998ecf8427e "]
|
OsString::from("MD5 (example.txt) = d41d8cd98f00b204e9800998ecf8427e ");
|
||||||
.iter()
|
let res = determine_regex(&line_algo_based_leading_space);
|
||||||
.map(|s| OsString::from(s.to_string()))
|
|
||||||
.collect::<Vec<_>>();
|
|
||||||
let res = determine_regex(&lines_algo_based_leading_space);
|
|
||||||
assert!(res.is_none());
|
assert!(res.is_none());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue