mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-28 11:37:44 +00:00
cksum/hashsum: try to detect the format faster the first line
This commit is contained in:
parent
2c83b28d18
commit
84d90fcbdf
2 changed files with 123 additions and 45 deletions
|
@ -310,6 +310,32 @@ const DOUBLE_SPACE_REGEX: &str = r"^(?P<checksum>[a-fA-F0-9]+)\s{2}(?P<filename>
|
||||||
// In this case, we ignore the *
|
// In this case, we ignore the *
|
||||||
const SINGLE_SPACE_REGEX: &str = r"^(?P<checksum>[a-fA-F0-9]+)\s(?P<binary>\*?)(?P<filename>.*)$";
|
const SINGLE_SPACE_REGEX: &str = r"^(?P<checksum>[a-fA-F0-9]+)\s(?P<binary>\*?)(?P<filename>.*)$";
|
||||||
|
|
||||||
|
/// Determines the appropriate regular expression to use based on the provided lines.
|
||||||
|
fn determine_regex(filename: &OsStr, lines: &[String]) -> UResult<(Regex, bool)> {
|
||||||
|
let algo_based_regex = Regex::new(ALGO_BASED_REGEX).unwrap();
|
||||||
|
let double_space_regex = Regex::new(DOUBLE_SPACE_REGEX).unwrap();
|
||||||
|
let single_space_regex = Regex::new(SINGLE_SPACE_REGEX).unwrap();
|
||||||
|
|
||||||
|
for line in lines {
|
||||||
|
let line_trim = line.trim();
|
||||||
|
if algo_based_regex.is_match(line_trim) {
|
||||||
|
return Ok((algo_based_regex, true));
|
||||||
|
} else if double_space_regex.is_match(line_trim) {
|
||||||
|
return Ok((double_space_regex, false));
|
||||||
|
} else if single_space_regex.is_match(line_trim) {
|
||||||
|
return Ok((single_space_regex, false));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Err(io::Error::new(
|
||||||
|
io::ErrorKind::InvalidData,
|
||||||
|
format!(
|
||||||
|
"{}: no properly formatted checksum lines found",
|
||||||
|
filename.maybe_quote()
|
||||||
|
),
|
||||||
|
)
|
||||||
|
.into())
|
||||||
|
}
|
||||||
/***
|
/***
|
||||||
* Do the checksum validation (can be strict or not)
|
* Do the checksum validation (can be strict or not)
|
||||||
*/
|
*/
|
||||||
|
@ -328,10 +354,6 @@ pub fn perform_checksum_validation<'a, I>(
|
||||||
where
|
where
|
||||||
I: Iterator<Item = &'a OsStr>,
|
I: Iterator<Item = &'a OsStr>,
|
||||||
{
|
{
|
||||||
let algo_based_regex = Regex::new(ALGO_BASED_REGEX).unwrap();
|
|
||||||
let double_space_regex = Regex::new(DOUBLE_SPACE_REGEX).unwrap();
|
|
||||||
let single_space_regex = Regex::new(SINGLE_SPACE_REGEX).unwrap();
|
|
||||||
|
|
||||||
// if cksum has several input files, it will print the result for each file
|
// if cksum has several input files, it will print the result for each file
|
||||||
for filename_input in files {
|
for filename_input in files {
|
||||||
let mut bad_format = 0;
|
let mut bad_format = 0;
|
||||||
|
@ -358,31 +380,13 @@ where
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
let mut reader = BufReader::new(file);
|
|
||||||
|
|
||||||
let mut first_line = String::new();
|
let reader = BufReader::new(file);
|
||||||
reader.read_line(&mut first_line)?;
|
let lines: Vec<String> = reader.lines().collect::<Result<_, _>>()?;
|
||||||
|
let (chosen_regex, algo_based_format) = determine_regex(filename_input, &lines)?;
|
||||||
|
|
||||||
// Determine which regular expression to use based on the first line
|
// Process each line
|
||||||
let first_line_trim = first_line.trim();
|
for (i, line) in lines.iter().enumerate() {
|
||||||
let (chosen_regex, algo_based_format) = if algo_based_regex.is_match(first_line_trim) {
|
|
||||||
(&algo_based_regex, true)
|
|
||||||
} else if double_space_regex.is_match(first_line_trim) {
|
|
||||||
// If the first line contains a double space, use the double space regex
|
|
||||||
(&double_space_regex, false)
|
|
||||||
} else {
|
|
||||||
// It is probably rare but sometimes the checksum file may contain a single space
|
|
||||||
(&single_space_regex, false)
|
|
||||||
};
|
|
||||||
|
|
||||||
// Push the first line back to the reader
|
|
||||||
let first_line_reader = io::Cursor::new(first_line);
|
|
||||||
let chain_reader = first_line_reader.chain(reader);
|
|
||||||
let reader = BufReader::new(chain_reader);
|
|
||||||
|
|
||||||
// for each line in the input, check if it is a valid checksum line
|
|
||||||
for (i, line) in reader.lines().enumerate() {
|
|
||||||
let line = line.unwrap_or_else(|_| String::new());
|
|
||||||
if let Some(caps) = chosen_regex.captures(&line) {
|
if let Some(caps) = chosen_regex.captures(&line) {
|
||||||
properly_formatted = true;
|
properly_formatted = true;
|
||||||
|
|
||||||
|
@ -886,4 +890,50 @@ mod tests {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_determine_regex() {
|
||||||
|
let filename = std::ffi::OsStr::new("test.txt");
|
||||||
|
// Test algo-based regex
|
||||||
|
let lines_algo_based =
|
||||||
|
vec!["MD5 (example.txt) = d41d8cd98f00b204e9800998ecf8427e".to_string()];
|
||||||
|
let result = determine_regex(filename, &lines_algo_based);
|
||||||
|
assert!(result.is_ok());
|
||||||
|
let (regex, algo_based) = result.unwrap();
|
||||||
|
assert!(algo_based);
|
||||||
|
assert!(regex.is_match(&lines_algo_based[0]));
|
||||||
|
|
||||||
|
// Test double-space regex
|
||||||
|
let lines_double_space = vec!["d41d8cd98f00b204e9800998ecf8427e example.txt".to_string()];
|
||||||
|
let result = determine_regex(filename, &lines_double_space);
|
||||||
|
assert!(result.is_ok());
|
||||||
|
let (regex, algo_based) = result.unwrap();
|
||||||
|
assert!(!algo_based);
|
||||||
|
assert!(regex.is_match(&lines_double_space[0]));
|
||||||
|
|
||||||
|
// Test single-space regex
|
||||||
|
let lines_single_space = vec!["d41d8cd98f00b204e9800998ecf8427e example.txt".to_string()];
|
||||||
|
let result = determine_regex(filename, &lines_single_space);
|
||||||
|
assert!(result.is_ok());
|
||||||
|
let (regex, algo_based) = result.unwrap();
|
||||||
|
assert!(!algo_based);
|
||||||
|
assert!(regex.is_match(&lines_single_space[0]));
|
||||||
|
|
||||||
|
// Test double-space regex start with invalid
|
||||||
|
let lines_double_space = vec![
|
||||||
|
"ERR".to_string(),
|
||||||
|
"d41d8cd98f00b204e9800998ecf8427e example.txt".to_string(),
|
||||||
|
];
|
||||||
|
let result = determine_regex(filename, &lines_double_space);
|
||||||
|
assert!(result.is_ok());
|
||||||
|
let (regex, algo_based) = result.unwrap();
|
||||||
|
assert!(!algo_based);
|
||||||
|
assert!(!regex.is_match(&lines_double_space[0]));
|
||||||
|
assert!(regex.is_match(&lines_double_space[1]));
|
||||||
|
|
||||||
|
// Test invalid checksum line
|
||||||
|
let lines_invalid = vec!["invalid checksum line".to_string()];
|
||||||
|
let result = determine_regex(filename, &lines_invalid);
|
||||||
|
assert!(result.is_err());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -724,47 +724,43 @@ fn test_check_one_two_space_star() {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_check_one_two_space_star_start_without_star() {
|
fn test_check_space_star_or_not() {
|
||||||
let scene = TestScenario::new(util_name!());
|
let scene = TestScenario::new(util_name!());
|
||||||
let at = &scene.fixtures;
|
let at = &scene.fixtures;
|
||||||
|
|
||||||
at.touch("empty");
|
at.touch("a");
|
||||||
at.touch("f");
|
at.touch("*c");
|
||||||
|
|
||||||
// with one space, the "*" is removed
|
// with one space, the "*" is removed
|
||||||
at.write(
|
at.write(
|
||||||
"in.md5",
|
"in.md5",
|
||||||
"d41d8cd98f00b204e9800998ecf8427e f\nd41d8cd98f00b204e9800998ecf8427e *empty\n",
|
"d41d8cd98f00b204e9800998ecf8427e *c\n
|
||||||
|
d41d8cd98f00b204e9800998ecf8427e a\n",
|
||||||
);
|
);
|
||||||
|
|
||||||
scene
|
scene
|
||||||
.ccmd("md5sum")
|
.ccmd("md5sum")
|
||||||
.arg("--check")
|
.arg("--check")
|
||||||
.arg(at.subdir.join("in.md5"))
|
.arg(at.subdir.join("in.md5"))
|
||||||
.succeeds()
|
.fails()
|
||||||
.stdout_is("f: OK\nempty: OK\n");
|
.stdout_contains("c: FAILED")
|
||||||
|
.stdout_does_not_contain("a: FAILED")
|
||||||
|
.stderr_contains("WARNING: 1 line is improperly formatted");
|
||||||
|
|
||||||
// with two spaces, the "*" is not removed
|
|
||||||
at.write(
|
at.write(
|
||||||
"in.md5",
|
"in.md5",
|
||||||
"d41d8cd98f00b204e9800998ecf8427e f\nd41d8cd98f00b204e9800998ecf8427e *empty\n",
|
"d41d8cd98f00b204e9800998ecf8427e a\n
|
||||||
|
d41d8cd98f00b204e9800998ecf8427e *c\n",
|
||||||
);
|
);
|
||||||
|
|
||||||
// First should fail as *empty doesn't exit
|
// First should fail as *empty doesn't exit
|
||||||
scene
|
scene
|
||||||
.ccmd("md5sum")
|
.ccmd("md5sum")
|
||||||
.arg("--check")
|
.arg("--check")
|
||||||
.arg(at.subdir.join("in.md5"))
|
.arg(at.subdir.join("in.md5"))
|
||||||
.fails()
|
.fails()
|
||||||
.stdout_is("f: OK\n*empty: FAILED open or read\n");
|
.stdout_contains("a: FAILED")
|
||||||
|
.stdout_contains("*c: FAILED");
|
||||||
at.touch("*empty");
|
|
||||||
// Should pass as we have the file
|
|
||||||
scene
|
|
||||||
.ccmd("md5sum")
|
|
||||||
.arg("--check")
|
|
||||||
.arg(at.subdir.join("in.md5"))
|
|
||||||
.succeeds()
|
|
||||||
.stdout_is("f: OK\n*empty: OK\n");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
@ -782,6 +778,38 @@ fn test_check_no_backslash_no_space() {
|
||||||
.stdout_is("f: OK\n");
|
.stdout_is("f: OK\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_incomplete_format() {
|
||||||
|
let scene = TestScenario::new(util_name!());
|
||||||
|
let at = &scene.fixtures;
|
||||||
|
|
||||||
|
at.touch("f");
|
||||||
|
at.write("in.md5", "MD5 (\n");
|
||||||
|
scene
|
||||||
|
.ccmd("md5sum")
|
||||||
|
.arg("--check")
|
||||||
|
.arg(at.subdir.join("in.md5"))
|
||||||
|
.fails()
|
||||||
|
.stderr_contains("no properly formatted checksum lines found");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_start_error() {
|
||||||
|
let scene = TestScenario::new(util_name!());
|
||||||
|
let at = &scene.fixtures;
|
||||||
|
|
||||||
|
at.touch("f");
|
||||||
|
at.write("in.md5", "ERR\nd41d8cd98f00b204e9800998ecf8427e f\n");
|
||||||
|
scene
|
||||||
|
.ccmd("md5sum")
|
||||||
|
.arg("--check")
|
||||||
|
.arg("--strict")
|
||||||
|
.arg(at.subdir.join("in.md5"))
|
||||||
|
.fails()
|
||||||
|
.stdout_is("f: OK\n")
|
||||||
|
.stderr_contains("WARNING: 1 line is improperly formatted");
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_check_check_ignore_no_file() {
|
fn test_check_check_ignore_no_file() {
|
||||||
let scene = TestScenario::new(util_name!());
|
let scene = TestScenario::new(util_name!());
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue