mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-29 03:57:44 +00:00
Merge pull request #6929 from RenjiSann/cksum-fixes
cksum: even more fixes
This commit is contained in:
commit
209ec0b817
4 changed files with 398 additions and 197 deletions
1
Cargo.lock
generated
1
Cargo.lock
generated
|
@ -3503,6 +3503,7 @@ dependencies = [
|
|||
"glob",
|
||||
"hex",
|
||||
"itertools",
|
||||
"lazy_static",
|
||||
"libc",
|
||||
"md-5",
|
||||
"memchr",
|
||||
|
|
|
@ -25,6 +25,7 @@ dns-lookup = { workspace = true, optional = true }
|
|||
dunce = { version = "1.0.4", optional = true }
|
||||
wild = "2.2.1"
|
||||
glob = { workspace = true }
|
||||
lazy_static = "1.4.0"
|
||||
# * optional
|
||||
itertools = { workspace = true, optional = true }
|
||||
thiserror = { workspace = true, optional = true }
|
||||
|
|
|
@ -2,13 +2,15 @@
|
|||
//
|
||||
// For the full copyright and license information, please view the LICENSE
|
||||
// file that was distributed with this source code.
|
||||
// spell-checker:ignore anotherfile invalidchecksum regexes JWZG FFFD xffname prefixfilename
|
||||
// spell-checker:ignore anotherfile invalidchecksum regexes JWZG FFFD xffname prefixfilename bytelen bitlen hexdigit
|
||||
|
||||
use data_encoding::BASE64;
|
||||
use lazy_static::lazy_static;
|
||||
use os_display::Quotable;
|
||||
use regex::bytes::{Captures, Regex};
|
||||
use regex::bytes::{Match, Regex};
|
||||
use std::{
|
||||
ffi::{OsStr, OsString},
|
||||
borrow::Cow,
|
||||
ffi::OsStr,
|
||||
fmt::Display,
|
||||
fs::File,
|
||||
io::{self, stdin, BufReader, Read, Write},
|
||||
|
@ -130,9 +132,6 @@ enum FileCheckError {
|
|||
ImproperlyFormatted,
|
||||
/// reading of the checksum file failed
|
||||
CantOpenChecksumFile,
|
||||
/// Algorithm detection was unsuccessful.
|
||||
/// Either none is provided, or there is a conflict.
|
||||
AlgoDetectionError,
|
||||
}
|
||||
|
||||
impl From<Box<dyn UError>> for FileCheckError {
|
||||
|
@ -422,14 +421,101 @@ pub fn detect_algo(algo: &str, length: Option<usize>) -> UResult<HashAlgorithm>
|
|||
// algo must be uppercase or b (for blake2b)
|
||||
// 2. <checksum> [* ]<filename>
|
||||
// 3. <checksum> [*]<filename> (only one space)
|
||||
const ALGO_BASED_REGEX: &str = r"^\s*\\?(?P<algo>(?:[A-Z0-9]+|BLAKE2b))(?:-(?P<bits>\d+))?\s?\((?P<filename>(?-u:.*))\)\s*=\s*(?P<checksum>[a-fA-F0-9]+)$";
|
||||
const ALGO_BASED_REGEX_BASE64: &str = r"^\s*\\?(?P<algo>(?:[A-Z0-9]+|BLAKE2b))(?:-(?P<bits>\d+))?\s?\((?P<filename>(?-u:.*))\)\s*=\s*(?P<checksum>[A-Za-z0-9+/]+={0,2})$";
|
||||
const ALGO_BASED_REGEX: &str = r"^\s*\\?(?P<algo>(?:[A-Z0-9]+|BLAKE2b))(?:-(?P<bits>\d+))?\s?\((?P<filename>(?-u:.*))\)\s*=\s*(?P<checksum>[A-Za-z0-9+/]+={0,2})$";
|
||||
|
||||
const DOUBLE_SPACE_REGEX: &str = r"^(?P<checksum>[a-fA-F0-9]+)\s{2}(?P<filename>(?-u:.*))$";
|
||||
|
||||
// In this case, we ignore the *
|
||||
const SINGLE_SPACE_REGEX: &str = r"^(?P<checksum>[a-fA-F0-9]+)\s(?P<filename>\*?(?-u:.*))$";
|
||||
|
||||
lazy_static! {
|
||||
static ref R_ALGO_BASED: Regex = Regex::new(ALGO_BASED_REGEX).unwrap();
|
||||
static ref R_DOUBLE_SPACE: Regex = Regex::new(DOUBLE_SPACE_REGEX).unwrap();
|
||||
static ref R_SINGLE_SPACE: Regex = Regex::new(SINGLE_SPACE_REGEX).unwrap();
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
||||
enum LineFormat {
|
||||
AlgoBased,
|
||||
SingleSpace,
|
||||
DoubleSpace,
|
||||
}
|
||||
|
||||
impl LineFormat {
|
||||
fn to_regex(self) -> &'static Regex {
|
||||
match self {
|
||||
LineFormat::AlgoBased => &R_ALGO_BASED,
|
||||
LineFormat::SingleSpace => &R_SINGLE_SPACE,
|
||||
LineFormat::DoubleSpace => &R_DOUBLE_SPACE,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Hold the data extracted from a checksum line.
|
||||
struct LineInfo {
|
||||
algo_name: Option<String>,
|
||||
algo_bit_len: Option<usize>,
|
||||
checksum: String,
|
||||
filename: Vec<u8>,
|
||||
|
||||
format: LineFormat,
|
||||
}
|
||||
|
||||
impl LineInfo {
|
||||
/// Returns a `LineInfo` parsed from a checksum line.
|
||||
/// The function will run 3 regexes against the line and select the first one that matches
|
||||
/// to populate the fields of the struct.
|
||||
/// However, there is a catch to handle regarding the handling of `cached_regex`.
|
||||
/// In case of non-algo-based regex, if `cached_regex` is Some, it must take the priority
|
||||
/// over the detected regex. Otherwise, we must set it the the detected regex.
|
||||
/// This specific behavior is emphasized by the test
|
||||
/// `test_hashsum::test_check_md5sum_only_one_space`.
|
||||
fn parse(s: impl AsRef<OsStr>, cached_regex: &mut Option<LineFormat>) -> Option<Self> {
|
||||
let regexes: &[(&'static Regex, LineFormat)] = &[
|
||||
(&R_ALGO_BASED, LineFormat::AlgoBased),
|
||||
(&R_DOUBLE_SPACE, LineFormat::DoubleSpace),
|
||||
(&R_SINGLE_SPACE, LineFormat::SingleSpace),
|
||||
];
|
||||
|
||||
let line_bytes = os_str_as_bytes(s.as_ref()).expect("UTF-8 decoding failed");
|
||||
|
||||
for (regex, format) in regexes {
|
||||
if !regex.is_match(line_bytes) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let mut r = *regex;
|
||||
if *format != LineFormat::AlgoBased {
|
||||
// The cached regex ensures that when processing non-algo based regexes,
|
||||
// it cannot be changed (can't have single and double space regexes
|
||||
// used in the same file).
|
||||
if cached_regex.is_some() {
|
||||
r = cached_regex.unwrap().to_regex();
|
||||
} else {
|
||||
*cached_regex = Some(*format);
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(caps) = r.captures(line_bytes) {
|
||||
// These unwraps are safe thanks to the regex
|
||||
let match_to_string = |m: Match| String::from_utf8(m.as_bytes().into()).unwrap();
|
||||
|
||||
return Some(Self {
|
||||
algo_name: caps.name("algo").map(match_to_string),
|
||||
algo_bit_len: caps
|
||||
.name("bits")
|
||||
.map(|m| match_to_string(m).parse::<usize>().unwrap()),
|
||||
checksum: caps.name("checksum").map(match_to_string).unwrap(),
|
||||
filename: caps.name("filename").map(|m| m.as_bytes().into()).unwrap(),
|
||||
format: *format,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn get_filename_for_output(filename: &OsStr, input_is_stdin: bool) -> String {
|
||||
if input_is_stdin {
|
||||
"standard input"
|
||||
|
@ -440,41 +526,44 @@ fn get_filename_for_output(filename: &OsStr, input_is_stdin: bool) -> String {
|
|||
.to_string()
|
||||
}
|
||||
|
||||
/// Determines the appropriate regular expression to use based on the provided lines.
|
||||
fn determine_regex(lines: &[OsString]) -> Option<(Regex, bool)> {
|
||||
let regexes = [
|
||||
(Regex::new(ALGO_BASED_REGEX).unwrap(), true),
|
||||
(Regex::new(DOUBLE_SPACE_REGEX).unwrap(), false),
|
||||
(Regex::new(SINGLE_SPACE_REGEX).unwrap(), false),
|
||||
(Regex::new(ALGO_BASED_REGEX_BASE64).unwrap(), true),
|
||||
];
|
||||
|
||||
for line in lines {
|
||||
let line_bytes = os_str_as_bytes(line).expect("UTF-8 decoding failed");
|
||||
for (regex, is_algo_based) in ®exes {
|
||||
if regex.is_match(line_bytes) {
|
||||
return Some((regex.clone(), *is_algo_based));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
/// Extract the expected digest from the checksum string
|
||||
fn get_expected_digest_as_hex_string(caps: &Captures, chosen_regex: &Regex) -> Option<String> {
|
||||
// Unwraps are safe, ensured by regex.
|
||||
let ck = caps.name("checksum").unwrap().as_bytes();
|
||||
fn get_expected_digest_as_hex_string(
|
||||
line_info: &LineInfo,
|
||||
len_hint: Option<usize>,
|
||||
) -> Option<Cow<str>> {
|
||||
let ck = &line_info.checksum;
|
||||
|
||||
if chosen_regex.as_str() == ALGO_BASED_REGEX_BASE64 {
|
||||
BASE64.decode(ck).map(hex::encode).ok()
|
||||
} else if ck.len() % 2 == 0 {
|
||||
Some(str::from_utf8(ck).unwrap().to_string())
|
||||
} else {
|
||||
// TODO MSRV 1.82, replace `is_some_and` with `is_none_or`
|
||||
// to improve readability. This closure returns True if a length hint provided
|
||||
// and the argument isn't the same as the hint.
|
||||
let against_hint = |len| len_hint.is_some_and(|l| l != len);
|
||||
|
||||
if ck.len() % 2 != 0 {
|
||||
// If the length of the digest is not a multiple of 2, then it
|
||||
// must be improperly formatted (1 hex digit is 2 characters)
|
||||
None
|
||||
return None;
|
||||
}
|
||||
|
||||
// If the digest can be decoded as hexadecimal AND it length match the
|
||||
// one expected (in case it's given), just go with it.
|
||||
if ck.as_bytes().iter().all(u8::is_ascii_hexdigit) && !against_hint(ck.len()) {
|
||||
return Some(Cow::Borrowed(ck));
|
||||
}
|
||||
|
||||
// If hexadecimal digest fails for any reason, interpret the digest as base 64.
|
||||
BASE64
|
||||
.decode(ck.as_bytes()) // Decode the string as encoded base64
|
||||
.map(hex::encode) // Encode it back as hexadecimal
|
||||
.map(Cow::<str>::Owned)
|
||||
.ok()
|
||||
.and_then(|s| {
|
||||
// Check the digest length
|
||||
if !against_hint(s.len()) {
|
||||
Some(s)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/// Returns a reader that reads from the specified file, or from stdin if `filename_to_check` is "-".
|
||||
|
@ -548,17 +637,15 @@ fn get_input_file(filename: &OsStr) -> UResult<Box<dyn Read>> {
|
|||
}
|
||||
}
|
||||
|
||||
/// Extracts the algorithm name and length from the regex captures if the algo-based format is matched.
|
||||
/// Gets the algorithm name and length from the `LineInfo` if the algo-based format is matched.
|
||||
fn identify_algo_name_and_length(
|
||||
caps: &Captures,
|
||||
line_info: &LineInfo,
|
||||
algo_name_input: Option<&str>,
|
||||
) -> Option<(String, Option<usize>)> {
|
||||
// When the algo-based format is matched, extract details from regex captures
|
||||
let algorithm = caps
|
||||
.name("algo")
|
||||
.map_or(String::new(), |m| {
|
||||
String::from_utf8(m.as_bytes().into()).unwrap()
|
||||
})
|
||||
let algorithm = line_info
|
||||
.algo_name
|
||||
.clone()
|
||||
.unwrap_or_default()
|
||||
.to_lowercase();
|
||||
|
||||
// check if we are called with XXXsum (example: md5sum) but we detected a different algo parsing the file
|
||||
|
@ -573,19 +660,119 @@ fn identify_algo_name_and_length(
|
|||
return None;
|
||||
}
|
||||
|
||||
let bits = caps.name("bits").map_or(Some(None), |m| {
|
||||
let bits_value = String::from_utf8(m.as_bytes().into())
|
||||
.unwrap()
|
||||
.parse::<usize>()
|
||||
.unwrap();
|
||||
if bits_value % 8 == 0 {
|
||||
Some(Some(bits_value / 8))
|
||||
} else {
|
||||
None // Return None to signal a divisibility issue
|
||||
let bytes = if let Some(bitlen) = line_info.algo_bit_len {
|
||||
if bitlen % 8 != 0 {
|
||||
// The given length is wrong
|
||||
return None;
|
||||
}
|
||||
})?;
|
||||
Some(bitlen / 8)
|
||||
} else if algorithm == ALGORITHM_OPTIONS_BLAKE2B {
|
||||
// Default length with BLAKE2b,
|
||||
Some(64)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
Some((algorithm, bits))
|
||||
Some((algorithm, bytes))
|
||||
}
|
||||
|
||||
/// Given a filename and an algorithm, compute the digest and compare it with
|
||||
/// the expected one.
|
||||
fn compute_and_check_digest_from_file(
|
||||
filename: &[u8],
|
||||
expected_checksum: &str,
|
||||
mut algo: HashAlgorithm,
|
||||
opts: ChecksumOptions,
|
||||
) -> Result<(), LineCheckError> {
|
||||
let (filename_to_check_unescaped, prefix) = unescape_filename(filename);
|
||||
let real_filename_to_check = os_str_from_bytes(&filename_to_check_unescaped)?;
|
||||
|
||||
// Open the input file
|
||||
let file_to_check = get_file_to_check(&real_filename_to_check, opts)?;
|
||||
let mut file_reader = BufReader::new(file_to_check);
|
||||
|
||||
// Read the file and calculate the checksum
|
||||
let create_fn = &mut algo.create_fn;
|
||||
let mut digest = create_fn();
|
||||
let (calculated_checksum, _) =
|
||||
digest_reader(&mut digest, &mut file_reader, opts.binary, algo.bits).unwrap();
|
||||
|
||||
// Do the checksum validation
|
||||
let checksum_correct = expected_checksum == calculated_checksum;
|
||||
print_file_report(
|
||||
std::io::stdout(),
|
||||
filename,
|
||||
FileChecksumResult::from_bool(checksum_correct),
|
||||
prefix,
|
||||
opts,
|
||||
);
|
||||
|
||||
if checksum_correct {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(LineCheckError::DigestMismatch)
|
||||
}
|
||||
}
|
||||
|
||||
/// Check a digest checksum with non-algo based pre-treatment.
|
||||
fn process_algo_based_line(
|
||||
line_info: &LineInfo,
|
||||
cli_algo_name: Option<&str>,
|
||||
opts: ChecksumOptions,
|
||||
) -> Result<(), LineCheckError> {
|
||||
let filename_to_check = line_info.filename.as_slice();
|
||||
|
||||
let (algo_name, algo_byte_len) = identify_algo_name_and_length(line_info, cli_algo_name)
|
||||
.ok_or(LineCheckError::ImproperlyFormatted)?;
|
||||
|
||||
// If the digest bitlen is known, we can check the format of the expected
|
||||
// checksum with it.
|
||||
let digest_char_length_hint = match (algo_name.as_str(), algo_byte_len) {
|
||||
(ALGORITHM_OPTIONS_BLAKE2B, Some(bytelen)) => Some(bytelen * 2),
|
||||
_ => None,
|
||||
};
|
||||
|
||||
let expected_checksum = get_expected_digest_as_hex_string(line_info, digest_char_length_hint)
|
||||
.ok_or(LineCheckError::ImproperlyFormatted)?;
|
||||
|
||||
let algo = detect_algo(&algo_name, algo_byte_len)?;
|
||||
|
||||
compute_and_check_digest_from_file(filename_to_check, &expected_checksum, algo, opts)
|
||||
}
|
||||
|
||||
/// Check a digest checksum with non-algo based pre-treatment.
|
||||
fn process_non_algo_based_line(
|
||||
line_number: usize,
|
||||
line_info: &LineInfo,
|
||||
cli_algo_name: &str,
|
||||
cli_algo_length: Option<usize>,
|
||||
opts: ChecksumOptions,
|
||||
) -> Result<(), LineCheckError> {
|
||||
let mut filename_to_check = line_info.filename.as_slice();
|
||||
if filename_to_check.starts_with(b"*")
|
||||
&& line_number == 0
|
||||
&& line_info.format == LineFormat::SingleSpace
|
||||
{
|
||||
// Remove the leading asterisk if present - only for the first line
|
||||
filename_to_check = &filename_to_check[1..];
|
||||
}
|
||||
let expected_checksum = get_expected_digest_as_hex_string(line_info, None)
|
||||
.ok_or(LineCheckError::ImproperlyFormatted)?;
|
||||
|
||||
// When a specific algorithm name is input, use it and use the provided bits
|
||||
// except when dealing with blake2b, where we will detect the length
|
||||
let (algo_name, algo_byte_len) = if cli_algo_name == ALGORITHM_OPTIONS_BLAKE2B {
|
||||
// division by 2 converts the length of the Blake2b checksum from hexadecimal
|
||||
// characters to bytes, as each byte is represented by two hexadecimal characters.
|
||||
let length = Some(expected_checksum.len() / 2);
|
||||
(ALGORITHM_OPTIONS_BLAKE2B.to_string(), length)
|
||||
} else {
|
||||
(cli_algo_name.to_lowercase(), cli_algo_length)
|
||||
};
|
||||
|
||||
let algo = detect_algo(&algo_name, algo_byte_len)?;
|
||||
|
||||
compute_and_check_digest_from_file(filename_to_check, &expected_checksum, algo, opts)
|
||||
}
|
||||
|
||||
/// Parses a checksum line, detect the algorithm to use, read the file and produce
|
||||
|
@ -594,88 +781,36 @@ fn identify_algo_name_and_length(
|
|||
/// Returns `Ok(bool)` if the comparison happened, bool indicates if the digest
|
||||
/// matched the expected.
|
||||
/// If the comparison didn't happen, return a `LineChecksumError`.
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
fn process_checksum_line(
|
||||
filename_input: &OsStr,
|
||||
line: &OsStr,
|
||||
i: usize,
|
||||
chosen_regex: &Regex,
|
||||
is_algo_based_format: bool,
|
||||
cli_algo_name: Option<&str>,
|
||||
cli_algo_length: Option<usize>,
|
||||
opts: ChecksumOptions,
|
||||
cached_regex: &mut Option<LineFormat>,
|
||||
) -> Result<(), LineCheckError> {
|
||||
let line_bytes = os_str_as_bytes(line)?;
|
||||
if let Some(caps) = chosen_regex.captures(line_bytes) {
|
||||
let mut filename_to_check = caps.name("filename").unwrap().as_bytes();
|
||||
|
||||
if filename_to_check.starts_with(b"*")
|
||||
&& i == 0
|
||||
&& chosen_regex.as_str() == SINGLE_SPACE_REGEX
|
||||
{
|
||||
// Remove the leading asterisk if present - only for the first line
|
||||
filename_to_check = &filename_to_check[1..];
|
||||
}
|
||||
// Early return on empty or commented lines.
|
||||
if line.is_empty() || line_bytes.starts_with(b"#") {
|
||||
return Err(LineCheckError::Skipped);
|
||||
}
|
||||
|
||||
let expected_checksum = get_expected_digest_as_hex_string(&caps, chosen_regex)
|
||||
.ok_or(LineCheckError::ImproperlyFormatted)?;
|
||||
|
||||
// If the algo_name is provided, we use it, otherwise we try to detect it
|
||||
let (algo_name, length) = if is_algo_based_format {
|
||||
identify_algo_name_and_length(&caps, cli_algo_name)
|
||||
.ok_or(LineCheckError::ImproperlyFormatted)?
|
||||
} else if let Some(a) = cli_algo_name {
|
||||
// When a specific algorithm name is input, use it and use the provided bits
|
||||
// except when dealing with blake2b, where we will detect the length
|
||||
if cli_algo_name == Some(ALGORITHM_OPTIONS_BLAKE2B) {
|
||||
// division by 2 converts the length of the Blake2b checksum from hexadecimal
|
||||
// characters to bytes, as each byte is represented by two hexadecimal characters.
|
||||
let length = Some(expected_checksum.len() / 2);
|
||||
(ALGORITHM_OPTIONS_BLAKE2B.to_string(), length)
|
||||
} else {
|
||||
(a.to_lowercase(), cli_algo_length)
|
||||
}
|
||||
// Use `LineInfo` to extract the data of a line.
|
||||
// Then, depending on its format, apply a different pre-treatment.
|
||||
if let Some(line_info) = LineInfo::parse(line, cached_regex) {
|
||||
if line_info.format == LineFormat::AlgoBased {
|
||||
process_algo_based_line(&line_info, cli_algo_name, opts)
|
||||
} else if let Some(cli_algo) = cli_algo_name {
|
||||
// If we match a non-algo based regex, we expect a cli argument
|
||||
// to give us the algorithm to use
|
||||
process_non_algo_based_line(i, &line_info, cli_algo, cli_algo_length, opts)
|
||||
} else {
|
||||
// Default case if no algorithm is specified and non-algo based format is matched
|
||||
// We have no clue of what algorithm to use
|
||||
return Err(LineCheckError::ImproperlyFormatted);
|
||||
};
|
||||
|
||||
let mut algo = detect_algo(&algo_name, length)?;
|
||||
|
||||
let (filename_to_check_unescaped, prefix) = unescape_filename(filename_to_check);
|
||||
|
||||
let real_filename_to_check = os_str_from_bytes(&filename_to_check_unescaped)?;
|
||||
|
||||
// manage the input file
|
||||
let file_to_check = get_file_to_check(&real_filename_to_check, opts)?;
|
||||
let mut file_reader = BufReader::new(file_to_check);
|
||||
|
||||
// Read the file and calculate the checksum
|
||||
let create_fn = &mut algo.create_fn;
|
||||
let mut digest = create_fn();
|
||||
let (calculated_checksum, _) =
|
||||
digest_reader(&mut digest, &mut file_reader, opts.binary, algo.bits).unwrap();
|
||||
|
||||
// Do the checksum validation
|
||||
let checksum_correct = expected_checksum == calculated_checksum;
|
||||
print_file_report(
|
||||
std::io::stdout(),
|
||||
filename_to_check,
|
||||
FileChecksumResult::from_bool(checksum_correct),
|
||||
prefix,
|
||||
opts,
|
||||
);
|
||||
|
||||
if checksum_correct {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(LineCheckError::DigestMismatch)
|
||||
}
|
||||
} else {
|
||||
if line.is_empty() || line_bytes.starts_with(b"#") {
|
||||
// Don't show any warning for empty or commented lines.
|
||||
return Err(LineCheckError::Skipped);
|
||||
}
|
||||
if opts.warn {
|
||||
let algo = if let Some(algo_name_input) = cli_algo_name {
|
||||
algo_name_input.to_uppercase()
|
||||
|
@ -723,22 +858,19 @@ fn process_checksum_file(
|
|||
let reader = BufReader::new(file);
|
||||
let lines = read_os_string_lines(reader).collect::<Vec<_>>();
|
||||
|
||||
let Some((chosen_regex, is_algo_based_format)) = determine_regex(&lines) else {
|
||||
log_no_properly_formatted(get_filename_for_output(filename_input, input_is_stdin));
|
||||
set_exit_code(1);
|
||||
return Err(FileCheckError::AlgoDetectionError);
|
||||
};
|
||||
// cached_regex is used to ensure that several non algo-based checksum line
|
||||
// will use the same regex.
|
||||
let mut cached_regex = None;
|
||||
|
||||
for (i, line) in lines.iter().enumerate() {
|
||||
let line_result = process_checksum_line(
|
||||
filename_input,
|
||||
line,
|
||||
i,
|
||||
&chosen_regex,
|
||||
is_algo_based_format,
|
||||
cli_algo_name,
|
||||
cli_algo_length,
|
||||
opts,
|
||||
&mut cached_regex,
|
||||
);
|
||||
|
||||
// Match a first time to elude critical UErrors, and increment the total
|
||||
|
@ -816,8 +948,7 @@ where
|
|||
use FileCheckError::*;
|
||||
match process_checksum_file(filename_input, algo_name_input, length_input, opts) {
|
||||
Err(UError(e)) => return Err(e),
|
||||
Err(ImproperlyFormatted) => break,
|
||||
Err(CantOpenChecksumFile | AlgoDetectionError) | Ok(_) => continue,
|
||||
Err(CantOpenChecksumFile | ImproperlyFormatted) | Ok(_) => continue,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -926,6 +1057,7 @@ pub fn escape_filename(filename: &Path) -> (String, &'static str) {
|
|||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::ffi::OsString;
|
||||
|
||||
#[test]
|
||||
fn test_unescape_filename() {
|
||||
|
@ -1159,79 +1291,71 @@ mod tests {
|
|||
}
|
||||
|
||||
#[test]
|
||||
fn test_determine_regex() {
|
||||
fn test_line_info() {
|
||||
let mut cached_regex = None;
|
||||
|
||||
// Test algo-based regex
|
||||
let lines_algo_based = ["MD5 (example.txt) = d41d8cd98f00b204e9800998ecf8427e"]
|
||||
.iter()
|
||||
.map(|s| OsString::from(s.to_string()))
|
||||
.collect::<Vec<_>>();
|
||||
let (regex, algo_based) = determine_regex(&lines_algo_based).unwrap();
|
||||
assert!(algo_based);
|
||||
assert!(regex.is_match(os_str_as_bytes(&lines_algo_based[0]).unwrap()));
|
||||
let line_algo_based =
|
||||
OsString::from("MD5 (example.txt) = d41d8cd98f00b204e9800998ecf8427e");
|
||||
let line_info = LineInfo::parse(&line_algo_based, &mut cached_regex).unwrap();
|
||||
assert_eq!(line_info.algo_name.as_deref(), Some("MD5"));
|
||||
assert!(line_info.algo_bit_len.is_none());
|
||||
assert_eq!(line_info.filename, b"example.txt");
|
||||
assert_eq!(line_info.checksum, "d41d8cd98f00b204e9800998ecf8427e");
|
||||
assert_eq!(line_info.format, LineFormat::AlgoBased);
|
||||
assert!(cached_regex.is_none());
|
||||
|
||||
// Test double-space regex
|
||||
let lines_double_space = ["d41d8cd98f00b204e9800998ecf8427e example.txt"]
|
||||
.iter()
|
||||
.map(|s| OsString::from(s.to_string()))
|
||||
.collect::<Vec<_>>();
|
||||
let (regex, algo_based) = determine_regex(&lines_double_space).unwrap();
|
||||
assert!(!algo_based);
|
||||
assert!(regex.is_match(os_str_as_bytes(&lines_double_space[0]).unwrap()));
|
||||
let line_double_space = OsString::from("d41d8cd98f00b204e9800998ecf8427e example.txt");
|
||||
let line_info = LineInfo::parse(&line_double_space, &mut cached_regex).unwrap();
|
||||
assert!(line_info.algo_name.is_none());
|
||||
assert!(line_info.algo_bit_len.is_none());
|
||||
assert_eq!(line_info.filename, b"example.txt");
|
||||
assert_eq!(line_info.checksum, "d41d8cd98f00b204e9800998ecf8427e");
|
||||
assert_eq!(line_info.format, LineFormat::DoubleSpace);
|
||||
assert!(cached_regex.is_some());
|
||||
|
||||
cached_regex = None;
|
||||
|
||||
// Test single-space regex
|
||||
let lines_single_space = ["d41d8cd98f00b204e9800998ecf8427e example.txt"]
|
||||
.iter()
|
||||
.map(|s| OsString::from(s.to_string()))
|
||||
.collect::<Vec<_>>();
|
||||
let (regex, algo_based) = determine_regex(&lines_single_space).unwrap();
|
||||
assert!(!algo_based);
|
||||
assert!(regex.is_match(os_str_as_bytes(&lines_single_space[0]).unwrap()));
|
||||
let line_single_space = OsString::from("d41d8cd98f00b204e9800998ecf8427e example.txt");
|
||||
let line_info = LineInfo::parse(&line_single_space, &mut cached_regex).unwrap();
|
||||
assert!(line_info.algo_name.is_none());
|
||||
assert!(line_info.algo_bit_len.is_none());
|
||||
assert_eq!(line_info.filename, b"example.txt");
|
||||
assert_eq!(line_info.checksum, "d41d8cd98f00b204e9800998ecf8427e");
|
||||
assert_eq!(line_info.format, LineFormat::SingleSpace);
|
||||
assert!(cached_regex.is_some());
|
||||
|
||||
// Test double-space regex start with invalid
|
||||
let lines_double_space = ["ERR", "d41d8cd98f00b204e9800998ecf8427e example.txt"]
|
||||
.iter()
|
||||
.map(|s| OsString::from(s.to_string()))
|
||||
.collect::<Vec<_>>();
|
||||
let (regex, algo_based) = determine_regex(&lines_double_space).unwrap();
|
||||
assert!(!algo_based);
|
||||
assert!(!regex.is_match(os_str_as_bytes(&lines_double_space[0]).unwrap()));
|
||||
assert!(regex.is_match(os_str_as_bytes(&lines_double_space[1]).unwrap()));
|
||||
cached_regex = None;
|
||||
|
||||
// Test invalid checksum line
|
||||
let lines_invalid = ["invalid checksum line"]
|
||||
.iter()
|
||||
.map(|s| OsString::from(s.to_string()))
|
||||
.collect::<Vec<_>>();
|
||||
assert!(determine_regex(&lines_invalid).is_none());
|
||||
let line_invalid = OsString::from("invalid checksum line");
|
||||
assert!(LineInfo::parse(&line_invalid, &mut cached_regex).is_none());
|
||||
assert!(cached_regex.is_none());
|
||||
|
||||
// Test leading space before checksum line
|
||||
let lines_algo_based_leading_space =
|
||||
[" MD5 (example.txt) = d41d8cd98f00b204e9800998ecf8427e"]
|
||||
.iter()
|
||||
.map(|s| OsString::from(s.to_string()))
|
||||
.collect::<Vec<_>>();
|
||||
let res = determine_regex(&lines_algo_based_leading_space);
|
||||
assert!(res.is_some());
|
||||
assert_eq!(res.unwrap().0.as_str(), ALGO_BASED_REGEX);
|
||||
let line_algo_based_leading_space =
|
||||
OsString::from(" MD5 (example.txt) = d41d8cd98f00b204e9800998ecf8427e");
|
||||
let line_info = LineInfo::parse(&line_algo_based_leading_space, &mut cached_regex).unwrap();
|
||||
assert_eq!(line_info.format, LineFormat::AlgoBased);
|
||||
assert!(cached_regex.is_none());
|
||||
|
||||
// Test trailing space after checksum line (should fail)
|
||||
let lines_algo_based_leading_space =
|
||||
["MD5 (example.txt) = d41d8cd98f00b204e9800998ecf8427e "]
|
||||
.iter()
|
||||
.map(|s| OsString::from(s.to_string()))
|
||||
.collect::<Vec<_>>();
|
||||
let res = determine_regex(&lines_algo_based_leading_space);
|
||||
let line_algo_based_leading_space =
|
||||
OsString::from("MD5 (example.txt) = d41d8cd98f00b204e9800998ecf8427e ");
|
||||
let res = LineInfo::parse(&line_algo_based_leading_space, &mut cached_regex);
|
||||
assert!(res.is_none());
|
||||
assert!(cached_regex.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_get_expected_digest() {
|
||||
let re = Regex::new(ALGO_BASED_REGEX_BASE64).unwrap();
|
||||
let caps = re
|
||||
.captures(b"SHA256 (empty) = 47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU=")
|
||||
.unwrap();
|
||||
let line = OsString::from("SHA256 (empty) = 47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU=");
|
||||
let mut cached_regex = None;
|
||||
let line_info = LineInfo::parse(&line, &mut cached_regex).unwrap();
|
||||
|
||||
let result = get_expected_digest_as_hex_string(&caps, &re);
|
||||
let result = get_expected_digest_as_hex_string(&line_info, None);
|
||||
|
||||
assert_eq!(
|
||||
result.unwrap(),
|
||||
|
@ -1241,12 +1365,12 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn test_get_expected_checksum_invalid() {
|
||||
let re = Regex::new(ALGO_BASED_REGEX_BASE64).unwrap();
|
||||
let caps = re
|
||||
.captures(b"SHA256 (empty) = 47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU")
|
||||
.unwrap();
|
||||
// The line misses a '=' at the end to be valid base64
|
||||
let line = OsString::from("SHA256 (empty) = 47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU");
|
||||
let mut cached_regex = None;
|
||||
let line_info = LineInfo::parse(&line, &mut cached_regex).unwrap();
|
||||
|
||||
let result = get_expected_digest_as_hex_string(&caps, &re);
|
||||
let result = get_expected_digest_as_hex_string(&line_info, None);
|
||||
|
||||
assert!(result.is_none());
|
||||
}
|
||||
|
|
|
@ -1443,7 +1443,7 @@ mod check_utf8 {
|
|||
let scene = TestScenario::new(util_name!());
|
||||
let at = &scene.fixtures;
|
||||
let filename: OsString = OsStringExt::from_vec(b"funky\xffname".to_vec());
|
||||
at.touch(&filename);
|
||||
at.touch(filename);
|
||||
|
||||
// Checksum match
|
||||
at.write_bytes("check",
|
||||
|
@ -1480,7 +1480,6 @@ mod check_utf8 {
|
|||
}
|
||||
}
|
||||
|
||||
#[ignore = "not yet implemented"]
|
||||
#[test]
|
||||
fn test_check_blake_length_guess() {
|
||||
let correct_lines = [
|
||||
|
@ -1523,7 +1522,6 @@ fn test_check_blake_length_guess() {
|
|||
.stderr_contains("foo.sums: no properly formatted checksum lines found");
|
||||
}
|
||||
|
||||
#[ignore = "not yet implemented"]
|
||||
#[test]
|
||||
fn test_check_confusing_base64() {
|
||||
let cksum = "BLAKE2b-48 (foo.dat) = fc1f97C4";
|
||||
|
@ -1544,7 +1542,6 @@ fn test_check_confusing_base64() {
|
|||
|
||||
/// This test checks that when a file contains several checksum lines
|
||||
/// with different encoding, the decoding still works.
|
||||
#[ignore = "not yet implemented"]
|
||||
#[test]
|
||||
fn test_check_mix_hex_base64() {
|
||||
let b64 = "BLAKE2b-128 (foo1.dat) = BBNuJPhdRwRlw9tm5Y7VbA==";
|
||||
|
@ -1769,3 +1766,81 @@ mod gnu_cksum_base64 {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The tests in this module check the behavior of cksum when given different
|
||||
/// checksum formats and algorithms in the same file, while specifying an
|
||||
/// algorithm on CLI or not.
|
||||
mod format_mix {
|
||||
use super::*;
|
||||
|
||||
// First line is algo-based, second one is not
|
||||
const INPUT_ALGO_NON_ALGO: &str = "\
|
||||
BLAKE2b (bar) = 786a02f742015903c6c6fd852552d272912f4740e15847618a86e217f71f5419d25e1031afee585313896444934eb04b903a685b1448b755d56f701afe9be2ce\n\
|
||||
786a02f742015903c6c6fd852552d272912f4740e15847618a86e217f71f5419d25e1031afee585313896444934eb04b903a685b1448b755d56f701afe9be2ce foo";
|
||||
|
||||
// First line is non algo-based, second one is
|
||||
const INPUT_NON_ALGO_ALGO: &str = "\
|
||||
786a02f742015903c6c6fd852552d272912f4740e15847618a86e217f71f5419d25e1031afee585313896444934eb04b903a685b1448b755d56f701afe9be2ce foo\n\
|
||||
BLAKE2b (bar) = 786a02f742015903c6c6fd852552d272912f4740e15847618a86e217f71f5419d25e1031afee585313896444934eb04b903a685b1448b755d56f701afe9be2ce";
|
||||
|
||||
/// Make a simple scene with foo and bar empty files
|
||||
fn make_scene() -> TestScenario {
|
||||
let scene = TestScenario::new(util_name!());
|
||||
let at = &scene.fixtures;
|
||||
|
||||
at.touch("foo");
|
||||
at.touch("bar");
|
||||
|
||||
scene
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_check_cli_algo_non_algo() {
|
||||
let scene = make_scene();
|
||||
scene
|
||||
.ucmd()
|
||||
.arg("--check")
|
||||
.arg("--algo=blake2b")
|
||||
.pipe_in(INPUT_ALGO_NON_ALGO)
|
||||
.succeeds()
|
||||
.stdout_contains("bar: OK\nfoo: OK")
|
||||
.no_stderr();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_check_cli_non_algo_algo() {
|
||||
let scene = make_scene();
|
||||
scene
|
||||
.ucmd()
|
||||
.arg("--check")
|
||||
.arg("--algo=blake2b")
|
||||
.pipe_in(INPUT_NON_ALGO_ALGO)
|
||||
.succeeds()
|
||||
.stdout_contains("foo: OK\nbar: OK")
|
||||
.no_stderr();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_check_algo_non_algo() {
|
||||
let scene = make_scene();
|
||||
scene
|
||||
.ucmd()
|
||||
.arg("--check")
|
||||
.pipe_in(INPUT_ALGO_NON_ALGO)
|
||||
.succeeds()
|
||||
.stdout_contains("bar: OK")
|
||||
.stderr_contains("cksum: WARNING: 1 line is improperly formatted");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_check_non_algo_algo() {
|
||||
let scene = make_scene();
|
||||
scene
|
||||
.ucmd()
|
||||
.arg("--check")
|
||||
.pipe_in(INPUT_NON_ALGO_ALGO)
|
||||
.succeeds()
|
||||
.stdout_contains("bar: OK")
|
||||
.stderr_contains("cksum: WARNING: 1 line is improperly formatted");
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue