mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-29 12:07:46 +00:00
Merge pull request #6929 from RenjiSann/cksum-fixes
cksum: even more fixes
This commit is contained in:
commit
209ec0b817
4 changed files with 398 additions and 197 deletions
1
Cargo.lock
generated
1
Cargo.lock
generated
|
@ -3503,6 +3503,7 @@ dependencies = [
|
||||||
"glob",
|
"glob",
|
||||||
"hex",
|
"hex",
|
||||||
"itertools",
|
"itertools",
|
||||||
|
"lazy_static",
|
||||||
"libc",
|
"libc",
|
||||||
"md-5",
|
"md-5",
|
||||||
"memchr",
|
"memchr",
|
||||||
|
|
|
@ -25,6 +25,7 @@ dns-lookup = { workspace = true, optional = true }
|
||||||
dunce = { version = "1.0.4", optional = true }
|
dunce = { version = "1.0.4", optional = true }
|
||||||
wild = "2.2.1"
|
wild = "2.2.1"
|
||||||
glob = { workspace = true }
|
glob = { workspace = true }
|
||||||
|
lazy_static = "1.4.0"
|
||||||
# * optional
|
# * optional
|
||||||
itertools = { workspace = true, optional = true }
|
itertools = { workspace = true, optional = true }
|
||||||
thiserror = { workspace = true, optional = true }
|
thiserror = { workspace = true, optional = true }
|
||||||
|
|
|
@ -2,13 +2,15 @@
|
||||||
//
|
//
|
||||||
// For the full copyright and license information, please view the LICENSE
|
// For the full copyright and license information, please view the LICENSE
|
||||||
// file that was distributed with this source code.
|
// file that was distributed with this source code.
|
||||||
// spell-checker:ignore anotherfile invalidchecksum regexes JWZG FFFD xffname prefixfilename
|
// spell-checker:ignore anotherfile invalidchecksum regexes JWZG FFFD xffname prefixfilename bytelen bitlen hexdigit
|
||||||
|
|
||||||
use data_encoding::BASE64;
|
use data_encoding::BASE64;
|
||||||
|
use lazy_static::lazy_static;
|
||||||
use os_display::Quotable;
|
use os_display::Quotable;
|
||||||
use regex::bytes::{Captures, Regex};
|
use regex::bytes::{Match, Regex};
|
||||||
use std::{
|
use std::{
|
||||||
ffi::{OsStr, OsString},
|
borrow::Cow,
|
||||||
|
ffi::OsStr,
|
||||||
fmt::Display,
|
fmt::Display,
|
||||||
fs::File,
|
fs::File,
|
||||||
io::{self, stdin, BufReader, Read, Write},
|
io::{self, stdin, BufReader, Read, Write},
|
||||||
|
@ -130,9 +132,6 @@ enum FileCheckError {
|
||||||
ImproperlyFormatted,
|
ImproperlyFormatted,
|
||||||
/// reading of the checksum file failed
|
/// reading of the checksum file failed
|
||||||
CantOpenChecksumFile,
|
CantOpenChecksumFile,
|
||||||
/// Algorithm detection was unsuccessful.
|
|
||||||
/// Either none is provided, or there is a conflict.
|
|
||||||
AlgoDetectionError,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<Box<dyn UError>> for FileCheckError {
|
impl From<Box<dyn UError>> for FileCheckError {
|
||||||
|
@ -422,14 +421,101 @@ pub fn detect_algo(algo: &str, length: Option<usize>) -> UResult<HashAlgorithm>
|
||||||
// algo must be uppercase or b (for blake2b)
|
// algo must be uppercase or b (for blake2b)
|
||||||
// 2. <checksum> [* ]<filename>
|
// 2. <checksum> [* ]<filename>
|
||||||
// 3. <checksum> [*]<filename> (only one space)
|
// 3. <checksum> [*]<filename> (only one space)
|
||||||
const ALGO_BASED_REGEX: &str = r"^\s*\\?(?P<algo>(?:[A-Z0-9]+|BLAKE2b))(?:-(?P<bits>\d+))?\s?\((?P<filename>(?-u:.*))\)\s*=\s*(?P<checksum>[a-fA-F0-9]+)$";
|
const ALGO_BASED_REGEX: &str = r"^\s*\\?(?P<algo>(?:[A-Z0-9]+|BLAKE2b))(?:-(?P<bits>\d+))?\s?\((?P<filename>(?-u:.*))\)\s*=\s*(?P<checksum>[A-Za-z0-9+/]+={0,2})$";
|
||||||
const ALGO_BASED_REGEX_BASE64: &str = r"^\s*\\?(?P<algo>(?:[A-Z0-9]+|BLAKE2b))(?:-(?P<bits>\d+))?\s?\((?P<filename>(?-u:.*))\)\s*=\s*(?P<checksum>[A-Za-z0-9+/]+={0,2})$";
|
|
||||||
|
|
||||||
const DOUBLE_SPACE_REGEX: &str = r"^(?P<checksum>[a-fA-F0-9]+)\s{2}(?P<filename>(?-u:.*))$";
|
const DOUBLE_SPACE_REGEX: &str = r"^(?P<checksum>[a-fA-F0-9]+)\s{2}(?P<filename>(?-u:.*))$";
|
||||||
|
|
||||||
// In this case, we ignore the *
|
// In this case, we ignore the *
|
||||||
const SINGLE_SPACE_REGEX: &str = r"^(?P<checksum>[a-fA-F0-9]+)\s(?P<filename>\*?(?-u:.*))$";
|
const SINGLE_SPACE_REGEX: &str = r"^(?P<checksum>[a-fA-F0-9]+)\s(?P<filename>\*?(?-u:.*))$";
|
||||||
|
|
||||||
|
lazy_static! {
|
||||||
|
static ref R_ALGO_BASED: Regex = Regex::new(ALGO_BASED_REGEX).unwrap();
|
||||||
|
static ref R_DOUBLE_SPACE: Regex = Regex::new(DOUBLE_SPACE_REGEX).unwrap();
|
||||||
|
static ref R_SINGLE_SPACE: Regex = Regex::new(SINGLE_SPACE_REGEX).unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
||||||
|
enum LineFormat {
|
||||||
|
AlgoBased,
|
||||||
|
SingleSpace,
|
||||||
|
DoubleSpace,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl LineFormat {
|
||||||
|
fn to_regex(self) -> &'static Regex {
|
||||||
|
match self {
|
||||||
|
LineFormat::AlgoBased => &R_ALGO_BASED,
|
||||||
|
LineFormat::SingleSpace => &R_SINGLE_SPACE,
|
||||||
|
LineFormat::DoubleSpace => &R_DOUBLE_SPACE,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Hold the data extracted from a checksum line.
|
||||||
|
struct LineInfo {
|
||||||
|
algo_name: Option<String>,
|
||||||
|
algo_bit_len: Option<usize>,
|
||||||
|
checksum: String,
|
||||||
|
filename: Vec<u8>,
|
||||||
|
|
||||||
|
format: LineFormat,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl LineInfo {
|
||||||
|
/// Returns a `LineInfo` parsed from a checksum line.
|
||||||
|
/// The function will run 3 regexes against the line and select the first one that matches
|
||||||
|
/// to populate the fields of the struct.
|
||||||
|
/// However, there is a catch to handle regarding the handling of `cached_regex`.
|
||||||
|
/// In case of non-algo-based regex, if `cached_regex` is Some, it must take the priority
|
||||||
|
/// over the detected regex. Otherwise, we must set it the the detected regex.
|
||||||
|
/// This specific behavior is emphasized by the test
|
||||||
|
/// `test_hashsum::test_check_md5sum_only_one_space`.
|
||||||
|
fn parse(s: impl AsRef<OsStr>, cached_regex: &mut Option<LineFormat>) -> Option<Self> {
|
||||||
|
let regexes: &[(&'static Regex, LineFormat)] = &[
|
||||||
|
(&R_ALGO_BASED, LineFormat::AlgoBased),
|
||||||
|
(&R_DOUBLE_SPACE, LineFormat::DoubleSpace),
|
||||||
|
(&R_SINGLE_SPACE, LineFormat::SingleSpace),
|
||||||
|
];
|
||||||
|
|
||||||
|
let line_bytes = os_str_as_bytes(s.as_ref()).expect("UTF-8 decoding failed");
|
||||||
|
|
||||||
|
for (regex, format) in regexes {
|
||||||
|
if !regex.is_match(line_bytes) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut r = *regex;
|
||||||
|
if *format != LineFormat::AlgoBased {
|
||||||
|
// The cached regex ensures that when processing non-algo based regexes,
|
||||||
|
// it cannot be changed (can't have single and double space regexes
|
||||||
|
// used in the same file).
|
||||||
|
if cached_regex.is_some() {
|
||||||
|
r = cached_regex.unwrap().to_regex();
|
||||||
|
} else {
|
||||||
|
*cached_regex = Some(*format);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(caps) = r.captures(line_bytes) {
|
||||||
|
// These unwraps are safe thanks to the regex
|
||||||
|
let match_to_string = |m: Match| String::from_utf8(m.as_bytes().into()).unwrap();
|
||||||
|
|
||||||
|
return Some(Self {
|
||||||
|
algo_name: caps.name("algo").map(match_to_string),
|
||||||
|
algo_bit_len: caps
|
||||||
|
.name("bits")
|
||||||
|
.map(|m| match_to_string(m).parse::<usize>().unwrap()),
|
||||||
|
checksum: caps.name("checksum").map(match_to_string).unwrap(),
|
||||||
|
filename: caps.name("filename").map(|m| m.as_bytes().into()).unwrap(),
|
||||||
|
format: *format,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn get_filename_for_output(filename: &OsStr, input_is_stdin: bool) -> String {
|
fn get_filename_for_output(filename: &OsStr, input_is_stdin: bool) -> String {
|
||||||
if input_is_stdin {
|
if input_is_stdin {
|
||||||
"standard input"
|
"standard input"
|
||||||
|
@ -440,41 +526,44 @@ fn get_filename_for_output(filename: &OsStr, input_is_stdin: bool) -> String {
|
||||||
.to_string()
|
.to_string()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Determines the appropriate regular expression to use based on the provided lines.
|
|
||||||
fn determine_regex(lines: &[OsString]) -> Option<(Regex, bool)> {
|
|
||||||
let regexes = [
|
|
||||||
(Regex::new(ALGO_BASED_REGEX).unwrap(), true),
|
|
||||||
(Regex::new(DOUBLE_SPACE_REGEX).unwrap(), false),
|
|
||||||
(Regex::new(SINGLE_SPACE_REGEX).unwrap(), false),
|
|
||||||
(Regex::new(ALGO_BASED_REGEX_BASE64).unwrap(), true),
|
|
||||||
];
|
|
||||||
|
|
||||||
for line in lines {
|
|
||||||
let line_bytes = os_str_as_bytes(line).expect("UTF-8 decoding failed");
|
|
||||||
for (regex, is_algo_based) in ®exes {
|
|
||||||
if regex.is_match(line_bytes) {
|
|
||||||
return Some((regex.clone(), *is_algo_based));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
None
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Extract the expected digest from the checksum string
|
/// Extract the expected digest from the checksum string
|
||||||
fn get_expected_digest_as_hex_string(caps: &Captures, chosen_regex: &Regex) -> Option<String> {
|
fn get_expected_digest_as_hex_string(
|
||||||
// Unwraps are safe, ensured by regex.
|
line_info: &LineInfo,
|
||||||
let ck = caps.name("checksum").unwrap().as_bytes();
|
len_hint: Option<usize>,
|
||||||
|
) -> Option<Cow<str>> {
|
||||||
|
let ck = &line_info.checksum;
|
||||||
|
|
||||||
if chosen_regex.as_str() == ALGO_BASED_REGEX_BASE64 {
|
// TODO MSRV 1.82, replace `is_some_and` with `is_none_or`
|
||||||
BASE64.decode(ck).map(hex::encode).ok()
|
// to improve readability. This closure returns True if a length hint provided
|
||||||
} else if ck.len() % 2 == 0 {
|
// and the argument isn't the same as the hint.
|
||||||
Some(str::from_utf8(ck).unwrap().to_string())
|
let against_hint = |len| len_hint.is_some_and(|l| l != len);
|
||||||
} else {
|
|
||||||
|
if ck.len() % 2 != 0 {
|
||||||
// If the length of the digest is not a multiple of 2, then it
|
// If the length of the digest is not a multiple of 2, then it
|
||||||
// must be improperly formatted (1 hex digit is 2 characters)
|
// must be improperly formatted (1 hex digit is 2 characters)
|
||||||
None
|
return None;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If the digest can be decoded as hexadecimal AND it length match the
|
||||||
|
// one expected (in case it's given), just go with it.
|
||||||
|
if ck.as_bytes().iter().all(u8::is_ascii_hexdigit) && !against_hint(ck.len()) {
|
||||||
|
return Some(Cow::Borrowed(ck));
|
||||||
|
}
|
||||||
|
|
||||||
|
// If hexadecimal digest fails for any reason, interpret the digest as base 64.
|
||||||
|
BASE64
|
||||||
|
.decode(ck.as_bytes()) // Decode the string as encoded base64
|
||||||
|
.map(hex::encode) // Encode it back as hexadecimal
|
||||||
|
.map(Cow::<str>::Owned)
|
||||||
|
.ok()
|
||||||
|
.and_then(|s| {
|
||||||
|
// Check the digest length
|
||||||
|
if !against_hint(s.len()) {
|
||||||
|
Some(s)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns a reader that reads from the specified file, or from stdin if `filename_to_check` is "-".
|
/// Returns a reader that reads from the specified file, or from stdin if `filename_to_check` is "-".
|
||||||
|
@ -548,17 +637,15 @@ fn get_input_file(filename: &OsStr) -> UResult<Box<dyn Read>> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Extracts the algorithm name and length from the regex captures if the algo-based format is matched.
|
/// Gets the algorithm name and length from the `LineInfo` if the algo-based format is matched.
|
||||||
fn identify_algo_name_and_length(
|
fn identify_algo_name_and_length(
|
||||||
caps: &Captures,
|
line_info: &LineInfo,
|
||||||
algo_name_input: Option<&str>,
|
algo_name_input: Option<&str>,
|
||||||
) -> Option<(String, Option<usize>)> {
|
) -> Option<(String, Option<usize>)> {
|
||||||
// When the algo-based format is matched, extract details from regex captures
|
let algorithm = line_info
|
||||||
let algorithm = caps
|
.algo_name
|
||||||
.name("algo")
|
.clone()
|
||||||
.map_or(String::new(), |m| {
|
.unwrap_or_default()
|
||||||
String::from_utf8(m.as_bytes().into()).unwrap()
|
|
||||||
})
|
|
||||||
.to_lowercase();
|
.to_lowercase();
|
||||||
|
|
||||||
// check if we are called with XXXsum (example: md5sum) but we detected a different algo parsing the file
|
// check if we are called with XXXsum (example: md5sum) but we detected a different algo parsing the file
|
||||||
|
@ -573,19 +660,119 @@ fn identify_algo_name_and_length(
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
|
|
||||||
let bits = caps.name("bits").map_or(Some(None), |m| {
|
let bytes = if let Some(bitlen) = line_info.algo_bit_len {
|
||||||
let bits_value = String::from_utf8(m.as_bytes().into())
|
if bitlen % 8 != 0 {
|
||||||
.unwrap()
|
// The given length is wrong
|
||||||
.parse::<usize>()
|
return None;
|
||||||
.unwrap();
|
|
||||||
if bits_value % 8 == 0 {
|
|
||||||
Some(Some(bits_value / 8))
|
|
||||||
} else {
|
|
||||||
None // Return None to signal a divisibility issue
|
|
||||||
}
|
}
|
||||||
})?;
|
Some(bitlen / 8)
|
||||||
|
} else if algorithm == ALGORITHM_OPTIONS_BLAKE2B {
|
||||||
|
// Default length with BLAKE2b,
|
||||||
|
Some(64)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
|
||||||
Some((algorithm, bits))
|
Some((algorithm, bytes))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Given a filename and an algorithm, compute the digest and compare it with
|
||||||
|
/// the expected one.
|
||||||
|
fn compute_and_check_digest_from_file(
|
||||||
|
filename: &[u8],
|
||||||
|
expected_checksum: &str,
|
||||||
|
mut algo: HashAlgorithm,
|
||||||
|
opts: ChecksumOptions,
|
||||||
|
) -> Result<(), LineCheckError> {
|
||||||
|
let (filename_to_check_unescaped, prefix) = unescape_filename(filename);
|
||||||
|
let real_filename_to_check = os_str_from_bytes(&filename_to_check_unescaped)?;
|
||||||
|
|
||||||
|
// Open the input file
|
||||||
|
let file_to_check = get_file_to_check(&real_filename_to_check, opts)?;
|
||||||
|
let mut file_reader = BufReader::new(file_to_check);
|
||||||
|
|
||||||
|
// Read the file and calculate the checksum
|
||||||
|
let create_fn = &mut algo.create_fn;
|
||||||
|
let mut digest = create_fn();
|
||||||
|
let (calculated_checksum, _) =
|
||||||
|
digest_reader(&mut digest, &mut file_reader, opts.binary, algo.bits).unwrap();
|
||||||
|
|
||||||
|
// Do the checksum validation
|
||||||
|
let checksum_correct = expected_checksum == calculated_checksum;
|
||||||
|
print_file_report(
|
||||||
|
std::io::stdout(),
|
||||||
|
filename,
|
||||||
|
FileChecksumResult::from_bool(checksum_correct),
|
||||||
|
prefix,
|
||||||
|
opts,
|
||||||
|
);
|
||||||
|
|
||||||
|
if checksum_correct {
|
||||||
|
Ok(())
|
||||||
|
} else {
|
||||||
|
Err(LineCheckError::DigestMismatch)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check a digest checksum with non-algo based pre-treatment.
|
||||||
|
fn process_algo_based_line(
|
||||||
|
line_info: &LineInfo,
|
||||||
|
cli_algo_name: Option<&str>,
|
||||||
|
opts: ChecksumOptions,
|
||||||
|
) -> Result<(), LineCheckError> {
|
||||||
|
let filename_to_check = line_info.filename.as_slice();
|
||||||
|
|
||||||
|
let (algo_name, algo_byte_len) = identify_algo_name_and_length(line_info, cli_algo_name)
|
||||||
|
.ok_or(LineCheckError::ImproperlyFormatted)?;
|
||||||
|
|
||||||
|
// If the digest bitlen is known, we can check the format of the expected
|
||||||
|
// checksum with it.
|
||||||
|
let digest_char_length_hint = match (algo_name.as_str(), algo_byte_len) {
|
||||||
|
(ALGORITHM_OPTIONS_BLAKE2B, Some(bytelen)) => Some(bytelen * 2),
|
||||||
|
_ => None,
|
||||||
|
};
|
||||||
|
|
||||||
|
let expected_checksum = get_expected_digest_as_hex_string(line_info, digest_char_length_hint)
|
||||||
|
.ok_or(LineCheckError::ImproperlyFormatted)?;
|
||||||
|
|
||||||
|
let algo = detect_algo(&algo_name, algo_byte_len)?;
|
||||||
|
|
||||||
|
compute_and_check_digest_from_file(filename_to_check, &expected_checksum, algo, opts)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check a digest checksum with non-algo based pre-treatment.
|
||||||
|
fn process_non_algo_based_line(
|
||||||
|
line_number: usize,
|
||||||
|
line_info: &LineInfo,
|
||||||
|
cli_algo_name: &str,
|
||||||
|
cli_algo_length: Option<usize>,
|
||||||
|
opts: ChecksumOptions,
|
||||||
|
) -> Result<(), LineCheckError> {
|
||||||
|
let mut filename_to_check = line_info.filename.as_slice();
|
||||||
|
if filename_to_check.starts_with(b"*")
|
||||||
|
&& line_number == 0
|
||||||
|
&& line_info.format == LineFormat::SingleSpace
|
||||||
|
{
|
||||||
|
// Remove the leading asterisk if present - only for the first line
|
||||||
|
filename_to_check = &filename_to_check[1..];
|
||||||
|
}
|
||||||
|
let expected_checksum = get_expected_digest_as_hex_string(line_info, None)
|
||||||
|
.ok_or(LineCheckError::ImproperlyFormatted)?;
|
||||||
|
|
||||||
|
// When a specific algorithm name is input, use it and use the provided bits
|
||||||
|
// except when dealing with blake2b, where we will detect the length
|
||||||
|
let (algo_name, algo_byte_len) = if cli_algo_name == ALGORITHM_OPTIONS_BLAKE2B {
|
||||||
|
// division by 2 converts the length of the Blake2b checksum from hexadecimal
|
||||||
|
// characters to bytes, as each byte is represented by two hexadecimal characters.
|
||||||
|
let length = Some(expected_checksum.len() / 2);
|
||||||
|
(ALGORITHM_OPTIONS_BLAKE2B.to_string(), length)
|
||||||
|
} else {
|
||||||
|
(cli_algo_name.to_lowercase(), cli_algo_length)
|
||||||
|
};
|
||||||
|
|
||||||
|
let algo = detect_algo(&algo_name, algo_byte_len)?;
|
||||||
|
|
||||||
|
compute_and_check_digest_from_file(filename_to_check, &expected_checksum, algo, opts)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Parses a checksum line, detect the algorithm to use, read the file and produce
|
/// Parses a checksum line, detect the algorithm to use, read the file and produce
|
||||||
|
@ -594,88 +781,36 @@ fn identify_algo_name_and_length(
|
||||||
/// Returns `Ok(bool)` if the comparison happened, bool indicates if the digest
|
/// Returns `Ok(bool)` if the comparison happened, bool indicates if the digest
|
||||||
/// matched the expected.
|
/// matched the expected.
|
||||||
/// If the comparison didn't happen, return a `LineChecksumError`.
|
/// If the comparison didn't happen, return a `LineChecksumError`.
|
||||||
#[allow(clippy::too_many_arguments)]
|
|
||||||
fn process_checksum_line(
|
fn process_checksum_line(
|
||||||
filename_input: &OsStr,
|
filename_input: &OsStr,
|
||||||
line: &OsStr,
|
line: &OsStr,
|
||||||
i: usize,
|
i: usize,
|
||||||
chosen_regex: &Regex,
|
|
||||||
is_algo_based_format: bool,
|
|
||||||
cli_algo_name: Option<&str>,
|
cli_algo_name: Option<&str>,
|
||||||
cli_algo_length: Option<usize>,
|
cli_algo_length: Option<usize>,
|
||||||
opts: ChecksumOptions,
|
opts: ChecksumOptions,
|
||||||
|
cached_regex: &mut Option<LineFormat>,
|
||||||
) -> Result<(), LineCheckError> {
|
) -> Result<(), LineCheckError> {
|
||||||
let line_bytes = os_str_as_bytes(line)?;
|
let line_bytes = os_str_as_bytes(line)?;
|
||||||
if let Some(caps) = chosen_regex.captures(line_bytes) {
|
|
||||||
let mut filename_to_check = caps.name("filename").unwrap().as_bytes();
|
|
||||||
|
|
||||||
if filename_to_check.starts_with(b"*")
|
// Early return on empty or commented lines.
|
||||||
&& i == 0
|
if line.is_empty() || line_bytes.starts_with(b"#") {
|
||||||
&& chosen_regex.as_str() == SINGLE_SPACE_REGEX
|
return Err(LineCheckError::Skipped);
|
||||||
{
|
}
|
||||||
// Remove the leading asterisk if present - only for the first line
|
|
||||||
filename_to_check = &filename_to_check[1..];
|
|
||||||
}
|
|
||||||
|
|
||||||
let expected_checksum = get_expected_digest_as_hex_string(&caps, chosen_regex)
|
// Use `LineInfo` to extract the data of a line.
|
||||||
.ok_or(LineCheckError::ImproperlyFormatted)?;
|
// Then, depending on its format, apply a different pre-treatment.
|
||||||
|
if let Some(line_info) = LineInfo::parse(line, cached_regex) {
|
||||||
// If the algo_name is provided, we use it, otherwise we try to detect it
|
if line_info.format == LineFormat::AlgoBased {
|
||||||
let (algo_name, length) = if is_algo_based_format {
|
process_algo_based_line(&line_info, cli_algo_name, opts)
|
||||||
identify_algo_name_and_length(&caps, cli_algo_name)
|
} else if let Some(cli_algo) = cli_algo_name {
|
||||||
.ok_or(LineCheckError::ImproperlyFormatted)?
|
// If we match a non-algo based regex, we expect a cli argument
|
||||||
} else if let Some(a) = cli_algo_name {
|
// to give us the algorithm to use
|
||||||
// When a specific algorithm name is input, use it and use the provided bits
|
process_non_algo_based_line(i, &line_info, cli_algo, cli_algo_length, opts)
|
||||||
// except when dealing with blake2b, where we will detect the length
|
|
||||||
if cli_algo_name == Some(ALGORITHM_OPTIONS_BLAKE2B) {
|
|
||||||
// division by 2 converts the length of the Blake2b checksum from hexadecimal
|
|
||||||
// characters to bytes, as each byte is represented by two hexadecimal characters.
|
|
||||||
let length = Some(expected_checksum.len() / 2);
|
|
||||||
(ALGORITHM_OPTIONS_BLAKE2B.to_string(), length)
|
|
||||||
} else {
|
|
||||||
(a.to_lowercase(), cli_algo_length)
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
// Default case if no algorithm is specified and non-algo based format is matched
|
// We have no clue of what algorithm to use
|
||||||
return Err(LineCheckError::ImproperlyFormatted);
|
return Err(LineCheckError::ImproperlyFormatted);
|
||||||
};
|
|
||||||
|
|
||||||
let mut algo = detect_algo(&algo_name, length)?;
|
|
||||||
|
|
||||||
let (filename_to_check_unescaped, prefix) = unescape_filename(filename_to_check);
|
|
||||||
|
|
||||||
let real_filename_to_check = os_str_from_bytes(&filename_to_check_unescaped)?;
|
|
||||||
|
|
||||||
// manage the input file
|
|
||||||
let file_to_check = get_file_to_check(&real_filename_to_check, opts)?;
|
|
||||||
let mut file_reader = BufReader::new(file_to_check);
|
|
||||||
|
|
||||||
// Read the file and calculate the checksum
|
|
||||||
let create_fn = &mut algo.create_fn;
|
|
||||||
let mut digest = create_fn();
|
|
||||||
let (calculated_checksum, _) =
|
|
||||||
digest_reader(&mut digest, &mut file_reader, opts.binary, algo.bits).unwrap();
|
|
||||||
|
|
||||||
// Do the checksum validation
|
|
||||||
let checksum_correct = expected_checksum == calculated_checksum;
|
|
||||||
print_file_report(
|
|
||||||
std::io::stdout(),
|
|
||||||
filename_to_check,
|
|
||||||
FileChecksumResult::from_bool(checksum_correct),
|
|
||||||
prefix,
|
|
||||||
opts,
|
|
||||||
);
|
|
||||||
|
|
||||||
if checksum_correct {
|
|
||||||
Ok(())
|
|
||||||
} else {
|
|
||||||
Err(LineCheckError::DigestMismatch)
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if line.is_empty() || line_bytes.starts_with(b"#") {
|
|
||||||
// Don't show any warning for empty or commented lines.
|
|
||||||
return Err(LineCheckError::Skipped);
|
|
||||||
}
|
|
||||||
if opts.warn {
|
if opts.warn {
|
||||||
let algo = if let Some(algo_name_input) = cli_algo_name {
|
let algo = if let Some(algo_name_input) = cli_algo_name {
|
||||||
algo_name_input.to_uppercase()
|
algo_name_input.to_uppercase()
|
||||||
|
@ -723,22 +858,19 @@ fn process_checksum_file(
|
||||||
let reader = BufReader::new(file);
|
let reader = BufReader::new(file);
|
||||||
let lines = read_os_string_lines(reader).collect::<Vec<_>>();
|
let lines = read_os_string_lines(reader).collect::<Vec<_>>();
|
||||||
|
|
||||||
let Some((chosen_regex, is_algo_based_format)) = determine_regex(&lines) else {
|
// cached_regex is used to ensure that several non algo-based checksum line
|
||||||
log_no_properly_formatted(get_filename_for_output(filename_input, input_is_stdin));
|
// will use the same regex.
|
||||||
set_exit_code(1);
|
let mut cached_regex = None;
|
||||||
return Err(FileCheckError::AlgoDetectionError);
|
|
||||||
};
|
|
||||||
|
|
||||||
for (i, line) in lines.iter().enumerate() {
|
for (i, line) in lines.iter().enumerate() {
|
||||||
let line_result = process_checksum_line(
|
let line_result = process_checksum_line(
|
||||||
filename_input,
|
filename_input,
|
||||||
line,
|
line,
|
||||||
i,
|
i,
|
||||||
&chosen_regex,
|
|
||||||
is_algo_based_format,
|
|
||||||
cli_algo_name,
|
cli_algo_name,
|
||||||
cli_algo_length,
|
cli_algo_length,
|
||||||
opts,
|
opts,
|
||||||
|
&mut cached_regex,
|
||||||
);
|
);
|
||||||
|
|
||||||
// Match a first time to elude critical UErrors, and increment the total
|
// Match a first time to elude critical UErrors, and increment the total
|
||||||
|
@ -816,8 +948,7 @@ where
|
||||||
use FileCheckError::*;
|
use FileCheckError::*;
|
||||||
match process_checksum_file(filename_input, algo_name_input, length_input, opts) {
|
match process_checksum_file(filename_input, algo_name_input, length_input, opts) {
|
||||||
Err(UError(e)) => return Err(e),
|
Err(UError(e)) => return Err(e),
|
||||||
Err(ImproperlyFormatted) => break,
|
Err(CantOpenChecksumFile | ImproperlyFormatted) | Ok(_) => continue,
|
||||||
Err(CantOpenChecksumFile | AlgoDetectionError) | Ok(_) => continue,
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -926,6 +1057,7 @@ pub fn escape_filename(filename: &Path) -> (String, &'static str) {
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
use std::ffi::OsString;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_unescape_filename() {
|
fn test_unescape_filename() {
|
||||||
|
@ -1159,79 +1291,71 @@ mod tests {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_determine_regex() {
|
fn test_line_info() {
|
||||||
|
let mut cached_regex = None;
|
||||||
|
|
||||||
// Test algo-based regex
|
// Test algo-based regex
|
||||||
let lines_algo_based = ["MD5 (example.txt) = d41d8cd98f00b204e9800998ecf8427e"]
|
let line_algo_based =
|
||||||
.iter()
|
OsString::from("MD5 (example.txt) = d41d8cd98f00b204e9800998ecf8427e");
|
||||||
.map(|s| OsString::from(s.to_string()))
|
let line_info = LineInfo::parse(&line_algo_based, &mut cached_regex).unwrap();
|
||||||
.collect::<Vec<_>>();
|
assert_eq!(line_info.algo_name.as_deref(), Some("MD5"));
|
||||||
let (regex, algo_based) = determine_regex(&lines_algo_based).unwrap();
|
assert!(line_info.algo_bit_len.is_none());
|
||||||
assert!(algo_based);
|
assert_eq!(line_info.filename, b"example.txt");
|
||||||
assert!(regex.is_match(os_str_as_bytes(&lines_algo_based[0]).unwrap()));
|
assert_eq!(line_info.checksum, "d41d8cd98f00b204e9800998ecf8427e");
|
||||||
|
assert_eq!(line_info.format, LineFormat::AlgoBased);
|
||||||
|
assert!(cached_regex.is_none());
|
||||||
|
|
||||||
// Test double-space regex
|
// Test double-space regex
|
||||||
let lines_double_space = ["d41d8cd98f00b204e9800998ecf8427e example.txt"]
|
let line_double_space = OsString::from("d41d8cd98f00b204e9800998ecf8427e example.txt");
|
||||||
.iter()
|
let line_info = LineInfo::parse(&line_double_space, &mut cached_regex).unwrap();
|
||||||
.map(|s| OsString::from(s.to_string()))
|
assert!(line_info.algo_name.is_none());
|
||||||
.collect::<Vec<_>>();
|
assert!(line_info.algo_bit_len.is_none());
|
||||||
let (regex, algo_based) = determine_regex(&lines_double_space).unwrap();
|
assert_eq!(line_info.filename, b"example.txt");
|
||||||
assert!(!algo_based);
|
assert_eq!(line_info.checksum, "d41d8cd98f00b204e9800998ecf8427e");
|
||||||
assert!(regex.is_match(os_str_as_bytes(&lines_double_space[0]).unwrap()));
|
assert_eq!(line_info.format, LineFormat::DoubleSpace);
|
||||||
|
assert!(cached_regex.is_some());
|
||||||
|
|
||||||
|
cached_regex = None;
|
||||||
|
|
||||||
// Test single-space regex
|
// Test single-space regex
|
||||||
let lines_single_space = ["d41d8cd98f00b204e9800998ecf8427e example.txt"]
|
let line_single_space = OsString::from("d41d8cd98f00b204e9800998ecf8427e example.txt");
|
||||||
.iter()
|
let line_info = LineInfo::parse(&line_single_space, &mut cached_regex).unwrap();
|
||||||
.map(|s| OsString::from(s.to_string()))
|
assert!(line_info.algo_name.is_none());
|
||||||
.collect::<Vec<_>>();
|
assert!(line_info.algo_bit_len.is_none());
|
||||||
let (regex, algo_based) = determine_regex(&lines_single_space).unwrap();
|
assert_eq!(line_info.filename, b"example.txt");
|
||||||
assert!(!algo_based);
|
assert_eq!(line_info.checksum, "d41d8cd98f00b204e9800998ecf8427e");
|
||||||
assert!(regex.is_match(os_str_as_bytes(&lines_single_space[0]).unwrap()));
|
assert_eq!(line_info.format, LineFormat::SingleSpace);
|
||||||
|
assert!(cached_regex.is_some());
|
||||||
|
|
||||||
// Test double-space regex start with invalid
|
cached_regex = None;
|
||||||
let lines_double_space = ["ERR", "d41d8cd98f00b204e9800998ecf8427e example.txt"]
|
|
||||||
.iter()
|
|
||||||
.map(|s| OsString::from(s.to_string()))
|
|
||||||
.collect::<Vec<_>>();
|
|
||||||
let (regex, algo_based) = determine_regex(&lines_double_space).unwrap();
|
|
||||||
assert!(!algo_based);
|
|
||||||
assert!(!regex.is_match(os_str_as_bytes(&lines_double_space[0]).unwrap()));
|
|
||||||
assert!(regex.is_match(os_str_as_bytes(&lines_double_space[1]).unwrap()));
|
|
||||||
|
|
||||||
// Test invalid checksum line
|
// Test invalid checksum line
|
||||||
let lines_invalid = ["invalid checksum line"]
|
let line_invalid = OsString::from("invalid checksum line");
|
||||||
.iter()
|
assert!(LineInfo::parse(&line_invalid, &mut cached_regex).is_none());
|
||||||
.map(|s| OsString::from(s.to_string()))
|
assert!(cached_regex.is_none());
|
||||||
.collect::<Vec<_>>();
|
|
||||||
assert!(determine_regex(&lines_invalid).is_none());
|
|
||||||
|
|
||||||
// Test leading space before checksum line
|
// Test leading space before checksum line
|
||||||
let lines_algo_based_leading_space =
|
let line_algo_based_leading_space =
|
||||||
[" MD5 (example.txt) = d41d8cd98f00b204e9800998ecf8427e"]
|
OsString::from(" MD5 (example.txt) = d41d8cd98f00b204e9800998ecf8427e");
|
||||||
.iter()
|
let line_info = LineInfo::parse(&line_algo_based_leading_space, &mut cached_regex).unwrap();
|
||||||
.map(|s| OsString::from(s.to_string()))
|
assert_eq!(line_info.format, LineFormat::AlgoBased);
|
||||||
.collect::<Vec<_>>();
|
assert!(cached_regex.is_none());
|
||||||
let res = determine_regex(&lines_algo_based_leading_space);
|
|
||||||
assert!(res.is_some());
|
|
||||||
assert_eq!(res.unwrap().0.as_str(), ALGO_BASED_REGEX);
|
|
||||||
|
|
||||||
// Test trailing space after checksum line (should fail)
|
// Test trailing space after checksum line (should fail)
|
||||||
let lines_algo_based_leading_space =
|
let line_algo_based_leading_space =
|
||||||
["MD5 (example.txt) = d41d8cd98f00b204e9800998ecf8427e "]
|
OsString::from("MD5 (example.txt) = d41d8cd98f00b204e9800998ecf8427e ");
|
||||||
.iter()
|
let res = LineInfo::parse(&line_algo_based_leading_space, &mut cached_regex);
|
||||||
.map(|s| OsString::from(s.to_string()))
|
|
||||||
.collect::<Vec<_>>();
|
|
||||||
let res = determine_regex(&lines_algo_based_leading_space);
|
|
||||||
assert!(res.is_none());
|
assert!(res.is_none());
|
||||||
|
assert!(cached_regex.is_none());
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_get_expected_digest() {
|
fn test_get_expected_digest() {
|
||||||
let re = Regex::new(ALGO_BASED_REGEX_BASE64).unwrap();
|
let line = OsString::from("SHA256 (empty) = 47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU=");
|
||||||
let caps = re
|
let mut cached_regex = None;
|
||||||
.captures(b"SHA256 (empty) = 47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU=")
|
let line_info = LineInfo::parse(&line, &mut cached_regex).unwrap();
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
let result = get_expected_digest_as_hex_string(&caps, &re);
|
let result = get_expected_digest_as_hex_string(&line_info, None);
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
result.unwrap(),
|
result.unwrap(),
|
||||||
|
@ -1241,12 +1365,12 @@ mod tests {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_get_expected_checksum_invalid() {
|
fn test_get_expected_checksum_invalid() {
|
||||||
let re = Regex::new(ALGO_BASED_REGEX_BASE64).unwrap();
|
// The line misses a '=' at the end to be valid base64
|
||||||
let caps = re
|
let line = OsString::from("SHA256 (empty) = 47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU");
|
||||||
.captures(b"SHA256 (empty) = 47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU")
|
let mut cached_regex = None;
|
||||||
.unwrap();
|
let line_info = LineInfo::parse(&line, &mut cached_regex).unwrap();
|
||||||
|
|
||||||
let result = get_expected_digest_as_hex_string(&caps, &re);
|
let result = get_expected_digest_as_hex_string(&line_info, None);
|
||||||
|
|
||||||
assert!(result.is_none());
|
assert!(result.is_none());
|
||||||
}
|
}
|
||||||
|
|
|
@ -1443,7 +1443,7 @@ mod check_utf8 {
|
||||||
let scene = TestScenario::new(util_name!());
|
let scene = TestScenario::new(util_name!());
|
||||||
let at = &scene.fixtures;
|
let at = &scene.fixtures;
|
||||||
let filename: OsString = OsStringExt::from_vec(b"funky\xffname".to_vec());
|
let filename: OsString = OsStringExt::from_vec(b"funky\xffname".to_vec());
|
||||||
at.touch(&filename);
|
at.touch(filename);
|
||||||
|
|
||||||
// Checksum match
|
// Checksum match
|
||||||
at.write_bytes("check",
|
at.write_bytes("check",
|
||||||
|
@ -1480,7 +1480,6 @@ mod check_utf8 {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[ignore = "not yet implemented"]
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_check_blake_length_guess() {
|
fn test_check_blake_length_guess() {
|
||||||
let correct_lines = [
|
let correct_lines = [
|
||||||
|
@ -1523,7 +1522,6 @@ fn test_check_blake_length_guess() {
|
||||||
.stderr_contains("foo.sums: no properly formatted checksum lines found");
|
.stderr_contains("foo.sums: no properly formatted checksum lines found");
|
||||||
}
|
}
|
||||||
|
|
||||||
#[ignore = "not yet implemented"]
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_check_confusing_base64() {
|
fn test_check_confusing_base64() {
|
||||||
let cksum = "BLAKE2b-48 (foo.dat) = fc1f97C4";
|
let cksum = "BLAKE2b-48 (foo.dat) = fc1f97C4";
|
||||||
|
@ -1544,7 +1542,6 @@ fn test_check_confusing_base64() {
|
||||||
|
|
||||||
/// This test checks that when a file contains several checksum lines
|
/// This test checks that when a file contains several checksum lines
|
||||||
/// with different encoding, the decoding still works.
|
/// with different encoding, the decoding still works.
|
||||||
#[ignore = "not yet implemented"]
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_check_mix_hex_base64() {
|
fn test_check_mix_hex_base64() {
|
||||||
let b64 = "BLAKE2b-128 (foo1.dat) = BBNuJPhdRwRlw9tm5Y7VbA==";
|
let b64 = "BLAKE2b-128 (foo1.dat) = BBNuJPhdRwRlw9tm5Y7VbA==";
|
||||||
|
@ -1769,3 +1766,81 @@ mod gnu_cksum_base64 {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// The tests in this module check the behavior of cksum when given different
|
||||||
|
/// checksum formats and algorithms in the same file, while specifying an
|
||||||
|
/// algorithm on CLI or not.
|
||||||
|
mod format_mix {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
// First line is algo-based, second one is not
|
||||||
|
const INPUT_ALGO_NON_ALGO: &str = "\
|
||||||
|
BLAKE2b (bar) = 786a02f742015903c6c6fd852552d272912f4740e15847618a86e217f71f5419d25e1031afee585313896444934eb04b903a685b1448b755d56f701afe9be2ce\n\
|
||||||
|
786a02f742015903c6c6fd852552d272912f4740e15847618a86e217f71f5419d25e1031afee585313896444934eb04b903a685b1448b755d56f701afe9be2ce foo";
|
||||||
|
|
||||||
|
// First line is non algo-based, second one is
|
||||||
|
const INPUT_NON_ALGO_ALGO: &str = "\
|
||||||
|
786a02f742015903c6c6fd852552d272912f4740e15847618a86e217f71f5419d25e1031afee585313896444934eb04b903a685b1448b755d56f701afe9be2ce foo\n\
|
||||||
|
BLAKE2b (bar) = 786a02f742015903c6c6fd852552d272912f4740e15847618a86e217f71f5419d25e1031afee585313896444934eb04b903a685b1448b755d56f701afe9be2ce";
|
||||||
|
|
||||||
|
/// Make a simple scene with foo and bar empty files
|
||||||
|
fn make_scene() -> TestScenario {
|
||||||
|
let scene = TestScenario::new(util_name!());
|
||||||
|
let at = &scene.fixtures;
|
||||||
|
|
||||||
|
at.touch("foo");
|
||||||
|
at.touch("bar");
|
||||||
|
|
||||||
|
scene
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_check_cli_algo_non_algo() {
|
||||||
|
let scene = make_scene();
|
||||||
|
scene
|
||||||
|
.ucmd()
|
||||||
|
.arg("--check")
|
||||||
|
.arg("--algo=blake2b")
|
||||||
|
.pipe_in(INPUT_ALGO_NON_ALGO)
|
||||||
|
.succeeds()
|
||||||
|
.stdout_contains("bar: OK\nfoo: OK")
|
||||||
|
.no_stderr();
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_check_cli_non_algo_algo() {
|
||||||
|
let scene = make_scene();
|
||||||
|
scene
|
||||||
|
.ucmd()
|
||||||
|
.arg("--check")
|
||||||
|
.arg("--algo=blake2b")
|
||||||
|
.pipe_in(INPUT_NON_ALGO_ALGO)
|
||||||
|
.succeeds()
|
||||||
|
.stdout_contains("foo: OK\nbar: OK")
|
||||||
|
.no_stderr();
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_check_algo_non_algo() {
|
||||||
|
let scene = make_scene();
|
||||||
|
scene
|
||||||
|
.ucmd()
|
||||||
|
.arg("--check")
|
||||||
|
.pipe_in(INPUT_ALGO_NON_ALGO)
|
||||||
|
.succeeds()
|
||||||
|
.stdout_contains("bar: OK")
|
||||||
|
.stderr_contains("cksum: WARNING: 1 line is improperly formatted");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_check_non_algo_algo() {
|
||||||
|
let scene = make_scene();
|
||||||
|
scene
|
||||||
|
.ucmd()
|
||||||
|
.arg("--check")
|
||||||
|
.pipe_in(INPUT_NON_ALGO_ALGO)
|
||||||
|
.succeeds()
|
||||||
|
.stdout_contains("bar: OK")
|
||||||
|
.stderr_contains("cksum: WARNING: 1 line is improperly formatted");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue