Merge pull request #6929 from RenjiSann/cksum-fixes

cksum: even more fixes
2025-09-14 02:57:57 +00:00 · 2024-12-12 10:16:48 +01:00 · 2024-12-12 10:16:48 +01:00 · 209ec0b817
commit 209ec0b817
parent 56a7e24f69 958222a07c
4 changed files with 398 additions and 197 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -3503,6 +3503,7 @@ dependencies = [
 "glob",
 "hex",
 "itertools",
 "lazy_static",
 "libc",
 "md-5",
 "memchr",
--- a/src/uucore/Cargo.toml
+++ b/src/uucore/Cargo.toml
@ -25,6 +25,7 @@ dns-lookup = { workspace = true, optional = true }
 dunce = { version = "1.0.4", optional = true }
 wild = "2.2.1"
 glob = { workspace = true }
 lazy_static = "1.4.0"
 # * optional
 itertools = { workspace = true, optional = true }
 thiserror = { workspace = true, optional = true }
--- a/src/uucore/src/lib/features/checksum.rs
+++ b/src/uucore/src/lib/features/checksum.rs
@ -2,13 +2,15 @@
 //
 // For the full copyright and license information, please view the LICENSE
 // file that was distributed with this source code.
-// spell-checker:ignore anotherfile invalidchecksum regexes JWZG FFFD xffname prefixfilename
+// spell-checker:ignore anotherfile invalidchecksum regexes JWZG FFFD xffname prefixfilename bytelen bitlen hexdigit
 use data_encoding::BASE64;
 use lazy_static::lazy_static;
 use os_display::Quotable;
-use regex::bytes::{Captures, Regex};
+use regex::bytes::{Match, Regex};
 use std::{
-    ffi::{OsStr, OsString},
+    borrow::Cow,
    ffi::OsStr,
    fmt::Display,
    fs::File,
    io::{self, stdin, BufReader, Read, Write},
@ -130,9 +132,6 @@ enum FileCheckError {
    ImproperlyFormatted,
    /// reading of the checksum file failed
    CantOpenChecksumFile,
    /// Algorithm detection was unsuccessful.
    /// Either none is provided, or there is a conflict.
    AlgoDetectionError,
 }
 impl From<Box<dyn UError>> for FileCheckError {
@ -422,14 +421,101 @@ pub fn detect_algo(algo: &str, length: Option<usize>) -> UResult<HashAlgorithm>
 //    algo must be uppercase or b (for blake2b)
 // 2. <checksum> [* ]<filename>
 // 3. <checksum> [*]<filename> (only one space)
-const ALGO_BASED_REGEX: &str = r"^\s*\\?(?P<algo>(?:[A-Z0-9]+|BLAKE2b))(?:-(?P<bits>\d+))?\s?\((?P<filename>(?-u:.*))\)\s*=\s*(?P<checksum>[a-fA-F0-9]+)$";
+const ALGO_BASED_REGEX: &str = r"^\s*\\?(?P<algo>(?:[A-Z0-9]+|BLAKE2b))(?:-(?P<bits>\d+))?\s?\((?P<filename>(?-u:.*))\)\s*=\s*(?P<checksum>[A-Za-z0-9+/]+={0,2})$";
 const ALGO_BASED_REGEX_BASE64: &str = r"^\s*\\?(?P<algo>(?:[A-Z0-9]+|BLAKE2b))(?:-(?P<bits>\d+))?\s?\((?P<filename>(?-u:.*))\)\s*=\s*(?P<checksum>[A-Za-z0-9+/]+={0,2})$";
 const DOUBLE_SPACE_REGEX: &str = r"^(?P<checksum>[a-fA-F0-9]+)\s{2}(?P<filename>(?-u:.*))$";
 // In this case, we ignore the *
 const SINGLE_SPACE_REGEX: &str = r"^(?P<checksum>[a-fA-F0-9]+)\s(?P<filename>\*?(?-u:.*))$";
 lazy_static! {
    static ref R_ALGO_BASED: Regex = Regex::new(ALGO_BASED_REGEX).unwrap();
    static ref R_DOUBLE_SPACE: Regex = Regex::new(DOUBLE_SPACE_REGEX).unwrap();
    static ref R_SINGLE_SPACE: Regex = Regex::new(SINGLE_SPACE_REGEX).unwrap();
 }
 #[derive(Debug, PartialEq, Eq, Clone, Copy)]
 enum LineFormat {
    AlgoBased,
    SingleSpace,
    DoubleSpace,
 }
 impl LineFormat {
    fn to_regex(self) -> &'static Regex {
        match self {
            LineFormat::AlgoBased => &R_ALGO_BASED,
            LineFormat::SingleSpace => &R_SINGLE_SPACE,
            LineFormat::DoubleSpace => &R_DOUBLE_SPACE,
        }
    }
 }
 /// Hold the data extracted from a checksum line.
 struct LineInfo {
    algo_name: Option<String>,
    algo_bit_len: Option<usize>,
    checksum: String,
    filename: Vec<u8>,
    format: LineFormat,
 }
 impl LineInfo {
    /// Returns a `LineInfo` parsed from a checksum line.
    /// The function will run 3 regexes against the line and select the first one that matches
    /// to populate the fields of the struct.
    /// However, there is a catch to handle regarding the handling of `cached_regex`.
    /// In case of non-algo-based regex, if `cached_regex` is Some, it must take the priority
    /// over the detected regex. Otherwise, we must set it the the detected regex.
    /// This specific behavior is emphasized by the test
    /// `test_hashsum::test_check_md5sum_only_one_space`.
    fn parse(s: impl AsRef<OsStr>, cached_regex: &mut Option<LineFormat>) -> Option<Self> {
        let regexes: &[(&'static Regex, LineFormat)] = &[
            (&R_ALGO_BASED, LineFormat::AlgoBased),
            (&R_DOUBLE_SPACE, LineFormat::DoubleSpace),
            (&R_SINGLE_SPACE, LineFormat::SingleSpace),
        ];
        let line_bytes = os_str_as_bytes(s.as_ref()).expect("UTF-8 decoding failed");
        for (regex, format) in regexes {
            if !regex.is_match(line_bytes) {
                continue;
            }
            let mut r = *regex;
            if *format != LineFormat::AlgoBased {
                // The cached regex ensures that when processing non-algo based regexes,
                // it cannot be changed (can't have single and double space regexes
                // used in the same file).
                if cached_regex.is_some() {
                    r = cached_regex.unwrap().to_regex();
                } else {
                    *cached_regex = Some(*format);
                }
            }
            if let Some(caps) = r.captures(line_bytes) {
                // These unwraps are safe thanks to the regex
                let match_to_string = |m: Match| String::from_utf8(m.as_bytes().into()).unwrap();
                return Some(Self {
                    algo_name: caps.name("algo").map(match_to_string),
                    algo_bit_len: caps
                        .name("bits")
                        .map(|m| match_to_string(m).parse::<usize>().unwrap()),
                    checksum: caps.name("checksum").map(match_to_string).unwrap(),
                    filename: caps.name("filename").map(|m| m.as_bytes().into()).unwrap(),
                    format: *format,
                });
            }
        }
        None
    }
 }
 fn get_filename_for_output(filename: &OsStr, input_is_stdin: bool) -> String {
    if input_is_stdin {
        "standard input"
@ -440,41 +526,44 @@ fn get_filename_for_output(filename: &OsStr, input_is_stdin: bool) -> String {
    .to_string()
 }
 /// Determines the appropriate regular expression to use based on the provided lines.
 fn determine_regex(lines: &[OsString]) -> Option<(Regex, bool)> {
    let regexes = [
        (Regex::new(ALGO_BASED_REGEX).unwrap(), true),
        (Regex::new(DOUBLE_SPACE_REGEX).unwrap(), false),
        (Regex::new(SINGLE_SPACE_REGEX).unwrap(), false),
        (Regex::new(ALGO_BASED_REGEX_BASE64).unwrap(), true),
    ];
    for line in lines {
        let line_bytes = os_str_as_bytes(line).expect("UTF-8 decoding failed");
        for (regex, is_algo_based) in &regexes {
            if regex.is_match(line_bytes) {
                return Some((regex.clone(), *is_algo_based));
            }
        }
    }
    None
 }
 /// Extract the expected digest from the checksum string
-fn get_expected_digest_as_hex_string(caps: &Captures, chosen_regex: &Regex) -> Option<String> {
+fn get_expected_digest_as_hex_string(
-    // Unwraps are safe, ensured by regex.
+    line_info: &LineInfo,
-    let ck = caps.name("checksum").unwrap().as_bytes();
+    len_hint: Option<usize>,
 ) -> Option<Cow<str>> {
    let ck = &line_info.checksum;
-    if chosen_regex.as_str() == ALGO_BASED_REGEX_BASE64 {
+    // TODO MSRV 1.82, replace `is_some_and` with `is_none_or`
-        BASE64.decode(ck).map(hex::encode).ok()
+    // to improve readability. This closure returns True if a length hint provided
-    } else if ck.len() % 2 == 0 {
+    // and the argument isn't the same as the hint.
-        Some(str::from_utf8(ck).unwrap().to_string())
+    let against_hint = |len| len_hint.is_some_and(|l| l != len);
-    } else {
+
    if ck.len() % 2 != 0 {
        // If the length of the digest is not a multiple of 2, then it
        // must be improperly formatted (1 hex digit is 2 characters)
-        None
+        return None;
    }
    // If the digest can be decoded as hexadecimal AND it length match the
    // one expected (in case it's given), just go with it.
    if ck.as_bytes().iter().all(u8::is_ascii_hexdigit) && !against_hint(ck.len()) {
        return Some(Cow::Borrowed(ck));
    }
    // If hexadecimal digest fails for any reason, interpret the digest as base 64.
    BASE64
        .decode(ck.as_bytes()) // Decode the string as encoded base64
        .map(hex::encode) // Encode it back as hexadecimal
        .map(Cow::<str>::Owned)
        .ok()
        .and_then(|s| {
            // Check the digest length
            if !against_hint(s.len()) {
                Some(s)
            } else {
                None
            }
        })
 }
 /// Returns a reader that reads from the specified file, or from stdin if `filename_to_check` is "-".
@ -548,17 +637,15 @@ fn get_input_file(filename: &OsStr) -> UResult<Box<dyn Read>> {
    }
 }
-/// Extracts the algorithm name and length from the regex captures if the algo-based format is matched.
+/// Gets the algorithm name and length from the `LineInfo` if the algo-based format is matched.
 fn identify_algo_name_and_length(
-    caps: &Captures,
+    line_info: &LineInfo,
    algo_name_input: Option<&str>,
 ) -> Option<(String, Option<usize>)> {
-    // When the algo-based format is matched, extract details from regex captures
+    let algorithm = line_info
-    let algorithm = caps
+        .algo_name
-        .name("algo")
+        .clone()
-        .map_or(String::new(), |m| {
+        .unwrap_or_default()
            String::from_utf8(m.as_bytes().into()).unwrap()
        })
        .to_lowercase();
    // check if we are called with XXXsum (example: md5sum) but we detected a different algo parsing the file
@ -573,19 +660,119 @@ fn identify_algo_name_and_length(
        return None;
    }
-    let bits = caps.name("bits").map_or(Some(None), |m| {
+    let bytes = if let Some(bitlen) = line_info.algo_bit_len {
-        let bits_value = String::from_utf8(m.as_bytes().into())
+        if bitlen % 8 != 0 {
-            .unwrap()
+            // The given length is wrong
-            .parse::<usize>()
+            return None;
            .unwrap();
        if bits_value % 8 == 0 {
            Some(Some(bits_value / 8))
        } else {
            None // Return None to signal a divisibility issue
        }
-    })?;
+        Some(bitlen / 8)
    } else if algorithm == ALGORITHM_OPTIONS_BLAKE2B {
        // Default length with BLAKE2b,
        Some(64)
    } else {
        None
    };
-    Some((algorithm, bits))
+    Some((algorithm, bytes))
 }
 /// Given a filename and an algorithm, compute the digest and compare it with
 /// the expected one.
 fn compute_and_check_digest_from_file(
    filename: &[u8],
    expected_checksum: &str,
    mut algo: HashAlgorithm,
    opts: ChecksumOptions,
 ) -> Result<(), LineCheckError> {
    let (filename_to_check_unescaped, prefix) = unescape_filename(filename);
    let real_filename_to_check = os_str_from_bytes(&filename_to_check_unescaped)?;
    // Open the input file
    let file_to_check = get_file_to_check(&real_filename_to_check, opts)?;
    let mut file_reader = BufReader::new(file_to_check);
    // Read the file and calculate the checksum
    let create_fn = &mut algo.create_fn;
    let mut digest = create_fn();
    let (calculated_checksum, _) =
        digest_reader(&mut digest, &mut file_reader, opts.binary, algo.bits).unwrap();
    // Do the checksum validation
    let checksum_correct = expected_checksum == calculated_checksum;
    print_file_report(
        std::io::stdout(),
        filename,
        FileChecksumResult::from_bool(checksum_correct),
        prefix,
        opts,
    );
    if checksum_correct {
        Ok(())
    } else {
        Err(LineCheckError::DigestMismatch)
    }
 }
 /// Check a digest checksum with non-algo based pre-treatment.
 fn process_algo_based_line(
    line_info: &LineInfo,
    cli_algo_name: Option<&str>,
    opts: ChecksumOptions,
 ) -> Result<(), LineCheckError> {
    let filename_to_check = line_info.filename.as_slice();
    let (algo_name, algo_byte_len) = identify_algo_name_and_length(line_info, cli_algo_name)
        .ok_or(LineCheckError::ImproperlyFormatted)?;
    // If the digest bitlen is known, we can check the format of the expected
    // checksum with it.
    let digest_char_length_hint = match (algo_name.as_str(), algo_byte_len) {
        (ALGORITHM_OPTIONS_BLAKE2B, Some(bytelen)) => Some(bytelen * 2),
        _ => None,
    };
    let expected_checksum = get_expected_digest_as_hex_string(line_info, digest_char_length_hint)
        .ok_or(LineCheckError::ImproperlyFormatted)?;
    let algo = detect_algo(&algo_name, algo_byte_len)?;
    compute_and_check_digest_from_file(filename_to_check, &expected_checksum, algo, opts)
 }
 /// Check a digest checksum with non-algo based pre-treatment.
 fn process_non_algo_based_line(
    line_number: usize,
    line_info: &LineInfo,
    cli_algo_name: &str,
    cli_algo_length: Option<usize>,
    opts: ChecksumOptions,
 ) -> Result<(), LineCheckError> {
    let mut filename_to_check = line_info.filename.as_slice();
    if filename_to_check.starts_with(b"*")
        && line_number == 0
        && line_info.format == LineFormat::SingleSpace
    {
        // Remove the leading asterisk if present - only for the first line
        filename_to_check = &filename_to_check[1..];
    }
    let expected_checksum = get_expected_digest_as_hex_string(line_info, None)
        .ok_or(LineCheckError::ImproperlyFormatted)?;
    // When a specific algorithm name is input, use it and use the provided bits
    // except when dealing with blake2b, where we will detect the length
    let (algo_name, algo_byte_len) = if cli_algo_name == ALGORITHM_OPTIONS_BLAKE2B {
        // division by 2 converts the length of the Blake2b checksum from hexadecimal
        // characters to bytes, as each byte is represented by two hexadecimal characters.
        let length = Some(expected_checksum.len() / 2);
        (ALGORITHM_OPTIONS_BLAKE2B.to_string(), length)
    } else {
        (cli_algo_name.to_lowercase(), cli_algo_length)
    };
    let algo = detect_algo(&algo_name, algo_byte_len)?;
    compute_and_check_digest_from_file(filename_to_check, &expected_checksum, algo, opts)
 }
 /// Parses a checksum line, detect the algorithm to use, read the file and produce
@ -594,88 +781,36 @@ fn identify_algo_name_and_length(
 /// Returns `Ok(bool)` if the comparison happened, bool indicates if the digest
 /// matched the expected.
 /// If the comparison didn't happen, return a `LineChecksumError`.
 #[allow(clippy::too_many_arguments)]
 fn process_checksum_line(
    filename_input: &OsStr,
    line: &OsStr,
    i: usize,
    chosen_regex: &Regex,
    is_algo_based_format: bool,
    cli_algo_name: Option<&str>,
    cli_algo_length: Option<usize>,
    opts: ChecksumOptions,
    cached_regex: &mut Option<LineFormat>,
 ) -> Result<(), LineCheckError> {
    let line_bytes = os_str_as_bytes(line)?;
    if let Some(caps) = chosen_regex.captures(line_bytes) {
        let mut filename_to_check = caps.name("filename").unwrap().as_bytes();
-        if filename_to_check.starts_with(b"*")
+    // Early return on empty or commented lines.
-            && i == 0
+    if line.is_empty() || line_bytes.starts_with(b"#") {
-            && chosen_regex.as_str() == SINGLE_SPACE_REGEX
+        return Err(LineCheckError::Skipped);
-        {
+    }
            // Remove the leading asterisk if present - only for the first line
            filename_to_check = &filename_to_check[1..];
        }
-        let expected_checksum = get_expected_digest_as_hex_string(&caps, chosen_regex)
+    // Use `LineInfo` to extract the data of a line.
-            .ok_or(LineCheckError::ImproperlyFormatted)?;
+    // Then, depending on its format, apply a different pre-treatment.
-
+    if let Some(line_info) = LineInfo::parse(line, cached_regex) {
-        // If the algo_name is provided, we use it, otherwise we try to detect it
+        if line_info.format == LineFormat::AlgoBased {
-        let (algo_name, length) = if is_algo_based_format {
+            process_algo_based_line(&line_info, cli_algo_name, opts)
-            identify_algo_name_and_length(&caps, cli_algo_name)
+        } else if let Some(cli_algo) = cli_algo_name {
-                .ok_or(LineCheckError::ImproperlyFormatted)?
+            // If we match a non-algo based regex, we expect a cli argument
-        } else if let Some(a) = cli_algo_name {
+            // to give us the algorithm to use
-            // When a specific algorithm name is input, use it and use the provided bits
+            process_non_algo_based_line(i, &line_info, cli_algo, cli_algo_length, opts)
            // except when dealing with blake2b, where we will detect the length
            if cli_algo_name == Some(ALGORITHM_OPTIONS_BLAKE2B) {
                // division by 2 converts the length of the Blake2b checksum from hexadecimal
                // characters to bytes, as each byte is represented by two hexadecimal characters.
                let length = Some(expected_checksum.len() / 2);
                (ALGORITHM_OPTIONS_BLAKE2B.to_string(), length)
            } else {
                (a.to_lowercase(), cli_algo_length)
            }
        } else {
-            // Default case if no algorithm is specified and non-algo based format is matched
+            // We have no clue of what algorithm to use
            return Err(LineCheckError::ImproperlyFormatted);
        };
        let mut algo = detect_algo(&algo_name, length)?;
        let (filename_to_check_unescaped, prefix) = unescape_filename(filename_to_check);
        let real_filename_to_check = os_str_from_bytes(&filename_to_check_unescaped)?;
        // manage the input file
        let file_to_check = get_file_to_check(&real_filename_to_check, opts)?;
        let mut file_reader = BufReader::new(file_to_check);
        // Read the file and calculate the checksum
        let create_fn = &mut algo.create_fn;
        let mut digest = create_fn();
        let (calculated_checksum, _) =
            digest_reader(&mut digest, &mut file_reader, opts.binary, algo.bits).unwrap();
        // Do the checksum validation
        let checksum_correct = expected_checksum == calculated_checksum;
        print_file_report(
            std::io::stdout(),
            filename_to_check,
            FileChecksumResult::from_bool(checksum_correct),
            prefix,
            opts,
        );
        if checksum_correct {
            Ok(())
        } else {
            Err(LineCheckError::DigestMismatch)
        }
    } else {
        if line.is_empty() || line_bytes.starts_with(b"#") {
            // Don't show any warning for empty or commented lines.
            return Err(LineCheckError::Skipped);
        }
        if opts.warn {
            let algo = if let Some(algo_name_input) = cli_algo_name {
                algo_name_input.to_uppercase()
@ -723,22 +858,19 @@ fn process_checksum_file(
    let reader = BufReader::new(file);
    let lines = read_os_string_lines(reader).collect::<Vec<_>>();
-    let Some((chosen_regex, is_algo_based_format)) = determine_regex(&lines) else {
+    // cached_regex is used to ensure that several non algo-based checksum line
-        log_no_properly_formatted(get_filename_for_output(filename_input, input_is_stdin));
+    // will use the same regex.
-        set_exit_code(1);
+    let mut cached_regex = None;
        return Err(FileCheckError::AlgoDetectionError);
    };
    for (i, line) in lines.iter().enumerate() {
        let line_result = process_checksum_line(
            filename_input,
            line,
            i,
            &chosen_regex,
            is_algo_based_format,
            cli_algo_name,
            cli_algo_length,
            opts,
            &mut cached_regex,
        );
        // Match a first time to elude critical UErrors, and increment the total
@ -816,8 +948,7 @@ where
        use FileCheckError::*;
        match process_checksum_file(filename_input, algo_name_input, length_input, opts) {
            Err(UError(e)) => return Err(e),
-            Err(ImproperlyFormatted) => break,
+            Err(CantOpenChecksumFile | ImproperlyFormatted) | Ok(_) => continue,
            Err(CantOpenChecksumFile | AlgoDetectionError) | Ok(_) => continue,
        }
    }
@ -926,6 +1057,7 @@ pub fn escape_filename(filename: &Path) -> (String, &'static str) {
 #[cfg(test)]
 mod tests {
    use super::*;
    use std::ffi::OsString;
    #[test]
    fn test_unescape_filename() {
@ -1159,79 +1291,71 @@ mod tests {
    }
    #[test]
-    fn test_determine_regex() {
+    fn test_line_info() {
        let mut cached_regex = None;
        // Test algo-based regex
-        let lines_algo_based = ["MD5 (example.txt) = d41d8cd98f00b204e9800998ecf8427e"]
+        let line_algo_based =
-            .iter()
+            OsString::from("MD5 (example.txt) = d41d8cd98f00b204e9800998ecf8427e");
-            .map(|s| OsString::from(s.to_string()))
+        let line_info = LineInfo::parse(&line_algo_based, &mut cached_regex).unwrap();
-            .collect::<Vec<_>>();
+        assert_eq!(line_info.algo_name.as_deref(), Some("MD5"));
-        let (regex, algo_based) = determine_regex(&lines_algo_based).unwrap();
+        assert!(line_info.algo_bit_len.is_none());
-        assert!(algo_based);
+        assert_eq!(line_info.filename, b"example.txt");
-        assert!(regex.is_match(os_str_as_bytes(&lines_algo_based[0]).unwrap()));
+        assert_eq!(line_info.checksum, "d41d8cd98f00b204e9800998ecf8427e");
        assert_eq!(line_info.format, LineFormat::AlgoBased);
        assert!(cached_regex.is_none());
        // Test double-space regex
-        let lines_double_space = ["d41d8cd98f00b204e9800998ecf8427e  example.txt"]
+        let line_double_space = OsString::from("d41d8cd98f00b204e9800998ecf8427e  example.txt");
-            .iter()
+        let line_info = LineInfo::parse(&line_double_space, &mut cached_regex).unwrap();
-            .map(|s| OsString::from(s.to_string()))
+        assert!(line_info.algo_name.is_none());
-            .collect::<Vec<_>>();
+        assert!(line_info.algo_bit_len.is_none());
-        let (regex, algo_based) = determine_regex(&lines_double_space).unwrap();
+        assert_eq!(line_info.filename, b"example.txt");
-        assert!(!algo_based);
+        assert_eq!(line_info.checksum, "d41d8cd98f00b204e9800998ecf8427e");
-        assert!(regex.is_match(os_str_as_bytes(&lines_double_space[0]).unwrap()));
+        assert_eq!(line_info.format, LineFormat::DoubleSpace);
        assert!(cached_regex.is_some());
        cached_regex = None;
        // Test single-space regex
-        let lines_single_space = ["d41d8cd98f00b204e9800998ecf8427e example.txt"]
+        let line_single_space = OsString::from("d41d8cd98f00b204e9800998ecf8427e example.txt");
-            .iter()
+        let line_info = LineInfo::parse(&line_single_space, &mut cached_regex).unwrap();
-            .map(|s| OsString::from(s.to_string()))
+        assert!(line_info.algo_name.is_none());
-            .collect::<Vec<_>>();
+        assert!(line_info.algo_bit_len.is_none());
-        let (regex, algo_based) = determine_regex(&lines_single_space).unwrap();
+        assert_eq!(line_info.filename, b"example.txt");
-        assert!(!algo_based);
+        assert_eq!(line_info.checksum, "d41d8cd98f00b204e9800998ecf8427e");
-        assert!(regex.is_match(os_str_as_bytes(&lines_single_space[0]).unwrap()));
+        assert_eq!(line_info.format, LineFormat::SingleSpace);
        assert!(cached_regex.is_some());
-        // Test double-space regex start with invalid
+        cached_regex = None;
        let lines_double_space = ["ERR", "d41d8cd98f00b204e9800998ecf8427e  example.txt"]
            .iter()
            .map(|s| OsString::from(s.to_string()))
            .collect::<Vec<_>>();
        let (regex, algo_based) = determine_regex(&lines_double_space).unwrap();
        assert!(!algo_based);
        assert!(!regex.is_match(os_str_as_bytes(&lines_double_space[0]).unwrap()));
        assert!(regex.is_match(os_str_as_bytes(&lines_double_space[1]).unwrap()));
        // Test invalid checksum line
-        let lines_invalid = ["invalid checksum line"]
+        let line_invalid = OsString::from("invalid checksum line");
-            .iter()
+        assert!(LineInfo::parse(&line_invalid, &mut cached_regex).is_none());
-            .map(|s| OsString::from(s.to_string()))
+        assert!(cached_regex.is_none());
            .collect::<Vec<_>>();
        assert!(determine_regex(&lines_invalid).is_none());
        // Test leading space before checksum line
-        let lines_algo_based_leading_space =
+        let line_algo_based_leading_space =
-            ["   MD5 (example.txt) = d41d8cd98f00b204e9800998ecf8427e"]
+            OsString::from("   MD5 (example.txt) = d41d8cd98f00b204e9800998ecf8427e");
-                .iter()
+        let line_info = LineInfo::parse(&line_algo_based_leading_space, &mut cached_regex).unwrap();
-                .map(|s| OsString::from(s.to_string()))
+        assert_eq!(line_info.format, LineFormat::AlgoBased);
-                .collect::<Vec<_>>();
+        assert!(cached_regex.is_none());
        let res = determine_regex(&lines_algo_based_leading_space);
        assert!(res.is_some());
        assert_eq!(res.unwrap().0.as_str(), ALGO_BASED_REGEX);
        // Test trailing space after checksum line (should fail)
-        let lines_algo_based_leading_space =
+        let line_algo_based_leading_space =
-            ["MD5 (example.txt) = d41d8cd98f00b204e9800998ecf8427e "]
+            OsString::from("MD5 (example.txt) = d41d8cd98f00b204e9800998ecf8427e ");
-                .iter()
+        let res = LineInfo::parse(&line_algo_based_leading_space, &mut cached_regex);
                .map(|s| OsString::from(s.to_string()))
                .collect::<Vec<_>>();
        let res = determine_regex(&lines_algo_based_leading_space);
        assert!(res.is_none());
        assert!(cached_regex.is_none());
    }
    #[test]
    fn test_get_expected_digest() {
-        let re = Regex::new(ALGO_BASED_REGEX_BASE64).unwrap();
+        let line = OsString::from("SHA256 (empty) = 47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU=");
-        let caps = re
+        let mut cached_regex = None;
-            .captures(b"SHA256 (empty) = 47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU=")
+        let line_info = LineInfo::parse(&line, &mut cached_regex).unwrap();
            .unwrap();
-        let result = get_expected_digest_as_hex_string(&caps, &re);
+        let result = get_expected_digest_as_hex_string(&line_info, None);
        assert_eq!(
            result.unwrap(),
@ -1241,12 +1365,12 @@ mod tests {
    #[test]
    fn test_get_expected_checksum_invalid() {
-        let re = Regex::new(ALGO_BASED_REGEX_BASE64).unwrap();
+        // The line misses a '=' at the end to be valid base64
-        let caps = re
+        let line = OsString::from("SHA256 (empty) = 47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU");
-            .captures(b"SHA256 (empty) = 47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU")
+        let mut cached_regex = None;
-            .unwrap();
+        let line_info = LineInfo::parse(&line, &mut cached_regex).unwrap();
-        let result = get_expected_digest_as_hex_string(&caps, &re);
+        let result = get_expected_digest_as_hex_string(&line_info, None);
        assert!(result.is_none());
    }
--- a/tests/by-util/test_cksum.rs
+++ b/tests/by-util/test_cksum.rs
@ -1443,7 +1443,7 @@ mod check_utf8 {
        let scene = TestScenario::new(util_name!());
        let at = &scene.fixtures;
        let filename: OsString = OsStringExt::from_vec(b"funky\xffname".to_vec());
-        at.touch(&filename);
+        at.touch(filename);
        // Checksum match
        at.write_bytes("check",
@ -1480,7 +1480,6 @@ mod check_utf8 {
    }
 }
 #[ignore = "not yet implemented"]
 #[test]
 fn test_check_blake_length_guess() {
    let correct_lines = [
@ -1523,7 +1522,6 @@ fn test_check_blake_length_guess() {
        .stderr_contains("foo.sums: no properly formatted checksum lines found");
 }
 #[ignore = "not yet implemented"]
 #[test]
 fn test_check_confusing_base64() {
    let cksum = "BLAKE2b-48 (foo.dat) = fc1f97C4";
@ -1544,7 +1542,6 @@ fn test_check_confusing_base64() {
 /// This test checks that when a file contains several checksum lines
 /// with different encoding, the decoding still works.
 #[ignore = "not yet implemented"]
 #[test]
 fn test_check_mix_hex_base64() {
    let b64 = "BLAKE2b-128 (foo1.dat) = BBNuJPhdRwRlw9tm5Y7VbA==";
@ -1769,3 +1766,81 @@ mod gnu_cksum_base64 {
        }
    }
 }
 /// The tests in this module check the behavior of cksum when given different
 /// checksum formats and algorithms in the same file, while specifying an
 /// algorithm on CLI or not.
 mod format_mix {
    use super::*;
    // First line is algo-based, second one is not
    const INPUT_ALGO_NON_ALGO: &str = "\
        BLAKE2b (bar) = 786a02f742015903c6c6fd852552d272912f4740e15847618a86e217f71f5419d25e1031afee585313896444934eb04b903a685b1448b755d56f701afe9be2ce\n\
        786a02f742015903c6c6fd852552d272912f4740e15847618a86e217f71f5419d25e1031afee585313896444934eb04b903a685b1448b755d56f701afe9be2ce  foo";
    // First line is non algo-based, second one is
    const INPUT_NON_ALGO_ALGO: &str = "\
        786a02f742015903c6c6fd852552d272912f4740e15847618a86e217f71f5419d25e1031afee585313896444934eb04b903a685b1448b755d56f701afe9be2ce  foo\n\
        BLAKE2b (bar) = 786a02f742015903c6c6fd852552d272912f4740e15847618a86e217f71f5419d25e1031afee585313896444934eb04b903a685b1448b755d56f701afe9be2ce";
    /// Make a simple scene with foo and bar empty files
    fn make_scene() -> TestScenario {
        let scene = TestScenario::new(util_name!());
        let at = &scene.fixtures;
        at.touch("foo");
        at.touch("bar");
        scene
    }
    #[test]
    fn test_check_cli_algo_non_algo() {
        let scene = make_scene();
        scene
            .ucmd()
            .arg("--check")
            .arg("--algo=blake2b")
            .pipe_in(INPUT_ALGO_NON_ALGO)
            .succeeds()
            .stdout_contains("bar: OK\nfoo: OK")
            .no_stderr();
    }
    #[test]
    fn test_check_cli_non_algo_algo() {
        let scene = make_scene();
        scene
            .ucmd()
            .arg("--check")
            .arg("--algo=blake2b")
            .pipe_in(INPUT_NON_ALGO_ALGO)
            .succeeds()
            .stdout_contains("foo: OK\nbar: OK")
            .no_stderr();
    }
    #[test]
    fn test_check_algo_non_algo() {
        let scene = make_scene();
        scene
            .ucmd()
            .arg("--check")
            .pipe_in(INPUT_ALGO_NON_ALGO)
            .succeeds()
            .stdout_contains("bar: OK")
            .stderr_contains("cksum: WARNING: 1 line is improperly formatted");
    }
    #[test]
    fn test_check_non_algo_algo() {
        let scene = make_scene();
        scene
            .ucmd()
            .arg("--check")
            .pipe_in(INPUT_NON_ALGO_ALGO)
            .succeeds()
            .stdout_contains("bar: OK")
            .stderr_contains("cksum: WARNING: 1 line is improperly formatted");
    }
 }