mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-28 11:37:44 +00:00
Merge pull request #7580 from GTimothy/cksum_no_regex
cksum: take out regex pattern matching
This commit is contained in:
commit
6675460d1d
2 changed files with 288 additions and 130 deletions
|
@ -89,7 +89,7 @@ default = []
|
||||||
# * non-default features
|
# * non-default features
|
||||||
backup-control = []
|
backup-control = []
|
||||||
colors = []
|
colors = []
|
||||||
checksum = ["data-encoding", "thiserror", "regex", "sum"]
|
checksum = ["data-encoding", "thiserror", "sum"]
|
||||||
encoding = ["data-encoding", "data-encoding-macro", "z85"]
|
encoding = ["data-encoding", "data-encoding-macro", "z85"]
|
||||||
entries = ["libc"]
|
entries = ["libc"]
|
||||||
fs = ["dunce", "libc", "winapi-util", "windows-sys"]
|
fs = ["dunce", "libc", "winapi-util", "windows-sys"]
|
||||||
|
|
|
@ -2,11 +2,10 @@
|
||||||
//
|
//
|
||||||
// For the full copyright and license information, please view the LICENSE
|
// For the full copyright and license information, please view the LICENSE
|
||||||
// file that was distributed with this source code.
|
// file that was distributed with this source code.
|
||||||
// spell-checker:ignore anotherfile invalidchecksum regexes JWZG FFFD xffname prefixfilename bytelen bitlen hexdigit
|
// spell-checker:ignore anotherfile invalidchecksum JWZG FFFD xffname prefixfilename bytelen bitlen hexdigit rsplit
|
||||||
|
|
||||||
use data_encoding::BASE64;
|
use data_encoding::BASE64;
|
||||||
use os_display::Quotable;
|
use os_display::Quotable;
|
||||||
use regex::bytes::{Match, Regex};
|
|
||||||
use std::{
|
use std::{
|
||||||
borrow::Cow,
|
borrow::Cow,
|
||||||
ffi::OsStr,
|
ffi::OsStr,
|
||||||
|
@ -15,7 +14,6 @@ use std::{
|
||||||
io::{self, BufReader, Read, Write, stdin},
|
io::{self, BufReader, Read, Write, stdin},
|
||||||
path::Path,
|
path::Path,
|
||||||
str,
|
str,
|
||||||
sync::LazyLock,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
|
@ -466,36 +464,180 @@ pub fn detect_algo(algo: &str, length: Option<usize>) -> UResult<HashAlgorithm>
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Regexp to handle the three input formats:
|
|
||||||
// 1. <algo>[-<bits>] (<filename>) = <checksum>
|
|
||||||
// algo must be uppercase or b (for blake2b)
|
|
||||||
// 2. <checksum> [* ]<filename>
|
|
||||||
// 3. <checksum> [*]<filename> (only one space)
|
|
||||||
const ALGO_BASED_REGEX: &str = r"^\s*\\?(?P<algo>(?:[A-Z0-9]+|BLAKE2b))(?:-(?P<bits>\d+))?\s?\((?P<filename>(?-u:.*))\)\s*=\s*(?P<checksum>[A-Za-z0-9+/]+={0,2})$";
|
|
||||||
|
|
||||||
const DOUBLE_SPACE_REGEX: &str = r"^(?P<checksum>[a-fA-F0-9]+)\s{2}(?P<filename>(?-u:.*))$";
|
|
||||||
|
|
||||||
// In this case, we ignore the *
|
|
||||||
const SINGLE_SPACE_REGEX: &str = r"^(?P<checksum>[a-fA-F0-9]+)\s(?P<filename>\*?(?-u:.*))$";
|
|
||||||
|
|
||||||
static R_ALGO_BASED: LazyLock<Regex> = LazyLock::new(|| Regex::new(ALGO_BASED_REGEX).unwrap());
|
|
||||||
static R_DOUBLE_SPACE: LazyLock<Regex> = LazyLock::new(|| Regex::new(DOUBLE_SPACE_REGEX).unwrap());
|
|
||||||
static R_SINGLE_SPACE: LazyLock<Regex> = LazyLock::new(|| Regex::new(SINGLE_SPACE_REGEX).unwrap());
|
|
||||||
|
|
||||||
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
||||||
enum LineFormat {
|
enum LineFormat {
|
||||||
AlgoBased,
|
AlgoBased,
|
||||||
SingleSpace,
|
SingleSpace,
|
||||||
DoubleSpace,
|
Untagged,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl LineFormat {
|
impl LineFormat {
|
||||||
fn to_regex(self) -> &'static Regex {
|
/// parse [tagged output format]
|
||||||
match self {
|
/// Normally the format is simply space separated but openssl does not
|
||||||
LineFormat::AlgoBased => &R_ALGO_BASED,
|
/// respect the gnu definition.
|
||||||
LineFormat::SingleSpace => &R_SINGLE_SPACE,
|
///
|
||||||
LineFormat::DoubleSpace => &R_DOUBLE_SPACE,
|
/// [tagged output format]: https://www.gnu.org/software/coreutils/manual/html_node/cksum-output-modes.html#cksum-output-modes-1
|
||||||
|
fn parse_algo_based(line: &[u8]) -> Option<LineInfo> {
|
||||||
|
// r"\MD5 (a\\ b) = abc123",
|
||||||
|
// BLAKE2b(44)= a45a4c4883cce4b50d844fab460414cc2080ca83690e74d850a9253e757384366382625b218c8585daee80f34dc9eb2f2fde5fb959db81cd48837f9216e7b0fa
|
||||||
|
let trimmed = line.trim_ascii_start();
|
||||||
|
let algo_start = if trimmed.starts_with(b"\\") { 1 } else { 0 };
|
||||||
|
let rest = &trimmed[algo_start..];
|
||||||
|
|
||||||
|
enum SubCase {
|
||||||
|
Posix,
|
||||||
|
OpenSSL,
|
||||||
}
|
}
|
||||||
|
// find the next parenthesis using byte search (not next whitespace) because openssl's
|
||||||
|
// tagged format does not put a space before (filename)
|
||||||
|
|
||||||
|
let par_idx = rest.iter().position(|&b| b == b'(')?;
|
||||||
|
let sub_case = if rest[par_idx - 1] == b' ' {
|
||||||
|
SubCase::Posix
|
||||||
|
} else {
|
||||||
|
SubCase::OpenSSL
|
||||||
|
};
|
||||||
|
|
||||||
|
let algo_substring = match sub_case {
|
||||||
|
SubCase::Posix => &rest[..par_idx - 1],
|
||||||
|
SubCase::OpenSSL => &rest[..par_idx],
|
||||||
|
};
|
||||||
|
let mut algo_parts = algo_substring.splitn(2, |&b| b == b'-');
|
||||||
|
let algo = algo_parts.next()?;
|
||||||
|
|
||||||
|
// Parse algo_bits if present
|
||||||
|
let algo_bits = algo_parts
|
||||||
|
.next()
|
||||||
|
.and_then(|s| std::str::from_utf8(s).ok()?.parse::<usize>().ok());
|
||||||
|
|
||||||
|
// Check algo format: uppercase ASCII or digits or "BLAKE2b"
|
||||||
|
let is_valid_algo = algo == b"BLAKE2b"
|
||||||
|
|| algo
|
||||||
|
.iter()
|
||||||
|
.all(|&b| b.is_ascii_uppercase() || b.is_ascii_digit());
|
||||||
|
if !is_valid_algo {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
// SAFETY: we just validated the contents of algo, we can unsafely make a
|
||||||
|
// String from it
|
||||||
|
let algo_utf8 = unsafe { String::from_utf8_unchecked(algo.to_vec()) };
|
||||||
|
// stripping '(' not ' (' since we matched on ( not whitespace because of openssl.
|
||||||
|
let after_paren = rest.get(par_idx + 1..)?;
|
||||||
|
let (filename, checksum) = match sub_case {
|
||||||
|
SubCase::Posix => ByteSliceExt::rsplit_once(after_paren, b") = ")?,
|
||||||
|
SubCase::OpenSSL => ByteSliceExt::rsplit_once(after_paren, b")= ")?,
|
||||||
|
};
|
||||||
|
|
||||||
|
fn is_valid_checksum(checksum: &[u8]) -> bool {
|
||||||
|
if checksum.is_empty() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut parts = checksum.splitn(2, |&b| b == b'=');
|
||||||
|
let main = parts.next().unwrap(); // Always exists since checksum isn't empty
|
||||||
|
let padding = parts.next().unwrap_or(&b""[..]); // Empty if no '='
|
||||||
|
|
||||||
|
main.iter()
|
||||||
|
.all(|&b| b.is_ascii_alphanumeric() || b == b'+' || b == b'/')
|
||||||
|
&& !main.is_empty()
|
||||||
|
&& padding.len() <= 2
|
||||||
|
&& padding.iter().all(|&b| b == b'=')
|
||||||
|
}
|
||||||
|
if !is_valid_checksum(checksum) {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
// SAFETY: we just validated the contents of checksum, we can unsafely make a
|
||||||
|
// String from it
|
||||||
|
let checksum_utf8 = unsafe { String::from_utf8_unchecked(checksum.to_vec()) };
|
||||||
|
|
||||||
|
Some(LineInfo {
|
||||||
|
algo_name: Some(algo_utf8),
|
||||||
|
algo_bit_len: algo_bits,
|
||||||
|
checksum: checksum_utf8,
|
||||||
|
filename: filename.to_vec(),
|
||||||
|
format: LineFormat::AlgoBased,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
#[allow(rustdoc::invalid_html_tags)]
|
||||||
|
/// parse [untagged output format]
|
||||||
|
/// The format is simple, either "<checksum> <filename>" or
|
||||||
|
/// "<checksum> *<filename>"
|
||||||
|
///
|
||||||
|
/// [untagged output format]: https://www.gnu.org/software/coreutils/manual/html_node/cksum-output-modes.html#cksum-output-modes-1
|
||||||
|
fn parse_untagged(line: &[u8]) -> Option<LineInfo> {
|
||||||
|
let space_idx = line.iter().position(|&b| b == b' ')?;
|
||||||
|
let checksum = &line[..space_idx];
|
||||||
|
if !checksum.iter().all(|&b| b.is_ascii_hexdigit()) || checksum.is_empty() {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
// SAFETY: we just validated the contents of checksum, we can unsafely make a
|
||||||
|
// String from it
|
||||||
|
let checksum_utf8 = unsafe { String::from_utf8_unchecked(checksum.to_vec()) };
|
||||||
|
|
||||||
|
let rest = &line[space_idx..];
|
||||||
|
let filename = rest
|
||||||
|
.strip_prefix(b" ")
|
||||||
|
.or_else(|| rest.strip_prefix(b" *"))?;
|
||||||
|
|
||||||
|
Some(LineInfo {
|
||||||
|
algo_name: None,
|
||||||
|
algo_bit_len: None,
|
||||||
|
checksum: checksum_utf8,
|
||||||
|
filename: filename.to_vec(),
|
||||||
|
format: LineFormat::Untagged,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
#[allow(rustdoc::invalid_html_tags)]
|
||||||
|
/// parse [untagged output format]
|
||||||
|
/// Normally the format is simple, either "<checksum> <filename>" or
|
||||||
|
/// "<checksum> *<filename>"
|
||||||
|
/// But the bsd tests expect special single space behavior where
|
||||||
|
/// checksum and filename are separated only by a space, meaning the second
|
||||||
|
/// space or asterisk is part of the file name.
|
||||||
|
/// This parser accounts for this variation
|
||||||
|
///
|
||||||
|
/// [untagged output format]: https://www.gnu.org/software/coreutils/manual/html_node/cksum-output-modes.html#cksum-output-modes-1
|
||||||
|
fn parse_single_space(line: &[u8]) -> Option<LineInfo> {
|
||||||
|
// Find first space
|
||||||
|
let space_idx = line.iter().position(|&b| b == b' ')?;
|
||||||
|
let checksum = &line[..space_idx];
|
||||||
|
if !checksum.iter().all(|&b| b.is_ascii_hexdigit()) || checksum.is_empty() {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
// SAFETY: we just validated the contents of checksum, we can unsafely make a
|
||||||
|
// String from it
|
||||||
|
let checksum_utf8 = unsafe { String::from_utf8_unchecked(checksum.to_vec()) };
|
||||||
|
|
||||||
|
let filename = line.get(space_idx + 1..)?; // Skip single space
|
||||||
|
|
||||||
|
Some(LineInfo {
|
||||||
|
algo_name: None,
|
||||||
|
algo_bit_len: None,
|
||||||
|
checksum: checksum_utf8,
|
||||||
|
filename: filename.to_vec(),
|
||||||
|
format: LineFormat::SingleSpace,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Helper trait for byte slice operations
|
||||||
|
trait ByteSliceExt {
|
||||||
|
/// Look for a pattern from right to left, return surrounding parts if found.
|
||||||
|
fn rsplit_once(&self, pattern: &[u8]) -> Option<(&Self, &Self)>;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ByteSliceExt for [u8] {
|
||||||
|
fn rsplit_once(&self, pattern: &[u8]) -> Option<(&Self, &Self)> {
|
||||||
|
let pos = self
|
||||||
|
.windows(pattern.len())
|
||||||
|
.rev()
|
||||||
|
.position(|w| w == pattern)?;
|
||||||
|
Some((
|
||||||
|
&self[..self.len() - pattern.len() - pos],
|
||||||
|
&self[self.len() - pos..],
|
||||||
|
))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -505,63 +647,40 @@ struct LineInfo {
|
||||||
algo_bit_len: Option<usize>,
|
algo_bit_len: Option<usize>,
|
||||||
checksum: String,
|
checksum: String,
|
||||||
filename: Vec<u8>,
|
filename: Vec<u8>,
|
||||||
|
|
||||||
format: LineFormat,
|
format: LineFormat,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl LineInfo {
|
impl LineInfo {
|
||||||
/// Returns a `LineInfo` parsed from a checksum line.
|
/// Returns a `LineInfo` parsed from a checksum line.
|
||||||
/// The function will run 3 regexes against the line and select the first one that matches
|
/// The function will run 3 parsers against the line and select the first one that matches
|
||||||
/// to populate the fields of the struct.
|
/// to populate the fields of the struct.
|
||||||
/// However, there is a catch to handle regarding the handling of `cached_regex`.
|
/// However, there is a catch to handle regarding the handling of `cached_line_format`.
|
||||||
/// In case of non-algo-based regex, if `cached_regex` is Some, it must take the priority
|
/// In case of non-algo-based format, if `cached_line_format` is Some, it must take the priority
|
||||||
/// over the detected regex. Otherwise, we must set it the the detected regex.
|
/// over the detected format. Otherwise, we must set it the the detected format.
|
||||||
/// This specific behavior is emphasized by the test
|
/// This specific behavior is emphasized by the test
|
||||||
/// `test_hashsum::test_check_md5sum_only_one_space`.
|
/// `test_hashsum::test_check_md5sum_only_one_space`.
|
||||||
fn parse(s: impl AsRef<OsStr>, cached_regex: &mut Option<LineFormat>) -> Option<Self> {
|
fn parse(s: impl AsRef<OsStr>, cached_line_format: &mut Option<LineFormat>) -> Option<Self> {
|
||||||
let regexes: &[(&'static Regex, LineFormat)] = &[
|
let line_bytes = os_str_as_bytes(s.as_ref()).ok()?;
|
||||||
(&R_ALGO_BASED, LineFormat::AlgoBased),
|
|
||||||
(&R_DOUBLE_SPACE, LineFormat::DoubleSpace),
|
|
||||||
(&R_SINGLE_SPACE, LineFormat::SingleSpace),
|
|
||||||
];
|
|
||||||
|
|
||||||
let line_bytes = os_str_as_bytes(s.as_ref()).expect("UTF-8 decoding failed");
|
if let Some(info) = LineFormat::parse_algo_based(line_bytes) {
|
||||||
|
return Some(info);
|
||||||
for (regex, format) in regexes {
|
|
||||||
if !regex.is_match(line_bytes) {
|
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
|
if let Some(cached_format) = cached_line_format {
|
||||||
let mut r = *regex;
|
match cached_format {
|
||||||
if *format != LineFormat::AlgoBased {
|
LineFormat::Untagged => LineFormat::parse_untagged(line_bytes),
|
||||||
// The cached regex ensures that when processing non-algo based regexes,
|
LineFormat::SingleSpace => LineFormat::parse_single_space(line_bytes),
|
||||||
// it cannot be changed (can't have single and double space regexes
|
_ => unreachable!("we never catch the algo based format"),
|
||||||
// used in the same file).
|
}
|
||||||
if cached_regex.is_some() {
|
} else if let Some(info) = LineFormat::parse_untagged(line_bytes) {
|
||||||
r = cached_regex.unwrap().to_regex();
|
*cached_line_format = Some(LineFormat::Untagged);
|
||||||
|
Some(info)
|
||||||
|
} else if let Some(info) = LineFormat::parse_single_space(line_bytes) {
|
||||||
|
*cached_line_format = Some(LineFormat::SingleSpace);
|
||||||
|
Some(info)
|
||||||
} else {
|
} else {
|
||||||
*cached_regex = Some(*format);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Some(caps) = r.captures(line_bytes) {
|
|
||||||
// These unwraps are safe thanks to the regex
|
|
||||||
let match_to_string = |m: Match| String::from_utf8(m.as_bytes().into()).unwrap();
|
|
||||||
|
|
||||||
return Some(Self {
|
|
||||||
algo_name: caps.name("algo").map(match_to_string),
|
|
||||||
algo_bit_len: caps
|
|
||||||
.name("bits")
|
|
||||||
.map(|m| match_to_string(m).parse::<usize>().unwrap()),
|
|
||||||
checksum: caps.name("checksum").map(match_to_string).unwrap(),
|
|
||||||
filename: caps.name("filename").map(|m| m.as_bytes().into()).unwrap(),
|
|
||||||
format: *format,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_filename_for_output(filename: &OsStr, input_is_stdin: bool) -> String {
|
fn get_filename_for_output(filename: &OsStr, input_is_stdin: bool) -> String {
|
||||||
|
@ -835,7 +954,7 @@ fn process_checksum_line(
|
||||||
cli_algo_name: Option<&str>,
|
cli_algo_name: Option<&str>,
|
||||||
cli_algo_length: Option<usize>,
|
cli_algo_length: Option<usize>,
|
||||||
opts: ChecksumOptions,
|
opts: ChecksumOptions,
|
||||||
cached_regex: &mut Option<LineFormat>,
|
cached_line_format: &mut Option<LineFormat>,
|
||||||
last_algo: &mut Option<String>,
|
last_algo: &mut Option<String>,
|
||||||
) -> Result<(), LineCheckError> {
|
) -> Result<(), LineCheckError> {
|
||||||
let line_bytes = os_str_as_bytes(line)?;
|
let line_bytes = os_str_as_bytes(line)?;
|
||||||
|
@ -847,14 +966,14 @@ fn process_checksum_line(
|
||||||
|
|
||||||
// Use `LineInfo` to extract the data of a line.
|
// Use `LineInfo` to extract the data of a line.
|
||||||
// Then, depending on its format, apply a different pre-treatment.
|
// Then, depending on its format, apply a different pre-treatment.
|
||||||
let Some(line_info) = LineInfo::parse(line, cached_regex) else {
|
let Some(line_info) = LineInfo::parse(line, cached_line_format) else {
|
||||||
return Err(LineCheckError::ImproperlyFormatted);
|
return Err(LineCheckError::ImproperlyFormatted);
|
||||||
};
|
};
|
||||||
|
|
||||||
if line_info.format == LineFormat::AlgoBased {
|
if line_info.format == LineFormat::AlgoBased {
|
||||||
process_algo_based_line(&line_info, cli_algo_name, opts, last_algo)
|
process_algo_based_line(&line_info, cli_algo_name, opts, last_algo)
|
||||||
} else if let Some(cli_algo) = cli_algo_name {
|
} else if let Some(cli_algo) = cli_algo_name {
|
||||||
// If we match a non-algo based regex, we expect a cli argument
|
// If we match a non-algo based parser, we expect a cli argument
|
||||||
// to give us the algorithm to use
|
// to give us the algorithm to use
|
||||||
process_non_algo_based_line(i, &line_info, cli_algo, cli_algo_length, opts)
|
process_non_algo_based_line(i, &line_info, cli_algo, cli_algo_length, opts)
|
||||||
} else {
|
} else {
|
||||||
|
@ -890,9 +1009,9 @@ fn process_checksum_file(
|
||||||
let reader = BufReader::new(file);
|
let reader = BufReader::new(file);
|
||||||
let lines = read_os_string_lines(reader).collect::<Vec<_>>();
|
let lines = read_os_string_lines(reader).collect::<Vec<_>>();
|
||||||
|
|
||||||
// cached_regex is used to ensure that several non algo-based checksum line
|
// cached_line_format is used to ensure that several non algo-based checksum line
|
||||||
// will use the same regex.
|
// will use the same parser.
|
||||||
let mut cached_regex = None;
|
let mut cached_line_format = None;
|
||||||
// last_algo caches the algorithm used in the last line to print a warning
|
// last_algo caches the algorithm used in the last line to print a warning
|
||||||
// message for the current line if improperly formatted.
|
// message for the current line if improperly formatted.
|
||||||
// Behavior tested in gnu_cksum_c::test_warn
|
// Behavior tested in gnu_cksum_c::test_warn
|
||||||
|
@ -905,7 +1024,7 @@ fn process_checksum_file(
|
||||||
cli_algo_name,
|
cli_algo_name,
|
||||||
cli_algo_length,
|
cli_algo_length,
|
||||||
opts,
|
opts,
|
||||||
&mut cached_regex,
|
&mut cached_line_format,
|
||||||
&mut last_algo,
|
&mut last_algo,
|
||||||
);
|
);
|
||||||
|
|
||||||
|
@ -1245,39 +1364,78 @@ mod tests {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_algo_based_regex() {
|
fn test_algo_based_parser() {
|
||||||
let algo_based_regex = Regex::new(ALGO_BASED_REGEX).unwrap();
|
|
||||||
#[allow(clippy::type_complexity)]
|
#[allow(clippy::type_complexity)]
|
||||||
let test_cases: &[(&[u8], Option<(&[u8], Option<&[u8]>, &[u8], &[u8])>)] = &[
|
let test_cases: &[(&[u8], Option<(&[u8], Option<&[u8]>, &[u8], &[u8])>)] = &[
|
||||||
(b"SHA256 (example.txt) = d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2", Some((b"SHA256", None, b"example.txt", b"d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2"))),
|
(b"SHA256 (example.txt) = d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2", Some((b"SHA256", None, b"example.txt", b"d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2"))),
|
||||||
// cspell:disable-next-line
|
// cspell:disable
|
||||||
(b"BLAKE2b-512 (file) = abcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdef", Some((b"BLAKE2b", Some(b"512"), b"file", b"abcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdef"))),
|
(b"BLAKE2b-512 (file) = abcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdef", Some((b"BLAKE2b", Some(b"512"), b"file", b"abcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdef"))),
|
||||||
(b" MD5 (test) = 9e107d9d372bb6826bd81d3542a419d6", Some((b"MD5", None, b"test", b"9e107d9d372bb6826bd81d3542a419d6"))),
|
(b" MD5 (test) = 9e107d9d372bb6826bd81d3542a419d6", Some((b"MD5", None, b"test", b"9e107d9d372bb6826bd81d3542a419d6"))),
|
||||||
(b"SHA-1 (anotherfile) = a9993e364706816aba3e25717850c26c9cd0d89d", Some((b"SHA", Some(b"1"), b"anotherfile", b"a9993e364706816aba3e25717850c26c9cd0d89d"))),
|
(b"SHA-1 (anotherfile) = a9993e364706816aba3e25717850c26c9cd0d89d", Some((b"SHA", Some(b"1"), b"anotherfile", b"a9993e364706816aba3e25717850c26c9cd0d89d"))),
|
||||||
|
(b" MD5 (anothertest) = fds65dsf46as5df4d6f54asds5d7f7g9", Some((b"MD5", None, b"anothertest", b"fds65dsf46as5df4d6f54asds5d7f7g9"))),
|
||||||
|
(b" MD5(anothertest2) = fds65dsf46as5df4d6f54asds5d7f7g9", None),
|
||||||
|
(b" MD5(weirdfilename0)= stillfilename)= fds65dsf46as5df4d6f54asds5d7f7g9", Some((b"MD5", None, b"weirdfilename0)= stillfilename", b"fds65dsf46as5df4d6f54asds5d7f7g9"))),
|
||||||
|
(b" MD5(weirdfilename1)= )= fds65dsf46as5df4d6f54asds5d7f7g9", Some((b"MD5", None, b"weirdfilename1)= ", b"fds65dsf46as5df4d6f54asds5d7f7g9"))),
|
||||||
|
(b" MD5(weirdfilename2) = )= fds65dsf46as5df4d6f54asds5d7f7g9", Some((b"MD5", None, b"weirdfilename2) = ", b"fds65dsf46as5df4d6f54asds5d7f7g9"))),
|
||||||
|
(b" MD5 (weirdfilename3)= ) = fds65dsf46as5df4d6f54asds5d7f7g9", Some((b"MD5", None, b"weirdfilename3)= ", b"fds65dsf46as5df4d6f54asds5d7f7g9"))),
|
||||||
|
(b" MD5 (weirdfilename4) = ) = fds65dsf46as5df4d6f54asds5d7f7g9", Some((b"MD5", None, b"weirdfilename4) = ", b"fds65dsf46as5df4d6f54asds5d7f7g9"))),
|
||||||
|
(b" MD5(weirdfilename5)= ) = fds65dsf46as5df4d6f54asds5d7f7g9", None),
|
||||||
|
(b" MD5(weirdfilename6) = ) = fds65dsf46as5df4d6f54asds5d7f7g9", None),
|
||||||
|
(b" MD5 (weirdfilename7)= )= fds65dsf46as5df4d6f54asds5d7f7g9", None),
|
||||||
|
(b" MD5 (weirdfilename8) = )= fds65dsf46as5df4d6f54asds5d7f7g9", None),
|
||||||
];
|
];
|
||||||
|
|
||||||
|
// cspell:enable
|
||||||
for (input, expected) in test_cases {
|
for (input, expected) in test_cases {
|
||||||
let captures = algo_based_regex.captures(input);
|
let line_info = LineFormat::parse_algo_based(input);
|
||||||
match expected {
|
match expected {
|
||||||
Some((algo, bits, filename, checksum)) => {
|
Some((algo, bits, filename, checksum)) => {
|
||||||
assert!(captures.is_some());
|
assert!(
|
||||||
let captures = captures.unwrap();
|
line_info.is_some(),
|
||||||
assert_eq!(&captures.name("algo").unwrap().as_bytes(), algo);
|
"expected Some, got None for {}",
|
||||||
assert_eq!(&captures.name("bits").map(|m| m.as_bytes()), bits);
|
String::from_utf8_lossy(filename)
|
||||||
assert_eq!(&captures.name("filename").unwrap().as_bytes(), filename);
|
);
|
||||||
assert_eq!(&captures.name("checksum").unwrap().as_bytes(), checksum);
|
let line_info = line_info.unwrap();
|
||||||
|
assert_eq!(
|
||||||
|
&line_info.algo_name.unwrap().as_bytes(),
|
||||||
|
algo,
|
||||||
|
"failed for {}",
|
||||||
|
String::from_utf8_lossy(filename)
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
line_info
|
||||||
|
.algo_bit_len
|
||||||
|
.map(|m| m.to_string().as_bytes().to_owned()),
|
||||||
|
bits.map(|b| b.to_owned()),
|
||||||
|
"failed for {}",
|
||||||
|
String::from_utf8_lossy(filename)
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
&line_info.filename,
|
||||||
|
filename,
|
||||||
|
"failed for {}",
|
||||||
|
String::from_utf8_lossy(filename)
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
&line_info.checksum.as_bytes(),
|
||||||
|
checksum,
|
||||||
|
"failed for {}",
|
||||||
|
String::from_utf8_lossy(filename)
|
||||||
|
);
|
||||||
}
|
}
|
||||||
None => {
|
None => {
|
||||||
assert!(captures.is_none());
|
assert!(
|
||||||
|
line_info.is_none(),
|
||||||
|
"failed for {}",
|
||||||
|
String::from_utf8_lossy(input)
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_double_space_regex() {
|
fn test_double_space_parser() {
|
||||||
let double_space_regex = Regex::new(DOUBLE_SPACE_REGEX).unwrap();
|
|
||||||
|
|
||||||
#[allow(clippy::type_complexity)]
|
#[allow(clippy::type_complexity)]
|
||||||
let test_cases: &[(&[u8], Option<(&[u8], &[u8])>)] = &[
|
let test_cases: &[(&[u8], Option<(&[u8], &[u8])>)] = &[
|
||||||
(
|
(
|
||||||
|
@ -1304,24 +1462,23 @@ mod tests {
|
||||||
];
|
];
|
||||||
|
|
||||||
for (input, expected) in test_cases {
|
for (input, expected) in test_cases {
|
||||||
let captures = double_space_regex.captures(input);
|
let line_info = LineFormat::parse_untagged(input);
|
||||||
match expected {
|
match expected {
|
||||||
Some((checksum, filename)) => {
|
Some((checksum, filename)) => {
|
||||||
assert!(captures.is_some());
|
assert!(line_info.is_some());
|
||||||
let captures = captures.unwrap();
|
let line_info = line_info.unwrap();
|
||||||
assert_eq!(&captures.name("checksum").unwrap().as_bytes(), checksum);
|
assert_eq!(&line_info.filename, filename);
|
||||||
assert_eq!(&captures.name("filename").unwrap().as_bytes(), filename);
|
assert_eq!(&line_info.checksum.as_bytes(), checksum);
|
||||||
}
|
}
|
||||||
None => {
|
None => {
|
||||||
assert!(captures.is_none());
|
assert!(line_info.is_none());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_single_space_regex() {
|
fn test_single_space_parser() {
|
||||||
let single_space_regex = Regex::new(SINGLE_SPACE_REGEX).unwrap();
|
|
||||||
#[allow(clippy::type_complexity)]
|
#[allow(clippy::type_complexity)]
|
||||||
let test_cases: &[(&[u8], Option<(&[u8], &[u8])>)] = &[
|
let test_cases: &[(&[u8], Option<(&[u8], &[u8])>)] = &[
|
||||||
(
|
(
|
||||||
|
@ -1344,16 +1501,16 @@ mod tests {
|
||||||
];
|
];
|
||||||
|
|
||||||
for (input, expected) in test_cases {
|
for (input, expected) in test_cases {
|
||||||
let captures = single_space_regex.captures(input);
|
let line_info = LineFormat::parse_single_space(input);
|
||||||
match expected {
|
match expected {
|
||||||
Some((checksum, filename)) => {
|
Some((checksum, filename)) => {
|
||||||
assert!(captures.is_some());
|
assert!(line_info.is_some());
|
||||||
let captures = captures.unwrap();
|
let line_info = line_info.unwrap();
|
||||||
assert_eq!(&captures.name("checksum").unwrap().as_bytes(), checksum);
|
assert_eq!(&line_info.filename, filename);
|
||||||
assert_eq!(&captures.name("filename").unwrap().as_bytes(), filename);
|
assert_eq!(&line_info.checksum.as_bytes(), checksum);
|
||||||
}
|
}
|
||||||
None => {
|
None => {
|
||||||
assert!(captures.is_none());
|
assert!(line_info.is_none());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1361,68 +1518,69 @@ mod tests {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_line_info() {
|
fn test_line_info() {
|
||||||
let mut cached_regex = None;
|
let mut cached_line_format = None;
|
||||||
|
|
||||||
// Test algo-based regex
|
// Test algo-based parser
|
||||||
let line_algo_based =
|
let line_algo_based =
|
||||||
OsString::from("MD5 (example.txt) = d41d8cd98f00b204e9800998ecf8427e");
|
OsString::from("MD5 (example.txt) = d41d8cd98f00b204e9800998ecf8427e");
|
||||||
let line_info = LineInfo::parse(&line_algo_based, &mut cached_regex).unwrap();
|
let line_info = LineInfo::parse(&line_algo_based, &mut cached_line_format).unwrap();
|
||||||
assert_eq!(line_info.algo_name.as_deref(), Some("MD5"));
|
assert_eq!(line_info.algo_name.as_deref(), Some("MD5"));
|
||||||
assert!(line_info.algo_bit_len.is_none());
|
assert!(line_info.algo_bit_len.is_none());
|
||||||
assert_eq!(line_info.filename, b"example.txt");
|
assert_eq!(line_info.filename, b"example.txt");
|
||||||
assert_eq!(line_info.checksum, "d41d8cd98f00b204e9800998ecf8427e");
|
assert_eq!(line_info.checksum, "d41d8cd98f00b204e9800998ecf8427e");
|
||||||
assert_eq!(line_info.format, LineFormat::AlgoBased);
|
assert_eq!(line_info.format, LineFormat::AlgoBased);
|
||||||
assert!(cached_regex.is_none());
|
assert!(cached_line_format.is_none());
|
||||||
|
|
||||||
// Test double-space regex
|
// Test double-space parser
|
||||||
let line_double_space = OsString::from("d41d8cd98f00b204e9800998ecf8427e example.txt");
|
let line_double_space = OsString::from("d41d8cd98f00b204e9800998ecf8427e example.txt");
|
||||||
let line_info = LineInfo::parse(&line_double_space, &mut cached_regex).unwrap();
|
let line_info = LineInfo::parse(&line_double_space, &mut cached_line_format).unwrap();
|
||||||
assert!(line_info.algo_name.is_none());
|
assert!(line_info.algo_name.is_none());
|
||||||
assert!(line_info.algo_bit_len.is_none());
|
assert!(line_info.algo_bit_len.is_none());
|
||||||
assert_eq!(line_info.filename, b"example.txt");
|
assert_eq!(line_info.filename, b"example.txt");
|
||||||
assert_eq!(line_info.checksum, "d41d8cd98f00b204e9800998ecf8427e");
|
assert_eq!(line_info.checksum, "d41d8cd98f00b204e9800998ecf8427e");
|
||||||
assert_eq!(line_info.format, LineFormat::DoubleSpace);
|
assert_eq!(line_info.format, LineFormat::Untagged);
|
||||||
assert!(cached_regex.is_some());
|
assert!(cached_line_format.is_some());
|
||||||
|
|
||||||
cached_regex = None;
|
cached_line_format = None;
|
||||||
|
|
||||||
// Test single-space regex
|
// Test single-space parser
|
||||||
let line_single_space = OsString::from("d41d8cd98f00b204e9800998ecf8427e example.txt");
|
let line_single_space = OsString::from("d41d8cd98f00b204e9800998ecf8427e example.txt");
|
||||||
let line_info = LineInfo::parse(&line_single_space, &mut cached_regex).unwrap();
|
let line_info = LineInfo::parse(&line_single_space, &mut cached_line_format).unwrap();
|
||||||
assert!(line_info.algo_name.is_none());
|
assert!(line_info.algo_name.is_none());
|
||||||
assert!(line_info.algo_bit_len.is_none());
|
assert!(line_info.algo_bit_len.is_none());
|
||||||
assert_eq!(line_info.filename, b"example.txt");
|
assert_eq!(line_info.filename, b"example.txt");
|
||||||
assert_eq!(line_info.checksum, "d41d8cd98f00b204e9800998ecf8427e");
|
assert_eq!(line_info.checksum, "d41d8cd98f00b204e9800998ecf8427e");
|
||||||
assert_eq!(line_info.format, LineFormat::SingleSpace);
|
assert_eq!(line_info.format, LineFormat::SingleSpace);
|
||||||
assert!(cached_regex.is_some());
|
assert!(cached_line_format.is_some());
|
||||||
|
|
||||||
cached_regex = None;
|
cached_line_format = None;
|
||||||
|
|
||||||
// Test invalid checksum line
|
// Test invalid checksum line
|
||||||
let line_invalid = OsString::from("invalid checksum line");
|
let line_invalid = OsString::from("invalid checksum line");
|
||||||
assert!(LineInfo::parse(&line_invalid, &mut cached_regex).is_none());
|
assert!(LineInfo::parse(&line_invalid, &mut cached_line_format).is_none());
|
||||||
assert!(cached_regex.is_none());
|
assert!(cached_line_format.is_none());
|
||||||
|
|
||||||
// Test leading space before checksum line
|
// Test leading space before checksum line
|
||||||
let line_algo_based_leading_space =
|
let line_algo_based_leading_space =
|
||||||
OsString::from(" MD5 (example.txt) = d41d8cd98f00b204e9800998ecf8427e");
|
OsString::from(" MD5 (example.txt) = d41d8cd98f00b204e9800998ecf8427e");
|
||||||
let line_info = LineInfo::parse(&line_algo_based_leading_space, &mut cached_regex).unwrap();
|
let line_info =
|
||||||
|
LineInfo::parse(&line_algo_based_leading_space, &mut cached_line_format).unwrap();
|
||||||
assert_eq!(line_info.format, LineFormat::AlgoBased);
|
assert_eq!(line_info.format, LineFormat::AlgoBased);
|
||||||
assert!(cached_regex.is_none());
|
assert!(cached_line_format.is_none());
|
||||||
|
|
||||||
// Test trailing space after checksum line (should fail)
|
// Test trailing space after checksum line (should fail)
|
||||||
let line_algo_based_leading_space =
|
let line_algo_based_leading_space =
|
||||||
OsString::from("MD5 (example.txt) = d41d8cd98f00b204e9800998ecf8427e ");
|
OsString::from("MD5 (example.txt) = d41d8cd98f00b204e9800998ecf8427e ");
|
||||||
let res = LineInfo::parse(&line_algo_based_leading_space, &mut cached_regex);
|
let res = LineInfo::parse(&line_algo_based_leading_space, &mut cached_line_format);
|
||||||
assert!(res.is_none());
|
assert!(res.is_none());
|
||||||
assert!(cached_regex.is_none());
|
assert!(cached_line_format.is_none());
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_get_expected_digest() {
|
fn test_get_expected_digest() {
|
||||||
let line = OsString::from("SHA256 (empty) = 47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU=");
|
let line = OsString::from("SHA256 (empty) = 47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU=");
|
||||||
let mut cached_regex = None;
|
let mut cached_line_format = None;
|
||||||
let line_info = LineInfo::parse(&line, &mut cached_regex).unwrap();
|
let line_info = LineInfo::parse(&line, &mut cached_line_format).unwrap();
|
||||||
|
|
||||||
let result = get_expected_digest_as_hex_string(&line_info, None);
|
let result = get_expected_digest_as_hex_string(&line_info, None);
|
||||||
|
|
||||||
|
@ -1436,8 +1594,8 @@ mod tests {
|
||||||
fn test_get_expected_checksum_invalid() {
|
fn test_get_expected_checksum_invalid() {
|
||||||
// The line misses a '=' at the end to be valid base64
|
// The line misses a '=' at the end to be valid base64
|
||||||
let line = OsString::from("SHA256 (empty) = 47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU");
|
let line = OsString::from("SHA256 (empty) = 47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU");
|
||||||
let mut cached_regex = None;
|
let mut cached_line_format = None;
|
||||||
let line_info = LineInfo::parse(&line, &mut cached_regex).unwrap();
|
let line_info = LineInfo::parse(&line, &mut cached_line_format).unwrap();
|
||||||
|
|
||||||
let result = get_expected_digest_as_hex_string(&line_info, None);
|
let result = get_expected_digest_as_hex_string(&line_info, None);
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue