mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-28 03:27:44 +00:00
Merge pull request #6793 from RenjiSann/checksum-utf8
cksum/hashsum: Support for non-UTF-8 input in checksum files
This commit is contained in:
commit
cb8711fdfb
3 changed files with 428 additions and 105 deletions
|
@ -2,30 +2,29 @@
|
||||||
//
|
//
|
||||||
// For the full copyright and license information, please view the LICENSE
|
// For the full copyright and license information, please view the LICENSE
|
||||||
// file that was distributed with this source code.
|
// file that was distributed with this source code.
|
||||||
// spell-checker:ignore anotherfile invalidchecksum regexes JWZG
|
// spell-checker:ignore anotherfile invalidchecksum regexes JWZG FFFD xffname prefixfilename
|
||||||
|
|
||||||
use data_encoding::BASE64;
|
use data_encoding::BASE64;
|
||||||
use os_display::Quotable;
|
use os_display::Quotable;
|
||||||
use regex::Regex;
|
use regex::bytes::{Captures, Regex};
|
||||||
use std::{
|
use std::{
|
||||||
ffi::OsStr,
|
ffi::{OsStr, OsString},
|
||||||
|
fmt::Display,
|
||||||
fs::File,
|
fs::File,
|
||||||
io::{self, BufReader, Read},
|
io::{self, stdin, BufReader, Read, Write},
|
||||||
path::Path,
|
path::Path,
|
||||||
|
str,
|
||||||
};
|
};
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
error::{set_exit_code, FromIo, UError, UResult, USimpleError},
|
error::{set_exit_code, FromIo, UError, UResult, USimpleError},
|
||||||
show, show_error, show_warning_caps,
|
os_str_as_bytes, os_str_from_bytes, read_os_string_lines, show, show_error, show_warning_caps,
|
||||||
sum::{
|
sum::{
|
||||||
Blake2b, Blake3, Digest, DigestWriter, Md5, Sha1, Sha224, Sha256, Sha384, Sha3_224,
|
Blake2b, Blake3, Digest, DigestWriter, Md5, Sha1, Sha224, Sha256, Sha384, Sha3_224,
|
||||||
Sha3_256, Sha3_384, Sha3_512, Sha512, Shake128, Shake256, Sm3, BSD, CRC, SYSV,
|
Sha3_256, Sha3_384, Sha3_512, Sha512, Shake128, Shake256, Sm3, BSD, CRC, SYSV,
|
||||||
},
|
},
|
||||||
util_name,
|
util_name,
|
||||||
};
|
};
|
||||||
use std::fmt::Write;
|
|
||||||
use std::io::stdin;
|
|
||||||
use std::io::BufRead;
|
|
||||||
use thiserror::Error;
|
use thiserror::Error;
|
||||||
|
|
||||||
pub const ALGORITHM_OPTIONS_SYSV: &str = "sysv";
|
pub const ALGORITHM_OPTIONS_SYSV: &str = "sysv";
|
||||||
|
@ -175,6 +174,36 @@ fn cksum_output(res: &ChecksumResult, status: bool) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy)]
|
||||||
|
enum FileChecksumResult {
|
||||||
|
Ok,
|
||||||
|
Failed,
|
||||||
|
CantOpen,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Display for FileChecksumResult {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
match self {
|
||||||
|
FileChecksumResult::Ok => write!(f, "OK"),
|
||||||
|
FileChecksumResult::Failed => write!(f, "FAILED"),
|
||||||
|
FileChecksumResult::CantOpen => write!(f, "FAILED open or read"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Print to the given buffer the checksum validation status of a file which
|
||||||
|
/// name might contain non-utf-8 characters.
|
||||||
|
fn print_file_report<W: Write>(
|
||||||
|
mut w: W,
|
||||||
|
filename: &[u8],
|
||||||
|
result: FileChecksumResult,
|
||||||
|
prefix: &str,
|
||||||
|
) {
|
||||||
|
let _ = write!(w, "{prefix}");
|
||||||
|
let _ = w.write_all(filename);
|
||||||
|
let _ = writeln!(w, ": {result}");
|
||||||
|
}
|
||||||
|
|
||||||
pub fn detect_algo(algo: &str, length: Option<usize>) -> UResult<HashAlgorithm> {
|
pub fn detect_algo(algo: &str, length: Option<usize>) -> UResult<HashAlgorithm> {
|
||||||
match algo {
|
match algo {
|
||||||
ALGORITHM_OPTIONS_SYSV => Ok(HashAlgorithm {
|
ALGORITHM_OPTIONS_SYSV => Ok(HashAlgorithm {
|
||||||
|
@ -279,13 +308,13 @@ pub fn detect_algo(algo: &str, length: Option<usize>) -> UResult<HashAlgorithm>
|
||||||
// algo must be uppercase or b (for blake2b)
|
// algo must be uppercase or b (for blake2b)
|
||||||
// 2. <checksum> [* ]<filename>
|
// 2. <checksum> [* ]<filename>
|
||||||
// 3. <checksum> [*]<filename> (only one space)
|
// 3. <checksum> [*]<filename> (only one space)
|
||||||
const ALGO_BASED_REGEX: &str = r"^\s*\\?(?P<algo>(?:[A-Z0-9]+|BLAKE2b))(?:-(?P<bits>\d+))?\s?\((?P<filename>.*)\)\s*=\s*(?P<checksum>[a-fA-F0-9]+)$";
|
const ALGO_BASED_REGEX: &str = r"^\s*\\?(?P<algo>(?:[A-Z0-9]+|BLAKE2b))(?:-(?P<bits>\d+))?\s?\((?P<filename>(?-u:.*))\)\s*=\s*(?P<checksum>[a-fA-F0-9]+)$";
|
||||||
const ALGO_BASED_REGEX_BASE64: &str = r"^\s*\\?(?P<algo>(?:[A-Z0-9]+|BLAKE2b))(?:-(?P<bits>\d+))?\s?\((?P<filename>.*)\)\s*=\s*(?P<checksum>[A-Za-z0-9+/]+={0,2})$";
|
const ALGO_BASED_REGEX_BASE64: &str = r"^\s*\\?(?P<algo>(?:[A-Z0-9]+|BLAKE2b))(?:-(?P<bits>\d+))?\s?\((?P<filename>(?-u:.*))\)\s*=\s*(?P<checksum>[A-Za-z0-9+/]+={0,2})$";
|
||||||
|
|
||||||
const DOUBLE_SPACE_REGEX: &str = r"^(?P<checksum>[a-fA-F0-9]+)\s{2}(?P<filename>.*)$";
|
const DOUBLE_SPACE_REGEX: &str = r"^(?P<checksum>[a-fA-F0-9]+)\s{2}(?P<filename>(?-u:.*))$";
|
||||||
|
|
||||||
// In this case, we ignore the *
|
// In this case, we ignore the *
|
||||||
const SINGLE_SPACE_REGEX: &str = r"^(?P<checksum>[a-fA-F0-9]+)\s(?P<filename>\*?.*)$";
|
const SINGLE_SPACE_REGEX: &str = r"^(?P<checksum>[a-fA-F0-9]+)\s(?P<filename>\*?(?-u:.*))$";
|
||||||
|
|
||||||
fn get_filename_for_output(filename: &OsStr, input_is_stdin: bool) -> String {
|
fn get_filename_for_output(filename: &OsStr, input_is_stdin: bool) -> String {
|
||||||
if input_is_stdin {
|
if input_is_stdin {
|
||||||
|
@ -298,7 +327,7 @@ fn get_filename_for_output(filename: &OsStr, input_is_stdin: bool) -> String {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Determines the appropriate regular expression to use based on the provided lines.
|
/// Determines the appropriate regular expression to use based on the provided lines.
|
||||||
fn determine_regex(lines: &[String]) -> Option<(Regex, bool)> {
|
fn determine_regex(lines: &[OsString]) -> Option<(Regex, bool)> {
|
||||||
let regexes = [
|
let regexes = [
|
||||||
(Regex::new(ALGO_BASED_REGEX).unwrap(), true),
|
(Regex::new(ALGO_BASED_REGEX).unwrap(), true),
|
||||||
(Regex::new(DOUBLE_SPACE_REGEX).unwrap(), false),
|
(Regex::new(DOUBLE_SPACE_REGEX).unwrap(), false),
|
||||||
|
@ -307,9 +336,9 @@ fn determine_regex(lines: &[String]) -> Option<(Regex, bool)> {
|
||||||
];
|
];
|
||||||
|
|
||||||
for line in lines {
|
for line in lines {
|
||||||
let line_trim = line.trim();
|
let line_bytes = os_str_as_bytes(line).expect("UTF-8 decoding failed");
|
||||||
for (regex, is_algo_based) in ®exes {
|
for (regex, is_algo_based) in ®exes {
|
||||||
if regex.is_match(line_trim) {
|
if regex.is_match(line_bytes) {
|
||||||
return Some((regex.clone(), *is_algo_based));
|
return Some((regex.clone(), *is_algo_based));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -320,6 +349,7 @@ fn determine_regex(lines: &[String]) -> Option<(Regex, bool)> {
|
||||||
|
|
||||||
// Converts bytes to a hexadecimal string
|
// Converts bytes to a hexadecimal string
|
||||||
fn bytes_to_hex(bytes: &[u8]) -> String {
|
fn bytes_to_hex(bytes: &[u8]) -> String {
|
||||||
|
use std::fmt::Write;
|
||||||
bytes
|
bytes
|
||||||
.iter()
|
.iter()
|
||||||
.fold(String::with_capacity(bytes.len() * 2), |mut hex, byte| {
|
.fold(String::with_capacity(bytes.len() * 2), |mut hex, byte| {
|
||||||
|
@ -329,13 +359,14 @@ fn bytes_to_hex(bytes: &[u8]) -> String {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_expected_checksum(
|
fn get_expected_checksum(
|
||||||
filename: &str,
|
filename: &[u8],
|
||||||
caps: ®ex::Captures,
|
caps: &Captures,
|
||||||
chosen_regex: &Regex,
|
chosen_regex: &Regex,
|
||||||
) -> UResult<String> {
|
) -> UResult<String> {
|
||||||
if chosen_regex.as_str() == ALGO_BASED_REGEX_BASE64 {
|
if chosen_regex.as_str() == ALGO_BASED_REGEX_BASE64 {
|
||||||
let ck = caps.name("checksum").unwrap().as_str();
|
// Unwrap is safe, ensured by regex
|
||||||
match BASE64.decode(ck.as_bytes()) {
|
let ck = caps.name("checksum").unwrap().as_bytes();
|
||||||
|
match BASE64.decode(ck) {
|
||||||
Ok(decoded_bytes) => {
|
Ok(decoded_bytes) => {
|
||||||
match std::str::from_utf8(&decoded_bytes) {
|
match std::str::from_utf8(&decoded_bytes) {
|
||||||
Ok(decoded_str) => Ok(decoded_str.to_string()),
|
Ok(decoded_str) => Ok(decoded_str.to_string()),
|
||||||
|
@ -344,32 +375,45 @@ fn get_expected_checksum(
|
||||||
}
|
}
|
||||||
Err(_) => Err(Box::new(
|
Err(_) => Err(Box::new(
|
||||||
ChecksumError::NoProperlyFormattedChecksumLinesFound {
|
ChecksumError::NoProperlyFormattedChecksumLinesFound {
|
||||||
filename: (&filename).to_string(),
|
filename: String::from_utf8_lossy(filename).to_string(),
|
||||||
},
|
},
|
||||||
)),
|
)),
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
Ok(caps.name("checksum").unwrap().as_str().to_string())
|
// Unwraps are safe, ensured by regex.
|
||||||
|
Ok(str::from_utf8(caps.name("checksum").unwrap().as_bytes())
|
||||||
|
.unwrap()
|
||||||
|
.to_string())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns a reader that reads from the specified file, or from stdin if `filename_to_check` is "-".
|
/// Returns a reader that reads from the specified file, or from stdin if `filename_to_check` is "-".
|
||||||
fn get_file_to_check(
|
fn get_file_to_check(
|
||||||
filename: &str,
|
filename: &OsStr,
|
||||||
ignore_missing: bool,
|
ignore_missing: bool,
|
||||||
res: &mut ChecksumResult,
|
res: &mut ChecksumResult,
|
||||||
) -> Option<Box<dyn Read>> {
|
) -> Option<Box<dyn Read>> {
|
||||||
|
let filename_bytes = os_str_as_bytes(filename).expect("UTF-8 error");
|
||||||
|
let filename_lossy = String::from_utf8_lossy(filename_bytes);
|
||||||
if filename == "-" {
|
if filename == "-" {
|
||||||
Some(Box::new(stdin())) // Use stdin if "-" is specified in the checksum file
|
Some(Box::new(stdin())) // Use stdin if "-" is specified in the checksum file
|
||||||
} else {
|
} else {
|
||||||
let mut failed_open = || {
|
let mut failed_open = || {
|
||||||
println!("{filename}: FAILED open or read");
|
print_file_report(
|
||||||
|
std::io::stdout(),
|
||||||
|
filename_bytes,
|
||||||
|
FileChecksumResult::CantOpen,
|
||||||
|
"",
|
||||||
|
);
|
||||||
res.failed_open_file += 1;
|
res.failed_open_file += 1;
|
||||||
};
|
};
|
||||||
match File::open(filename) {
|
match File::open(filename) {
|
||||||
Ok(f) => {
|
Ok(f) => {
|
||||||
if f.metadata().ok()?.is_dir() {
|
if f.metadata().ok()?.is_dir() {
|
||||||
show!(USimpleError::new(1, format!("{filename}: Is a directory")));
|
show!(USimpleError::new(
|
||||||
|
1,
|
||||||
|
format!("{filename_lossy}: Is a directory")
|
||||||
|
));
|
||||||
// also regarded as a failed open
|
// also regarded as a failed open
|
||||||
failed_open();
|
failed_open();
|
||||||
None
|
None
|
||||||
|
@ -380,7 +424,7 @@ fn get_file_to_check(
|
||||||
Err(err) => {
|
Err(err) => {
|
||||||
if !ignore_missing {
|
if !ignore_missing {
|
||||||
// yes, we have both stderr and stdout here
|
// yes, we have both stderr and stdout here
|
||||||
show!(err.map_err_context(|| filename.to_string()));
|
show!(err.map_err_context(|| filename_lossy.to_string()));
|
||||||
failed_open();
|
failed_open();
|
||||||
}
|
}
|
||||||
// we could not open the file but we want to continue
|
// we could not open the file but we want to continue
|
||||||
|
@ -414,13 +458,18 @@ fn get_input_file(filename: &OsStr) -> UResult<Box<dyn Read>> {
|
||||||
|
|
||||||
/// Extracts the algorithm name and length from the regex captures if the algo-based format is matched.
|
/// Extracts the algorithm name and length from the regex captures if the algo-based format is matched.
|
||||||
fn identify_algo_name_and_length(
|
fn identify_algo_name_and_length(
|
||||||
caps: ®ex::Captures,
|
caps: &Captures,
|
||||||
algo_name_input: Option<&str>,
|
algo_name_input: Option<&str>,
|
||||||
res: &mut ChecksumResult,
|
res: &mut ChecksumResult,
|
||||||
properly_formatted: &mut bool,
|
properly_formatted: &mut bool,
|
||||||
) -> Option<(String, Option<usize>)> {
|
) -> Option<(String, Option<usize>)> {
|
||||||
// When the algo-based format is matched, extract details from regex captures
|
// When the algo-based format is matched, extract details from regex captures
|
||||||
let algorithm = caps.name("algo").map_or("", |m| m.as_str()).to_lowercase();
|
let algorithm = caps
|
||||||
|
.name("algo")
|
||||||
|
.map_or(String::new(), |m| {
|
||||||
|
String::from_utf8(m.as_bytes().into()).unwrap()
|
||||||
|
})
|
||||||
|
.to_lowercase();
|
||||||
|
|
||||||
// check if we are called with XXXsum (example: md5sum) but we detected a different algo parsing the file
|
// check if we are called with XXXsum (example: md5sum) but we detected a different algo parsing the file
|
||||||
// (for example SHA1 (f) = d...)
|
// (for example SHA1 (f) = d...)
|
||||||
|
@ -438,7 +487,10 @@ fn identify_algo_name_and_length(
|
||||||
}
|
}
|
||||||
|
|
||||||
let bits = caps.name("bits").map_or(Some(None), |m| {
|
let bits = caps.name("bits").map_or(Some(None), |m| {
|
||||||
let bits_value = m.as_str().parse::<usize>().unwrap();
|
let bits_value = String::from_utf8(m.as_bytes().into())
|
||||||
|
.unwrap()
|
||||||
|
.parse::<usize>()
|
||||||
|
.unwrap();
|
||||||
if bits_value % 8 == 0 {
|
if bits_value % 8 == 0 {
|
||||||
Some(Some(bits_value / 8))
|
Some(Some(bits_value / 8))
|
||||||
} else {
|
} else {
|
||||||
|
@ -491,7 +543,8 @@ where
|
||||||
};
|
};
|
||||||
|
|
||||||
let reader = BufReader::new(file);
|
let reader = BufReader::new(file);
|
||||||
let lines: Vec<String> = reader.lines().collect::<Result<_, _>>()?;
|
let lines = read_os_string_lines(reader).collect::<Vec<_>>();
|
||||||
|
|
||||||
let Some((chosen_regex, is_algo_based_format)) = determine_regex(&lines) else {
|
let Some((chosen_regex, is_algo_based_format)) = determine_regex(&lines) else {
|
||||||
let e = ChecksumError::NoProperlyFormattedChecksumLinesFound {
|
let e = ChecksumError::NoProperlyFormattedChecksumLinesFound {
|
||||||
filename: get_filename_for_output(filename_input, input_is_stdin),
|
filename: get_filename_for_output(filename_input, input_is_stdin),
|
||||||
|
@ -502,11 +555,13 @@ where
|
||||||
};
|
};
|
||||||
|
|
||||||
for (i, line) in lines.iter().enumerate() {
|
for (i, line) in lines.iter().enumerate() {
|
||||||
if let Some(caps) = chosen_regex.captures(line) {
|
let line_bytes = os_str_as_bytes(line)?;
|
||||||
|
if let Some(caps) = chosen_regex.captures(line_bytes) {
|
||||||
properly_formatted = true;
|
properly_formatted = true;
|
||||||
|
|
||||||
let mut filename_to_check = caps.name("filename").unwrap().as_str();
|
let mut filename_to_check = caps.name("filename").unwrap().as_bytes();
|
||||||
if filename_to_check.starts_with('*')
|
|
||||||
|
if filename_to_check.starts_with(b"*")
|
||||||
&& i == 0
|
&& i == 0
|
||||||
&& chosen_regex.as_str() == SINGLE_SPACE_REGEX
|
&& chosen_regex.as_str() == SINGLE_SPACE_REGEX
|
||||||
{
|
{
|
||||||
|
@ -551,10 +606,11 @@ where
|
||||||
|
|
||||||
let (filename_to_check_unescaped, prefix) = unescape_filename(filename_to_check);
|
let (filename_to_check_unescaped, prefix) = unescape_filename(filename_to_check);
|
||||||
|
|
||||||
|
let real_filename_to_check = os_str_from_bytes(&filename_to_check_unescaped)?;
|
||||||
|
|
||||||
// manage the input file
|
// manage the input file
|
||||||
let file_to_check =
|
let file_to_check =
|
||||||
match get_file_to_check(&filename_to_check_unescaped, ignore_missing, &mut res)
|
match get_file_to_check(&real_filename_to_check, ignore_missing, &mut res) {
|
||||||
{
|
|
||||||
Some(file) => file,
|
Some(file) => file,
|
||||||
None => continue,
|
None => continue,
|
||||||
};
|
};
|
||||||
|
@ -568,17 +624,27 @@ where
|
||||||
// Do the checksum validation
|
// Do the checksum validation
|
||||||
if expected_checksum == calculated_checksum {
|
if expected_checksum == calculated_checksum {
|
||||||
if !quiet && !status {
|
if !quiet && !status {
|
||||||
println!("{prefix}{filename_to_check}: OK");
|
print_file_report(
|
||||||
|
std::io::stdout(),
|
||||||
|
filename_to_check,
|
||||||
|
FileChecksumResult::Ok,
|
||||||
|
prefix,
|
||||||
|
);
|
||||||
}
|
}
|
||||||
correct_format += 1;
|
correct_format += 1;
|
||||||
} else {
|
} else {
|
||||||
if !status {
|
if !status {
|
||||||
println!("{prefix}{filename_to_check}: FAILED");
|
print_file_report(
|
||||||
|
std::io::stdout(),
|
||||||
|
filename_to_check,
|
||||||
|
FileChecksumResult::Failed,
|
||||||
|
prefix,
|
||||||
|
);
|
||||||
}
|
}
|
||||||
res.failed_cksum += 1;
|
res.failed_cksum += 1;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if line.is_empty() || line.starts_with("#") {
|
if line.is_empty() || line_bytes.starts_with(b"#") {
|
||||||
// Don't show any warning for empty or commented lines.
|
// Don't show any warning for empty or commented lines.
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
@ -707,11 +773,28 @@ pub fn calculate_blake2b_length(length: usize) -> UResult<Option<usize>> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn unescape_filename(filename: &str) -> (String, &'static str) {
|
pub fn unescape_filename(filename: &[u8]) -> (Vec<u8>, &'static str) {
|
||||||
let unescaped = filename
|
let mut unescaped = Vec::with_capacity(filename.len());
|
||||||
.replace("\\\\", "\\")
|
let mut byte_iter = filename.iter().peekable();
|
||||||
.replace("\\n", "\n")
|
loop {
|
||||||
.replace("\\r", "\r");
|
let Some(byte) = byte_iter.next() else {
|
||||||
|
break;
|
||||||
|
};
|
||||||
|
if *byte == b'\\' {
|
||||||
|
match byte_iter.next() {
|
||||||
|
Some(b'\\') => unescaped.push(b'\\'),
|
||||||
|
Some(b'n') => unescaped.push(b'\n'),
|
||||||
|
Some(b'r') => unescaped.push(b'\r'),
|
||||||
|
Some(x) => {
|
||||||
|
unescaped.push(b'\\');
|
||||||
|
unescaped.push(*x);
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
unescaped.push(*byte);
|
||||||
|
}
|
||||||
|
}
|
||||||
let prefix = if unescaped == filename { "" } else { "\\" };
|
let prefix = if unescaped == filename { "" } else { "\\" };
|
||||||
(unescaped, prefix)
|
(unescaped, prefix)
|
||||||
}
|
}
|
||||||
|
@ -732,19 +815,19 @@ mod tests {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_unescape_filename() {
|
fn test_unescape_filename() {
|
||||||
let (unescaped, prefix) = unescape_filename("test\\nfile.txt");
|
let (unescaped, prefix) = unescape_filename(b"test\\nfile.txt");
|
||||||
assert_eq!(unescaped, "test\nfile.txt");
|
assert_eq!(unescaped, b"test\nfile.txt");
|
||||||
assert_eq!(prefix, "\\");
|
assert_eq!(prefix, "\\");
|
||||||
let (unescaped, prefix) = unescape_filename("test\\nfile.txt");
|
let (unescaped, prefix) = unescape_filename(b"test\\nfile.txt");
|
||||||
assert_eq!(unescaped, "test\nfile.txt");
|
assert_eq!(unescaped, b"test\nfile.txt");
|
||||||
assert_eq!(prefix, "\\");
|
assert_eq!(prefix, "\\");
|
||||||
|
|
||||||
let (unescaped, prefix) = unescape_filename("test\\rfile.txt");
|
let (unescaped, prefix) = unescape_filename(b"test\\rfile.txt");
|
||||||
assert_eq!(unescaped, "test\rfile.txt");
|
assert_eq!(unescaped, b"test\rfile.txt");
|
||||||
assert_eq!(prefix, "\\");
|
assert_eq!(prefix, "\\");
|
||||||
|
|
||||||
let (unescaped, prefix) = unescape_filename("test\\\\file.txt");
|
let (unescaped, prefix) = unescape_filename(b"test\\\\file.txt");
|
||||||
assert_eq!(unescaped, "test\\file.txt");
|
assert_eq!(unescaped, b"test\\file.txt");
|
||||||
assert_eq!(prefix, "\\");
|
assert_eq!(prefix, "\\");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -849,24 +932,25 @@ mod tests {
|
||||||
#[test]
|
#[test]
|
||||||
fn test_algo_based_regex() {
|
fn test_algo_based_regex() {
|
||||||
let algo_based_regex = Regex::new(ALGO_BASED_REGEX).unwrap();
|
let algo_based_regex = Regex::new(ALGO_BASED_REGEX).unwrap();
|
||||||
let test_cases = vec![
|
#[allow(clippy::type_complexity)]
|
||||||
("SHA256 (example.txt) = d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2", Some(("SHA256", None, "example.txt", "d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2"))),
|
let test_cases: &[(&[u8], Option<(&[u8], Option<&[u8]>, &[u8], &[u8])>)] = &[
|
||||||
|
(b"SHA256 (example.txt) = d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2", Some((b"SHA256", None, b"example.txt", b"d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2"))),
|
||||||
// cspell:disable-next-line
|
// cspell:disable-next-line
|
||||||
("BLAKE2b-512 (file) = abcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdef", Some(("BLAKE2b", Some("512"), "file", "abcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdef"))),
|
(b"BLAKE2b-512 (file) = abcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdef", Some((b"BLAKE2b", Some(b"512"), b"file", b"abcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdef"))),
|
||||||
(" MD5 (test) = 9e107d9d372bb6826bd81d3542a419d6", Some(("MD5", None, "test", "9e107d9d372bb6826bd81d3542a419d6"))),
|
(b" MD5 (test) = 9e107d9d372bb6826bd81d3542a419d6", Some((b"MD5", None, b"test", b"9e107d9d372bb6826bd81d3542a419d6"))),
|
||||||
("SHA-1 (anotherfile) = a9993e364706816aba3e25717850c26c9cd0d89d", Some(("SHA", Some("1"), "anotherfile", "a9993e364706816aba3e25717850c26c9cd0d89d"))),
|
(b"SHA-1 (anotherfile) = a9993e364706816aba3e25717850c26c9cd0d89d", Some((b"SHA", Some(b"1"), b"anotherfile", b"a9993e364706816aba3e25717850c26c9cd0d89d"))),
|
||||||
];
|
];
|
||||||
|
|
||||||
for (input, expected) in test_cases {
|
for (input, expected) in test_cases {
|
||||||
let captures = algo_based_regex.captures(input);
|
let captures = algo_based_regex.captures(*input);
|
||||||
match expected {
|
match expected {
|
||||||
Some((algo, bits, filename, checksum)) => {
|
Some((algo, bits, filename, checksum)) => {
|
||||||
assert!(captures.is_some());
|
assert!(captures.is_some());
|
||||||
let captures = captures.unwrap();
|
let captures = captures.unwrap();
|
||||||
assert_eq!(captures.name("algo").unwrap().as_str(), algo);
|
assert_eq!(&captures.name("algo").unwrap().as_bytes(), algo);
|
||||||
assert_eq!(captures.name("bits").map(|m| m.as_str()), bits);
|
assert_eq!(&captures.name("bits").map(|m| m.as_bytes()), bits);
|
||||||
assert_eq!(captures.name("filename").unwrap().as_str(), filename);
|
assert_eq!(&captures.name("filename").unwrap().as_bytes(), filename);
|
||||||
assert_eq!(captures.name("checksum").unwrap().as_str(), checksum);
|
assert_eq!(&captures.name("checksum").unwrap().as_bytes(), checksum);
|
||||||
}
|
}
|
||||||
None => {
|
None => {
|
||||||
assert!(captures.is_none());
|
assert!(captures.is_none());
|
||||||
|
@ -879,28 +963,29 @@ mod tests {
|
||||||
fn test_double_space_regex() {
|
fn test_double_space_regex() {
|
||||||
let double_space_regex = Regex::new(DOUBLE_SPACE_REGEX).unwrap();
|
let double_space_regex = Regex::new(DOUBLE_SPACE_REGEX).unwrap();
|
||||||
|
|
||||||
let test_cases = vec![
|
#[allow(clippy::type_complexity)]
|
||||||
|
let test_cases: &[(&[u8], Option<(&[u8], &[u8])>)] = &[
|
||||||
(
|
(
|
||||||
"60b725f10c9c85c70d97880dfe8191b3 a",
|
b"60b725f10c9c85c70d97880dfe8191b3 a",
|
||||||
Some(("60b725f10c9c85c70d97880dfe8191b3", "a")),
|
Some((b"60b725f10c9c85c70d97880dfe8191b3", b"a")),
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
"bf35d7536c785cf06730d5a40301eba2 b",
|
b"bf35d7536c785cf06730d5a40301eba2 b",
|
||||||
Some(("bf35d7536c785cf06730d5a40301eba2", " b")),
|
Some((b"bf35d7536c785cf06730d5a40301eba2", b" b")),
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
"f5b61709718c1ecf8db1aea8547d4698 *c",
|
b"f5b61709718c1ecf8db1aea8547d4698 *c",
|
||||||
Some(("f5b61709718c1ecf8db1aea8547d4698", "*c")),
|
Some((b"f5b61709718c1ecf8db1aea8547d4698", b"*c")),
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
"b064a020db8018f18ff5ae367d01b212 dd",
|
b"b064a020db8018f18ff5ae367d01b212 dd",
|
||||||
Some(("b064a020db8018f18ff5ae367d01b212", "dd")),
|
Some((b"b064a020db8018f18ff5ae367d01b212", b"dd")),
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
"b064a020db8018f18ff5ae367d01b212 ",
|
b"b064a020db8018f18ff5ae367d01b212 ",
|
||||||
Some(("b064a020db8018f18ff5ae367d01b212", " ")),
|
Some((b"b064a020db8018f18ff5ae367d01b212", b" ")),
|
||||||
),
|
),
|
||||||
("invalidchecksum test", None),
|
(b"invalidchecksum test", None),
|
||||||
];
|
];
|
||||||
|
|
||||||
for (input, expected) in test_cases {
|
for (input, expected) in test_cases {
|
||||||
|
@ -909,8 +994,8 @@ mod tests {
|
||||||
Some((checksum, filename)) => {
|
Some((checksum, filename)) => {
|
||||||
assert!(captures.is_some());
|
assert!(captures.is_some());
|
||||||
let captures = captures.unwrap();
|
let captures = captures.unwrap();
|
||||||
assert_eq!(captures.name("checksum").unwrap().as_str(), checksum);
|
assert_eq!(&captures.name("checksum").unwrap().as_bytes(), checksum);
|
||||||
assert_eq!(captures.name("filename").unwrap().as_str(), filename);
|
assert_eq!(&captures.name("filename").unwrap().as_bytes(), filename);
|
||||||
}
|
}
|
||||||
None => {
|
None => {
|
||||||
assert!(captures.is_none());
|
assert!(captures.is_none());
|
||||||
|
@ -922,24 +1007,25 @@ mod tests {
|
||||||
#[test]
|
#[test]
|
||||||
fn test_single_space_regex() {
|
fn test_single_space_regex() {
|
||||||
let single_space_regex = Regex::new(SINGLE_SPACE_REGEX).unwrap();
|
let single_space_regex = Regex::new(SINGLE_SPACE_REGEX).unwrap();
|
||||||
let test_cases = vec![
|
#[allow(clippy::type_complexity)]
|
||||||
|
let test_cases: &[(&[u8], Option<(&[u8], &[u8])>)] = &[
|
||||||
(
|
(
|
||||||
"60b725f10c9c85c70d97880dfe8191b3 a",
|
b"60b725f10c9c85c70d97880dfe8191b3 a",
|
||||||
Some(("60b725f10c9c85c70d97880dfe8191b3", "a")),
|
Some((b"60b725f10c9c85c70d97880dfe8191b3", b"a")),
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
"bf35d7536c785cf06730d5a40301eba2 b",
|
b"bf35d7536c785cf06730d5a40301eba2 b",
|
||||||
Some(("bf35d7536c785cf06730d5a40301eba2", "b")),
|
Some((b"bf35d7536c785cf06730d5a40301eba2", b"b")),
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
"f5b61709718c1ecf8db1aea8547d4698 *c",
|
b"f5b61709718c1ecf8db1aea8547d4698 *c",
|
||||||
Some(("f5b61709718c1ecf8db1aea8547d4698", "*c")),
|
Some((b"f5b61709718c1ecf8db1aea8547d4698", b"*c")),
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
"b064a020db8018f18ff5ae367d01b212 dd",
|
b"b064a020db8018f18ff5ae367d01b212 dd",
|
||||||
Some(("b064a020db8018f18ff5ae367d01b212", "dd")),
|
Some((b"b064a020db8018f18ff5ae367d01b212", b"dd")),
|
||||||
),
|
),
|
||||||
("invalidchecksum test", None),
|
(b"invalidchecksum test", None),
|
||||||
];
|
];
|
||||||
|
|
||||||
for (input, expected) in test_cases {
|
for (input, expected) in test_cases {
|
||||||
|
@ -948,8 +1034,8 @@ mod tests {
|
||||||
Some((checksum, filename)) => {
|
Some((checksum, filename)) => {
|
||||||
assert!(captures.is_some());
|
assert!(captures.is_some());
|
||||||
let captures = captures.unwrap();
|
let captures = captures.unwrap();
|
||||||
assert_eq!(captures.name("checksum").unwrap().as_str(), checksum);
|
assert_eq!(&captures.name("checksum").unwrap().as_bytes(), checksum);
|
||||||
assert_eq!(captures.name("filename").unwrap().as_str(), filename);
|
assert_eq!(&captures.name("filename").unwrap().as_bytes(), filename);
|
||||||
}
|
}
|
||||||
None => {
|
None => {
|
||||||
assert!(captures.is_none());
|
assert!(captures.is_none());
|
||||||
|
@ -961,47 +1047,77 @@ mod tests {
|
||||||
#[test]
|
#[test]
|
||||||
fn test_determine_regex() {
|
fn test_determine_regex() {
|
||||||
// Test algo-based regex
|
// Test algo-based regex
|
||||||
let lines_algo_based =
|
let lines_algo_based = ["MD5 (example.txt) = d41d8cd98f00b204e9800998ecf8427e"]
|
||||||
vec!["MD5 (example.txt) = d41d8cd98f00b204e9800998ecf8427e".to_string()];
|
.iter()
|
||||||
|
.map(|s| OsString::from(s.to_string()))
|
||||||
|
.collect::<Vec<_>>();
|
||||||
let (regex, algo_based) = determine_regex(&lines_algo_based).unwrap();
|
let (regex, algo_based) = determine_regex(&lines_algo_based).unwrap();
|
||||||
assert!(algo_based);
|
assert!(algo_based);
|
||||||
assert!(regex.is_match(&lines_algo_based[0]));
|
assert!(regex.is_match(os_str_as_bytes(&lines_algo_based[0]).unwrap()));
|
||||||
|
|
||||||
// Test double-space regex
|
// Test double-space regex
|
||||||
let lines_double_space = vec!["d41d8cd98f00b204e9800998ecf8427e example.txt".to_string()];
|
let lines_double_space = ["d41d8cd98f00b204e9800998ecf8427e example.txt"]
|
||||||
|
.iter()
|
||||||
|
.map(|s| OsString::from(s.to_string()))
|
||||||
|
.collect::<Vec<_>>();
|
||||||
let (regex, algo_based) = determine_regex(&lines_double_space).unwrap();
|
let (regex, algo_based) = determine_regex(&lines_double_space).unwrap();
|
||||||
assert!(!algo_based);
|
assert!(!algo_based);
|
||||||
assert!(regex.is_match(&lines_double_space[0]));
|
assert!(regex.is_match(os_str_as_bytes(&lines_double_space[0]).unwrap()));
|
||||||
|
|
||||||
// Test single-space regex
|
// Test single-space regex
|
||||||
let lines_single_space = vec!["d41d8cd98f00b204e9800998ecf8427e example.txt".to_string()];
|
let lines_single_space = ["d41d8cd98f00b204e9800998ecf8427e example.txt"]
|
||||||
|
.iter()
|
||||||
|
.map(|s| OsString::from(s.to_string()))
|
||||||
|
.collect::<Vec<_>>();
|
||||||
let (regex, algo_based) = determine_regex(&lines_single_space).unwrap();
|
let (regex, algo_based) = determine_regex(&lines_single_space).unwrap();
|
||||||
assert!(!algo_based);
|
assert!(!algo_based);
|
||||||
assert!(regex.is_match(&lines_single_space[0]));
|
assert!(regex.is_match(os_str_as_bytes(&lines_single_space[0]).unwrap()));
|
||||||
|
|
||||||
// Test double-space regex start with invalid
|
// Test double-space regex start with invalid
|
||||||
let lines_double_space = vec![
|
let lines_double_space = ["ERR", "d41d8cd98f00b204e9800998ecf8427e example.txt"]
|
||||||
"ERR".to_string(),
|
.iter()
|
||||||
"d41d8cd98f00b204e9800998ecf8427e example.txt".to_string(),
|
.map(|s| OsString::from(s.to_string()))
|
||||||
];
|
.collect::<Vec<_>>();
|
||||||
let (regex, algo_based) = determine_regex(&lines_double_space).unwrap();
|
let (regex, algo_based) = determine_regex(&lines_double_space).unwrap();
|
||||||
assert!(!algo_based);
|
assert!(!algo_based);
|
||||||
assert!(!regex.is_match(&lines_double_space[0]));
|
assert!(!regex.is_match(os_str_as_bytes(&lines_double_space[0]).unwrap()));
|
||||||
assert!(regex.is_match(&lines_double_space[1]));
|
assert!(regex.is_match(os_str_as_bytes(&lines_double_space[1]).unwrap()));
|
||||||
|
|
||||||
// Test invalid checksum line
|
// Test invalid checksum line
|
||||||
let lines_invalid = vec!["invalid checksum line".to_string()];
|
let lines_invalid = ["invalid checksum line"]
|
||||||
|
.iter()
|
||||||
|
.map(|s| OsString::from(s.to_string()))
|
||||||
|
.collect::<Vec<_>>();
|
||||||
assert!(determine_regex(&lines_invalid).is_none());
|
assert!(determine_regex(&lines_invalid).is_none());
|
||||||
|
|
||||||
|
// Test leading space before checksum line
|
||||||
|
let lines_algo_based_leading_space =
|
||||||
|
vec![" MD5 (example.txt) = d41d8cd98f00b204e9800998ecf8427e"]
|
||||||
|
.iter()
|
||||||
|
.map(|s| OsString::from(s.to_string()))
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
let res = determine_regex(&lines_algo_based_leading_space);
|
||||||
|
assert!(res.is_some());
|
||||||
|
assert_eq!(res.unwrap().0.as_str(), ALGO_BASED_REGEX);
|
||||||
|
|
||||||
|
// Test trailing space after checksum line (should fail)
|
||||||
|
let lines_algo_based_leading_space =
|
||||||
|
vec!["MD5 (example.txt) = d41d8cd98f00b204e9800998ecf8427e "]
|
||||||
|
.iter()
|
||||||
|
.map(|s| OsString::from(s.to_string()))
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
let res = determine_regex(&lines_algo_based_leading_space);
|
||||||
|
assert!(res.is_none());
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_get_expected_checksum() {
|
fn test_get_expected_checksum() {
|
||||||
let re = Regex::new(ALGO_BASED_REGEX_BASE64).unwrap();
|
let re = Regex::new(ALGO_BASED_REGEX_BASE64).unwrap();
|
||||||
let caps = re
|
let caps = re
|
||||||
.captures("SHA256 (empty) = 47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU=")
|
.captures(b"SHA256 (empty) = 47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU=")
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
let result = get_expected_checksum("filename", &caps, &re);
|
let result = get_expected_checksum(b"filename", &caps, &re);
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
result.unwrap(),
|
result.unwrap(),
|
||||||
|
@ -1013,11 +1129,48 @@ mod tests {
|
||||||
fn test_get_expected_checksum_invalid() {
|
fn test_get_expected_checksum_invalid() {
|
||||||
let re = Regex::new(ALGO_BASED_REGEX_BASE64).unwrap();
|
let re = Regex::new(ALGO_BASED_REGEX_BASE64).unwrap();
|
||||||
let caps = re
|
let caps = re
|
||||||
.captures("SHA256 (empty) = 47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU")
|
.captures(b"SHA256 (empty) = 47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU")
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
let result = get_expected_checksum("filename", &caps, &re);
|
let result = get_expected_checksum(b"filename", &caps, &re);
|
||||||
|
|
||||||
assert!(result.is_err());
|
assert!(result.is_err());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_print_file_report() {
|
||||||
|
let cases: &[(&[u8], FileChecksumResult, &str, &[u8])] = &[
|
||||||
|
(b"filename", FileChecksumResult::Ok, "", b"filename: OK\n"),
|
||||||
|
(
|
||||||
|
b"filename",
|
||||||
|
FileChecksumResult::Failed,
|
||||||
|
"",
|
||||||
|
b"filename: FAILED\n",
|
||||||
|
),
|
||||||
|
(
|
||||||
|
b"filename",
|
||||||
|
FileChecksumResult::CantOpen,
|
||||||
|
"",
|
||||||
|
b"filename: FAILED open or read\n",
|
||||||
|
),
|
||||||
|
(
|
||||||
|
b"filename",
|
||||||
|
FileChecksumResult::Ok,
|
||||||
|
"prefix",
|
||||||
|
b"prefixfilename: OK\n",
|
||||||
|
),
|
||||||
|
(
|
||||||
|
b"funky\xffname",
|
||||||
|
FileChecksumResult::Ok,
|
||||||
|
"",
|
||||||
|
b"funky\xffname: OK\n",
|
||||||
|
),
|
||||||
|
];
|
||||||
|
|
||||||
|
for (filename, result, prefix, expected) in cases {
|
||||||
|
let mut buffer: Vec<u8> = vec![];
|
||||||
|
print_file_report(&mut buffer, filename, *result, prefix);
|
||||||
|
assert_eq!(&buffer, expected)
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -100,10 +100,14 @@ pub use crate::features::fsxattr;
|
||||||
|
|
||||||
//## core functions
|
//## core functions
|
||||||
|
|
||||||
|
use std::borrow::Cow;
|
||||||
use std::ffi::OsStr;
|
use std::ffi::OsStr;
|
||||||
use std::ffi::OsString;
|
use std::ffi::OsString;
|
||||||
|
use std::io::{BufRead, BufReader};
|
||||||
|
use std::iter;
|
||||||
#[cfg(unix)]
|
#[cfg(unix)]
|
||||||
use std::os::unix::ffi::OsStrExt;
|
use std::os::unix::ffi::{OsStrExt, OsStringExt};
|
||||||
|
use std::str;
|
||||||
use std::sync::atomic::Ordering;
|
use std::sync::atomic::Ordering;
|
||||||
|
|
||||||
use once_cell::sync::Lazy;
|
use once_cell::sync::Lazy;
|
||||||
|
@ -240,6 +244,72 @@ pub fn os_str_as_bytes(os_string: &OsStr) -> mods::error::UResult<&[u8]> {
|
||||||
Ok(bytes)
|
Ok(bytes)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Helper function for converting a slice of bytes into an &OsStr
|
||||||
|
/// or OsString in non-unix targets.
|
||||||
|
///
|
||||||
|
/// It converts `&[u8]` to `Cow<OsStr>` for unix targets only.
|
||||||
|
/// On non-unix (i.e. Windows), the conversion goes through the String type
|
||||||
|
/// and thus undergo UTF-8 validation, making it fail if the stream contains
|
||||||
|
/// non-UTF-8 characters.
|
||||||
|
pub fn os_str_from_bytes(bytes: &[u8]) -> mods::error::UResult<Cow<'_, OsStr>> {
|
||||||
|
#[cfg(unix)]
|
||||||
|
let os_str = Cow::Borrowed(OsStr::from_bytes(bytes));
|
||||||
|
#[cfg(not(unix))]
|
||||||
|
let os_str = Cow::Owned(OsString::from(str::from_utf8(bytes).map_err(|_| {
|
||||||
|
mods::error::UUsageError::new(1, "Unable to transform bytes into OsStr")
|
||||||
|
})?));
|
||||||
|
|
||||||
|
Ok(os_str)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Helper function for making an `OsString` from a byte field
|
||||||
|
/// It converts `Vec<u8>` to `OsString` for unix targets only.
|
||||||
|
/// On non-unix (i.e. Windows) it may fail if the bytes are not valid UTF-8
|
||||||
|
pub fn os_string_from_vec(vec: Vec<u8>) -> mods::error::UResult<OsString> {
|
||||||
|
#[cfg(unix)]
|
||||||
|
let s = OsString::from_vec(vec);
|
||||||
|
#[cfg(not(unix))]
|
||||||
|
let s = OsString::from(String::from_utf8(vec).map_err(|_| {
|
||||||
|
mods::error::UUsageError::new(1, "invalid UTF-8 was detected in one or more arguments")
|
||||||
|
})?);
|
||||||
|
|
||||||
|
Ok(s)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Equivalent to `std::BufRead::lines` which outputs each line as a `Vec<u8>`,
|
||||||
|
/// which avoids panicking on non UTF-8 input.
|
||||||
|
pub fn read_byte_lines<R: std::io::Read>(
|
||||||
|
mut buf_reader: BufReader<R>,
|
||||||
|
) -> impl Iterator<Item = Vec<u8>> {
|
||||||
|
iter::from_fn(move || {
|
||||||
|
let mut buf = Vec::with_capacity(256);
|
||||||
|
let size = buf_reader.read_until(b'\n', &mut buf).ok()?;
|
||||||
|
|
||||||
|
if size == 0 {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Trim (\r)\n
|
||||||
|
if buf.ends_with(b"\n") {
|
||||||
|
buf.pop();
|
||||||
|
if buf.ends_with(b"\r") {
|
||||||
|
buf.pop();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Some(buf)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Equivalent to `std::BufRead::lines` which outputs each line as an `OsString`
|
||||||
|
/// This won't panic on non UTF-8 characters on Unix,
|
||||||
|
/// but it still will on Windows.
|
||||||
|
pub fn read_os_string_lines<R: std::io::Read>(
|
||||||
|
buf_reader: BufReader<R>,
|
||||||
|
) -> impl Iterator<Item = OsString> {
|
||||||
|
read_byte_lines(buf_reader).map(|byte_line| os_string_from_vec(byte_line).expect("UTF-8 error"))
|
||||||
|
}
|
||||||
|
|
||||||
/// Prompt the user with a formatted string and returns `true` if they reply `'y'` or `'Y'`
|
/// Prompt the user with a formatted string and returns `true` if they reply `'y'` or `'Y'`
|
||||||
///
|
///
|
||||||
/// This macro functions accepts the same syntax as `format!`. The prompt is written to
|
/// This macro functions accepts the same syntax as `format!`. The prompt is written to
|
||||||
|
|
|
@ -1402,3 +1402,103 @@ fn test_zero_single_file() {
|
||||||
.succeeds()
|
.succeeds()
|
||||||
.stdout_is_fixture("zero_single_file.expected");
|
.stdout_is_fixture("zero_single_file.expected");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_check_trailing_space_fails() {
|
||||||
|
// If a checksum line has trailing spaces after the digest,
|
||||||
|
// it shall be considered improperly formatted.
|
||||||
|
|
||||||
|
let scene = TestScenario::new(util_name!());
|
||||||
|
let at = &scene.fixtures;
|
||||||
|
|
||||||
|
at.write("foo", "foo-content\n");
|
||||||
|
at.write(
|
||||||
|
"CHECKSUM",
|
||||||
|
"SHA1 (foo) = 058ab38dd3603703b3a7063cf95dc51a4286b6fe \n",
|
||||||
|
);
|
||||||
|
|
||||||
|
scene
|
||||||
|
.ucmd()
|
||||||
|
.arg("--check")
|
||||||
|
.arg("CHECKSUM")
|
||||||
|
.fails()
|
||||||
|
.no_stdout()
|
||||||
|
.stderr_contains("CHECKSUM: no properly formatted checksum lines found");
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Regroup tests related to the handling of non-utf-8 content
|
||||||
|
/// in checksum files.
|
||||||
|
/// These tests are excluded from Windows because it does not provide any safe
|
||||||
|
/// conversion between `OsString` and byte sequences for non-utf-8 strings.
|
||||||
|
#[cfg(not(windows))]
|
||||||
|
mod check_utf8 {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_check_non_utf8_comment() {
|
||||||
|
let hashes =
|
||||||
|
b"MD5 (empty) = 1B2M2Y8AsgTpgAmY7PhCfg==\n\
|
||||||
|
# Comment with a non utf8 char: >>\xff<<\n\
|
||||||
|
SHA256 (empty) = 47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU=\n\
|
||||||
|
BLAKE2b (empty) = eGoC90IBWQPGxv2FJVLScpEvR0DhWEdhiobiF/cfVBnSXhAxr+5YUxOJZESTTrBLkDpoWxRIt1XVb3Aa/pvizg==\n"
|
||||||
|
;
|
||||||
|
|
||||||
|
let scene = TestScenario::new(util_name!());
|
||||||
|
let at = &scene.fixtures;
|
||||||
|
|
||||||
|
at.touch("empty");
|
||||||
|
at.write_bytes("check", hashes);
|
||||||
|
|
||||||
|
scene
|
||||||
|
.ucmd()
|
||||||
|
.arg("--check")
|
||||||
|
.arg(at.subdir.join("check"))
|
||||||
|
.succeeds()
|
||||||
|
.stdout_is("empty: OK\nempty: OK\nempty: OK\n")
|
||||||
|
.no_stderr();
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(target_os = "linux")]
|
||||||
|
#[test]
|
||||||
|
fn test_check_non_utf8_filename() {
|
||||||
|
use std::{ffi::OsString, os::unix::ffi::OsStringExt};
|
||||||
|
|
||||||
|
let scene = TestScenario::new(util_name!());
|
||||||
|
let at = &scene.fixtures;
|
||||||
|
let filename: OsString = OsStringExt::from_vec(b"funky\xffname".to_vec());
|
||||||
|
at.touch(&filename);
|
||||||
|
|
||||||
|
// Checksum match
|
||||||
|
at.write_bytes("check",
|
||||||
|
b"SHA256 (funky\xffname) = e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855\n");
|
||||||
|
scene
|
||||||
|
.ucmd()
|
||||||
|
.arg("--check")
|
||||||
|
.arg(at.subdir.join("check"))
|
||||||
|
.succeeds()
|
||||||
|
.stdout_is_bytes(b"funky\xffname: OK\n")
|
||||||
|
.no_stderr();
|
||||||
|
|
||||||
|
// Checksum mismatch
|
||||||
|
at.write_bytes("check",
|
||||||
|
b"SHA256 (funky\xffname) = ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff\n");
|
||||||
|
scene
|
||||||
|
.ucmd()
|
||||||
|
.arg("--check")
|
||||||
|
.arg(at.subdir.join("check"))
|
||||||
|
.fails()
|
||||||
|
.stdout_is_bytes(b"funky\xffname: FAILED\n")
|
||||||
|
.stderr_contains("1 computed checksum did NOT match");
|
||||||
|
|
||||||
|
// file not found
|
||||||
|
at.write_bytes("check",
|
||||||
|
b"SHA256 (flakey\xffname) = ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff\n");
|
||||||
|
scene
|
||||||
|
.ucmd()
|
||||||
|
.arg("--check")
|
||||||
|
.arg(at.subdir.join("check"))
|
||||||
|
.fails()
|
||||||
|
.stdout_is_bytes(b"flakey\xffname: FAILED open or read\n")
|
||||||
|
.stderr_contains("1 listed file could not be read");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue