mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-29 03:57:44 +00:00
Merge pull request #6575 from BenWiederhake/dev-cksum-nonutf8-filename
cksum: accept non-UTF-8 filenames
This commit is contained in:
commit
9fbfb2925f
3 changed files with 105 additions and 63 deletions
|
@ -4,8 +4,9 @@
|
|||
// file that was distributed with this source code.
|
||||
|
||||
// spell-checker:ignore (ToDO) fname, algo
|
||||
use clap::builder::ValueParser;
|
||||
use clap::{crate_version, value_parser, Arg, ArgAction, Command};
|
||||
use std::ffi::OsStr;
|
||||
use std::ffi::{OsStr, OsString};
|
||||
use std::fs::File;
|
||||
use std::io::{self, stdin, stdout, BufReader, Read, Write};
|
||||
use std::iter;
|
||||
|
@ -18,7 +19,7 @@ use uucore::checksum::{
|
|||
use uucore::{
|
||||
encoding,
|
||||
error::{FromIo, UResult, USimpleError},
|
||||
format_usage, help_about, help_section, help_usage, show,
|
||||
format_usage, help_about, help_section, help_usage, os_str_as_bytes, show,
|
||||
sum::{div_ceil, Digest},
|
||||
};
|
||||
|
||||
|
@ -116,52 +117,64 @@ where
|
|||
};
|
||||
// The BSD checksum output is 5 digit integer
|
||||
let bsd_width = 5;
|
||||
match (options.algo_name, not_file) {
|
||||
(ALGORITHM_OPTIONS_SYSV, true) => println!(
|
||||
"{} {}",
|
||||
sum.parse::<u16>().unwrap(),
|
||||
div_ceil(sz, options.output_bits)
|
||||
let (before_filename, should_print_filename, after_filename) = match options.algo_name {
|
||||
ALGORITHM_OPTIONS_SYSV => (
|
||||
format!(
|
||||
"{} {}{}",
|
||||
sum.parse::<u16>().unwrap(),
|
||||
div_ceil(sz, options.output_bits),
|
||||
if not_file { "" } else { " " }
|
||||
),
|
||||
!not_file,
|
||||
String::new(),
|
||||
),
|
||||
(ALGORITHM_OPTIONS_SYSV, false) => println!(
|
||||
"{} {} {}",
|
||||
sum.parse::<u16>().unwrap(),
|
||||
div_ceil(sz, options.output_bits),
|
||||
filename.display()
|
||||
ALGORITHM_OPTIONS_BSD => (
|
||||
format!(
|
||||
"{:0bsd_width$} {:bsd_width$}{}",
|
||||
sum.parse::<u16>().unwrap(),
|
||||
div_ceil(sz, options.output_bits),
|
||||
if not_file { "" } else { " " }
|
||||
),
|
||||
!not_file,
|
||||
String::new(),
|
||||
),
|
||||
(ALGORITHM_OPTIONS_BSD, true) => println!(
|
||||
"{:0bsd_width$} {:bsd_width$}",
|
||||
sum.parse::<u16>().unwrap(),
|
||||
div_ceil(sz, options.output_bits)
|
||||
ALGORITHM_OPTIONS_CRC => (
|
||||
format!("{sum} {sz}{}", if not_file { "" } else { " " }),
|
||||
!not_file,
|
||||
String::new(),
|
||||
),
|
||||
(ALGORITHM_OPTIONS_BSD, false) => println!(
|
||||
"{:0bsd_width$} {:bsd_width$} {}",
|
||||
sum.parse::<u16>().unwrap(),
|
||||
div_ceil(sz, options.output_bits),
|
||||
filename.display()
|
||||
),
|
||||
(ALGORITHM_OPTIONS_CRC, true) => println!("{sum} {sz}"),
|
||||
(ALGORITHM_OPTIONS_CRC, false) => println!("{sum} {sz} {}", filename.display()),
|
||||
(ALGORITHM_OPTIONS_BLAKE2B, _) if options.tag => {
|
||||
if let Some(length) = options.length {
|
||||
// Multiply by 8 here, as we want to print the length in bits.
|
||||
println!("BLAKE2b-{} ({}) = {sum}", length * 8, filename.display());
|
||||
} else {
|
||||
println!("BLAKE2b ({}) = {sum}", filename.display());
|
||||
}
|
||||
ALGORITHM_OPTIONS_BLAKE2B if options.tag => {
|
||||
(
|
||||
if let Some(length) = options.length {
|
||||
// Multiply by 8 here, as we want to print the length in bits.
|
||||
format!("BLAKE2b-{} (", length * 8)
|
||||
} else {
|
||||
"BLAKE2b (".to_owned()
|
||||
},
|
||||
true,
|
||||
format!(") = {sum}"),
|
||||
)
|
||||
}
|
||||
_ => {
|
||||
if options.tag {
|
||||
println!(
|
||||
"{} ({}) = {sum}",
|
||||
options.algo_name.to_ascii_uppercase(),
|
||||
filename.display()
|
||||
);
|
||||
(
|
||||
format!("{} (", options.algo_name.to_ascii_uppercase()),
|
||||
true,
|
||||
format!(") = {sum}"),
|
||||
)
|
||||
} else {
|
||||
let prefix = if options.asterisk { "*" } else { " " };
|
||||
println!("{sum} {prefix}{}", filename.display());
|
||||
(format!("{sum} {prefix}"), true, String::new())
|
||||
}
|
||||
}
|
||||
};
|
||||
print!("{}", before_filename);
|
||||
if should_print_filename {
|
||||
// The filename might not be valid UTF-8, and filename.display() would mangle the names.
|
||||
// Therefore, emit the bytes directly to stdout, without any attempt at encoding them.
|
||||
let _dropped_result = stdout().write_all(os_str_as_bytes(filename.as_os_str())?);
|
||||
}
|
||||
println!("{}", after_filename);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
|
@ -209,7 +222,7 @@ fn prompt_asterisk(tag: bool, binary: bool, had_reset: bool) -> bool {
|
|||
* Don't do it with clap because if it struggling with the --overrides_with
|
||||
* marking the value as set even if not present
|
||||
*/
|
||||
fn had_reset(args: &[String]) -> bool {
|
||||
fn had_reset(args: &[OsString]) -> bool {
|
||||
// Indices where "--binary" or "-b", "--tag", and "--untagged" are found
|
||||
let binary_index = args.iter().position(|x| x == "--binary" || x == "-b");
|
||||
let tag_index = args.iter().position(|x| x == "--tag");
|
||||
|
@ -234,7 +247,7 @@ fn handle_tag_text_binary_flags(matches: &clap::ArgMatches) -> UResult<(bool, bo
|
|||
|
||||
let binary_flag: bool = matches.get_flag(options::BINARY);
|
||||
|
||||
let args: Vec<String> = std::env::args().collect();
|
||||
let args: Vec<OsString> = std::env::args_os().collect();
|
||||
let had_reset = had_reset(&args);
|
||||
|
||||
let asterisk: bool = prompt_asterisk(tag, binary_flag, had_reset);
|
||||
|
@ -298,7 +311,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
|
|||
|
||||
// Execute the checksum validation based on the presence of files or the use of stdin
|
||||
|
||||
let files = matches.get_many::<String>(options::FILE).map_or_else(
|
||||
let files = matches.get_many::<OsString>(options::FILE).map_or_else(
|
||||
|| iter::once(OsStr::new("-")).collect::<Vec<_>>(),
|
||||
|files| files.map(OsStr::new).collect::<Vec<_>>(),
|
||||
);
|
||||
|
@ -337,7 +350,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
|
|||
asterisk,
|
||||
};
|
||||
|
||||
match matches.get_many::<String>(options::FILE) {
|
||||
match matches.get_many::<OsString>(options::FILE) {
|
||||
Some(files) => cksum(opts, files.map(OsStr::new))?,
|
||||
None => cksum(opts, iter::once(OsStr::new("-")))?,
|
||||
};
|
||||
|
@ -356,6 +369,7 @@ pub fn uu_app() -> Command {
|
|||
Arg::new(options::FILE)
|
||||
.hide(true)
|
||||
.action(clap::ArgAction::Append)
|
||||
.value_parser(ValueParser::os_string())
|
||||
.value_hint(clap::ValueHint::FilePath),
|
||||
)
|
||||
.arg(
|
||||
|
@ -469,61 +483,62 @@ mod tests {
|
|||
use super::had_reset;
|
||||
use crate::calculate_blake2b_length;
|
||||
use crate::prompt_asterisk;
|
||||
use std::ffi::OsString;
|
||||
|
||||
#[test]
|
||||
fn test_had_reset() {
|
||||
let args = ["--binary", "--tag", "--untagged"]
|
||||
.iter()
|
||||
.map(|&s| s.to_string())
|
||||
.collect::<Vec<String>>();
|
||||
.map(|&s| s.into())
|
||||
.collect::<Vec<OsString>>();
|
||||
assert!(had_reset(&args));
|
||||
|
||||
let args = ["-b", "--tag", "--untagged"]
|
||||
.iter()
|
||||
.map(|&s| s.to_string())
|
||||
.collect::<Vec<String>>();
|
||||
.map(|&s| s.into())
|
||||
.collect::<Vec<OsString>>();
|
||||
assert!(had_reset(&args));
|
||||
|
||||
let args = ["-b", "--binary", "--tag", "--untagged"]
|
||||
.iter()
|
||||
.map(|&s| s.to_string())
|
||||
.collect::<Vec<String>>();
|
||||
.map(|&s| s.into())
|
||||
.collect::<Vec<OsString>>();
|
||||
assert!(had_reset(&args));
|
||||
|
||||
let args = ["--untagged", "--tag", "--binary"]
|
||||
.iter()
|
||||
.map(|&s| s.to_string())
|
||||
.collect::<Vec<String>>();
|
||||
.map(|&s| s.into())
|
||||
.collect::<Vec<OsString>>();
|
||||
assert!(!had_reset(&args));
|
||||
|
||||
let args = ["--untagged", "--tag", "-b"]
|
||||
.iter()
|
||||
.map(|&s| s.to_string())
|
||||
.collect::<Vec<String>>();
|
||||
.map(|&s| s.into())
|
||||
.collect::<Vec<OsString>>();
|
||||
assert!(!had_reset(&args));
|
||||
|
||||
let args = ["--binary", "--tag"]
|
||||
.iter()
|
||||
.map(|&s| s.to_string())
|
||||
.collect::<Vec<String>>();
|
||||
.map(|&s| s.into())
|
||||
.collect::<Vec<OsString>>();
|
||||
assert!(!had_reset(&args));
|
||||
|
||||
let args = ["--tag", "--untagged"]
|
||||
.iter()
|
||||
.map(|&s| s.to_string())
|
||||
.collect::<Vec<String>>();
|
||||
.map(|&s| s.into())
|
||||
.collect::<Vec<OsString>>();
|
||||
assert!(!had_reset(&args));
|
||||
|
||||
let args = ["--text", "--untagged"]
|
||||
.iter()
|
||||
.map(|&s| s.to_string())
|
||||
.collect::<Vec<String>>();
|
||||
.map(|&s| s.into())
|
||||
.collect::<Vec<OsString>>();
|
||||
assert!(!had_reset(&args));
|
||||
|
||||
let args = ["--binary", "--untagged"]
|
||||
.iter()
|
||||
.map(|&s| s.to_string())
|
||||
.collect::<Vec<String>>();
|
||||
.map(|&s| s.into())
|
||||
.collect::<Vec<OsString>>();
|
||||
assert!(!had_reset(&args));
|
||||
}
|
||||
|
||||
|
|
|
@ -222,9 +222,9 @@ pub fn read_yes() -> bool {
|
|||
}
|
||||
}
|
||||
|
||||
// Helper function for processing delimiter values (which could be non UTF-8)
|
||||
// It converts OsString to &[u8] for unix targets only
|
||||
// On non-unix (i.e. Windows) it will just return an error if delimiter value is not UTF-8
|
||||
/// Helper function for processing delimiter values (which could be non UTF-8)
|
||||
/// It converts OsString to &[u8] for unix targets only
|
||||
/// On non-unix (i.e. Windows) it will just return an error if delimiter value is not UTF-8
|
||||
pub fn os_str_as_bytes(os_string: &OsStr) -> mods::error::UResult<&[u8]> {
|
||||
#[cfg(unix)]
|
||||
let bytes = os_string.as_bytes();
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
//
|
||||
// For the full copyright and license information, please view the LICENSE
|
||||
// file that was distributed with this source code.
|
||||
// spell-checker:ignore (words) asdf algo algos mgmt
|
||||
// spell-checker:ignore (words) asdf algo algos asha mgmt xffname
|
||||
|
||||
use crate::common::util::TestScenario;
|
||||
|
||||
|
@ -1250,3 +1250,30 @@ fn test_several_files_error_mgmt() {
|
|||
.stderr_contains("empty: no properly ")
|
||||
.stderr_contains("incorrect: no properly ");
|
||||
}
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
#[test]
|
||||
fn test_non_utf8_filename() {
|
||||
use std::ffi::OsString;
|
||||
use std::os::unix::ffi::OsStringExt;
|
||||
|
||||
let scene = TestScenario::new(util_name!());
|
||||
let at = &scene.fixtures;
|
||||
let filename: OsString = OsStringExt::from_vec(b"funky\xffname".to_vec());
|
||||
|
||||
at.touch(&filename);
|
||||
|
||||
scene
|
||||
.ucmd()
|
||||
.arg(&filename)
|
||||
.succeeds()
|
||||
.stdout_is_bytes(b"4294967295 0 funky\xffname\n")
|
||||
.no_stderr();
|
||||
scene
|
||||
.ucmd()
|
||||
.arg("-asha256")
|
||||
.arg(filename)
|
||||
.succeeds()
|
||||
.stdout_is_bytes(b"SHA256 (funky\xffname) = e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855\n")
|
||||
.no_stderr();
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue