1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-28 11:37:44 +00:00

hashsum: don't copy input buffer on Windows

Remove a copy operation of the input buffer being read for digest when
reading in text mode on Windows. Previously, the code was copying the
buffer to a completely new `Vec`, replacing "\r\n" with "\n". Instead,
the code now scans for the indices at which each "\r\n" occurs in the
input buffer and inputs into the digest only the characters before the
"\r" and after it.
This commit is contained in:
Jeffrey Finkelstein 2021-08-23 22:41:53 -04:00 committed by Michael Debertol
parent e5d6c6970b
commit 52cfd4c6cb
3 changed files with 20 additions and 24 deletions

1
Cargo.lock generated
View file

@ -2392,6 +2392,7 @@ dependencies = [
"hex", "hex",
"libc", "libc",
"md5", "md5",
"memchr 2.4.0",
"regex", "regex",
"regex-syntax", "regex-syntax",
"sha1", "sha1",

View file

@ -19,6 +19,7 @@ digest = "0.6.2"
clap = { version = "2.33", features = ["wrap_help"] } clap = { version = "2.33", features = ["wrap_help"] }
hex = "0.2.0" hex = "0.2.0"
libc = "0.2.42" libc = "0.2.42"
memchr = "2"
md5 = "0.3.5" md5 = "0.3.5"
regex = "1.0.1" regex = "1.0.1"
regex-syntax = "0.6.7" regex-syntax = "0.6.7"

View file

@ -7,7 +7,7 @@
// * For the full copyright and license information, please view the LICENSE // * For the full copyright and license information, please view the LICENSE
// * file that was distributed with this source code. // * file that was distributed with this source code.
// spell-checker:ignore (ToDO) algo, algoname, regexes, nread // spell-checker:ignore (ToDO) algo, algoname, regexes, nread memmem
#[macro_use] #[macro_use]
extern crate clap; extern crate clap;
@ -22,6 +22,7 @@ use self::digest::Digest;
use clap::{App, Arg, ArgMatches}; use clap::{App, Arg, ArgMatches};
use hex::ToHex; use hex::ToHex;
use md5::Context as Md5; use md5::Context as Md5;
use memchr::memmem;
use regex::Regex; use regex::Regex;
use sha1::Sha1; use sha1::Sha1;
use sha2::{Sha224, Sha256, Sha384, Sha512}; use sha2::{Sha224, Sha256, Sha384, Sha512};
@ -586,8 +587,6 @@ fn digest_reader<'a, T: Read>(
// Digest file, do not hold too much in memory at any given moment // Digest file, do not hold too much in memory at any given moment
let windows = cfg!(windows); let windows = cfg!(windows);
let mut buffer = Vec::with_capacity(524_288); let mut buffer = Vec::with_capacity(524_288);
let mut vec = Vec::with_capacity(524_288);
let mut looking_for_newline = false;
loop { loop {
match reader.read_to_end(&mut buffer) { match reader.read_to_end(&mut buffer) {
Ok(0) => { Ok(0) => {
@ -595,24 +594,23 @@ fn digest_reader<'a, T: Read>(
} }
Ok(nread) => { Ok(nread) => {
if windows && !binary { if windows && !binary {
// Windows text mode returns '\n' when reading '\r\n' // In Windows text mode, replace each occurrence of
for &b in buffer.iter().take(nread) { // "\r\n" with "\n".
if looking_for_newline { //
if b != b'\n' { // Find all occurrences of "\r\n", inputting the
vec.push(b'\r'); // slice just before the "\n" in the previous
} // instance of "\r\n" and the beginning of this
if b != b'\r' { // "\r\n".
vec.push(b); //
looking_for_newline = false; // FIXME This fails if one call to `read()` ends
} // with the "\r" and the next call to `read()`
} else if b != b'\r' { // begins with the "\n".
vec.push(b); let mut i_prev = 0;
} else { for i in memmem::find_iter(&buffer[0..nread], b"\r\n") {
looking_for_newline = true; digest.input(&buffer[i_prev..i]);
} i_prev = i + 1;
} }
digest.input(&vec); digest.input(&buffer[i_prev..nread]);
vec.clear();
} else { } else {
digest.input(&buffer[..nread]); digest.input(&buffer[..nread]);
} }
@ -620,10 +618,6 @@ fn digest_reader<'a, T: Read>(
Err(e) => return Err(e), Err(e) => return Err(e),
} }
} }
if windows && looking_for_newline {
vec.push(b'\r');
digest.input(&vec);
}
if digest.output_bits() > 0 { if digest.output_bits() > 0 {
Ok(digest.result_str()) Ok(digest.result_str())