From 52cfd4c6cb184473448ca3137f29d1d3986b6901 Mon Sep 17 00:00:00 2001 From: Jeffrey Finkelstein Date: Mon, 23 Aug 2021 22:41:53 -0400 Subject: [PATCH] hashsum: don't copy input buffer on Windows Remove a copy operation of the input buffer being read for digest when reading in text mode on Windows. Previously, the code was copying the buffer to a completely new `Vec`, replacing "\r\n" with "\n". Instead, the code now scans for the indices at which each "\r\n" occurs in the input buffer and inputs into the digest only the characters before the "\r" and after it. --- Cargo.lock | 1 + src/uu/hashsum/Cargo.toml | 1 + src/uu/hashsum/src/hashsum.rs | 42 +++++++++++++++-------------------- 3 files changed, 20 insertions(+), 24 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c0d3ceee6..adc373f85 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2392,6 +2392,7 @@ dependencies = [ "hex", "libc", "md5", + "memchr 2.4.0", "regex", "regex-syntax", "sha1", diff --git a/src/uu/hashsum/Cargo.toml b/src/uu/hashsum/Cargo.toml index 43d78119b..b4da17b71 100644 --- a/src/uu/hashsum/Cargo.toml +++ b/src/uu/hashsum/Cargo.toml @@ -19,6 +19,7 @@ digest = "0.6.2" clap = { version = "2.33", features = ["wrap_help"] } hex = "0.2.0" libc = "0.2.42" +memchr = "2" md5 = "0.3.5" regex = "1.0.1" regex-syntax = "0.6.7" diff --git a/src/uu/hashsum/src/hashsum.rs b/src/uu/hashsum/src/hashsum.rs index 1e677358e..384ef1388 100644 --- a/src/uu/hashsum/src/hashsum.rs +++ b/src/uu/hashsum/src/hashsum.rs @@ -7,7 +7,7 @@ // * For the full copyright and license information, please view the LICENSE // * file that was distributed with this source code. -// spell-checker:ignore (ToDO) algo, algoname, regexes, nread +// spell-checker:ignore (ToDO) algo, algoname, regexes, nread memmem #[macro_use] extern crate clap; @@ -22,6 +22,7 @@ use self::digest::Digest; use clap::{App, Arg, ArgMatches}; use hex::ToHex; use md5::Context as Md5; +use memchr::memmem; use regex::Regex; use sha1::Sha1; use sha2::{Sha224, Sha256, Sha384, Sha512}; @@ -586,8 +587,6 @@ fn digest_reader<'a, T: Read>( // Digest file, do not hold too much in memory at any given moment let windows = cfg!(windows); let mut buffer = Vec::with_capacity(524_288); - let mut vec = Vec::with_capacity(524_288); - let mut looking_for_newline = false; loop { match reader.read_to_end(&mut buffer) { Ok(0) => { @@ -595,24 +594,23 @@ fn digest_reader<'a, T: Read>( } Ok(nread) => { if windows && !binary { - // Windows text mode returns '\n' when reading '\r\n' - for &b in buffer.iter().take(nread) { - if looking_for_newline { - if b != b'\n' { - vec.push(b'\r'); - } - if b != b'\r' { - vec.push(b); - looking_for_newline = false; - } - } else if b != b'\r' { - vec.push(b); - } else { - looking_for_newline = true; - } + // In Windows text mode, replace each occurrence of + // "\r\n" with "\n". + // + // Find all occurrences of "\r\n", inputting the + // slice just before the "\n" in the previous + // instance of "\r\n" and the beginning of this + // "\r\n". + // + // FIXME This fails if one call to `read()` ends + // with the "\r" and the next call to `read()` + // begins with the "\n". + let mut i_prev = 0; + for i in memmem::find_iter(&buffer[0..nread], b"\r\n") { + digest.input(&buffer[i_prev..i]); + i_prev = i + 1; } - digest.input(&vec); - vec.clear(); + digest.input(&buffer[i_prev..nread]); } else { digest.input(&buffer[..nread]); } @@ -620,10 +618,6 @@ fn digest_reader<'a, T: Read>( Err(e) => return Err(e), } } - if windows && looking_for_newline { - vec.push(b'\r'); - digest.input(&vec); - } if digest.output_bits() > 0 { Ok(digest.result_str())