From 7fea771f32c0ab68e63a75de8dd4245188908e91 Mon Sep 17 00:00:00 2001 From: Jeffrey Finkelstein Date: Tue, 24 Aug 2021 21:34:30 -0400 Subject: [PATCH] hashsum: use std::io::copy() to simplify digest Create a `DigestWriter` struct that implements `Write` by passing bytes directly to `Digest::input()`, so that `hashsum` can use `std::io::copy()`. Using `std::io::copy()` eliminates some boilerplate code around reading and writing bytes. And defining `DigestWriter` makes it easier to add a `#[cfg(windows)]` guard around the Windows-specific replacement of "\r\n" with "\n". --- src/uu/hashsum/src/digest.rs | 85 +++++++++++++++++++++++++++++++++++ src/uu/hashsum/src/hashsum.rs | 53 ++++++---------------- 2 files changed, 98 insertions(+), 40 deletions(-) diff --git a/src/uu/hashsum/src/digest.rs b/src/uu/hashsum/src/digest.rs index 9093d94a7..531dc7e4f 100644 --- a/src/uu/hashsum/src/digest.rs +++ b/src/uu/hashsum/src/digest.rs @@ -1,10 +1,22 @@ +// spell-checker:ignore memmem +//! Implementations of digest functions, like md5 and sha1. +//! +//! The [`Digest`] trait represents the interface for providing inputs +//! to these digest functions and accessing the resulting hash. The +//! [`DigestWriter`] struct provides a wrapper around [`Digest`] that +//! implements the [`Write`] trait, for use in situations where calling +//! [`write`] would be useful. extern crate digest; extern crate md5; extern crate sha1; extern crate sha2; extern crate sha3; +use std::io::Write; + use hex::ToHex; +#[cfg(windows)] +use memchr::memmem; use crate::digest::digest::{ExtendableOutput, Input, XofReader}; @@ -158,3 +170,76 @@ impl_digest_sha!(sha3::Sha3_384, 384); impl_digest_sha!(sha3::Sha3_512, 512); impl_digest_shake!(sha3::Shake128); impl_digest_shake!(sha3::Shake256); + +/// A struct that writes to a digest. +/// +/// This struct wraps a [`Digest`] and provides a [`Write`] +/// implementation that passes input bytes directly to the +/// [`Digest::input`]. +/// +/// On Windows, if `binary` is `false`, then the [`write`] +/// implementation replaces instances of "\r\n" with "\n" before passing +/// the input bytes to the [`digest`]. +pub struct DigestWriter<'a> { + digest: &'a mut Box, + + /// Whether to write to the digest in binary mode or text mode on Windows. + /// + /// If this is `false`, then instances of "\r\n" are replaced with + /// "\n" before passing input bytes to the [`digest`]. + #[allow(dead_code)] + binary: bool, + // TODO This is dead code only on non-Windows operating systems. It + // might be better to use a `#[cfg(windows)]` guard here. +} + +impl<'a> DigestWriter<'a> { + pub fn new(digest: &'a mut Box, binary: bool) -> DigestWriter { + DigestWriter { digest, binary } + } +} + +impl<'a> Write for DigestWriter<'a> { + #[cfg(not(windows))] + fn write(&mut self, buf: &[u8]) -> std::io::Result { + self.digest.input(buf); + Ok(buf.len()) + } + + #[cfg(windows)] + fn write(&mut self, buf: &[u8]) -> std::io::Result { + if self.binary { + self.digest.input(buf); + return Ok(buf.len()); + } + + // In Windows text mode, replace each occurrence of "\r\n" + // with "\n". + // + // Find all occurrences of "\r\n", inputting the slice just + // before the "\n" in the previous instance of "\r\n" and + // the beginning of this "\r\n". + // + // FIXME This fails if one call to `write()` ends with the + // "\r" and the next call to `write()` begins with the "\n". + let n = buf.len(); + let mut i_prev = 0; + for i in memmem::find_iter(buf, b"\r\n") { + self.digest.input(&buf[i_prev..i]); + i_prev = i + 1; + } + self.digest.input(&buf[i_prev..n]); + + // Even though we dropped a "\r" for each "\r\n" we found, we + // still report the number of bytes written as `n`. This is + // because the meaning of the returned number is supposed to be + // the number of bytes consumed by the writer, so that if the + // calling code were calling `write()` in a loop, it would know + // where the next contiguous slice of the buffer starts. + Ok(n) + } + + fn flush(&mut self) -> std::io::Result<()> { + Ok(()) + } +} diff --git a/src/uu/hashsum/src/hashsum.rs b/src/uu/hashsum/src/hashsum.rs index f820b083e..4186043f5 100644 --- a/src/uu/hashsum/src/hashsum.rs +++ b/src/uu/hashsum/src/hashsum.rs @@ -7,7 +7,7 @@ // * For the full copyright and license information, please view the LICENSE // * file that was distributed with this source code. -// spell-checker:ignore (ToDO) algo, algoname, regexes, nread memmem +// spell-checker:ignore (ToDO) algo, algoname, regexes, nread #[macro_use] extern crate clap; @@ -18,11 +18,11 @@ extern crate uucore; mod digest; use self::digest::Digest; +use self::digest::DigestWriter; use clap::{App, Arg, ArgMatches}; use hex::ToHex; use md5::Context as Md5; -use memchr::memmem; use regex::Regex; use sha1::Sha1; use sha2::{Sha224, Sha256, Sha384, Sha512}; @@ -540,7 +540,7 @@ where let real_sum = crash_if_err!( 1, digest_reader( - &mut *options.digest, + &mut options.digest, &mut ckf, binary_check, options.output_bits @@ -571,7 +571,7 @@ where let sum = crash_if_err!( 1, digest_reader( - &mut *options.digest, + &mut options.digest, &mut file, options.binary, options.output_bits @@ -598,48 +598,21 @@ where Ok(()) } -fn digest_reader<'a, T: Read>( - digest: &mut (dyn Digest + 'a), +fn digest_reader( + digest: &mut Box, reader: &mut BufReader, binary: bool, output_bits: usize, ) -> io::Result { digest.reset(); - // Digest file, do not hold too much in memory at any given moment - let windows = cfg!(windows); - let mut buffer = Vec::with_capacity(524_288); - loop { - match reader.read_to_end(&mut buffer) { - Ok(0) => { - break; - } - Ok(nread) => { - if windows && !binary { - // In Windows text mode, replace each occurrence of - // "\r\n" with "\n". - // - // Find all occurrences of "\r\n", inputting the - // slice just before the "\n" in the previous - // instance of "\r\n" and the beginning of this - // "\r\n". - // - // FIXME This fails if one call to `read()` ends - // with the "\r" and the next call to `read()` - // begins with the "\n". - let mut i_prev = 0; - for i in memmem::find_iter(&buffer[0..nread], b"\r\n") { - digest.input(&buffer[i_prev..i]); - i_prev = i + 1; - } - digest.input(&buffer[i_prev..nread]); - } else { - digest.input(&buffer[..nread]); - } - } - Err(e) => return Err(e), - } - } + // Read bytes from `reader` and write those bytes to `digest`. + // + // If `binary` is `false` and the operating system is Windows, then + // `DigestWriter` replaces "\r\n" with "\n" before it writes the + // bytes into `digest`. Otherwise, it just inserts the bytes as-is. + let mut digest_writer = DigestWriter::new(digest, binary); + std::io::copy(reader, &mut digest_writer)?; if digest.output_bits() > 0 { Ok(digest.result_str())