1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-09-15 19:36:16 +00:00

hashsum: use std::io::copy() to simplify digest

Create a `DigestWriter` struct that implements `Write` by passing bytes
directly to `Digest::input()`, so that `hashsum` can use
`std::io::copy()`. Using `std::io::copy()` eliminates some boilerplate
code around reading and writing bytes. And defining `DigestWriter` makes
it easier to add a `#[cfg(windows)]` guard around the Windows-specific
replacement of "\r\n" with "\n".
This commit is contained in:
Jeffrey Finkelstein 2021-08-24 21:34:30 -04:00
parent 7fb1fcfac5
commit 7fea771f32
2 changed files with 98 additions and 40 deletions

View file

@ -1,10 +1,22 @@
// spell-checker:ignore memmem
//! Implementations of digest functions, like md5 and sha1.
//!
//! The [`Digest`] trait represents the interface for providing inputs
//! to these digest functions and accessing the resulting hash. The
//! [`DigestWriter`] struct provides a wrapper around [`Digest`] that
//! implements the [`Write`] trait, for use in situations where calling
//! [`write`] would be useful.
extern crate digest; extern crate digest;
extern crate md5; extern crate md5;
extern crate sha1; extern crate sha1;
extern crate sha2; extern crate sha2;
extern crate sha3; extern crate sha3;
use std::io::Write;
use hex::ToHex; use hex::ToHex;
#[cfg(windows)]
use memchr::memmem;
use crate::digest::digest::{ExtendableOutput, Input, XofReader}; use crate::digest::digest::{ExtendableOutput, Input, XofReader};
@ -158,3 +170,76 @@ impl_digest_sha!(sha3::Sha3_384, 384);
impl_digest_sha!(sha3::Sha3_512, 512); impl_digest_sha!(sha3::Sha3_512, 512);
impl_digest_shake!(sha3::Shake128); impl_digest_shake!(sha3::Shake128);
impl_digest_shake!(sha3::Shake256); impl_digest_shake!(sha3::Shake256);
/// A struct that writes to a digest.
///
/// This struct wraps a [`Digest`] and provides a [`Write`]
/// implementation that passes input bytes directly to the
/// [`Digest::input`].
///
/// On Windows, if `binary` is `false`, then the [`write`]
/// implementation replaces instances of "\r\n" with "\n" before passing
/// the input bytes to the [`digest`].
pub struct DigestWriter<'a> {
digest: &'a mut Box<dyn Digest>,
/// Whether to write to the digest in binary mode or text mode on Windows.
///
/// If this is `false`, then instances of "\r\n" are replaced with
/// "\n" before passing input bytes to the [`digest`].
#[allow(dead_code)]
binary: bool,
// TODO This is dead code only on non-Windows operating systems. It
// might be better to use a `#[cfg(windows)]` guard here.
}
impl<'a> DigestWriter<'a> {
pub fn new(digest: &'a mut Box<dyn Digest>, binary: bool) -> DigestWriter {
DigestWriter { digest, binary }
}
}
impl<'a> Write for DigestWriter<'a> {
#[cfg(not(windows))]
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
self.digest.input(buf);
Ok(buf.len())
}
#[cfg(windows)]
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
if self.binary {
self.digest.input(buf);
return Ok(buf.len());
}
// In Windows text mode, replace each occurrence of "\r\n"
// with "\n".
//
// Find all occurrences of "\r\n", inputting the slice just
// before the "\n" in the previous instance of "\r\n" and
// the beginning of this "\r\n".
//
// FIXME This fails if one call to `write()` ends with the
// "\r" and the next call to `write()` begins with the "\n".
let n = buf.len();
let mut i_prev = 0;
for i in memmem::find_iter(buf, b"\r\n") {
self.digest.input(&buf[i_prev..i]);
i_prev = i + 1;
}
self.digest.input(&buf[i_prev..n]);
// Even though we dropped a "\r" for each "\r\n" we found, we
// still report the number of bytes written as `n`. This is
// because the meaning of the returned number is supposed to be
// the number of bytes consumed by the writer, so that if the
// calling code were calling `write()` in a loop, it would know
// where the next contiguous slice of the buffer starts.
Ok(n)
}
fn flush(&mut self) -> std::io::Result<()> {
Ok(())
}
}

View file

@ -7,7 +7,7 @@
// * For the full copyright and license information, please view the LICENSE // * For the full copyright and license information, please view the LICENSE
// * file that was distributed with this source code. // * file that was distributed with this source code.
// spell-checker:ignore (ToDO) algo, algoname, regexes, nread memmem // spell-checker:ignore (ToDO) algo, algoname, regexes, nread
#[macro_use] #[macro_use]
extern crate clap; extern crate clap;
@ -18,11 +18,11 @@ extern crate uucore;
mod digest; mod digest;
use self::digest::Digest; use self::digest::Digest;
use self::digest::DigestWriter;
use clap::{App, Arg, ArgMatches}; use clap::{App, Arg, ArgMatches};
use hex::ToHex; use hex::ToHex;
use md5::Context as Md5; use md5::Context as Md5;
use memchr::memmem;
use regex::Regex; use regex::Regex;
use sha1::Sha1; use sha1::Sha1;
use sha2::{Sha224, Sha256, Sha384, Sha512}; use sha2::{Sha224, Sha256, Sha384, Sha512};
@ -540,7 +540,7 @@ where
let real_sum = crash_if_err!( let real_sum = crash_if_err!(
1, 1,
digest_reader( digest_reader(
&mut *options.digest, &mut options.digest,
&mut ckf, &mut ckf,
binary_check, binary_check,
options.output_bits options.output_bits
@ -571,7 +571,7 @@ where
let sum = crash_if_err!( let sum = crash_if_err!(
1, 1,
digest_reader( digest_reader(
&mut *options.digest, &mut options.digest,
&mut file, &mut file,
options.binary, options.binary,
options.output_bits options.output_bits
@ -598,48 +598,21 @@ where
Ok(()) Ok(())
} }
fn digest_reader<'a, T: Read>( fn digest_reader<T: Read>(
digest: &mut (dyn Digest + 'a), digest: &mut Box<dyn Digest>,
reader: &mut BufReader<T>, reader: &mut BufReader<T>,
binary: bool, binary: bool,
output_bits: usize, output_bits: usize,
) -> io::Result<String> { ) -> io::Result<String> {
digest.reset(); digest.reset();
// Digest file, do not hold too much in memory at any given moment // Read bytes from `reader` and write those bytes to `digest`.
let windows = cfg!(windows);
let mut buffer = Vec::with_capacity(524_288);
loop {
match reader.read_to_end(&mut buffer) {
Ok(0) => {
break;
}
Ok(nread) => {
if windows && !binary {
// In Windows text mode, replace each occurrence of
// "\r\n" with "\n".
// //
// Find all occurrences of "\r\n", inputting the // If `binary` is `false` and the operating system is Windows, then
// slice just before the "\n" in the previous // `DigestWriter` replaces "\r\n" with "\n" before it writes the
// instance of "\r\n" and the beginning of this // bytes into `digest`. Otherwise, it just inserts the bytes as-is.
// "\r\n". let mut digest_writer = DigestWriter::new(digest, binary);
// std::io::copy(reader, &mut digest_writer)?;
// FIXME This fails if one call to `read()` ends
// with the "\r" and the next call to `read()`
// begins with the "\n".
let mut i_prev = 0;
for i in memmem::find_iter(&buffer[0..nread], b"\r\n") {
digest.input(&buffer[i_prev..i]);
i_prev = i + 1;
}
digest.input(&buffer[i_prev..nread]);
} else {
digest.input(&buffer[..nread]);
}
}
Err(e) => return Err(e),
}
}
if digest.output_bits() > 0 { if digest.output_bits() > 0 {
Ok(digest.result_str()) Ok(digest.result_str())