1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-09-15 11:36:16 +00:00

Merge pull request #2663 from jfinkels/hashsum-std-io-copy

hashsum: use std::io::copy() to simplify digest
This commit is contained in:
Sylvestre Ledru 2021-09-18 17:19:16 +02:00 committed by GitHub
commit b59bbddcbf
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 98 additions and 40 deletions

View file

@ -1,10 +1,22 @@
// spell-checker:ignore memmem
//! Implementations of digest functions, like md5 and sha1.
//!
//! The [`Digest`] trait represents the interface for providing inputs
//! to these digest functions and accessing the resulting hash. The
//! [`DigestWriter`] struct provides a wrapper around [`Digest`] that
//! implements the [`Write`] trait, for use in situations where calling
//! [`write`] would be useful.
extern crate digest;
extern crate md5;
extern crate sha1;
extern crate sha2;
extern crate sha3;
use std::io::Write;
use hex::ToHex;
#[cfg(windows)]
use memchr::memmem;
use crate::digest::digest::{ExtendableOutput, Input, XofReader};
@ -158,3 +170,76 @@ impl_digest_sha!(sha3::Sha3_384, 384);
impl_digest_sha!(sha3::Sha3_512, 512);
impl_digest_shake!(sha3::Shake128);
impl_digest_shake!(sha3::Shake256);
/// A struct that writes to a digest.
///
/// This struct wraps a [`Digest`] and provides a [`Write`]
/// implementation that passes input bytes directly to the
/// [`Digest::input`].
///
/// On Windows, if `binary` is `false`, then the [`write`]
/// implementation replaces instances of "\r\n" with "\n" before passing
/// the input bytes to the [`digest`].
pub struct DigestWriter<'a> {
digest: &'a mut Box<dyn Digest>,
/// Whether to write to the digest in binary mode or text mode on Windows.
///
/// If this is `false`, then instances of "\r\n" are replaced with
/// "\n" before passing input bytes to the [`digest`].
#[allow(dead_code)]
binary: bool,
// TODO This is dead code only on non-Windows operating systems. It
// might be better to use a `#[cfg(windows)]` guard here.
}
impl<'a> DigestWriter<'a> {
pub fn new(digest: &'a mut Box<dyn Digest>, binary: bool) -> DigestWriter {
DigestWriter { digest, binary }
}
}
impl<'a> Write for DigestWriter<'a> {
#[cfg(not(windows))]
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
self.digest.input(buf);
Ok(buf.len())
}
#[cfg(windows)]
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
if self.binary {
self.digest.input(buf);
return Ok(buf.len());
}
// In Windows text mode, replace each occurrence of "\r\n"
// with "\n".
//
// Find all occurrences of "\r\n", inputting the slice just
// before the "\n" in the previous instance of "\r\n" and
// the beginning of this "\r\n".
//
// FIXME This fails if one call to `write()` ends with the
// "\r" and the next call to `write()` begins with the "\n".
let n = buf.len();
let mut i_prev = 0;
for i in memmem::find_iter(buf, b"\r\n") {
self.digest.input(&buf[i_prev..i]);
i_prev = i + 1;
}
self.digest.input(&buf[i_prev..n]);
// Even though we dropped a "\r" for each "\r\n" we found, we
// still report the number of bytes written as `n`. This is
// because the meaning of the returned number is supposed to be
// the number of bytes consumed by the writer, so that if the
// calling code were calling `write()` in a loop, it would know
// where the next contiguous slice of the buffer starts.
Ok(n)
}
fn flush(&mut self) -> std::io::Result<()> {
Ok(())
}
}

View file

@ -7,7 +7,7 @@
// * For the full copyright and license information, please view the LICENSE
// * file that was distributed with this source code.
// spell-checker:ignore (ToDO) algo, algoname, regexes, nread memmem
// spell-checker:ignore (ToDO) algo, algoname, regexes, nread
#[macro_use]
extern crate clap;
@ -18,11 +18,11 @@ extern crate uucore;
mod digest;
use self::digest::Digest;
use self::digest::DigestWriter;
use clap::{App, Arg, ArgMatches};
use hex::ToHex;
use md5::Context as Md5;
use memchr::memmem;
use regex::Regex;
use sha1::Sha1;
use sha2::{Sha224, Sha256, Sha384, Sha512};
@ -540,7 +540,7 @@ where
let real_sum = crash_if_err!(
1,
digest_reader(
&mut *options.digest,
&mut options.digest,
&mut ckf,
binary_check,
options.output_bits
@ -571,7 +571,7 @@ where
let sum = crash_if_err!(
1,
digest_reader(
&mut *options.digest,
&mut options.digest,
&mut file,
options.binary,
options.output_bits
@ -598,48 +598,21 @@ where
Ok(())
}
fn digest_reader<'a, T: Read>(
digest: &mut (dyn Digest + 'a),
fn digest_reader<T: Read>(
digest: &mut Box<dyn Digest>,
reader: &mut BufReader<T>,
binary: bool,
output_bits: usize,
) -> io::Result<String> {
digest.reset();
// Digest file, do not hold too much in memory at any given moment
let windows = cfg!(windows);
let mut buffer = Vec::with_capacity(524_288);
loop {
match reader.read_to_end(&mut buffer) {
Ok(0) => {
break;
}
Ok(nread) => {
if windows && !binary {
// In Windows text mode, replace each occurrence of
// "\r\n" with "\n".
//
// Find all occurrences of "\r\n", inputting the
// slice just before the "\n" in the previous
// instance of "\r\n" and the beginning of this
// "\r\n".
//
// FIXME This fails if one call to `read()` ends
// with the "\r" and the next call to `read()`
// begins with the "\n".
let mut i_prev = 0;
for i in memmem::find_iter(&buffer[0..nread], b"\r\n") {
digest.input(&buffer[i_prev..i]);
i_prev = i + 1;
}
digest.input(&buffer[i_prev..nread]);
} else {
digest.input(&buffer[..nread]);
}
}
Err(e) => return Err(e),
}
}
// Read bytes from `reader` and write those bytes to `digest`.
//
// If `binary` is `false` and the operating system is Windows, then
// `DigestWriter` replaces "\r\n" with "\n" before it writes the
// bytes into `digest`. Otherwise, it just inserts the bytes as-is.
let mut digest_writer = DigestWriter::new(digest, binary);
std::io::copy(reader, &mut digest_writer)?;
if digest.output_bits() > 0 {
Ok(digest.result_str())