mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-28 11:37:44 +00:00
Merge pull request #2711 from jfinkels/hashsum-crlf-windows-across-blocks
hashsum: fix handling of \r\n in Windows text mode
This commit is contained in:
commit
ede77c6099
2 changed files with 89 additions and 12 deletions
|
@ -189,13 +189,31 @@ pub struct DigestWriter<'a> {
|
||||||
/// "\n" before passing input bytes to the [`digest`].
|
/// "\n" before passing input bytes to the [`digest`].
|
||||||
#[allow(dead_code)]
|
#[allow(dead_code)]
|
||||||
binary: bool,
|
binary: bool,
|
||||||
// TODO This is dead code only on non-Windows operating systems. It
|
|
||||||
// might be better to use a `#[cfg(windows)]` guard here.
|
/// Whether the previous
|
||||||
|
#[allow(dead_code)]
|
||||||
|
was_last_character_carriage_return: bool,
|
||||||
|
// TODO These are dead code only on non-Windows operating systems.
|
||||||
|
// It might be better to use a `#[cfg(windows)]` guard here.
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> DigestWriter<'a> {
|
impl<'a> DigestWriter<'a> {
|
||||||
pub fn new(digest: &'a mut Box<dyn Digest>, binary: bool) -> DigestWriter {
|
pub fn new(digest: &'a mut Box<dyn Digest>, binary: bool) -> DigestWriter {
|
||||||
DigestWriter { digest, binary }
|
let was_last_character_carriage_return = false;
|
||||||
|
DigestWriter {
|
||||||
|
digest,
|
||||||
|
binary,
|
||||||
|
was_last_character_carriage_return,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn finalize(&mut self) -> bool {
|
||||||
|
if self.was_last_character_carriage_return {
|
||||||
|
self.digest.input(&[b'\r']);
|
||||||
|
true
|
||||||
|
} else {
|
||||||
|
false
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -213,22 +231,40 @@ impl<'a> Write for DigestWriter<'a> {
|
||||||
return Ok(buf.len());
|
return Ok(buf.len());
|
||||||
}
|
}
|
||||||
|
|
||||||
// In Windows text mode, replace each occurrence of "\r\n"
|
// The remaining code handles Windows text mode, where we must
|
||||||
// with "\n".
|
// replace each occurrence of "\r\n" with "\n".
|
||||||
//
|
//
|
||||||
// Find all occurrences of "\r\n", inputting the slice just
|
// First, if the last character written was "\r" and the first
|
||||||
// before the "\n" in the previous instance of "\r\n" and
|
// character in the current buffer to write is not "\n", then we
|
||||||
// the beginning of this "\r\n".
|
// need to write the "\r" that we buffered from the previous
|
||||||
//
|
// call to `write()`.
|
||||||
// FIXME This fails if one call to `write()` ends with the
|
|
||||||
// "\r" and the next call to `write()` begins with the "\n".
|
|
||||||
let n = buf.len();
|
let n = buf.len();
|
||||||
|
if self.was_last_character_carriage_return && n > 0 && buf[0] != b'\n' {
|
||||||
|
self.digest.input(&[b'\r']);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Next, find all occurrences of "\r\n", inputting the slice
|
||||||
|
// just before the "\n" in the previous instance of "\r\n" and
|
||||||
|
// the beginning of this "\r\n".
|
||||||
let mut i_prev = 0;
|
let mut i_prev = 0;
|
||||||
for i in memmem::find_iter(buf, b"\r\n") {
|
for i in memmem::find_iter(buf, b"\r\n") {
|
||||||
self.digest.input(&buf[i_prev..i]);
|
self.digest.input(&buf[i_prev..i]);
|
||||||
i_prev = i + 1;
|
i_prev = i + 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Finally, check whether the last character is "\r". If so,
|
||||||
|
// buffer it until we know that the next character is not "\n",
|
||||||
|
// which can only be known on the next call to `write()`.
|
||||||
|
//
|
||||||
|
// This all assumes that `write()` will be called on adjacent
|
||||||
|
// blocks of the input.
|
||||||
|
if n > 0 && buf[n - 1] == b'\r' {
|
||||||
|
self.was_last_character_carriage_return = true;
|
||||||
|
self.digest.input(&buf[i_prev..n - 1]);
|
||||||
|
} else {
|
||||||
|
self.was_last_character_carriage_return = false;
|
||||||
self.digest.input(&buf[i_prev..n]);
|
self.digest.input(&buf[i_prev..n]);
|
||||||
|
}
|
||||||
|
|
||||||
// Even though we dropped a "\r" for each "\r\n" we found, we
|
// Even though we dropped a "\r" for each "\r\n" we found, we
|
||||||
// still report the number of bytes written as `n`. This is
|
// still report the number of bytes written as `n`. This is
|
||||||
|
@ -243,3 +279,36 @@ impl<'a> Write for DigestWriter<'a> {
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
|
||||||
|
/// Test for replacing a "\r\n" sequence with "\n" when the "\r" is
|
||||||
|
/// at the end of one block and the "\n" is at the beginning of the
|
||||||
|
/// next block, when reading in blocks.
|
||||||
|
#[cfg(windows)]
|
||||||
|
#[test]
|
||||||
|
fn test_crlf_across_blocks() {
|
||||||
|
use std::io::Write;
|
||||||
|
|
||||||
|
use crate::digest::Digest;
|
||||||
|
use crate::digest::DigestWriter;
|
||||||
|
|
||||||
|
// Writing "\r" in one call to `write()`, and then "\n" in another.
|
||||||
|
let mut digest = Box::new(md5::Context::new()) as Box<dyn Digest>;
|
||||||
|
let mut writer_crlf = DigestWriter::new(&mut digest, false);
|
||||||
|
writer_crlf.write_all(&[b'\r']).unwrap();
|
||||||
|
writer_crlf.write_all(&[b'\n']).unwrap();
|
||||||
|
writer_crlf.finalize();
|
||||||
|
let result_crlf = digest.result_str();
|
||||||
|
|
||||||
|
// We expect "\r\n" to be replaced with "\n" in text mode on Windows.
|
||||||
|
let mut digest = Box::new(md5::Context::new()) as Box<dyn Digest>;
|
||||||
|
let mut writer_lf = DigestWriter::new(&mut digest, false);
|
||||||
|
writer_lf.write_all(&[b'\n']).unwrap();
|
||||||
|
writer_lf.finalize();
|
||||||
|
let result_lf = digest.result_str();
|
||||||
|
|
||||||
|
assert_eq!(result_crlf, result_lf);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -611,8 +611,16 @@ fn digest_reader<T: Read>(
|
||||||
// If `binary` is `false` and the operating system is Windows, then
|
// If `binary` is `false` and the operating system is Windows, then
|
||||||
// `DigestWriter` replaces "\r\n" with "\n" before it writes the
|
// `DigestWriter` replaces "\r\n" with "\n" before it writes the
|
||||||
// bytes into `digest`. Otherwise, it just inserts the bytes as-is.
|
// bytes into `digest`. Otherwise, it just inserts the bytes as-is.
|
||||||
|
//
|
||||||
|
// In order to support replacing "\r\n", we must call `finalize()`
|
||||||
|
// in order to support the possibility that the last character read
|
||||||
|
// from the reader was "\r". (This character gets buffered by
|
||||||
|
// `DigestWriter` and only written if the following character is
|
||||||
|
// "\n". But when "\r" is the last character read, we need to force
|
||||||
|
// it to be written.)
|
||||||
let mut digest_writer = DigestWriter::new(digest, binary);
|
let mut digest_writer = DigestWriter::new(digest, binary);
|
||||||
std::io::copy(reader, &mut digest_writer)?;
|
std::io::copy(reader, &mut digest_writer)?;
|
||||||
|
digest_writer.finalize();
|
||||||
|
|
||||||
if digest.output_bits() > 0 {
|
if digest.output_bits() > 0 {
|
||||||
Ok(digest.result_str())
|
Ok(digest.result_str())
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue