diff --git a/src/od/od.rs b/src/od/od.rs index 2b91a0e29..7563c751d 100644 --- a/src/od/od.rs +++ b/src/od/od.rs @@ -18,6 +18,7 @@ extern crate uucore; mod multifilereader; mod partialreader; +mod peekreader; mod byteorder_io; mod formatteriteminfo; mod prn_int; @@ -28,12 +29,12 @@ mod parse_nrofbytes; mod mockstream; use std::cmp; -use std::io::Read; use std::io::Write; use unindent::*; use byteorder_io::*; use multifilereader::*; use partialreader::*; +use peekreader::*; use prn_int::*; use prn_char::*; use prn_float::*; @@ -51,6 +52,7 @@ macro_rules! hashmap { static VERSION: &'static str = env!("CARGO_PKG_VERSION"); const MAX_BYTES_PER_UNIT: usize = 8; +const PEEK_BUFFER_SIZE: usize = 4; // utf-8 can be 4 bytes #[derive(Copy, Clone, Debug, Eq, PartialEq)] enum Radix { Decimal, Hexadecimal, Octal, NoPrefix } @@ -254,12 +256,13 @@ fn odfunc(line_bytes: usize, input_offset_base: Radix, byte_order: ByteOrder, skip_bytes: usize, read_bytes: Option) -> i32 { let mf = MultifileReader::new(fnames); - let mut input = PartialReader::new(mf, skip_bytes, read_bytes); + let pr = PartialReader::new(mf, skip_bytes, read_bytes); + let mut input = PeekReader::new(pr); let mut addr = skip_bytes; let mut duplicate_line = false; let mut previous_bytes: Vec = Vec::new(); - let mut bytes: Vec = Vec::with_capacity(line_bytes); - unsafe { bytes.set_len(line_bytes); } // fast but uninitialized + let mut bytes: Vec = Vec::with_capacity(line_bytes + PEEK_BUFFER_SIZE); + unsafe { bytes.set_len(line_bytes + PEEK_BUFFER_SIZE); } // fast but uninitialized let byte_size_block = formats.iter().fold(1, |max, next| cmp::max(max, next.byte_size)); let print_width_block = formats @@ -302,12 +305,12 @@ fn odfunc(line_bytes: usize, input_offset_base: Radix, byte_order: ByteOrder, // print each line data (or multi-format raster of several lines describing the same data). // TODO: we need to read more data in case a multi-byte sequence starts at the end of the line - match input.read(bytes.as_mut_slice()) { - Ok(0) => { + match input.peek_read(bytes.as_mut_slice(), PEEK_BUFFER_SIZE) { + Ok((0, _)) => { print_final_offset(input_offset_base, addr); break; } - Ok(n) => { + Ok((n, peekbytes)) => { // not enough byte for a whole element, this should only happen on the last line. if n != line_bytes { // set zero bytes in the part of the buffer that will be used, but is not filled. @@ -321,7 +324,10 @@ fn odfunc(line_bytes: usize, input_offset_base: Radix, byte_order: ByteOrder, } } - if !output_duplicates && n == line_bytes && previous_bytes == bytes { + if !output_duplicates + && n == line_bytes + && !previous_bytes.is_empty() + && previous_bytes[..line_bytes] == bytes[..line_bytes] { if !duplicate_line { duplicate_line = true; println!("*"); @@ -334,7 +340,8 @@ fn odfunc(line_bytes: usize, input_offset_base: Radix, byte_order: ByteOrder, previous_bytes.clone_from(&bytes); } - print_bytes(byte_order, &bytes, n, &print_with_radix(input_offset_base, addr), + print_bytes(byte_order, &bytes, n, peekbytes, + &print_with_radix(input_offset_base, addr), &spaced_formatters, byte_size_block); } @@ -355,7 +362,7 @@ fn odfunc(line_bytes: usize, input_offset_base: Radix, byte_order: ByteOrder, } } -fn print_bytes(byte_order: ByteOrder, bytes: &[u8], length: usize, prefix: &str, +fn print_bytes(byte_order: ByteOrder, bytes: &[u8], length: usize, peekbytes: usize, prefix: &str, formats: &[SpacedFormatterItemInfo], byte_size_block: usize) { let mut first = true; // First line of a multi-format raster. for f in formats { @@ -401,7 +408,7 @@ fn print_bytes(byte_order: ByteOrder, bytes: &[u8], length: usize, prefix: &str, output_text.push_str(&func(p)); } FormatWriter::MultibyteWriter(func) => { - output_text.push_str(&func(&bytes[b..length])); + output_text.push_str(&func(&bytes[b..length+peekbytes])); } } b = nextb; diff --git a/src/od/peekreader.rs b/src/od/peekreader.rs new file mode 100644 index 000000000..10e415273 --- /dev/null +++ b/src/od/peekreader.rs @@ -0,0 +1,213 @@ +//! Contains the trait `PeekRead` and type `PeekReader` implementing it. + +use std::io; +use std::io::{Read, Write}; +use multifilereader::HasError; + +/// A trait which supplies a function to peek into a stream without +/// actually reading it. +/// +/// Like `std::io::Read`, it allows to read data from a stream, with +/// the additional possibility to reserve a part of the returned data +/// with the data which will be read in subsequent calls. +/// +pub trait PeekRead { + /// Reads data into a buffer. + /// + /// Fills `out` with data. The last `peek_size` bytes of `out` are + /// used for data which keeps available on subsequent calls. + /// `peek_size` must be smaller or equal to the size of `out`. + /// + /// Returns a tuple where the first number is the number of bytes + /// read from the stream, and the second number is the number of + /// bytes additionally read. Any of the numbers might be zero. + /// It can also return an error. + /// + /// A type implementing this trait, will typically also implement + /// `std::io::Read`. + /// + /// # Panics + /// Might panic if `peek_size` is larger then the size of `out` + fn peek_read(&mut self, out: &mut [u8], peek_size: usize) -> io::Result<(usize,usize)>; +} + +/// Wrapper for `std::io::Read` allowing to peek into the data to be read. +pub struct PeekReader { + inner: R, + temp_buffer: Vec, +} + +impl PeekReader { + /// Create a new `PeekReader` wrapping `inner` + pub fn new(inner: R) -> Self { + PeekReader { + inner: inner, + temp_buffer: Vec::new(), + } + } +} + +impl PeekReader { + fn read_from_tempbuffer(&mut self, mut out: &mut [u8]) -> usize { + match out.write(self.temp_buffer.as_mut_slice()) { + Ok(n) => { + self.temp_buffer.drain(..n); + n + }, + Err(_) => 0, + } + } + + fn write_to_tempbuffer(&mut self, bytes: &[u8]) { + // if temp_buffer is not empty, data has to be inserted in front + let org_buffer: Vec<_> = self.temp_buffer.drain(..).collect(); + self.temp_buffer.write(bytes).unwrap(); + self.temp_buffer.extend(org_buffer); + } +} + +impl Read for PeekReader { + fn read(&mut self, out: &mut [u8]) -> io::Result { + let start_pos = self.read_from_tempbuffer(out); + match self.inner.read(&mut out[start_pos..]) { + Err(e) => Err(e), + Ok(n) => Ok(n + start_pos), + } + } +} + +impl PeekRead for PeekReader { + /// Reads data into a buffer. + /// + /// See `PeekRead::peek_read`. + /// + /// # Panics + /// If `peek_size` is larger then the size of `out` + fn peek_read(&mut self, out: &mut [u8], peek_size: usize) -> io::Result<(usize,usize)> { + assert!(out.len() >= peek_size); + match self.read(out) { + Err(e) => Err(e), + Ok(bytes_in_buffer) => { + let unused = out.len() - bytes_in_buffer; + if peek_size <= unused { + Ok((bytes_in_buffer, 0)) + } + else { + let actual_peek_size = peek_size - unused; + let real_size = bytes_in_buffer - actual_peek_size; + self.write_to_tempbuffer(&out[real_size..bytes_in_buffer]); + Ok((real_size, actual_peek_size)) + } + }, + } + } +} + +impl HasError for PeekReader { + fn has_error(&self) -> bool { + self.inner.has_error() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::io::{Cursor, Read}; + + #[test] + fn test_read_normal() { + let mut sut = PeekReader::new(Cursor::new(&b"abcdefgh"[..])); + + let mut v = [0; 10]; + assert_eq!(sut.read(v.as_mut()).unwrap(), 8); + assert_eq!(v, [0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0, 0]); + } + + #[test] + fn test_peek_read_without_buffer() { + let mut sut = PeekReader::new(Cursor::new(&b"abcdefgh"[..])); + + let mut v = [0; 10]; + assert_eq!(sut.peek_read(v.as_mut(), 0).unwrap(), (8,0)); + assert_eq!(v, [0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0, 0]); + } + + #[test] + fn test_peek_read_and_read() { + let mut sut = PeekReader::new(Cursor::new(&b"abcdefghij"[..])); + + let mut v = [0; 8]; + assert_eq!(sut.peek_read(v.as_mut(), 4).unwrap(), (4, 4)); + assert_eq!(v, [0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68]); + + let mut v2 = [0; 8]; + assert_eq!(sut.read(v2.as_mut()).unwrap(), 6); + assert_eq!(v2, [0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0, 0]); + } + + #[test] + fn test_peek_read_multiple_times() { + let mut sut = PeekReader::new(Cursor::new(&b"abcdefghij"[..])); + + let mut s1 = [0; 8]; + assert_eq!(sut.peek_read(s1.as_mut(), 4).unwrap(), (4, 4)); + assert_eq!(s1, [0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68]); + + let mut s2 = [0; 8]; + assert_eq!(sut.peek_read(s2.as_mut(), 4).unwrap(), (4, 2)); + assert_eq!(s2, [0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0, 0]); + + let mut s3 = [0; 8]; + assert_eq!(sut.peek_read(s3.as_mut(), 4).unwrap(), (2, 0)); + assert_eq!(s3, [0x69, 0x6a, 0, 0, 0, 0, 0, 0]); + } + + #[test] + fn test_peek_read_and_read_with_small_buffer() { + let mut sut = PeekReader::new(Cursor::new(&b"abcdefghij"[..])); + + let mut v = [0; 8]; + assert_eq!(sut.peek_read(v.as_mut(), 4).unwrap(), (4, 4)); + assert_eq!(v, [0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68]); + + let mut v2 = [0; 2]; + assert_eq!(sut.read(v2.as_mut()).unwrap(), 2); + assert_eq!(v2, [0x65, 0x66]); + assert_eq!(sut.read(v2.as_mut()).unwrap(), 2); + assert_eq!(v2, [0x67, 0x68]); + assert_eq!(sut.read(v2.as_mut()).unwrap(), 2); + assert_eq!(v2, [0x69, 0x6a]); + } + + #[test] + fn test_peek_read_with_smaller_buffer() { + let mut sut = PeekReader::new(Cursor::new(&b"abcdefghij"[..])); + + let mut v = [0; 8]; + assert_eq!(sut.peek_read(v.as_mut(), 4).unwrap(), (4, 4)); + assert_eq!(v, [0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68]); + + let mut v2 = [0; 2]; + assert_eq!(sut.peek_read(v2.as_mut(), 2).unwrap(), (0, 2)); + assert_eq!(v2, [0x65, 0x66]); + assert_eq!(sut.peek_read(v2.as_mut(), 0).unwrap(), (2, 0)); + assert_eq!(v2, [0x65, 0x66]); + assert_eq!(sut.peek_read(v2.as_mut(), 0).unwrap(), (2, 0)); + assert_eq!(v2, [0x67, 0x68]); + assert_eq!(sut.peek_read(v2.as_mut(), 0).unwrap(), (2, 0)); + assert_eq!(v2, [0x69, 0x6a]); + } + + #[test] + fn test_peek_read_peek_with_larger_peek_buffer() { + let mut sut = PeekReader::new(Cursor::new(&b"abcdefghij"[..])); + + let mut v = [0; 8]; + assert_eq!(sut.peek_read(v.as_mut(), 4).unwrap(), (4, 4)); + assert_eq!(v, [0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68]); + + let mut v2 = [0; 8]; + assert_eq!(sut.peek_read(v2.as_mut(), 8).unwrap(), (0, 6)); + assert_eq!(v2, [0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0, 0]); + } +} diff --git a/tests/test_od.rs b/tests/test_od.rs index 26f3d5b60..e18cd9501 100644 --- a/tests/test_od.rs +++ b/tests/test_od.rs @@ -246,15 +246,14 @@ fn test_f64(){ #[test] fn test_multibyte() { - // TODO: replace **** with \u{1B000} - let result = new_ucmd!().arg("-c").arg("-w12").run_piped_stdin("Universität Tübingen ****".as_bytes()); + let result = new_ucmd!().arg("-c").arg("-w12").run_piped_stdin("Universität Tübingen \u{1B000}".as_bytes()); assert_empty_stderr!(result); assert!(result.success); assert_eq!(result.stdout, unindent(" 0000000 U n i v e r s i t ä ** t - 0000014 T ü ** b i n g e n * - 0000030 * * * + 0000014 T ü ** b i n g e n \u{1B000} + 0000030 ** ** ** 0000033 ")); } @@ -313,11 +312,27 @@ fn test_width_without_value(){ #[test] fn test_suppress_duplicates(){ - let input = [0u8 ; 41]; + let input: [u8; 41] = [ + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 1, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0]; let expected_output = unindent(" 0000000 00000000000 0000 0000 * + 0000020 00000000001 + 0001 0000 + 0000024 00000000000 + 0000 0000 + * 0000050 00000000000 0000 0000051