mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-28 11:37:44 +00:00
od: finish multi-byte support
This commit is contained in:
parent
459db47c2b
commit
69bde1170d
3 changed files with 251 additions and 16 deletions
29
src/od/od.rs
29
src/od/od.rs
|
@ -18,6 +18,7 @@ extern crate uucore;
|
|||
|
||||
mod multifilereader;
|
||||
mod partialreader;
|
||||
mod peekreader;
|
||||
mod byteorder_io;
|
||||
mod formatteriteminfo;
|
||||
mod prn_int;
|
||||
|
@ -28,12 +29,12 @@ mod parse_nrofbytes;
|
|||
mod mockstream;
|
||||
|
||||
use std::cmp;
|
||||
use std::io::Read;
|
||||
use std::io::Write;
|
||||
use unindent::*;
|
||||
use byteorder_io::*;
|
||||
use multifilereader::*;
|
||||
use partialreader::*;
|
||||
use peekreader::*;
|
||||
use prn_int::*;
|
||||
use prn_char::*;
|
||||
use prn_float::*;
|
||||
|
@ -51,6 +52,7 @@ macro_rules! hashmap {
|
|||
|
||||
static VERSION: &'static str = env!("CARGO_PKG_VERSION");
|
||||
const MAX_BYTES_PER_UNIT: usize = 8;
|
||||
const PEEK_BUFFER_SIZE: usize = 4; // utf-8 can be 4 bytes
|
||||
|
||||
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
|
||||
enum Radix { Decimal, Hexadecimal, Octal, NoPrefix }
|
||||
|
@ -254,12 +256,13 @@ fn odfunc(line_bytes: usize, input_offset_base: Radix, byte_order: ByteOrder,
|
|||
skip_bytes: usize, read_bytes: Option<usize>) -> i32 {
|
||||
|
||||
let mf = MultifileReader::new(fnames);
|
||||
let mut input = PartialReader::new(mf, skip_bytes, read_bytes);
|
||||
let pr = PartialReader::new(mf, skip_bytes, read_bytes);
|
||||
let mut input = PeekReader::new(pr);
|
||||
let mut addr = skip_bytes;
|
||||
let mut duplicate_line = false;
|
||||
let mut previous_bytes: Vec<u8> = Vec::new();
|
||||
let mut bytes: Vec<u8> = Vec::with_capacity(line_bytes);
|
||||
unsafe { bytes.set_len(line_bytes); } // fast but uninitialized
|
||||
let mut bytes: Vec<u8> = Vec::with_capacity(line_bytes + PEEK_BUFFER_SIZE);
|
||||
unsafe { bytes.set_len(line_bytes + PEEK_BUFFER_SIZE); } // fast but uninitialized
|
||||
|
||||
let byte_size_block = formats.iter().fold(1, |max, next| cmp::max(max, next.byte_size));
|
||||
let print_width_block = formats
|
||||
|
@ -302,12 +305,12 @@ fn odfunc(line_bytes: usize, input_offset_base: Radix, byte_order: ByteOrder,
|
|||
// print each line data (or multi-format raster of several lines describing the same data).
|
||||
// TODO: we need to read more data in case a multi-byte sequence starts at the end of the line
|
||||
|
||||
match input.read(bytes.as_mut_slice()) {
|
||||
Ok(0) => {
|
||||
match input.peek_read(bytes.as_mut_slice(), PEEK_BUFFER_SIZE) {
|
||||
Ok((0, _)) => {
|
||||
print_final_offset(input_offset_base, addr);
|
||||
break;
|
||||
}
|
||||
Ok(n) => {
|
||||
Ok((n, peekbytes)) => {
|
||||
// not enough byte for a whole element, this should only happen on the last line.
|
||||
if n != line_bytes {
|
||||
// set zero bytes in the part of the buffer that will be used, but is not filled.
|
||||
|
@ -321,7 +324,10 @@ fn odfunc(line_bytes: usize, input_offset_base: Radix, byte_order: ByteOrder,
|
|||
}
|
||||
}
|
||||
|
||||
if !output_duplicates && n == line_bytes && previous_bytes == bytes {
|
||||
if !output_duplicates
|
||||
&& n == line_bytes
|
||||
&& !previous_bytes.is_empty()
|
||||
&& previous_bytes[..line_bytes] == bytes[..line_bytes] {
|
||||
if !duplicate_line {
|
||||
duplicate_line = true;
|
||||
println!("*");
|
||||
|
@ -334,7 +340,8 @@ fn odfunc(line_bytes: usize, input_offset_base: Radix, byte_order: ByteOrder,
|
|||
previous_bytes.clone_from(&bytes);
|
||||
}
|
||||
|
||||
print_bytes(byte_order, &bytes, n, &print_with_radix(input_offset_base, addr),
|
||||
print_bytes(byte_order, &bytes, n, peekbytes,
|
||||
&print_with_radix(input_offset_base, addr),
|
||||
&spaced_formatters, byte_size_block);
|
||||
}
|
||||
|
||||
|
@ -355,7 +362,7 @@ fn odfunc(line_bytes: usize, input_offset_base: Radix, byte_order: ByteOrder,
|
|||
}
|
||||
}
|
||||
|
||||
fn print_bytes(byte_order: ByteOrder, bytes: &[u8], length: usize, prefix: &str,
|
||||
fn print_bytes(byte_order: ByteOrder, bytes: &[u8], length: usize, peekbytes: usize, prefix: &str,
|
||||
formats: &[SpacedFormatterItemInfo], byte_size_block: usize) {
|
||||
let mut first = true; // First line of a multi-format raster.
|
||||
for f in formats {
|
||||
|
@ -401,7 +408,7 @@ fn print_bytes(byte_order: ByteOrder, bytes: &[u8], length: usize, prefix: &str,
|
|||
output_text.push_str(&func(p));
|
||||
}
|
||||
FormatWriter::MultibyteWriter(func) => {
|
||||
output_text.push_str(&func(&bytes[b..length]));
|
||||
output_text.push_str(&func(&bytes[b..length+peekbytes]));
|
||||
}
|
||||
}
|
||||
b = nextb;
|
||||
|
|
213
src/od/peekreader.rs
Normal file
213
src/od/peekreader.rs
Normal file
|
@ -0,0 +1,213 @@
|
|||
//! Contains the trait `PeekRead` and type `PeekReader` implementing it.
|
||||
|
||||
use std::io;
|
||||
use std::io::{Read, Write};
|
||||
use multifilereader::HasError;
|
||||
|
||||
/// A trait which supplies a function to peek into a stream without
|
||||
/// actually reading it.
|
||||
///
|
||||
/// Like `std::io::Read`, it allows to read data from a stream, with
|
||||
/// the additional possibility to reserve a part of the returned data
|
||||
/// with the data which will be read in subsequent calls.
|
||||
///
|
||||
pub trait PeekRead {
|
||||
/// Reads data into a buffer.
|
||||
///
|
||||
/// Fills `out` with data. The last `peek_size` bytes of `out` are
|
||||
/// used for data which keeps available on subsequent calls.
|
||||
/// `peek_size` must be smaller or equal to the size of `out`.
|
||||
///
|
||||
/// Returns a tuple where the first number is the number of bytes
|
||||
/// read from the stream, and the second number is the number of
|
||||
/// bytes additionally read. Any of the numbers might be zero.
|
||||
/// It can also return an error.
|
||||
///
|
||||
/// A type implementing this trait, will typically also implement
|
||||
/// `std::io::Read`.
|
||||
///
|
||||
/// # Panics
|
||||
/// Might panic if `peek_size` is larger then the size of `out`
|
||||
fn peek_read(&mut self, out: &mut [u8], peek_size: usize) -> io::Result<(usize,usize)>;
|
||||
}
|
||||
|
||||
/// Wrapper for `std::io::Read` allowing to peek into the data to be read.
|
||||
pub struct PeekReader<R> {
|
||||
inner: R,
|
||||
temp_buffer: Vec<u8>,
|
||||
}
|
||||
|
||||
impl<R> PeekReader<R> {
|
||||
/// Create a new `PeekReader` wrapping `inner`
|
||||
pub fn new(inner: R) -> Self {
|
||||
PeekReader {
|
||||
inner: inner,
|
||||
temp_buffer: Vec::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<R: Read> PeekReader<R> {
|
||||
fn read_from_tempbuffer(&mut self, mut out: &mut [u8]) -> usize {
|
||||
match out.write(self.temp_buffer.as_mut_slice()) {
|
||||
Ok(n) => {
|
||||
self.temp_buffer.drain(..n);
|
||||
n
|
||||
},
|
||||
Err(_) => 0,
|
||||
}
|
||||
}
|
||||
|
||||
fn write_to_tempbuffer(&mut self, bytes: &[u8]) {
|
||||
// if temp_buffer is not empty, data has to be inserted in front
|
||||
let org_buffer: Vec<_> = self.temp_buffer.drain(..).collect();
|
||||
self.temp_buffer.write(bytes).unwrap();
|
||||
self.temp_buffer.extend(org_buffer);
|
||||
}
|
||||
}
|
||||
|
||||
impl<R: Read> Read for PeekReader<R> {
|
||||
fn read(&mut self, out: &mut [u8]) -> io::Result<usize> {
|
||||
let start_pos = self.read_from_tempbuffer(out);
|
||||
match self.inner.read(&mut out[start_pos..]) {
|
||||
Err(e) => Err(e),
|
||||
Ok(n) => Ok(n + start_pos),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<R: Read> PeekRead for PeekReader<R> {
|
||||
/// Reads data into a buffer.
|
||||
///
|
||||
/// See `PeekRead::peek_read`.
|
||||
///
|
||||
/// # Panics
|
||||
/// If `peek_size` is larger then the size of `out`
|
||||
fn peek_read(&mut self, out: &mut [u8], peek_size: usize) -> io::Result<(usize,usize)> {
|
||||
assert!(out.len() >= peek_size);
|
||||
match self.read(out) {
|
||||
Err(e) => Err(e),
|
||||
Ok(bytes_in_buffer) => {
|
||||
let unused = out.len() - bytes_in_buffer;
|
||||
if peek_size <= unused {
|
||||
Ok((bytes_in_buffer, 0))
|
||||
}
|
||||
else {
|
||||
let actual_peek_size = peek_size - unused;
|
||||
let real_size = bytes_in_buffer - actual_peek_size;
|
||||
self.write_to_tempbuffer(&out[real_size..bytes_in_buffer]);
|
||||
Ok((real_size, actual_peek_size))
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<R: HasError> HasError for PeekReader<R> {
|
||||
fn has_error(&self) -> bool {
|
||||
self.inner.has_error()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::io::{Cursor, Read};
|
||||
|
||||
#[test]
|
||||
fn test_read_normal() {
|
||||
let mut sut = PeekReader::new(Cursor::new(&b"abcdefgh"[..]));
|
||||
|
||||
let mut v = [0; 10];
|
||||
assert_eq!(sut.read(v.as_mut()).unwrap(), 8);
|
||||
assert_eq!(v, [0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0, 0]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_peek_read_without_buffer() {
|
||||
let mut sut = PeekReader::new(Cursor::new(&b"abcdefgh"[..]));
|
||||
|
||||
let mut v = [0; 10];
|
||||
assert_eq!(sut.peek_read(v.as_mut(), 0).unwrap(), (8,0));
|
||||
assert_eq!(v, [0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0, 0]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_peek_read_and_read() {
|
||||
let mut sut = PeekReader::new(Cursor::new(&b"abcdefghij"[..]));
|
||||
|
||||
let mut v = [0; 8];
|
||||
assert_eq!(sut.peek_read(v.as_mut(), 4).unwrap(), (4, 4));
|
||||
assert_eq!(v, [0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68]);
|
||||
|
||||
let mut v2 = [0; 8];
|
||||
assert_eq!(sut.read(v2.as_mut()).unwrap(), 6);
|
||||
assert_eq!(v2, [0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0, 0]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_peek_read_multiple_times() {
|
||||
let mut sut = PeekReader::new(Cursor::new(&b"abcdefghij"[..]));
|
||||
|
||||
let mut s1 = [0; 8];
|
||||
assert_eq!(sut.peek_read(s1.as_mut(), 4).unwrap(), (4, 4));
|
||||
assert_eq!(s1, [0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68]);
|
||||
|
||||
let mut s2 = [0; 8];
|
||||
assert_eq!(sut.peek_read(s2.as_mut(), 4).unwrap(), (4, 2));
|
||||
assert_eq!(s2, [0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0, 0]);
|
||||
|
||||
let mut s3 = [0; 8];
|
||||
assert_eq!(sut.peek_read(s3.as_mut(), 4).unwrap(), (2, 0));
|
||||
assert_eq!(s3, [0x69, 0x6a, 0, 0, 0, 0, 0, 0]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_peek_read_and_read_with_small_buffer() {
|
||||
let mut sut = PeekReader::new(Cursor::new(&b"abcdefghij"[..]));
|
||||
|
||||
let mut v = [0; 8];
|
||||
assert_eq!(sut.peek_read(v.as_mut(), 4).unwrap(), (4, 4));
|
||||
assert_eq!(v, [0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68]);
|
||||
|
||||
let mut v2 = [0; 2];
|
||||
assert_eq!(sut.read(v2.as_mut()).unwrap(), 2);
|
||||
assert_eq!(v2, [0x65, 0x66]);
|
||||
assert_eq!(sut.read(v2.as_mut()).unwrap(), 2);
|
||||
assert_eq!(v2, [0x67, 0x68]);
|
||||
assert_eq!(sut.read(v2.as_mut()).unwrap(), 2);
|
||||
assert_eq!(v2, [0x69, 0x6a]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_peek_read_with_smaller_buffer() {
|
||||
let mut sut = PeekReader::new(Cursor::new(&b"abcdefghij"[..]));
|
||||
|
||||
let mut v = [0; 8];
|
||||
assert_eq!(sut.peek_read(v.as_mut(), 4).unwrap(), (4, 4));
|
||||
assert_eq!(v, [0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68]);
|
||||
|
||||
let mut v2 = [0; 2];
|
||||
assert_eq!(sut.peek_read(v2.as_mut(), 2).unwrap(), (0, 2));
|
||||
assert_eq!(v2, [0x65, 0x66]);
|
||||
assert_eq!(sut.peek_read(v2.as_mut(), 0).unwrap(), (2, 0));
|
||||
assert_eq!(v2, [0x65, 0x66]);
|
||||
assert_eq!(sut.peek_read(v2.as_mut(), 0).unwrap(), (2, 0));
|
||||
assert_eq!(v2, [0x67, 0x68]);
|
||||
assert_eq!(sut.peek_read(v2.as_mut(), 0).unwrap(), (2, 0));
|
||||
assert_eq!(v2, [0x69, 0x6a]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_peek_read_peek_with_larger_peek_buffer() {
|
||||
let mut sut = PeekReader::new(Cursor::new(&b"abcdefghij"[..]));
|
||||
|
||||
let mut v = [0; 8];
|
||||
assert_eq!(sut.peek_read(v.as_mut(), 4).unwrap(), (4, 4));
|
||||
assert_eq!(v, [0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68]);
|
||||
|
||||
let mut v2 = [0; 8];
|
||||
assert_eq!(sut.peek_read(v2.as_mut(), 8).unwrap(), (0, 6));
|
||||
assert_eq!(v2, [0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0, 0]);
|
||||
}
|
||||
}
|
|
@ -246,15 +246,14 @@ fn test_f64(){
|
|||
#[test]
|
||||
fn test_multibyte() {
|
||||
|
||||
// TODO: replace **** with \u{1B000}
|
||||
let result = new_ucmd!().arg("-c").arg("-w12").run_piped_stdin("Universität Tübingen ****".as_bytes());
|
||||
let result = new_ucmd!().arg("-c").arg("-w12").run_piped_stdin("Universität Tübingen \u{1B000}".as_bytes());
|
||||
|
||||
assert_empty_stderr!(result);
|
||||
assert!(result.success);
|
||||
assert_eq!(result.stdout, unindent("
|
||||
0000000 U n i v e r s i t ä ** t
|
||||
0000014 T ü ** b i n g e n *
|
||||
0000030 * * *
|
||||
0000014 T ü ** b i n g e n \u{1B000}
|
||||
0000030 ** ** **
|
||||
0000033
|
||||
"));
|
||||
}
|
||||
|
@ -313,11 +312,27 @@ fn test_width_without_value(){
|
|||
#[test]
|
||||
fn test_suppress_duplicates(){
|
||||
|
||||
let input = [0u8 ; 41];
|
||||
let input: [u8; 41] = [
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
1, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
0];
|
||||
let expected_output = unindent("
|
||||
0000000 00000000000
|
||||
0000 0000
|
||||
*
|
||||
0000020 00000000001
|
||||
0001 0000
|
||||
0000024 00000000000
|
||||
0000 0000
|
||||
*
|
||||
0000050 00000000000
|
||||
0000
|
||||
0000051
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue