diff --git a/.busybox-config b/.busybox-config index d1fb62794..e6921536f 100644 --- a/.busybox-config +++ b/.busybox-config @@ -1,2 +1,4 @@ CONFIG_FEATURE_FANCY_HEAD=y CONFIG_UNICODE_SUPPORT=y +CONFIG_DESKTOP=y +CONFIG_LONG_OPTS=y diff --git a/Cargo.lock b/Cargo.lock index 9de14011a..c06c977f3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -90,6 +90,7 @@ dependencies = [ "tty 0.0.1", "uname 0.0.1", "unexpand 0.0.1", + "unindent 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "uniq 0.0.1", "unlink 0.0.1", "uptime 0.0.1", @@ -175,6 +176,11 @@ name = "bitflags" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "byteorder" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "cat" version = "0.0.1" @@ -386,6 +392,11 @@ dependencies = [ "uucore 0.0.1", ] +[[package]] +name = "half" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "hashsum" version = "0.0.1" @@ -657,8 +668,11 @@ dependencies = [ name = "od" version = "0.0.1" dependencies = [ + "byteorder 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)", "getopts 0.2.14 (registry+https://github.com/rust-lang/crates.io-index)", + "half 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "libc 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)", + "uucore 0.0.1", ] [[package]] @@ -1129,6 +1143,11 @@ name = "unicode-width" version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "unindent" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "uniq" version = "0.0.1" diff --git a/Cargo.toml b/Cargo.toml index 95ea8df4c..0ed6157a1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -203,6 +203,7 @@ libc = "*" regex="*" rand="*" tempdir="*" +unindent="*" [[bin]] name = "uutils" diff --git a/README.md b/README.md index 7257f21dd..cdb4b1173 100644 --- a/README.md +++ b/README.md @@ -201,7 +201,7 @@ To do * [x] nohup * [x] nproc * [ ] numfmt -* [ ] od (in progress, needs lots of work) +* [ ] od (almost complete, `--strings` and 128-bit datatypes are missing) * [x] paste * [x] pathchk * [x] pinky diff --git a/src/od/Cargo.toml b/src/od/Cargo.toml index d6320dd65..a500caecc 100644 --- a/src/od/Cargo.toml +++ b/src/od/Cargo.toml @@ -10,6 +10,9 @@ path = "od.rs" [dependencies] getopts = "*" libc = "*" +byteorder = "*" +half = "*" +uucore = { path="../uucore" } [[bin]] name = "od" diff --git a/src/od/byteorder_io.rs b/src/od/byteorder_io.rs new file mode 100644 index 000000000..e72b4373b --- /dev/null +++ b/src/od/byteorder_io.rs @@ -0,0 +1,50 @@ +// workaround until https://github.com/BurntSushi/byteorder/issues/41 has been fixed +// based on: https://github.com/netvl/immeta/blob/4460ee/src/utils.rs#L76 + +use byteorder::{NativeEndian, LittleEndian, BigEndian}; +use byteorder::ByteOrder as ByteOrderTrait; + +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub enum ByteOrder { + Little, + Big, + Native, +} + +macro_rules! gen_byte_order_ops { + ($($read_name:ident, $write_name:ident -> $tpe:ty),+) => { + impl ByteOrder { + $( + #[allow(dead_code)] + #[inline] + pub fn $read_name(self, source: &[u8]) -> $tpe { + match self { + ByteOrder::Little => LittleEndian::$read_name(source), + ByteOrder::Big => BigEndian::$read_name(source), + ByteOrder::Native => NativeEndian::$read_name(source), + } + } + + #[allow(dead_code)] + pub fn $write_name(self, target: &mut [u8], n: $tpe) { + match self { + ByteOrder::Little => LittleEndian::$write_name(target, n), + ByteOrder::Big => BigEndian::$write_name(target, n), + ByteOrder::Native => NativeEndian::$write_name(target, n), + } + } + )+ + } + } +} + +gen_byte_order_ops! { + read_u16, write_u16 -> u16, + read_u32, write_u32 -> u32, + read_u64, write_u64 -> u64, + read_i16, write_i16 -> i16, + read_i32, write_i32 -> i32, + read_i64, write_i64 -> i64, + read_f32, write_f32 -> f32, + read_f64, write_f64 -> f64 +} diff --git a/src/od/formatteriteminfo.rs b/src/od/formatteriteminfo.rs new file mode 100644 index 000000000..9a5c3e236 --- /dev/null +++ b/src/od/formatteriteminfo.rs @@ -0,0 +1,56 @@ +use std::fmt; + +#[derive(Copy)] +pub enum FormatWriter { + IntWriter(fn(u64) -> String), + FloatWriter(fn(f64) -> String), + MultibyteWriter(fn(&[u8]) -> String), +} + +impl Clone for FormatWriter { + #[inline] + fn clone(&self) -> Self { + *self + } +} + +impl PartialEq for FormatWriter { + fn eq(&self, other: &FormatWriter) -> bool { + use formatteriteminfo::FormatWriter::*; + + match (self, other) { + (&IntWriter(ref a), &IntWriter(ref b)) => a == b, + (&FloatWriter(ref a), &FloatWriter(ref b)) => a == b, + (&MultibyteWriter(ref a), &MultibyteWriter(ref b)) => *a as usize == *b as usize, + _ => false, + } + } +} + +impl Eq for FormatWriter {} + +impl fmt::Debug for FormatWriter { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + &FormatWriter::IntWriter(ref p) => { + try!(f.write_str("IntWriter:")); + fmt::Pointer::fmt(p, f) + }, + &FormatWriter::FloatWriter(ref p) => { + try!(f.write_str("FloatWriter:")); + fmt::Pointer::fmt(p, f) + }, + &FormatWriter::MultibyteWriter(ref p) => { + try!(f.write_str("MultibyteWriter:")); + fmt::Pointer::fmt(&(*p as *const ()), f) + }, + } + } + } + +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub struct FormatterItemInfo { + pub byte_size: usize, + pub print_width: usize, // including a space in front of the text + pub formatter: FormatWriter, +} diff --git a/src/od/inputdecoder.rs b/src/od/inputdecoder.rs new file mode 100644 index 000000000..e58cf79f3 --- /dev/null +++ b/src/od/inputdecoder.rs @@ -0,0 +1,183 @@ +use std::io; +use byteorder_io::ByteOrder; +use multifilereader::HasError; +use peekreader::PeekRead; +use half::f16; + +/// Processes an input and provides access to the data read in various formats +/// +/// Currently only useful if the input implements `PeekRead`. +pub struct InputDecoder<'a, I> where I: 'a { + /// The input from which data is read + input: &'a mut I, + + /// A memory buffer, it's size is set in `new`. + data: Vec, + /// The numer of bytes in the buffer reserved for the peek data from `PeekRead`. + reserved_peek_length: usize, + + /// The number of (valid) bytes in the buffer. + used_normal_length: usize, + /// The number of peek bytes in the buffer. + used_peek_length: usize, + + /// Byte order used to read data from the buffer. + byte_order: ByteOrder, +} + +impl<'a, I> InputDecoder<'a, I> { + /// Creates a new `InputDecoder` with an allocated buffer of `normal_length` + `peek_length` bytes. + /// `byte_order` determines how to read multibyte formats from the buffer. + pub fn new(input: &mut I, normal_length: usize, peek_length: usize, byte_order: ByteOrder) -> InputDecoder { + let mut bytes: Vec = Vec::with_capacity(normal_length + peek_length); + unsafe { bytes.set_len(normal_length + peek_length); } // fast but uninitialized + + InputDecoder { + input: input, + data: bytes, + reserved_peek_length: peek_length, + used_normal_length: 0, + used_peek_length: 0, + byte_order: byte_order, + } + } +} + + +impl<'a, I> InputDecoder<'a, I> where I: PeekRead { + /// calls `peek_read` on the internal stream to (re)fill the buffer. Returns a + /// MemoryDecoder providing access to the result or returns an i/o error. + pub fn peek_read(&mut self) -> io::Result { + match self.input.peek_read(self.data.as_mut_slice(), self.reserved_peek_length) { + Ok((n, p)) => { + self.used_normal_length = n; + self.used_peek_length = p; + Ok(MemoryDecoder { + data: &mut self.data, + used_normal_length: self.used_normal_length, + used_peek_length: self.used_peek_length, + byte_order: self.byte_order, + }) + }, + Err(e) => Err(e), + } + + } +} + +impl<'a, I> HasError for InputDecoder<'a, I> where I: HasError { + /// calls has_error on the internal stream. + fn has_error(&self) -> bool { + self.input.has_error() + } +} + +/// Provides access to the internal data in various formats +pub struct MemoryDecoder<'a> { + /// A reference to the parents' data + data: &'a mut Vec, + /// The number of (valid) bytes in the buffer. + used_normal_length: usize, + /// The number of peek bytes in the buffer. + used_peek_length: usize, + /// Byte order used to read data from the buffer. + byte_order: ByteOrder, +} + +impl<'a> MemoryDecoder<'a> { + /// Set a part of the internal buffer to zero. + /// access to the whole buffer is possible, not just to the valid data. + pub fn zero_out_buffer(&mut self, start:usize, end:usize) { + for i in start..end { + self.data[i] = 0; + } + } + + /// Returns the current length of the buffer. (ie. how much valid data it contains.) + pub fn length(&self) -> usize { + self.used_normal_length + } + + /// Creates a clone of the internal buffer. The clone only contain the valid data. + pub fn clone_buffer(&self, other: &mut Vec) { + other.clone_from(&self.data); + other.resize(self.used_normal_length, 0); + } + + /// Returns a slice to the internal buffer starting at `start`. + pub fn get_buffer(&self, start: usize) -> &[u8] { + &self.data[start..self.used_normal_length] + } + + /// Returns a slice to the internal buffer including the peek data starting at `start`. + pub fn get_full_buffer(&self, start: usize) -> &[u8] { + &self.data[start..self.used_normal_length + self.used_peek_length] + } + + /// Returns a u8/u16/u32/u64 from the internal buffer at position `start`. + pub fn read_uint(&self, start: usize, byte_size: usize) -> u64 { + match byte_size { + 1 => self.data[start] as u64, + 2 => self.byte_order.read_u16(&self.data[start..start + 2]) as u64, + 4 => self.byte_order.read_u32(&self.data[start..start + 4]) as u64, + 8 => self.byte_order.read_u64(&self.data[start..start + 8]), + _ => panic!("Invalid byte_size: {}", byte_size), + } + } + + /// Returns a f32/f64 from the internal buffer at position `start`. + pub fn read_float(&self, start: usize, byte_size: usize) -> f64 { + match byte_size { + 2 => f64::from(f16::from_bits(self.byte_order.read_u16(&self.data[start..start + 2]))), + 4 => self.byte_order.read_f32(&self.data[start..start + 4]) as f64, + 8 => self.byte_order.read_f64(&self.data[start..start + 8]), + _ => panic!("Invalid byte_size: {}", byte_size), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::io::Cursor; + use peekreader::PeekReader; + use byteorder_io::ByteOrder; + + #[test] + fn smoke_test() { + let data = [0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xC0, 0xff, 0xff]; + let mut input = PeekReader::new(Cursor::new(&data)); + let mut sut = InputDecoder::new(&mut input, 8, 2, ByteOrder::Little); + + match sut.peek_read() { + Ok(mut mem) => { + assert_eq!(8, mem.length()); + + assert_eq!(-2.0, mem.read_float(0, 8)); + assert_eq!(-2.0, mem.read_float(4, 4)); + assert_eq!(0xc000000000000000, mem.read_uint(0, 8)); + assert_eq!(0xc0000000, mem.read_uint(4, 4)); + assert_eq!(0xc000, mem.read_uint(6, 2)); + assert_eq!(0xc0, mem.read_uint(7, 1)); + assert_eq!(&[0, 0xc0], mem.get_buffer(6)); + assert_eq!(&[0, 0xc0, 0xff, 0xff], mem.get_full_buffer(6)); + + let mut copy: Vec = Vec::new(); + mem.clone_buffer(&mut copy); + assert_eq!(vec![0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xC0], copy); + + mem.zero_out_buffer(7, 8); + assert_eq!(&[0, 0, 0xff, 0xff], mem.get_full_buffer(6)); + } + Err(e) => { assert!(false, e); } + } + + match sut.peek_read() { + Ok(mem) => { + assert_eq!(2, mem.length()); + assert_eq!(0xffff, mem.read_uint(0, 2)); + } + Err(e) => { assert!(false, e); } + } + } +} diff --git a/src/od/inputoffset.rs b/src/od/inputoffset.rs new file mode 100644 index 000000000..9b82a214d --- /dev/null +++ b/src/od/inputoffset.rs @@ -0,0 +1,110 @@ + +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub enum Radix { Decimal, Hexadecimal, Octal, NoPrefix } + +/// provides the byte offset printed at the left margin +pub struct InputOffset { + /// The radix to print the byte offset. NoPrefix will not print a byte offset. + radix: Radix, + /// The current position. Initialize at `new`, increase using `increase_position`. + byte_pos: usize, + /// An optional label printed in parentheses, typically different from `byte_pos`, + /// but will increase with the same value if `byte_pos` in increased. + label: Option, +} + +impl InputOffset { + /// creates a new `InputOffset` using the provided values. + pub fn new(radix: Radix, byte_pos: usize, label: Option) -> InputOffset { + InputOffset { + radix: radix, + byte_pos: byte_pos, + label: label, + } + } + + /// Increase `byte_pos` and `label` if a label is used. + pub fn increase_position(&mut self, n: usize) { + self.byte_pos += n; + if let Some(l) = self.label { + self.label = Some(l + n); + } + } + + #[cfg(test)] + fn set_radix(&mut self, radix: Radix) { + self.radix = radix; + } + + /// returns a string with the current byte offset + pub fn format_byte_offset(&self) -> String { + match (self.radix, self.label) { + (Radix::Decimal, None) => format!("{:07}", self.byte_pos), + (Radix::Decimal, Some(l)) => format!("{:07} ({:07})", self.byte_pos, l), + (Radix::Hexadecimal, None) => format!("{:06X}", self.byte_pos), + (Radix::Hexadecimal, Some(l)) => format!("{:06X} ({:06X})", self.byte_pos, l), + (Radix::Octal, None) => format!("{:07o}", self.byte_pos), + (Radix::Octal, Some(l)) => format!("{:07o} ({:07o})", self.byte_pos, l), + (Radix::NoPrefix, None) => String::from(""), + (Radix::NoPrefix, Some(l)) => format!("({:07o})", l), + } + } + + /// Prints the byte offset followed by a newline, or nothing at all if + /// both `Radix::NoPrefix` was set and no label (--traditional) is used. + pub fn print_final_offset(&self) { + if self.radix != Radix::NoPrefix || self.label.is_some() { + print!("{}\n", self.format_byte_offset()); + } + } +} + +#[test] +fn test_input_offset() { + let mut sut = InputOffset::new(Radix::Hexadecimal, 10, None); + assert_eq!("00000A", &sut.format_byte_offset()); + sut.increase_position(10); + assert_eq!("000014", &sut.format_byte_offset()); + + // note normally the radix will not change after initialisation + sut.set_radix(Radix::Decimal); + assert_eq!("0000020", &sut.format_byte_offset()); + + sut.set_radix(Radix::Hexadecimal); + assert_eq!("000014", &sut.format_byte_offset()); + + sut.set_radix(Radix::Octal); + assert_eq!("0000024", &sut.format_byte_offset()); + + sut.set_radix(Radix::NoPrefix); + assert_eq!("", &sut.format_byte_offset()); + + sut.increase_position(10); + sut.set_radix(Radix::Octal); + assert_eq!("0000036", &sut.format_byte_offset()); +} + +#[test] +fn test_input_offset_with_label() { + let mut sut = InputOffset::new(Radix::Hexadecimal, 10, Some(20)); + assert_eq!("00000A (000014)", &sut.format_byte_offset()); + sut.increase_position(10); + assert_eq!("000014 (00001E)", &sut.format_byte_offset()); + + // note normally the radix will not change after initialisation + sut.set_radix(Radix::Decimal); + assert_eq!("0000020 (0000030)", &sut.format_byte_offset()); + + sut.set_radix(Radix::Hexadecimal); + assert_eq!("000014 (00001E)", &sut.format_byte_offset()); + + sut.set_radix(Radix::Octal); + assert_eq!("0000024 (0000036)", &sut.format_byte_offset()); + + sut.set_radix(Radix::NoPrefix); + assert_eq!("(0000036)", &sut.format_byte_offset()); + + sut.increase_position(10); + sut.set_radix(Radix::Octal); + assert_eq!("0000036 (0000050)", &sut.format_byte_offset()); +} diff --git a/src/od/mockstream.rs b/src/od/mockstream.rs new file mode 100644 index 000000000..5adfce2ae --- /dev/null +++ b/src/od/mockstream.rs @@ -0,0 +1,101 @@ +// https://github.com/lazy-bitfield/rust-mockstream/pull/2 + +use std::io::{Cursor, Read, Result, Error, ErrorKind}; +use std::error::Error as errorError; + +/// `FailingMockStream` mocks a stream which will fail upon read or write +/// +/// # Examples +/// +/// ``` +/// use std::io::{Cursor, Read}; +/// +/// struct CountIo {} +/// +/// impl CountIo { +/// fn read_data(&self, r: &mut Read) -> usize { +/// let mut count: usize = 0; +/// let mut retries = 3; +/// +/// loop { +/// let mut buffer = [0; 5]; +/// match r.read(&mut buffer) { +/// Err(_) => { +/// if retries == 0 { break; } +/// retries -= 1; +/// }, +/// Ok(0) => break, +/// Ok(n) => count += n, +/// } +/// } +/// count +/// } +/// } +/// +/// #[test] +/// fn test_io_retries() { +/// let mut c = Cursor::new(&b"1234"[..]) +/// .chain(FailingMockStream::new(ErrorKind::Other, "Failing", 3)) +/// .chain(Cursor::new(&b"5678"[..])); +/// +/// let sut = CountIo {}; +/// // this will fail unless read_data performs at least 3 retries on I/O errors +/// assert_eq!(8, sut.read_data(&mut c)); +/// } +/// ``` +#[derive(Clone)] +pub struct FailingMockStream { + kind: ErrorKind, + message: &'static str, + repeat_count: i32, +} + +impl FailingMockStream { + /// Creates a FailingMockStream + /// + /// When `read` or `write` is called, it will return an error `repeat_count` times. + /// `kind` and `message` can be specified to define the exact error. + pub fn new(kind: ErrorKind, message: &'static str, repeat_count: i32) -> FailingMockStream { + FailingMockStream { kind: kind, message: message, repeat_count: repeat_count, } + } + + fn error(&mut self) -> Result { + if self.repeat_count == 0 { + return Ok(0) + } else { + if self.repeat_count > 0 { + self.repeat_count -= 1; + } + Err(Error::new(self.kind, self.message)) + } + } +} + +impl Read for FailingMockStream { + fn read(&mut self, _: &mut [u8]) -> Result { + self.error() + } +} + +#[test] +fn test_failing_mock_stream_read() { + let mut s = FailingMockStream::new(ErrorKind::BrokenPipe, "The dog ate the ethernet cable", 1); + let mut v = [0; 4]; + let error = s.read(v.as_mut()).unwrap_err(); + assert_eq!(error.kind(), ErrorKind::BrokenPipe); + assert_eq!(error.description(), "The dog ate the ethernet cable"); + // after a single error, it will return Ok(0) + assert_eq!(s.read(v.as_mut()).unwrap(), 0); +} + +#[test] +fn test_failing_mock_stream_chain_interrupted() { + let mut c = Cursor::new(&b"abcd"[..]) + .chain(FailingMockStream::new(ErrorKind::Interrupted, "Interrupted", 5)) + .chain(Cursor::new(&b"ABCD"[..])); + + let mut v = [0; 8]; + c.read_exact(v.as_mut()).unwrap(); + assert_eq!(v, [0x61, 0x62, 0x63, 0x64, 0x41, 0x42, 0x43, 0x44]); + assert_eq!(c.read(v.as_mut()).unwrap(), 0); +} diff --git a/src/od/multifilereader.rs b/src/od/multifilereader.rs new file mode 100644 index 000000000..127b8b123 --- /dev/null +++ b/src/od/multifilereader.rs @@ -0,0 +1,194 @@ +use std; +use std::io; +use std::io::BufReader; +use std::fs::File; +use std::io::Write; +use std::vec::Vec; + +pub enum InputSource<'a> { + FileName(&'a str), + Stdin, + #[allow(dead_code)] + Stream(Box), +} + +// MultifileReader - concatenate all our input, file or stdin. +pub struct MultifileReader<'a> { + ni: Vec>, + curr_file: Option>, + any_err: bool, +} + +pub trait HasError { + fn has_error(&self) -> bool; +} + +impl<'b> MultifileReader<'b> { + pub fn new<'a>(fnames: Vec>) -> MultifileReader<'a> { + let mut mf = MultifileReader { + ni: fnames, + curr_file: None, // normally this means done; call next_file() + any_err: false, + }; + mf.next_file(); + mf + } + + fn next_file(&mut self) { + // loop retries with subsequent files if err - normally 'loops' once + loop { + if self.ni.len() == 0 { + self.curr_file = None; + break; + } + match self.ni.remove(0) { + InputSource::Stdin => { + self.curr_file = Some(Box::new(BufReader::new(std::io::stdin()))); + break; + } + InputSource::FileName(fname) => { + match File::open(fname) { + Ok(f) => { + self.curr_file = Some(Box::new(BufReader::new(f))); + break; + } + Err(e) => { + // If any file can't be opened, + // print an error at the time that the file is needed, + // then move on the the next file. + // This matches the behavior of the original `od` + eprintln!("{}: '{}': {}", + executable!().split("::").next().unwrap(), // remove module + fname, e); + self.any_err = true + } + } + } + InputSource::Stream(s) => { + self.curr_file = Some(s); + break; + } + } + } + } +} + +impl<'b> io::Read for MultifileReader<'b> { + // Fill buf with bytes read from the list of files + // Returns Ok() + // Handles io errors itself, thus always returns OK + // Fills the provided buffer completely, unless it has run out of input. + // If any call returns short (< buf.len()), all subsequent calls will return Ok<0> + fn read(&mut self, buf: &mut [u8]) -> io::Result { + let mut xfrd = 0; + // while buffer we are filling is not full.. May go thru several files. + 'fillloop: while xfrd < buf.len() { + match self.curr_file { + None => break, + Some(ref mut curr_file) => { + loop { + // stdin may return on 'return' (enter), even though the buffer isn't full. + xfrd += match curr_file.read(&mut buf[xfrd..]) { + Ok(0) => break, + Ok(n) => n, + Err(e) => { + eprintln!("{}: I/O: {}", + executable!().split("::").next().unwrap(), // remove module + e); + self.any_err = true; + break; + }, + }; + if xfrd == buf.len() { + // transferred all that was asked for. + break 'fillloop; + } + } + } + } + self.next_file(); + } + Ok(xfrd) + } +} + +impl<'b> HasError for MultifileReader<'b> { + fn has_error(&self) -> bool { + self.any_err + } +} + + +#[cfg(test)] +mod tests { + use super::*; + use std::io::{Cursor, Read, ErrorKind}; + use mockstream::*; + + #[test] + fn test_multi_file_reader_one_read() { + let mut inputs = Vec::new(); + inputs.push(InputSource::Stream(Box::new(Cursor::new(&b"abcd"[..])))); + inputs.push(InputSource::Stream(Box::new(Cursor::new(&b"ABCD"[..])))); + let mut v = [0; 10]; + + let mut sut = MultifileReader::new(inputs); + + assert_eq!(sut.read(v.as_mut()).unwrap(), 8); + assert_eq!(v, [0x61, 0x62, 0x63, 0x64, 0x41, 0x42, 0x43, 0x44, 0, 0]); + assert_eq!(sut.read(v.as_mut()).unwrap(), 0); + } + + #[test] + fn test_multi_file_reader_two_reads() { + let mut inputs = Vec::new(); + inputs.push(InputSource::Stream(Box::new(Cursor::new(&b"abcd"[..])))); + inputs.push(InputSource::Stream(Box::new(Cursor::new(&b"ABCD"[..])))); + let mut v = [0; 5]; + + let mut sut = MultifileReader::new(inputs); + + assert_eq!(sut.read(v.as_mut()).unwrap(), 5); + assert_eq!(v, [0x61, 0x62, 0x63, 0x64, 0x41]); + assert_eq!(sut.read(v.as_mut()).unwrap(), 3); + assert_eq!(v, [0x42, 0x43, 0x44, 0x64, 0x41]); // last two bytes are not overwritten + } + + #[test] + fn test_multi_file_reader_read_error() { + let c = Cursor::new(&b"1234"[..]) + .chain(FailingMockStream::new(ErrorKind::Other, "Failing", 1)) + .chain(Cursor::new(&b"5678"[..])); + let mut inputs = Vec::new(); + inputs.push(InputSource::Stream(Box::new(c))); + inputs.push(InputSource::Stream(Box::new(Cursor::new(&b"ABCD"[..])))); + let mut v = [0; 5]; + + let mut sut = MultifileReader::new(inputs); + + assert_eq!(sut.read(v.as_mut()).unwrap(), 5); + assert_eq!(v, [49, 50, 51, 52, 65]); + assert_eq!(sut.read(v.as_mut()).unwrap(), 3); + assert_eq!(v, [66, 67, 68, 52, 65]); // last two bytes are not overwritten + + // note: no retry on i/o error, so 5678 is missing + } + + #[test] + fn test_multi_file_reader_read_error_at_start() { + let mut inputs = Vec::new(); + inputs.push(InputSource::Stream(Box::new(FailingMockStream::new(ErrorKind::Other, "Failing", 1)))); + inputs.push(InputSource::Stream(Box::new(Cursor::new(&b"abcd"[..])))); + inputs.push(InputSource::Stream(Box::new(FailingMockStream::new(ErrorKind::Other, "Failing", 1)))); + inputs.push(InputSource::Stream(Box::new(Cursor::new(&b"ABCD"[..])))); + inputs.push(InputSource::Stream(Box::new(FailingMockStream::new(ErrorKind::Other, "Failing", 1)))); + let mut v = [0; 5]; + + let mut sut = MultifileReader::new(inputs); + + assert_eq!(sut.read(v.as_mut()).unwrap(), 5); + assert_eq!(v, [0x61, 0x62, 0x63, 0x64, 0x41]); + assert_eq!(sut.read(v.as_mut()).unwrap(), 3); + assert_eq!(v, [0x42, 0x43, 0x44, 0x64, 0x41]); // last two bytes are not overwritten + } +} diff --git a/src/od/od.rs b/src/od/od.rs index 62693a1a3..4228fe032 100644 --- a/src/od/od.rs +++ b/src/od/od.rs @@ -10,34 +10,92 @@ */ extern crate getopts; +extern crate byteorder; +extern crate half; -use std::fs::File; -use std::io::Read; -use std::mem; -use std::io::BufReader; +#[macro_use] +extern crate uucore; + +mod multifilereader; +mod partialreader; +mod peekreader; +mod byteorder_io; +mod formatteriteminfo; +mod prn_int; +mod prn_char; +mod prn_float; +mod parse_nrofbytes; +mod parse_formats; +mod parse_inputs; +mod inputoffset; +mod inputdecoder; +mod output_info; +#[cfg(test)] +mod mockstream; + +use std::cmp; use std::io::Write; -use std::io; +use byteorder_io::*; +use multifilereader::*; +use partialreader::*; +use peekreader::*; +use formatteriteminfo::*; +use parse_nrofbytes::parse_number_of_bytes; +use parse_formats::{parse_format_flags, ParsedFormatterItemInfo}; +use prn_char::format_ascii_dump; +use parse_inputs::{parse_inputs, CommandLineInputs}; +use inputoffset::{InputOffset, Radix}; +use inputdecoder::{InputDecoder,MemoryDecoder}; +use output_info::OutputInfo; -//This is available in some versions of std, but not all that we target. -macro_rules! hashmap { - ($( $key: expr => $val: expr ),*) => {{ - let mut map = ::std::collections::HashMap::new(); - $( map.insert($key, $val); )* - map - }} -} +static VERSION: &'static str = env!("CARGO_PKG_VERSION"); +const PEEK_BUFFER_SIZE: usize = 4; // utf-8 can be 4 bytes +static USAGE: &'static str = +r#"Usage: + od [OPTION]... [--] [FILENAME]... + od [-abcdDefFhHiIlLoOsxX] [FILENAME] [[+][0x]OFFSET[.][b]] + od --traditional [OPTION]... [FILENAME] [[+][0x]OFFSET[.][b] [[+][0x]LABEL[.][b]]] -#[derive(Debug)] -enum Radix { Decimal, Hexadecimal, Octal, Binary } +Displays data in various human-readable formats. If multiple formats are +specified, the output will contain all formats in the order they appear on the +commandline. Each format will be printed on a new line. Only the line +containing the first format will be prefixed with the offset. -#[derive(Debug)] -enum InputSource<'a> { - FileName(&'a str ), - Stdin -} +If no filename is specified, or it is "-", stdin will be used. After a "--", no +more options will be recognised. This allows for filenames starting with a "-". -pub fn uumain(args: Vec) -> i32 { +If a filename is a valid number which can be used as an offset in the second +form, you can force it to be recognised as a filename if you include an option +like "-j0", which is only valid in the first form. + +RADIX is one of o,d,x,n for octal, decimal, hexadecimal or none. + +BYTES is decimal by default, octal if prefixed with a "0", or hexadecimal if +prefixed with "0x". The suffixes b, KB, K, MB, M, GB, G, will multiply the +number with 512, 1000, 1024, 1000^2, 1024^2, 1000^3, 1024^3, 1000^2, 1024^2. + +OFFSET and LABEL are octal by default, hexadecimal if prefixed with "0x" or +decimal if a "." suffix is added. The "b" suffix will multiply with 512. + +TYPE contains one or more format specifications consisting of: + a for printable 7-bits ASCII + c for utf-8 characters or octal for undefined characters + d[SIZE] for signed decimal + f[SIZE] for floating point + o[SIZE] for octal + u[SIZE] for unsigned decimal + x[SIZE] for hexadecimal +SIZE is the number of bytes which can be the number 1, 2, 4, 8 or 16, + or C, I, S, L for 1, 2, 4, 8 bytes for integer types, + or F, D, L for 4, 8, 16 bytes for floating point. +Any type specification can have a "z" suffic, which will add a ASCII dump at + the end of the line. + +If an error occurred, a diagnostic message will be printed to stderr, and the +exitcode will be non-zero."#; + +fn create_getopts_options() -> getopts::Options { let mut opts = getopts::Options::new(); opts.optopt("A", "address-radix", @@ -46,446 +104,343 @@ pub fn uumain(args: Vec) -> i32 { "Skip bytes input bytes before formatting and writing.", "BYTES"); opts.optopt("N", "read-bytes", "limit dump to BYTES input bytes", "BYTES"); + opts.optopt("", "endian", "byte order to use for multi-byte formats", "big|little"); opts.optopt("S", "strings", ("output strings of at least BYTES graphic chars. 3 is assumed when \ BYTES is not specified."), "BYTES"); - opts.optflag("a", "", "named characters, ignoring high-order bit"); - opts.optflag("b", "", "octal bytes"); - opts.optflag("c", "", "ASCII characters or backslash escapes"); - opts.optflag("d", "", "unsigned decimal 2-byte units"); - opts.optflag("o", "", "unsigned decimal 2-byte units"); + opts.optflagmulti("a", "", "named characters, ignoring high-order bit"); + opts.optflagmulti("b", "", "octal bytes"); + opts.optflagmulti("c", "", "ASCII characters or backslash escapes"); + opts.optflagmulti("d", "", "unsigned decimal 2-byte units"); + opts.optflagmulti("D", "", "unsigned decimal 4-byte units"); + opts.optflagmulti("o", "", "octal 2-byte units"); - opts.optflag("I", "", "decimal 2-byte units"); - opts.optflag("L", "", "decimal 2-byte units"); - opts.optflag("i", "", "decimal 2-byte units"); + opts.optflagmulti("I", "", "decimal 8-byte units"); + opts.optflagmulti("L", "", "decimal 8-byte units"); + opts.optflagmulti("i", "", "decimal 4-byte units"); + opts.optflagmulti("l", "", "decimal 8-byte units"); + opts.optflagmulti("x", "", "hexadecimal 2-byte units"); + opts.optflagmulti("h", "", "hexadecimal 2-byte units"); - opts.optflag("O", "", "octal 4-byte units"); - opts.optflag("s", "", "decimal 4-byte units"); + opts.optflagmulti("O", "", "octal 4-byte units"); + opts.optflagmulti("s", "", "decimal 2-byte units"); + opts.optflagmulti("X", "", "hexadecimal 4-byte units"); + opts.optflagmulti("H", "", "hexadecimal 4-byte units"); - opts.optopt("t", "format", "select output format or formats", "TYPE"); + opts.optflagmulti("e", "", "floating point double precision (64-bit) units"); + opts.optflagmulti("f", "", "floating point single precision (32-bit) units"); + opts.optflagmulti("F", "", "floating point double precision (64-bit) units"); + + opts.optmulti("t", "format", "select output format or formats", "TYPE"); opts.optflag("v", "output-duplicates", "do not use * to mark line suppression"); - opts.optopt("w", "width", + opts.optflagopt("w", "width", ("output BYTES bytes per output line. 32 is implied when BYTES is not \ specified."), "BYTES"); opts.optflag("h", "help", "display this help and exit."); opts.optflag("", "version", "output version information and exit."); + opts.optflag("", "traditional", "compatibility mode with one input, offset and label."); + + opts +} + +struct OdOptions { + byte_order: ByteOrder, + skip_bytes: usize, + read_bytes: Option, + label: Option, + input_strings: Vec, + formats: Vec, + line_bytes: usize, + output_duplicates: bool, + radix: Radix, +} + +impl OdOptions { + fn new(matches: getopts::Matches, args: Vec) -> Result { + let byte_order = match matches.opt_str("endian").as_ref().map(String::as_ref) { + None => { ByteOrder::Native }, + Some("little") => { ByteOrder::Little }, + Some("big") => { ByteOrder::Big }, + Some(s) => { + return Err(format!("Invalid argument --endian={}", s)); + } + }; + + let mut skip_bytes = match matches.opt_default("skip-bytes", "0") { + None => 0, + Some(s) => { + match parse_number_of_bytes(&s) { + Ok(i) => { i } + Err(_) => { + return Err(format!("Invalid argument --skip-bytes={}", s)); + } + } + } + }; + + let mut label: Option = None; + + let input_strings = match parse_inputs(&matches) { + Ok(CommandLineInputs::FileNames(v)) => v, + Ok(CommandLineInputs::FileAndOffset((f, s, l))) => { + skip_bytes = s; + label = l; + vec![f] + }, + Err(e) => { + return Err(format!("Invalid inputs: {}", e)); + } + }; + + let formats = match parse_format_flags(&args) { + Ok(f) => f, + Err(e) => { + return Err(format!("{}", e)); + } + }; + + let mut line_bytes = match matches.opt_default("w", "32") { + None => 16, + Some(s) => { + match s.parse::() { + Ok(i) => { i } + Err(_) => { 0 } + } + } + }; + let min_bytes = formats.iter().fold(1, |max, next| cmp::max(max, next.formatter_item_info.byte_size)); + if line_bytes == 0 || line_bytes % min_bytes != 0 { + show_warning!("invalid width {}; using {} instead", line_bytes, min_bytes); + line_bytes = min_bytes; + } + + let output_duplicates = matches.opt_present("v"); + + let read_bytes = match matches.opt_str("read-bytes") { + None => None, + Some(s) => { + match parse_number_of_bytes(&s) { + Ok(i) => { Some(i) } + Err(_) => { + return Err(format!("Invalid argument --read-bytes={}", s)); + } + } + } + }; + + let radix = match matches.opt_str("A") { + None => Radix::Octal, + Some(s) => { + let st = s.into_bytes(); + if st.len() != 1 { + return Err(format!("Radix must be one of [d, o, n, x]")) + } else { + let radix: char = *(st.get(0) + .expect("byte string of length 1 lacks a 0th elem")) as char; + match radix { + 'd' => Radix::Decimal, + 'x' => Radix::Hexadecimal, + 'o' => Radix::Octal, + 'n' => Radix::NoPrefix, + _ => return Err(format!("Radix must be one of [d, o, n, x]")) + } + } + } + }; + + Ok(OdOptions { + byte_order: byte_order, + skip_bytes: skip_bytes, + read_bytes: read_bytes, + label: label, + input_strings: input_strings, + formats: formats, + line_bytes: line_bytes, + output_duplicates: output_duplicates, + radix: radix, + }) + } +} + +/// parses and validates commandline parameters, prepares data structures, +/// opens the input and calls `odfunc` to process the input. +pub fn uumain(args: Vec) -> i32 { + let opts = create_getopts_options(); let matches = match opts.parse(&args[1..]) { Ok(m) => m, - Err(f) => panic!("Invalid options\n{}", f) + Err(f) => { + disp_err!("{}", f); + return 1; + } }; - let input_offset_base = match parse_radix(matches.opt_str("A")) { - Ok(r) => r, - Err(f) => { panic!("Invalid -A/--address-radix\n{}", f) } + if matches.opt_present("h") { + println!("{}", opts.usage(&USAGE)); + return 0; + } + if matches.opt_present("version") { + println!("{} {}", executable!(), VERSION); + return 0; + } + + let od_options = match OdOptions::new(matches, args) { + Err(s) => { + disp_err!("{}", s); + return 1; + }, + Ok(o) => o, }; - // Gather up file names - args which don't start with '-' - let stdnionly = [InputSource::Stdin]; - let inputs = args[1..] - .iter() - .filter_map(|w| match w as &str { - "--" => Some(InputSource::Stdin), - o if o.starts_with("-") => None, - x => Some(InputSource::FileName(x)), - }) - .collect::>(); - // If no input files named, use stdin. - let inputs = if inputs.len() == 0 { - &stdnionly[..] - } else { - &inputs[..] - }; - // Gather up format flags, we don't use getopts becase we need keep them in order. - let flags = args[1..] - .iter() - .filter_map(|w| match w as &str { - "--" => None, - o if o.starts_with("-") => Some(&o[1..]), - _ => None, - }) - .collect::>(); + let mut input_offset = InputOffset::new(od_options.radix, od_options.skip_bytes, + od_options.label); - // At the moment, char (-a & -c)formats need the driver to set up a - // line by inserting a different # of of spaces at the start. - struct OdFormater { - writer: fn(p: u64, itembytes: usize), - offmarg: usize, - }; - let oct = OdFormater { - writer: print_item_oct, offmarg: 2 - }; - let hex = OdFormater { - writer: print_item_hex, offmarg: 2 - }; - let dec_u = OdFormater { - writer: print_item_dec_u, offmarg: 2 - }; - let dec_s = OdFormater { - writer: print_item_dec_s, offmarg: 2 - }; - let a_char = OdFormater { - writer: print_item_a, offmarg: 1 - }; - let c_char = OdFormater { - writer: print_item_c, offmarg: 1 - }; + let mut input = open_input_peek_reader(&od_options.input_strings, + od_options.skip_bytes, od_options.read_bytes); + let mut input_decoder = InputDecoder::new(&mut input, od_options.line_bytes, + PEEK_BUFFER_SIZE, od_options.byte_order); - fn mkfmt(itembytes: usize, fmtspec: &OdFormater) -> OdFormat { - OdFormat { - itembytes: itembytes, - writer: fmtspec.writer, - offmarg: fmtspec.offmarg, - } - } + let output_info = OutputInfo::new(od_options.line_bytes, &od_options.formats[..], + od_options.output_duplicates); -// TODO: -t fmts - let known_formats = hashmap![ - "a" => (1, &a_char), - "B" => (2, &oct) , - "b" => (1, &oct), - "c" => (1, &c_char), - "D" => (4, &dec_u), - // TODO: support floats - // "e" => (8, &flo64), - // "F" => (8, &flo64), - // "F" => (4, &flo32), - "H" => (4, &hex), - "X" => (4, &hex) , - "o" => (2, &oct), - "x" => (2, &hex), - "h" => (2, &hex), - - "I" => (2, &dec_s), - "L" => (2, &dec_s), - "i" => (2, &dec_s), - - "O" => (4, &oct), - "s" => (2, &dec_u) - ]; - - let mut formats = Vec::new(); - - for flag in flags.iter() { - match known_formats.get(flag) { - None => {} // not every option is a format - Some(r) => { - let (itembytes, fmtspec) = *r; - formats.push(mkfmt(itembytes, fmtspec)) - } - } - } - - if formats.is_empty() { - formats.push(mkfmt(2, &oct)); // 2 byte octal is the default - } - - odfunc(&input_offset_base, &inputs, &formats[..]) + odfunc(&mut input_offset, &mut input_decoder, &output_info) } -const LINEBYTES:usize = 16; -const WORDBYTES:usize = 2; +/// Loops through the input line by line, calling print_bytes to take care of the output. +fn odfunc(input_offset: &mut InputOffset, input_decoder: &mut InputDecoder, + output_info: &OutputInfo) -> i32 + where I: PeekRead + HasError { + let mut duplicate_line = false; + let mut previous_bytes: Vec = Vec::new(); + let line_bytes = output_info.byte_size_line; -fn odfunc(input_offset_base: &Radix, fnames: &[InputSource], formats: &[OdFormat]) -> i32 { - - let mut mf = MultifileReader::new(fnames); - let mut addr = 0; - let bytes = &mut [b'\x00'; LINEBYTES]; loop { // print each line data (or multi-format raster of several lines describing the same data). - print_with_radix(input_offset_base, addr); // print offset - // if printing in multiple formats offset is printed only once + match input_decoder.peek_read() { + Ok(mut memory_decoder) => { + let length = memory_decoder.length(); - match mf.f_read(bytes) { - Ok(0) => { - print!("\n"); - break; - } - Ok(n) => { - let mut first = true; // First line of a multi-format raster. - for f in formats { - if !first { - // this takes the space of the file offset on subsequent - // lines of multi-format rasters. - print!(" "); - } - first = false; - print!("{:>width$}", "", width = f.offmarg);// 4 spaces after offset - we print 2 more before each word - - for b in 0..n / f.itembytes { - let mut p: u64 = 0; - for i in 0..f.itembytes { - p |= (bytes[(f.itembytes * b) + i] as u64) << (8 * i); - } - (f.writer)(p, f.itembytes); - } - // not enough byte for a whole element, this should only happen on the last line. - if n % f.itembytes != 0 { - let b = n / f.itembytes; - let mut p2: u64 = 0; - for i in 0..(n % f.itembytes) { - p2 |= (bytes[(f.itembytes * b) + i] as u64) << (8 * i); - } - (f.writer)(p2, f.itembytes); - } - // Add extra spaces to pad out the short, presumably last, line. - if n < LINEBYTES { - // calc # of items we did not print, must be short at least WORDBYTES to be missing any. - let words_short = (LINEBYTES - n) / WORDBYTES; - // XXX this is running short for -c & -a - print!("{:>width$}", "", width = (words_short) * (6 + 2)); - } - print!("\n"); + if length == 0 { + input_offset.print_final_offset(); + break; } - addr += n; + + // not enough byte for a whole element, this should only happen on the last line. + if length != line_bytes { + // set zero bytes in the part of the buffer that will be used, but is not filled. + let mut max_used = length + output_info.byte_size_block; + if max_used > line_bytes { + max_used = line_bytes; + } + + memory_decoder.zero_out_buffer(length, max_used); + } + + if !output_info.output_duplicates + && length == line_bytes + && memory_decoder.get_buffer(0) == &previous_bytes[..] { + if !duplicate_line { + duplicate_line = true; + println!("*"); + } + } else { + duplicate_line = false; + if length == line_bytes { + // save a copy of the input unless it is the last line + memory_decoder.clone_buffer(&mut previous_bytes); + } + + print_bytes(&input_offset.format_byte_offset(), &memory_decoder, + &output_info); + } + + input_offset.increase_position(length); } - Err(_) => { - break; + Err(e) => { + show_error!("{}", e); + input_offset.print_final_offset(); + return 1; } }; } - if mf.any_err { + + if input_decoder.has_error() { 1 } else { 0 } } -// For file byte offset printed at left margin. -fn parse_radix(radix_str: Option) -> Result { - match radix_str { - None => Ok(Radix::Octal), - Some(s) => { - let st = s.into_bytes(); - if st.len() != 1 { - Err("Radix must be one of [d, o, b, x]\n") - } else { - let radix: char = *(st.get(0) - .expect("byte string of length 1 lacks a 0th elem")) as char; - match radix { - 'd' => Ok(Radix::Decimal), - 'x' => Ok(Radix::Hexadecimal), - 'o' => Ok(Radix::Octal), - 'b' => Ok(Radix::Binary), - _ => Err("Radix must be one of [d, o, b, x]\n") +/// Outputs a single line of input, into one or more lines human readable output. +fn print_bytes(prefix: &str, input_decoder: &MemoryDecoder, output_info: &OutputInfo) { + let mut first = true; // First line of a multi-format raster. + for f in output_info.spaced_formatters_iter() { + let mut output_text = String::new(); + + let mut b = 0; + while b < input_decoder.length() { + output_text.push_str(&format!("{:>width$}", + "", + width = f.spacing[b % output_info.byte_size_block])); + + match f.formatter_item_info.formatter { + FormatWriter::IntWriter(func) => { + let p = input_decoder.read_uint(b, f.formatter_item_info.byte_size); + output_text.push_str(&func(p)); + } + FormatWriter::FloatWriter(func) => { + let p = input_decoder.read_float(b, f.formatter_item_info.byte_size); + output_text.push_str(&func(p)); + } + FormatWriter::MultibyteWriter(func) => { + output_text.push_str(&func(input_decoder.get_full_buffer(b))); } } + + b += f.formatter_item_info.byte_size; } - } -} -fn print_with_radix(r: &Radix, x: usize) { - // TODO(keunwoo): field widths should be based on sizeof(x), or chosen dynamically based on the - // expected range of address values. Binary in particular is not great here. - match *r { - Radix::Decimal => print!("{:07}", x), - Radix::Hexadecimal => print!("{:07X}", x), - Radix::Octal => print!("{:07o}", x), - Radix::Binary => print!("{:07b}", x) - } -} - -// MultifileReader - concatenate all our input, file or stdin. -struct MultifileReader<'a> { - ni: std::slice::Iter<'a, InputSource<'a>>, - curr_file: Option>, - any_err: bool, -} -impl<'b> MultifileReader<'b> { - fn new<'a>(fnames: &'a [InputSource]) -> MultifileReader<'a> { - let mut mf = MultifileReader { - ni: fnames.iter(), - curr_file: None, // normally this means done; call next_file() - any_err: false, - }; - mf.next_file(); - return mf; - } - - fn next_file(&mut self) { - // loop retries with subsequent files if err - normally 'loops' once - loop { - match self.ni.next() { - None => { - self.curr_file = None; - return; - } - Some(input) => { - match *input { - InputSource::Stdin => { - self.curr_file = Some(Box::new(BufReader::new(std::io::stdin()))); - return; - } - InputSource::FileName(fname) => { - match File::open(fname) { - Ok(f) => { - self.curr_file = Some(Box::new(BufReader::new(f))); - return; - } - Err(e) => { - // If any file can't be opened, - // print an error at the time that the file is needed, - // then move on the the next file. - // This matches the behavior of the original `od` - let _ = - writeln!(&mut std::io::stderr(), "od: '{}': {}", fname, e); - self.any_err = true - } - } - } - } - } - } + if f.add_ascii_dump { + let missing_spacing = output_info.print_width_line.saturating_sub(output_text.chars().count()); + output_text.push_str(&format!("{:>width$} {}", + "", + format_ascii_dump(input_decoder.get_buffer(0)), + width = missing_spacing)); } - } - // Fill buf with bytes read from the list of files - // Returns Ok() - // Handles io errors itself, thus always returns OK - // Fills the provided buffer completely, unless it has run out of input. - // If any call returns short (< buf.len()), all subsequent calls will return Ok<0> - fn f_read(&mut self, buf: &mut [u8]) -> io::Result { - let mut xfrd = 0; - // while buffer we are filling is not full.. May go thru several files. - 'fillloop: while xfrd < buf.len() { - match self.curr_file { - None => break, - Some(ref mut curr_file) => { - loop { - // stdin may return on 'return' (enter), even though the buffer isn't full. - xfrd += match curr_file.read(&mut buf[xfrd..]) { - Ok(0) => break, - Ok(n) => n, - Err(e) => panic!("file error: {}", e), - }; - if xfrd == buf.len() { - // transferred all that was asked for. - break 'fillloop; - } - } - } - } - self.next_file(); + if first { + print!("{}", prefix); // print offset + // if printing in multiple formats offset is printed only once + first = false; + } else { + // this takes the space of the file offset on subsequent + // lines of multi-format rasters. + print!("{:>width$}", "", width=prefix.chars().count()); } - Ok(xfrd) + print!("{}\n", output_text); } } +/// returns a reader implementing `PeekRead + Read + HasError` providing the combined input +/// +/// `skip_bytes` is the number of bytes skipped from the input +/// `read_bytes` is an optinal limit to the number of bytes to read +fn open_input_peek_reader<'a>(input_strings: &'a Vec, skip_bytes: usize, + read_bytes: Option) -> PeekReader>> { + // should return "impl PeekRead + Read + HasError" when supported in (stable) rust + let inputs = input_strings + .iter() + .map(|w| match w as &str { + "-" => InputSource::Stdin, + x => InputSource::FileName(x), + }) + .collect::>(); -struct OdFormat { - itembytes: usize, - writer: fn(u64, usize), - offmarg: usize, -} - -// TODO: use some sort of byte iterator, instead of passing bytes in u64 -fn print_item_oct(p: u64, itembytes: usize) { - let itemwidth = 3 * itembytes; - let itemspace = 4 * itembytes - itemwidth; - - print!("{:>itemspace$}{:0width$o}", - "", - p, - width = itemwidth, - itemspace = itemspace); -} - -fn print_item_hex(p: u64, itembytes: usize) { - let itemwidth = 2 * itembytes; - let itemspace = 4 * itembytes - itemwidth; - - print!("{:>itemspace$}{:0width$x}", - "", - p, - width = itemwidth, - itemspace = itemspace); -} - - -fn sign_extend(item: u64, itembytes: usize) -> i64{ - // https://graphics.stanford.edu/~seander/bithacks.html#VariableSignExtend - unsafe{ - let b = 8 * itembytes; // number of bits representing the number in p - let m = mem::transmute::(1u64 << (b - 1)); - let x = mem::transmute::(item) & (mem::transmute::(1u64 << b) - 1); - let r = (x ^ m) - m; - r - } -} - - -fn print_item_dec_s(p: u64, itembytes: usize) { - // sign extend - let s = sign_extend(p,itembytes); - print!("{:totalwidth$}", s, totalwidth = 4 * itembytes); -} -fn print_item_dec_u(p: u64, itembytes: usize) { - print!("{:totalwidth$}", p, totalwidth = 4 * itembytes); -} - -// TODO: multi-byte chars -// Quoth the man page: Multi-byte characters are displayed in the area corresponding to the first byte of the character. The remaining bytes are shown as `**'. - -static A_CHRS : [&'static str; 160] = -["nul", "soh", "stx", "etx", "eot", "enq", "ack", "bel", - "bs", "ht", "nl", "vt", "ff", "cr", "so", "si", - "dle", "dc1", "dc2", "dc3", "dc4", "nak", "syn", "etb", - "can", "em", "sub", "esc", "fs", "gs", "rs", "us", - "sp", "!", "\"", "#", "$", "%", "&", "'", - "(", ")", "*", "+", ",", "-", ".", "/", - "0", "1", "2", "3", "4", "5", "6", "7", - "8", "9", ":", ";", "<", "=", ">", "?", - "@", "A", "B", "C", "D", "E", "F", "G", - "H", "I", "J", "K", "L", "M", "N", "O", - "P", "Q", "R", "S", "T", "U", "V", "W", - "X", "Y", "Z", "[", "\\", "]", "^", "_", - "`", "a", "b", "c", "d", "e", "f", "g", - "h", "i", "j", "k", "l", "m", "n", "o", - "p", "q", "r", "s", "t", "u", "v", "w", - "x", "y", "z", "{", "|", "}", "~", "del", - "80", "81", "82", "83", "84", "85", "86", "87", - "88", "89", "8a", "8b", "8c", "8d", "8e", "8f", - "90", "91", "92", "93", "94", "95", "96", "97", - "98", "99", "9a", "9b", "9c", "9d", "9e", "9f"]; - -fn print_item_a(p: u64, _: usize) { - // itembytes == 1 - let b = (p & 0xff) as u8; - print!("{:>4}", A_CHRS.get(b as usize).unwrap_or(&"?") // XXX od dose not actually do this, it just prints the byte - ); -} - - -static C_CHRS : [&'static str; 127] = [ -"\\0", "001", "002", "003", "004", "005", "006", "\\a", -"\\b", "\\t", "\\n", "\\v", "\\f", "\\r", "016", "017", -"020", "021", "022", "023", "024", "025", "026", "027", -"030", "031", "032", "033", "034", "035", "036", "037", - " ", "!", "\"", "#", "$", "%", "&", "'", - "(", ")", "*", "+", ",", "-", ".", "/", - "0", "1", "2", "3", "4", "5", "6", "7", - "8", "9", ":", ";", "<", "=", ">", "?", - "@", "A", "B", "C", "D", "E", "F", "G", - "H", "I", "J", "K", "L", "M", "N", "O", - "P", "Q", "R", "S", "T", "U", "V", "W", - "X", "Y", "Z", "[", "\\", "]", "^", "_", - "`", "a", "b", "c", "d", "e", "f", "g", - "h", "i", "j", "k", "l", "m", "n", "o", - "p", "q", "r", "s", "t", "u", "v", "w", - "x", "y", "z", "{", "|", "}", "~" ]; - - -fn print_item_c(p: u64, _: usize) { - // itembytes == 1 - let b = (p & 0xff) as usize; - - if b < C_CHRS.len() { - match C_CHRS.get(b as usize) { - Some(s) => print!("{:>4}", s), - None => print!("{:>4}", b), - } - } + let mf = MultifileReader::new(inputs); + let pr = PartialReader::new(mf, skip_bytes, read_bytes); + let input = PeekReader::new(pr); + input } diff --git a/src/od/output_info.rs b/src/od/output_info.rs new file mode 100644 index 000000000..5f8d8733e --- /dev/null +++ b/src/od/output_info.rs @@ -0,0 +1,241 @@ +use std::cmp; +use std::slice::Iter; +use parse_formats::ParsedFormatterItemInfo; +use formatteriteminfo::FormatterItemInfo; + +/// Size in bytes of the max datatype. ie set to 16 for 128-bit numbers. +const MAX_BYTES_PER_UNIT: usize = 8; + +/// Contains information to output single output line in human readable form +pub struct SpacedFormatterItemInfo { + /// Contains a function pointer to output data, and information about the output format. + pub formatter_item_info: FormatterItemInfo, + /// Contains the number of spaces to add to align data with other output formats. + /// + /// If the corresponding data is a single byte, each entry in this array contains + /// the number of spaces to insert when outputting each byte. If the corresponding + /// data is multi-byte, only the fist byte position is used. For example a 32-bit + /// datatype, could use positions 0, 4, 8, 12, .... + /// As each block is formatted identically, only the spacing for a single block is set. + pub spacing: [usize; MAX_BYTES_PER_UNIT], + /// if set adds a ascii dump at the end of the line + pub add_ascii_dump: bool, +} + +/// Contains information about all output lines. +pub struct OutputInfo { + /// The number of bytes of a line. + pub byte_size_line: usize, + /// The width of a line in human readable format. + pub print_width_line: usize, + + /// The number of bytes in a block. (This is the size of the largest datatype in `spaced_formatters`.) + pub byte_size_block: usize, + /// The width of a block in human readable format. (The size of the largest format.) + pub print_width_block: usize, + /// All formats. + spaced_formatters: Vec, + /// determines if duplicate output lines should be printed, or + /// skipped with a "*" showing one or more skipped lines. + pub output_duplicates: bool, +} + + +impl OutputInfo { + /// Returns an iterator over the `SpacedFormatterItemInfo` vector. + pub fn spaced_formatters_iter(&self) -> Iter { + self.spaced_formatters.iter() + } + + /// Creates a new `OutputInfo` based on the parameters + pub fn new(line_bytes: usize, formats: &[ParsedFormatterItemInfo], output_duplicates: bool) -> OutputInfo { + let byte_size_block = formats.iter().fold(1, |max, next| cmp::max(max, next.formatter_item_info.byte_size)); + let print_width_block = formats + .iter() + .fold(1, |max, next| { + cmp::max(max, next.formatter_item_info.print_width * (byte_size_block / next.formatter_item_info.byte_size)) + }); + let print_width_line = print_width_block * (line_bytes / byte_size_block); + + let spaced_formatters = OutputInfo::create_spaced_formatter_info(&formats, byte_size_block, print_width_block); + + OutputInfo { + byte_size_line: line_bytes, + print_width_line: print_width_line, + byte_size_block: byte_size_block, + print_width_block: print_width_block, + spaced_formatters: spaced_formatters, + output_duplicates: output_duplicates, + } + } + + fn create_spaced_formatter_info(formats: &[ParsedFormatterItemInfo], + byte_size_block: usize, print_width_block: usize) -> Vec { + formats + .iter() + .map(|f| SpacedFormatterItemInfo { + formatter_item_info: f.formatter_item_info, + add_ascii_dump: f.add_ascii_dump, + spacing: OutputInfo::calculate_alignment(f, byte_size_block, print_width_block) + }) + .collect() + } + + /// calculates proper alignment for a single line of output + /// + /// Multiple representations of the same data, will be right-aligned for easy reading. + /// For example a 64 bit octal and a 32-bit decimal with a 16-bit hexadecimal looks like this: + /// ``` + /// 1777777777777777777777 1777777777777777777777 + /// 4294967295 4294967295 4294967295 4294967295 + /// ffff ffff ffff ffff ffff ffff ffff ffff + /// ``` + /// In this example is additional spacing before the first and third decimal number, + /// and there is additional spacing before the 1st, 3rd, 5th and 7th hexadecimal number. + /// This way both the octal and decimal, aswell the decimal and hexadecimal numbers + /// left align. Note that the alignment below both octal numbers is identical. + /// + /// This function calculates the required spacing for a single line, given the size + /// of a block, and the width of a block. The size of a block is the largest type + /// and the width is width of the the type which needs the most space to print that + /// number of bytes. So both numbers might refer to different types. All widths + /// include a space at the front. For example the width of a 8-bit hexadecimal, + /// is 3 characters, for example " FF". + /// + /// This algorithm first calculates how many spaces needs to be added, based the + /// block size and the size of the type, and the widths of the block and the type. + /// The required spaces are spread across the available positions. + /// If the blocksize is 8, and the size of the type is 8 too, there will be just + /// one value in a block, so all spacing will be assigned to position 0. + /// If the blocksize is 8, and the size of the type is 2, the spacing will be + /// spread across position 0, 2, 4, 6. All 4 positions will get an additional + /// space as long as there are more then 4 spaces available. If there are 2 + /// spaces available, they will be assigend to position 0 and 4. If there is + /// 1 space available, it will be assigned to position 0. This will be combined, + /// For example 7 spaces will be assigned to position 0, 2, 4, 6 like: 3, 1, 2, 1. + /// And 7 spaces with 2 positions will be assigned to position 0 and 4 like 4, 3. + /// + /// Here is another example showing the alignment of 64-bit unsigned decimal numbers, + /// 32-bit hexadecimal number, 16-bit octal numbers and 8-bit hexadecimal numbers: + /// ``` + /// 18446744073709551615 18446744073709551615 + /// ffffffff ffffffff ffffffff ffffffff + /// 177777 177777 177777 177777 177777 177777 177777 177777 + /// ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff + /// ``` + /// + /// This algorithm assumes the size of all types is a power of 2 (1, 2, 4, 8, 16, ...) + /// Increase MAX_BYTES_PER_UNIT to allow larger types. + fn calculate_alignment(sf: &TypeSizeInfo, byte_size_block: usize, + print_width_block: usize) -> [usize; MAX_BYTES_PER_UNIT] { + if byte_size_block > MAX_BYTES_PER_UNIT { + panic!("{}-bits types are unsupported. Current max={}-bits.", + 8 * byte_size_block, + 8 * MAX_BYTES_PER_UNIT); + } + let mut spacing = [0; MAX_BYTES_PER_UNIT]; + + let mut byte_size = sf.byte_size(); + let mut items_in_block = byte_size_block / byte_size; + let thisblock_width = sf.print_width() * items_in_block; + let mut missing_spacing = print_width_block - thisblock_width; + + while items_in_block > 0 { + let avg_spacing: usize = missing_spacing / items_in_block; + for i in 0..items_in_block { + spacing[i * byte_size] += avg_spacing; + missing_spacing -= avg_spacing; + } + + items_in_block /= 2; + byte_size *= 2; + } + + spacing + } +} + +trait TypeSizeInfo { + fn byte_size(&self) -> usize; + fn print_width(&self) -> usize; +} + +impl TypeSizeInfo for ParsedFormatterItemInfo { + fn byte_size(&self) -> usize { self.formatter_item_info.byte_size } + fn print_width(&self) -> usize { self.formatter_item_info.print_width } +} + +#[cfg(test)] +struct TypeInfo { + byte_size: usize, + print_width: usize, +} + +#[cfg(test)] +impl TypeSizeInfo for TypeInfo { + fn byte_size(&self) -> usize { self.byte_size } + fn print_width(&self) -> usize { self.print_width } +} + +#[test] +fn test_calculate_alignment() { + // For this example `byte_size_block` is 8 and 'print_width_block' is 23: + // 1777777777777777777777 1777777777777777777777 + // 4294967295 4294967295 4294967295 4294967295 + // ffff ffff ffff ffff ffff ffff ffff ffff + + // the first line has no additional spacing: + assert_eq!([0, 0, 0, 0, 0, 0, 0, 0], + OutputInfo::calculate_alignment(&TypeInfo{byte_size:8, print_width:23}, 8, 23)); + // the second line a single space at the start of the block: + assert_eq!([1, 0, 0, 0, 0, 0, 0, 0], + OutputInfo::calculate_alignment(&TypeInfo{byte_size:4, print_width:11}, 8, 23)); + // the third line two spaces at pos 0, and 1 space at pos 4: + assert_eq!([2, 0, 0, 0, 1, 0, 0, 0], + OutputInfo::calculate_alignment(&TypeInfo{byte_size:2, print_width:5}, 8, 23)); + + // For this example `byte_size_block` is 8 and 'print_width_block' is 28: + // 18446744073709551615 18446744073709551615 + // ffffffff ffffffff ffffffff ffffffff + // 177777 177777 177777 177777 177777 177777 177777 177777 + // ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff + + assert_eq!([7, 0, 0, 0, 0, 0, 0, 0], + OutputInfo::calculate_alignment(&TypeInfo{byte_size:8, print_width:21}, 8, 28)); + assert_eq!([5, 0, 0, 0, 5, 0, 0, 0], + OutputInfo::calculate_alignment(&TypeInfo{byte_size:4, print_width:9}, 8, 28)); + assert_eq!([0, 0, 0, 0, 0, 0, 0, 0], + OutputInfo::calculate_alignment(&TypeInfo{byte_size:2, print_width:7}, 8, 28)); + assert_eq!([1, 0, 1, 0, 1, 0, 1, 0], + OutputInfo::calculate_alignment(&TypeInfo{byte_size:1, print_width:3}, 8, 28)); + + // 9 tests where 8 .. 16 spaces are spread across 8 positions + assert_eq!([1, 1, 1, 1, 1, 1, 1, 1], + OutputInfo::calculate_alignment(&TypeInfo{byte_size:1, print_width:2}, 8, 16 + 8)); + assert_eq!([2, 1, 1, 1, 1, 1, 1, 1], + OutputInfo::calculate_alignment(&TypeInfo{byte_size:1, print_width:2}, 8, 16 + 9)); + assert_eq!([2, 1, 1, 1, 2, 1, 1, 1], + OutputInfo::calculate_alignment(&TypeInfo{byte_size:1, print_width:2}, 8, 16 + 10)); + assert_eq!([3, 1, 1, 1, 2, 1, 1, 1], + OutputInfo::calculate_alignment(&TypeInfo{byte_size:1, print_width:2}, 8, 16 + 11)); + assert_eq!([2, 1, 2, 1, 2, 1, 2, 1], + OutputInfo::calculate_alignment(&TypeInfo{byte_size:1, print_width:2}, 8, 16 + 12)); + assert_eq!([3, 1, 2, 1, 2, 1, 2, 1], + OutputInfo::calculate_alignment(&TypeInfo{byte_size:1, print_width:2}, 8, 16 + 13)); + assert_eq!([3, 1, 2, 1, 3, 1, 2, 1], + OutputInfo::calculate_alignment(&TypeInfo{byte_size:1, print_width:2}, 8, 16 + 14)); + assert_eq!([4, 1, 2, 1, 3, 1, 2, 1], + OutputInfo::calculate_alignment(&TypeInfo{byte_size:1, print_width:2}, 8, 16 + 15)); + assert_eq!([2, 2, 2, 2, 2, 2, 2, 2], + OutputInfo::calculate_alignment(&TypeInfo{byte_size:1, print_width:2}, 8, 16 + 16)); + + // 4 tests where 15 spaces are spread across 8, 4, 2 or 1 position(s) + assert_eq!([4, 1, 2, 1, 3, 1, 2, 1], + OutputInfo::calculate_alignment(&TypeInfo{byte_size:1, print_width:2}, 8, 16 + 15)); + assert_eq!([5, 0, 3, 0, 4, 0, 3, 0], + OutputInfo::calculate_alignment(&TypeInfo{byte_size:2, print_width:4}, 8, 16 + 15)); + assert_eq!([8, 0, 0, 0, 7, 0, 0, 0], + OutputInfo::calculate_alignment(&TypeInfo{byte_size:4, print_width:8}, 8, 16 + 15)); + assert_eq!([15, 0, 0, 0, 0, 0, 0, 0], + OutputInfo::calculate_alignment(&TypeInfo{byte_size:8, print_width:16}, 8, 16 + 15)); +} diff --git a/src/od/parse_formats.rs b/src/od/parse_formats.rs new file mode 100644 index 000000000..472208e5a --- /dev/null +++ b/src/od/parse_formats.rs @@ -0,0 +1,516 @@ +use formatteriteminfo::FormatterItemInfo; +use prn_int::*; +use prn_char::*; +use prn_float::*; + +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub struct ParsedFormatterItemInfo { + pub formatter_item_info: FormatterItemInfo, + pub add_ascii_dump: bool, +} + +impl ParsedFormatterItemInfo { + pub fn new(formatter_item_info: FormatterItemInfo, add_ascii_dump: bool) -> ParsedFormatterItemInfo { + ParsedFormatterItemInfo { + formatter_item_info: formatter_item_info, + add_ascii_dump: add_ascii_dump, + } + } +} + +fn od_argument_traditional_format(ch: char) -> Option { + match ch { + 'a' => Some(FORMAT_ITEM_A), + 'B' => Some(FORMAT_ITEM_OCT16), + 'b' => Some(FORMAT_ITEM_OCT8), + 'c' => Some(FORMAT_ITEM_C), + 'D' => Some(FORMAT_ITEM_DEC32U), + 'd' => Some(FORMAT_ITEM_DEC16U), + 'e' => Some(FORMAT_ITEM_F64), + 'F' => Some(FORMAT_ITEM_F64), + 'f' => Some(FORMAT_ITEM_F32), + 'H' => Some(FORMAT_ITEM_HEX32), + 'h' => Some(FORMAT_ITEM_HEX16), + 'i' => Some(FORMAT_ITEM_DEC32S), + 'I' => Some(FORMAT_ITEM_DEC64S), + 'L' => Some(FORMAT_ITEM_DEC64S), + 'l' => Some(FORMAT_ITEM_DEC64S), + 'O' => Some(FORMAT_ITEM_OCT32), + 'o' => Some(FORMAT_ITEM_OCT16), + 's' => Some(FORMAT_ITEM_DEC16S), + 'X' => Some(FORMAT_ITEM_HEX32), + 'x' => Some(FORMAT_ITEM_HEX16), + _ => None, + } +} + +fn od_format_type(type_char: FormatType, byte_size: u8) -> Option { + match (type_char, byte_size) { + (FormatType::Ascii, _) => Some(FORMAT_ITEM_A), + (FormatType::Char, _) => Some(FORMAT_ITEM_C), + + (FormatType::DecimalInt, 1) => Some(FORMAT_ITEM_DEC8S), + (FormatType::DecimalInt, 2) => Some(FORMAT_ITEM_DEC16S), + (FormatType::DecimalInt, 0) | + (FormatType::DecimalInt, 4) => Some(FORMAT_ITEM_DEC32S), + (FormatType::DecimalInt, 8) => Some(FORMAT_ITEM_DEC64S), + + (FormatType::OctalInt, 1) => Some(FORMAT_ITEM_OCT8), + (FormatType::OctalInt, 2) => Some(FORMAT_ITEM_OCT16), + (FormatType::OctalInt, 0) | + (FormatType::OctalInt, 4) => Some(FORMAT_ITEM_OCT32), + (FormatType::OctalInt, 8) => Some(FORMAT_ITEM_OCT64), + + (FormatType::UnsignedInt, 1) => Some(FORMAT_ITEM_DEC8U), + (FormatType::UnsignedInt, 2) => Some(FORMAT_ITEM_DEC16U), + (FormatType::UnsignedInt, 0) | + (FormatType::UnsignedInt, 4) => Some(FORMAT_ITEM_DEC32U), + (FormatType::UnsignedInt, 8) => Some(FORMAT_ITEM_DEC64U), + + (FormatType::HexadecimalInt, 1) => Some(FORMAT_ITEM_HEX8), + (FormatType::HexadecimalInt, 2) => Some(FORMAT_ITEM_HEX16), + (FormatType::HexadecimalInt, 0) | + (FormatType::HexadecimalInt, 4) => Some(FORMAT_ITEM_HEX32), + (FormatType::HexadecimalInt, 8) => Some(FORMAT_ITEM_HEX64), + + (FormatType::Float, 2) => Some(FORMAT_ITEM_F16), + (FormatType::Float, 0) | + (FormatType::Float, 4) => Some(FORMAT_ITEM_F32), + (FormatType::Float, 8) => Some(FORMAT_ITEM_F64), + + _ => None, + } +} + +fn od_argument_with_option(ch:char) -> bool { + match ch { + 'A' | 'j' | 'N' | 'S' | 'w' => true, + _ => false, + } +} + + +/// Parses format flags from commandline +/// +/// getopts, docopt, clap don't seem suitable to parse the commandline +/// arguments used for formats. In particular arguments can appear +/// multiple times and the order they appear in, is significant. +/// +/// arguments like -f, -o, -x can appear separate or combined: -fox +/// it can also be mixed with non format related flags like -v: -fvox +/// arguments with parameters like -w16 can only appear at the end: -fvoxw16 +/// parameters of -t/--format specify 1 or more formats. +/// if -- appears on the commandline, parsing should stop. +pub fn parse_format_flags(args: &Vec) -> Result, String> { + let mut formats = Vec::new(); + + // args[0] is the name of the binary + let mut arg_iter = args.iter().skip(1); + let mut expect_type_string = false; + + while let Some(arg) = arg_iter.next() { + if expect_type_string { + match parse_type_string(arg) { + Ok(v) => formats.extend(v.into_iter()), + Err(e) => return Err(e), + } + expect_type_string = false; + } else if arg.starts_with("--") { + if arg.len() == 2 { + break; + } + if arg.starts_with("--format=") { + let params: String = arg.chars().skip_while(|c| *c != '=').skip(1).collect(); + match parse_type_string(¶ms) { + Ok(v) => formats.extend(v.into_iter()), + Err(e) => return Err(e), + } + } + if arg == "--format" { + expect_type_string = true; + } + } else if arg.starts_with("-") { + let mut flags = arg.chars().skip(1); + let mut format_spec = String::new(); + while let Some(c) = flags.next() { + if expect_type_string { + format_spec.push(c); + } else if od_argument_with_option(c) { + break; + } else if c == 't' { + expect_type_string = true; + } else { + // not every option is a format + if let Some(r) = od_argument_traditional_format(c) { + formats.push(ParsedFormatterItemInfo::new(r, false)) + } + } + } + if !format_spec.is_empty() { + match parse_type_string(&format_spec) { + Ok(v) => formats.extend(v.into_iter()), + Err(e) => return Err(e), + } + expect_type_string = false; + } + } + } + if expect_type_string { + return Err(format!("missing format specification after '--format' / '-t'")); + } + + if formats.is_empty() { + formats.push(ParsedFormatterItemInfo::new(FORMAT_ITEM_OCT16, false)); // 2 byte octal is the default + } + + Ok(formats) +} + +#[derive(PartialEq, Eq, Debug, Copy, Clone)] +enum FormatType { + Ascii, + Char, + DecimalInt, + OctalInt, + UnsignedInt, + HexadecimalInt, + Float, +} + +#[derive(PartialEq, Eq, Debug, Copy, Clone)] +enum FormatTypeCategory { + Char, + Integer, + Float, +} + +fn format_type(ch: char) -> Option { + match ch { + 'a' => Some(FormatType::Ascii), + 'c' => Some(FormatType::Char), + 'd' => Some(FormatType::DecimalInt), + 'o' => Some(FormatType::OctalInt), + 'u' => Some(FormatType::UnsignedInt), + 'x' => Some(FormatType::HexadecimalInt), + 'f' => Some(FormatType::Float), + _ => None, + } +} + + +fn format_type_category(t: FormatType) -> FormatTypeCategory { + match t { + FormatType::Ascii | FormatType::Char + => FormatTypeCategory::Char, + FormatType::DecimalInt | FormatType::OctalInt | FormatType::UnsignedInt | FormatType::HexadecimalInt + => FormatTypeCategory::Integer, + FormatType::Float + => FormatTypeCategory::Float, + } +} + +fn is_format_size_char(ch: Option, format_type: FormatTypeCategory, byte_size: &mut u8) -> bool { + match (format_type, ch) { + (FormatTypeCategory::Integer, Some('C')) => { + *byte_size = 1; + true + }, + (FormatTypeCategory::Integer, Some('S')) => { + *byte_size = 2; + true + }, + (FormatTypeCategory::Integer, Some('I')) => { + *byte_size = 4; + true + }, + (FormatTypeCategory::Integer, Some('L')) => { + *byte_size = 8; + true + }, + + (FormatTypeCategory::Float, Some('F')) => { + *byte_size = 4; + true + }, + (FormatTypeCategory::Float, Some('D')) => { + *byte_size = 8; + true + }, + // FormatTypeCategory::Float, 'L' => *byte_size = 16, // TODO support f128 + + _ => false, + } +} + +fn is_format_size_decimal(ch: Option, format_type: FormatTypeCategory, decimal_size: &mut String) -> bool { + if format_type == FormatTypeCategory::Char { return false; } + match ch { + Some(d) if d.is_digit(10) => { + decimal_size.push(d); + return true; + } + _ => false, + } +} + +fn is_format_dump_char(ch: Option, show_ascii_dump: &mut bool) -> bool { + match ch { + Some('z') => { + *show_ascii_dump = true; + return true; + } + _ => false, + } +} + +fn parse_type_string(params: &String) -> Result, String> { + let mut formats = Vec::new(); + + let mut chars = params.chars(); + let mut ch = chars.next(); + + while ch.is_some() { + let type_char = ch.unwrap(); + let type_char = match format_type(type_char) { + Some(t) => t, + None => { + return Err(format!("unexpected char '{}' in format specification '{}'", type_char, params)); + } + }; + + let type_cat = format_type_category(type_char); + + ch = chars.next(); + + let mut byte_size = 0u8; + let mut show_ascii_dump = false; + if is_format_size_char(ch, type_cat, &mut byte_size) { + ch = chars.next(); + } else { + let mut decimal_size = String::new(); + while is_format_size_decimal(ch, type_cat, &mut decimal_size) { + ch = chars.next(); + } + if !decimal_size.is_empty() { + byte_size = match decimal_size.parse() { + Err(_) => return Err(format!("invalid number '{}' in format specification '{}'", decimal_size, params)), + Ok(n) => n, + } + } + } + if is_format_dump_char(ch, &mut show_ascii_dump) { + ch = chars.next(); + } + + match od_format_type(type_char, byte_size) { + Some(ft) => formats.push(ParsedFormatterItemInfo::new(ft, show_ascii_dump)), + None => return Err(format!("invalid size '{}' in format specification '{}'", byte_size, params)), + } + } + + Ok(formats) +} + +#[cfg(test)] +pub fn parse_format_flags_str(args_str: &Vec<&'static str>) -> Result, String> { + let args = args_str.iter().map(|s| s.to_string()).collect(); + match parse_format_flags(&args) { + Err(e) => Err(e), + Ok(v) => { + // tests using this function asume add_ascii_dump is not set + Ok(v.into_iter() + .inspect(|f| assert!(!f.add_ascii_dump)) + .map(|f| f.formatter_item_info) + .collect()) + }, + } +} + + +#[test] +fn test_no_options() { + assert_eq!(parse_format_flags_str( + &vec!("od")).unwrap(), + vec!(FORMAT_ITEM_OCT16)); +} + +#[test] +fn test_one_option() { + assert_eq!(parse_format_flags_str( + &vec!("od", "-F")).unwrap(), + vec!(FORMAT_ITEM_F64)); +} + +#[test] +fn test_two_separate_options() { + assert_eq!(parse_format_flags_str( + &vec!("od", "-F", "-x")).unwrap(), + vec!(FORMAT_ITEM_F64, FORMAT_ITEM_HEX16)); +} + +#[test] +fn test_two_combined_options() { + assert_eq!(parse_format_flags_str( + &vec!("od", "-Fx")).unwrap(), + vec!(FORMAT_ITEM_F64, FORMAT_ITEM_HEX16)); +} + +#[test] +fn test_ignore_non_format_parameters() { + assert_eq!(parse_format_flags_str( + &vec!("od", "-d", "-Ax")).unwrap(), + vec!(FORMAT_ITEM_DEC16U)); +} + +#[test] +fn test_ignore_separate_parameters() { + assert_eq!(parse_format_flags_str( + &vec!("od", "-I", "-A", "x")).unwrap(), + vec!(FORMAT_ITEM_DEC64S)); +} + +#[test] +fn test_ignore_trailing_vals() { + assert_eq!(parse_format_flags_str( + &vec!("od", "-D", "--", "-x")).unwrap(), + vec!(FORMAT_ITEM_DEC32U)); +} + +#[test] +fn test_invalid_long_format() { + parse_format_flags_str(&vec!("od", "--format=X")).unwrap_err(); + parse_format_flags_str(&vec!("od", "--format=xX")).unwrap_err(); + parse_format_flags_str(&vec!("od", "--format=aC")).unwrap_err(); + parse_format_flags_str(&vec!("od", "--format=fI")).unwrap_err(); + parse_format_flags_str(&vec!("od", "--format=xD")).unwrap_err(); + + parse_format_flags_str(&vec!("od", "--format=xC1")).unwrap_err(); + parse_format_flags_str(&vec!("od", "--format=x1C")).unwrap_err(); + parse_format_flags_str(&vec!("od", "--format=xz1")).unwrap_err(); + parse_format_flags_str(&vec!("od", "--format=xzC")).unwrap_err(); + parse_format_flags_str(&vec!("od", "--format=xzz")).unwrap_err(); + parse_format_flags_str(&vec!("od", "--format=xCC")).unwrap_err(); + + parse_format_flags_str(&vec!("od", "--format=c1")).unwrap_err(); + parse_format_flags_str(&vec!("od", "--format=x256")).unwrap_err(); + parse_format_flags_str(&vec!("od", "--format=d5")).unwrap_err(); + parse_format_flags_str(&vec!("od", "--format=f1")).unwrap_err(); +} + +#[test] +fn test_long_format_a() { + assert_eq!(parse_format_flags_str( + &vec!("od", "--format=a")).unwrap(), + vec!(FORMAT_ITEM_A)); +} + +#[test] +fn test_long_format_cz() { + assert_eq!(parse_format_flags( + &vec!("od".to_string(), "--format=cz".to_string())).unwrap(), + vec!(ParsedFormatterItemInfo::new(FORMAT_ITEM_C, true))); +} + +#[test] +fn test_long_format_d() { + assert_eq!(parse_format_flags_str( + &vec!("od", "--format=d8")).unwrap(), + vec!(FORMAT_ITEM_DEC64S)); +} + +#[test] +fn test_long_format_d_default() { + assert_eq!(parse_format_flags_str( + &vec!("od", "--format=d")).unwrap(), + vec!(FORMAT_ITEM_DEC32S)); +} + +#[test] +fn test_long_format_o_default() { + assert_eq!(parse_format_flags_str( + &vec!("od", "--format=o")).unwrap(), + vec!(FORMAT_ITEM_OCT32)); +} + +#[test] +fn test_long_format_u_default() { + assert_eq!(parse_format_flags_str( + &vec!("od", "--format=u")).unwrap(), + vec!(FORMAT_ITEM_DEC32U)); +} + +#[test] +fn test_long_format_x_default() { + assert_eq!(parse_format_flags_str( + &vec!("od", "--format=x")).unwrap(), + vec!(FORMAT_ITEM_HEX32)); +} + +#[test] +fn test_long_format_f_default() { + assert_eq!(parse_format_flags_str( + &vec!("od", "--format=f")).unwrap(), + vec!(FORMAT_ITEM_F32)); +} + +#[test] +fn test_long_format_next_arg() { + assert_eq!(parse_format_flags_str( + &vec!("od", "--format", "f8")).unwrap(), + vec!(FORMAT_ITEM_F64)); +} + +#[test] +fn test_short_format_next_arg() { + assert_eq!(parse_format_flags_str( + &vec!("od", "-t", "x8")).unwrap(), + vec!(FORMAT_ITEM_HEX64)); +} + +#[test] +fn test_short_format_combined_arg() { + assert_eq!(parse_format_flags_str( + &vec!("od", "-tu8")).unwrap(), + vec!(FORMAT_ITEM_DEC64U)); +} + +#[test] +fn test_format_next_arg_invalid() { + parse_format_flags_str(&vec!("od", "--format", "-v")).unwrap_err(); + parse_format_flags_str(&vec!("od", "--format")).unwrap_err(); + parse_format_flags_str(&vec!("od", "-t", "-v")).unwrap_err(); + parse_format_flags_str(&vec!("od", "-t")).unwrap_err(); +} + +#[test] +fn test_mixed_formats() { + assert_eq!(parse_format_flags( + &vec!( + "od".to_string(), + "--skip-bytes=2".to_string(), + "-vItu1z".to_string(), + "-N".to_string(), + "1000".to_string(), + "-xt".to_string(), + "acdx1".to_string(), + "--format=u2c".to_string(), + "--format".to_string(), + "f".to_string(), + "-xAx".to_string(), + "--".to_string(), + "-h".to_string(), + "--format=f8".to_string())).unwrap(), + vec!( + ParsedFormatterItemInfo::new(FORMAT_ITEM_DEC64S, false), // I + ParsedFormatterItemInfo::new(FORMAT_ITEM_DEC8U, true), // tu1z + ParsedFormatterItemInfo::new(FORMAT_ITEM_HEX16, false), // x + ParsedFormatterItemInfo::new(FORMAT_ITEM_A, false), // ta + ParsedFormatterItemInfo::new(FORMAT_ITEM_C, false), // tc + ParsedFormatterItemInfo::new(FORMAT_ITEM_DEC32S, false), // td + ParsedFormatterItemInfo::new(FORMAT_ITEM_HEX8, false), // tx1 + ParsedFormatterItemInfo::new(FORMAT_ITEM_DEC16U, false), // tu2 + ParsedFormatterItemInfo::new(FORMAT_ITEM_C, false), // tc + ParsedFormatterItemInfo::new(FORMAT_ITEM_F32, false), // tf + ParsedFormatterItemInfo::new(FORMAT_ITEM_HEX16, false), // x + )); +} diff --git a/src/od/parse_inputs.rs b/src/od/parse_inputs.rs new file mode 100644 index 000000000..9ac04e23c --- /dev/null +++ b/src/od/parse_inputs.rs @@ -0,0 +1,362 @@ +use getopts::Matches; + +/// Abstraction for getopts +pub trait CommandLineOpts { + /// returns all commandline parameters which do not belong to an option. + fn inputs(&self) -> Vec; + /// tests if any of the specified options is present. + fn opts_present(&self, &[&str]) -> bool; +} + +/// Implementation for `getopts` +impl CommandLineOpts for Matches { + fn inputs(&self) -> Vec { + self.free.clone() + } + fn opts_present(&self, opts: &[&str]) -> bool { + self.opts_present(&opts.iter().map(|s| s.to_string()).collect::>()) + } +} + +/// Contains the Input filename(s) with an optional offset. +/// +/// `FileNames` is used for one or more file inputs ("-" = stdin) +/// `FileAndOffset` is used for a single file input, with an offset +/// and an optional label. Offset and label are specified in bytes. +/// `FileAndOffset` will be only used if an offset is specified, +/// but it might be 0. +#[derive(PartialEq, Debug)] +pub enum CommandLineInputs { + FileNames(Vec), + FileAndOffset((String, usize, Option)), +} + + +/// Interprets the commandline inputs of od. +/// +/// Returns either an unspecified number of filenames. +/// Or it will return a single filename, with an offset and optional label. +/// Offset and label are specified in bytes. +/// '-' is used as filename if stdin is meant. This is also returned if +/// there is no input, as stdin is the default input. +pub fn parse_inputs(matches: &CommandLineOpts) -> Result { + let mut input_strings: Vec = matches.inputs(); + + if matches.opts_present(&["traditional"]) { + return parse_inputs_traditional(input_strings); + } + + // test if commandline contains: [file] + // fall-through if no (valid) offset is found + if input_strings.len() == 1 || input_strings.len() == 2 { + // if any of the options -A, -j, -N, -t, -v or -w are present there is no offset + if !matches.opts_present(&["A", "j", "N", "t", "v", "w"]) { + // test if the last input can be parsed as an offset. + let offset = parse_offset_operand(&input_strings[input_strings.len()-1]); + match offset { + Ok(n) => { + // if there is just 1 input (stdin), an offset must start with '+' + if input_strings.len() == 1 && input_strings[0].starts_with("+") { + return Ok(CommandLineInputs::FileAndOffset(("-".to_string(), n, None))); + } + if input_strings.len() == 2 { + return Ok(CommandLineInputs::FileAndOffset((input_strings[0].clone(), n, None))); + } + } + _ => { + // if it cannot be parsed, it is considered a filename + } + } + } + } + + if input_strings.len() == 0 { + input_strings.push("-".to_string()); + } + Ok(CommandLineInputs::FileNames(input_strings)) +} + +/// interprets inputs when --traditional is on the commandline +/// +/// normally returns CommandLineInputs::FileAndOffset, but if no offset is found, +/// it returns CommandLineInputs::FileNames (also to differentiate from the offset == 0) +pub fn parse_inputs_traditional(input_strings: Vec) -> Result { + match input_strings.len() { + 0 => { + Ok(CommandLineInputs::FileNames(vec!["-".to_string()])) + } + 1 => { + let offset0 = parse_offset_operand(&input_strings[0]); + Ok(match offset0 { + Ok(n) => CommandLineInputs::FileAndOffset(("-".to_string(), n, None)), + _ => CommandLineInputs::FileNames(input_strings), + }) + } + 2 => { + let offset0 = parse_offset_operand(&input_strings[0]); + let offset1 = parse_offset_operand(&input_strings[1]); + match (offset0, offset1) { + (Ok(n), Ok(m)) => Ok(CommandLineInputs::FileAndOffset(("-".to_string(), n, Some(m)))), + (_, Ok(m)) => Ok(CommandLineInputs::FileAndOffset((input_strings[0].clone(), m, None))), + _ => Err(format!("invalid offset: {}", input_strings[1])), + } + } + 3 => { + let offset = parse_offset_operand(&input_strings[1]); + let label = parse_offset_operand(&input_strings[2]); + match (offset, label) { + (Ok(n), Ok(m)) => Ok(CommandLineInputs::FileAndOffset((input_strings[0].clone(), n, Some(m)))), + (Err(_), _) => Err(format!("invalid offset: {}", input_strings[1])), + (_, Err(_)) => Err(format!("invalid label: {}", input_strings[2])), + } + } + _ => { + Err(format!("too many inputs after --traditional: {}", input_strings[3])) + } + } +} + +/// parses format used by offset and label on the commandline +pub fn parse_offset_operand(s: &String) -> Result { + let mut start = 0; + let mut len = s.len(); + let mut radix = 8; + let mut multiply = 1; + + if s.starts_with("+") { + start += 1; + } + + if s[start..len].starts_with("0x") || s[start..len].starts_with("0X") { + start += 2; + radix = 16; + } else { + if s[start..len].ends_with("b") { + len -= 1; + multiply = 512; + } + if s[start..len].ends_with(".") { + len -= 1; + radix = 10; + } + } + match usize::from_str_radix(&s[start..len], radix) { + Ok(i) => Ok(i * multiply), + Err(_) => Err("parse failed"), + } +} + + +#[cfg(test)] +mod tests { + use super::*; + + /// A mock for the commandline options type + /// + /// `inputs` are all commandline parameters which do not belong to an option. + /// `option_names` are the names of the options on the commandline. + struct MockOptions<'a> { + inputs: Vec, + option_names: Vec<&'a str>, + } + + impl<'a> MockOptions<'a> { + fn new(inputs: Vec<&'a str>, option_names: Vec<&'a str>) -> MockOptions<'a> { + MockOptions { + inputs: inputs.iter().map(|s| s.to_string()).collect::>(), + option_names: option_names, + } + } + } + + impl<'a> CommandLineOpts for MockOptions<'a> { + fn inputs(&self) -> Vec { + self.inputs.clone() + } + fn opts_present(&self, opts: &[&str]) -> bool { + for expected in opts.iter() { + for actual in self.option_names.iter() { + if *expected == *actual { + return true; + } + } + } + false + } + } + + #[test] + fn test_parse_inputs_normal() { + assert_eq!(CommandLineInputs::FileNames(vec!["-".to_string()]), + parse_inputs(&MockOptions::new( + vec![], + vec![])).unwrap()); + + assert_eq!(CommandLineInputs::FileNames(vec!["-".to_string()]), + parse_inputs(&MockOptions::new( + vec!["-"], + vec![])).unwrap()); + + assert_eq!(CommandLineInputs::FileNames(vec!["file1".to_string()]), + parse_inputs(&MockOptions::new( + vec!["file1"], + vec![])).unwrap()); + + assert_eq!(CommandLineInputs::FileNames(vec!["file1".to_string(), "file2".to_string()]), + parse_inputs(&MockOptions::new( + vec!["file1", "file2"], + vec![])).unwrap()); + + assert_eq!(CommandLineInputs::FileNames(vec!["-".to_string(), "file1".to_string(), "file2".to_string()]), + parse_inputs(&MockOptions::new( + vec!["-", "file1", "file2"], + vec![])).unwrap()); + } + + #[test] + fn test_parse_inputs_with_offset() { + // offset is found without filename, so stdin will be used. + assert_eq!(CommandLineInputs::FileAndOffset(("-".to_string(), 8, None)), + parse_inputs(&MockOptions::new( + vec!["+10"], + vec![])).unwrap()); + + // offset must start with "+" if no input is specified. + assert_eq!(CommandLineInputs::FileNames(vec!["10".to_string()]), + parse_inputs(&MockOptions::new( + vec!["10"], + vec![""])).unwrap()); + + // offset is not valid, so it is considered a filename. + assert_eq!(CommandLineInputs::FileNames(vec!["+10a".to_string()]), + parse_inputs(&MockOptions::new( + vec!["+10a"], + vec![""])).unwrap()); + + // if -j is included in the commandline, there cannot be an offset. + assert_eq!(CommandLineInputs::FileNames(vec!["+10".to_string()]), + parse_inputs(&MockOptions::new( + vec!["+10"], + vec!["j"])).unwrap()); + + // if -v is included in the commandline, there cannot be an offset. + assert_eq!(CommandLineInputs::FileNames(vec!["+10".to_string()]), + parse_inputs(&MockOptions::new( + vec!["+10"], + vec!["o", "v"])).unwrap()); + + assert_eq!(CommandLineInputs::FileAndOffset(("file1".to_string(), 8, None)), + parse_inputs(&MockOptions::new( + vec!["file1", "+10"], + vec![])).unwrap()); + + // offset does not need to start with "+" if a filename is included. + assert_eq!(CommandLineInputs::FileAndOffset(("file1".to_string(), 8, None)), + parse_inputs(&MockOptions::new( + vec!["file1", "10"], + vec![])).unwrap()); + + assert_eq!(CommandLineInputs::FileNames(vec!["file1".to_string(), "+10a".to_string()]), + parse_inputs(&MockOptions::new( + vec!["file1", "+10a"], + vec![""])).unwrap()); + + assert_eq!(CommandLineInputs::FileNames(vec!["file1".to_string(), "+10".to_string()]), + parse_inputs(&MockOptions::new( + vec!["file1", "+10"], + vec!["j"])).unwrap()); + + // offset must be last on the commandline + assert_eq!(CommandLineInputs::FileNames(vec!["+10".to_string(), "file1".to_string()]), + parse_inputs(&MockOptions::new( + vec!["+10", "file1"], + vec![""])).unwrap()); + } + + #[test] + fn test_parse_inputs_traditional() { + // it should not return FileAndOffset to signal no offset was entered on the commandline. + assert_eq!(CommandLineInputs::FileNames(vec!["-".to_string()]), + parse_inputs(&MockOptions::new( + vec![], + vec!["traditional"])).unwrap()); + + assert_eq!(CommandLineInputs::FileNames(vec!["file1".to_string()]), + parse_inputs(&MockOptions::new( + vec!["file1"], + vec!["traditional"])).unwrap()); + + // offset does not need to start with a + + assert_eq!(CommandLineInputs::FileAndOffset(("-".to_string(), 8, None)), + parse_inputs(&MockOptions::new( + vec!["10"], + vec!["traditional"])).unwrap()); + + // valid offset and valid label + assert_eq!(CommandLineInputs::FileAndOffset(("-".to_string(), 8, Some(8))), + parse_inputs(&MockOptions::new( + vec!["10", "10"], + vec!["traditional"])).unwrap()); + + assert_eq!(CommandLineInputs::FileAndOffset(("file1".to_string(), 8, None)), + parse_inputs(&MockOptions::new( + vec!["file1", "10"], + vec!["traditional"])).unwrap()); + + // only one file is allowed, it must be the first + parse_inputs(&MockOptions::new( + vec!["10", "file1"], + vec!["traditional"])).unwrap_err(); + + assert_eq!(CommandLineInputs::FileAndOffset(("file1".to_string(), 8, Some(8))), + parse_inputs(&MockOptions::new( + vec!["file1", "10", "10"], + vec!["traditional"])).unwrap()); + + parse_inputs(&MockOptions::new( + vec!["10", "file1", "10"], + vec!["traditional"])).unwrap_err(); + + parse_inputs(&MockOptions::new( + vec!["10", "10", "file1"], + vec!["traditional"])).unwrap_err(); + + parse_inputs(&MockOptions::new( + vec!["10", "10", "10", "10"], + vec!["traditional"])).unwrap_err(); + } + + fn parse_offset_operand_str(s: &str) -> Result { + parse_offset_operand(&String::from(s)) + } + + #[test] + fn test_parse_offset_operand_invalid() { + parse_offset_operand_str("").unwrap_err(); + parse_offset_operand_str("a").unwrap_err(); + parse_offset_operand_str("+").unwrap_err(); + parse_offset_operand_str("+b").unwrap_err(); + parse_offset_operand_str("0x1.").unwrap_err(); + parse_offset_operand_str("0x1.b").unwrap_err(); + parse_offset_operand_str("-").unwrap_err(); + parse_offset_operand_str("-1").unwrap_err(); + parse_offset_operand_str("1e10").unwrap_err(); + } + + #[test] + fn test_parse_offset_operand() { + assert_eq!(8, parse_offset_operand_str("10").unwrap()); // default octal + assert_eq!(0, parse_offset_operand_str("0").unwrap()); + assert_eq!(8, parse_offset_operand_str("+10").unwrap()); // optional leading '+' + assert_eq!(16, parse_offset_operand_str("0x10").unwrap()); // hex + assert_eq!(16, parse_offset_operand_str("0X10").unwrap()); // hex + assert_eq!(16, parse_offset_operand_str("+0X10").unwrap()); // hex + assert_eq!(10, parse_offset_operand_str("10.").unwrap()); // decimal + assert_eq!(10, parse_offset_operand_str("+10.").unwrap()); // decimal + assert_eq!(4096, parse_offset_operand_str("10b").unwrap()); // b suffix = *512 + assert_eq!(4096, parse_offset_operand_str("+10b").unwrap()); // b suffix = *512 + assert_eq!(5120, parse_offset_operand_str("10.b").unwrap()); // b suffix = *512 + assert_eq!(5120, parse_offset_operand_str("+10.b").unwrap()); // b suffix = *512 + assert_eq!(267, parse_offset_operand_str("0x10b").unwrap()); // hex + } +} diff --git a/src/od/parse_nrofbytes.rs b/src/od/parse_nrofbytes.rs new file mode 100644 index 000000000..be55cb7b6 --- /dev/null +++ b/src/od/parse_nrofbytes.rs @@ -0,0 +1,128 @@ + +pub fn parse_number_of_bytes(s: &String) -> Result { + let mut start = 0; + let mut len = s.len(); + let mut radix = 10; + let mut multiply = 1; + + if s.starts_with("0x") || s.starts_with("0X") { + start = 2; + radix = 16; + } else if s.starts_with("0") { + radix = 8; + } + + let mut ends_with = s.chars().rev(); + match ends_with.next() { + Some('b') if radix != 16 => { + multiply = 512; + len -= 1; + }, + Some('k') | Some('K') => { + multiply = 1024; + len -= 1; + } + Some('m') | Some('M') => { + multiply = 1024 * 1024; + len -= 1; + } + Some('G') => { + multiply = 1024 * 1024 * 1024; + len -= 1; + } + #[cfg(target_pointer_width = "64")] + Some('T') => { + multiply = 1024 * 1024 * 1024 * 1024; + len -= 1; + } + #[cfg(target_pointer_width = "64")] + Some('P') => { + multiply = 1024 * 1024 * 1024 * 1024 * 1024; + len -= 1; + } + #[cfg(target_pointer_width = "64")] + Some('E') => { + multiply = 1024 * 1024 * 1024 * 1024 * 1024 * 1024; + len -= 1; + } + Some('B') if radix != 16 => { + len -= 2; + multiply = match ends_with.next() { + Some('k') | Some('K') => 1000, + Some('m') | Some('M') => 1000 * 1000, + Some('G') => 1000 * 1000 * 1000, + #[cfg(target_pointer_width = "64")] + Some('T') => 1000 * 1000 * 1000 * 1000, + #[cfg(target_pointer_width = "64")] + Some('P') => 1000 * 1000 * 1000 * 1000 * 1000, + #[cfg(target_pointer_width = "64")] + Some('E') => 1000 * 1000 * 1000 * 1000 * 1000 * 1000, + _ => return Err("parse failed"), + } + }, + _ => {}, + } + + match usize::from_str_radix(&s[start..len], radix) { + Ok(i) => Ok(i * multiply), + Err(_) => Err("parse failed"), + } +} + +#[allow(dead_code)] +fn parse_number_of_bytes_str(s: &str) -> Result { + parse_number_of_bytes(&String::from(s)) +} + +#[test] +fn test_parse_number_of_bytes() { + // normal decimal numbers + assert_eq!(0, parse_number_of_bytes_str("0").unwrap()); + assert_eq!(5, parse_number_of_bytes_str("5").unwrap()); + assert_eq!(999, parse_number_of_bytes_str("999").unwrap()); + assert_eq!(2 * 512, parse_number_of_bytes_str("2b").unwrap()); + assert_eq!(2 * 1024, parse_number_of_bytes_str("2k").unwrap()); + assert_eq!(4 * 1024, parse_number_of_bytes_str("4K").unwrap()); + assert_eq!(2 * 1048576, parse_number_of_bytes_str("2m").unwrap()); + assert_eq!(4 * 1048576, parse_number_of_bytes_str("4M").unwrap()); + assert_eq!(1073741824, parse_number_of_bytes_str("1G").unwrap()); + assert_eq!(2000, parse_number_of_bytes_str("2kB").unwrap()); + assert_eq!(4000, parse_number_of_bytes_str("4KB").unwrap()); + assert_eq!(2000000, parse_number_of_bytes_str("2mB").unwrap()); + assert_eq!(4000000, parse_number_of_bytes_str("4MB").unwrap()); + assert_eq!(2000000000, parse_number_of_bytes_str("2GB").unwrap()); + + // octal input + assert_eq!(8, parse_number_of_bytes_str("010").unwrap()); + assert_eq!(8 * 512, parse_number_of_bytes_str("010b").unwrap()); + assert_eq!(8 * 1024, parse_number_of_bytes_str("010k").unwrap()); + assert_eq!(8 * 1048576, parse_number_of_bytes_str("010m").unwrap()); + + // hex input + assert_eq!(15, parse_number_of_bytes_str("0xf").unwrap()); + assert_eq!(15, parse_number_of_bytes_str("0XF").unwrap()); + assert_eq!(27, parse_number_of_bytes_str("0x1b").unwrap()); + assert_eq!(16 * 1024, parse_number_of_bytes_str("0x10k").unwrap()); + assert_eq!(16 * 1048576, parse_number_of_bytes_str("0x10m").unwrap()); + + // invalid input + parse_number_of_bytes_str("").unwrap_err(); + parse_number_of_bytes_str("-1").unwrap_err(); + parse_number_of_bytes_str("1e2").unwrap_err(); + parse_number_of_bytes_str("xyz").unwrap_err(); + parse_number_of_bytes_str("b").unwrap_err(); + parse_number_of_bytes_str("1Y").unwrap_err(); + parse_number_of_bytes_str("∞").unwrap_err(); +} + +#[test] +#[cfg(target_pointer_width = "64")] +fn test_parse_number_of_bytes_64bits() { + assert_eq!(1099511627776, parse_number_of_bytes_str("1T").unwrap()); + assert_eq!(1125899906842624, parse_number_of_bytes_str("1P").unwrap()); + assert_eq!(1152921504606846976, parse_number_of_bytes_str("1E").unwrap()); + + assert_eq!(2000000000000, parse_number_of_bytes_str("2TB").unwrap()); + assert_eq!(2000000000000000, parse_number_of_bytes_str("2PB").unwrap()); + assert_eq!(2000000000000000000, parse_number_of_bytes_str("2EB").unwrap()); +} diff --git a/src/od/partialreader.rs b/src/od/partialreader.rs new file mode 100644 index 000000000..72a37f58b --- /dev/null +++ b/src/od/partialreader.rs @@ -0,0 +1,204 @@ +use std::cmp; +use std::io; +use std::io::Read; +use multifilereader::HasError; + +/// When a large number of bytes must be skipped, it will be read into a +/// dynamically allocated buffer. The buffer will be limited to this size. +const MAX_SKIP_BUFFER: usize = 64 * 1024; + +/// Wrapper for `std::io::Read` which can skip bytes at the beginning +/// of the input, and it can limit the returned bytes to a particular +/// number of bytes. +pub struct PartialReader { + inner: R, + skip: usize, + limit: Option, +} + +impl PartialReader { + /// Create a new `PartialReader` wrapping `inner`, which will skip + /// `skip` bytes, and limits the output to `limit` bytes. Set `limit` + /// to `None` if there should be no limit. + pub fn new(inner: R, skip: usize, limit: Option) -> Self { + PartialReader { + inner: inner, + skip: skip, + limit: limit, + } + } +} + +impl Read for PartialReader { + fn read(&mut self, out: &mut [u8]) -> io::Result { + if self.skip > 0 { + let buf_size = cmp::min(self.skip, MAX_SKIP_BUFFER); + let mut bytes: Vec = Vec::with_capacity(buf_size); + unsafe { bytes.set_len(buf_size); } + + while self.skip > 0 { + let skip_count = cmp::min(self.skip, buf_size); + + match self.inner.read_exact(&mut bytes[..skip_count]) { + Err(e) => return Err(e), + Ok(()) => self.skip -= skip_count, + } + } + } + match self.limit { + None => self.inner.read(out), + Some(0) => Ok(0), + Some(ref mut limit) => { + let slice = if *limit > out.len() { out } else { &mut out[0..*limit] }; + match self.inner.read(slice) { + Err(e) => Err(e), + Ok(r) => { + *limit -= r; + Ok(r) + }, + } + }, + } + } +} + +impl HasError for PartialReader { + fn has_error(&self) -> bool { + self.inner.has_error() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::io::{Cursor, Read, ErrorKind}; + use std::error::Error; + use mockstream::*; + + #[test] + fn test_read_without_limits() { + let mut v = [0; 10]; + let mut sut = PartialReader::new(Cursor::new(&b"abcdefgh"[..]), 0, None); + + assert_eq!(sut.read(v.as_mut()).unwrap(), 8); + assert_eq!(v, [0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0, 0]); + } + + #[test] + fn test_read_without_limits_with_error() { + let mut v = [0; 10]; + let f = FailingMockStream::new(ErrorKind::PermissionDenied, "No access", 3); + let mut sut = PartialReader::new(f, 0, None); + + let error = sut.read(v.as_mut()).unwrap_err(); + assert_eq!(error.kind(), ErrorKind::PermissionDenied); + assert_eq!(error.description(), "No access"); + } + + #[test] + fn test_read_skipping_bytes() { + let mut v = [0; 10]; + let mut sut = PartialReader::new(Cursor::new(&b"abcdefgh"[..]), 2, None); + + assert_eq!(sut.read(v.as_mut()).unwrap(), 6); + assert_eq!(v, [0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0, 0, 0, 0]); + } + + #[test] + fn test_read_skipping_all() { + let mut v = [0; 10]; + let mut sut = PartialReader::new(Cursor::new(&b"abcdefgh"[..]), 20, None); + + let error = sut.read(v.as_mut()).unwrap_err(); + assert_eq!(error.kind(), ErrorKind::UnexpectedEof); + } + + #[test] + fn test_read_skipping_with_error() { + let mut v = [0; 10]; + let f = FailingMockStream::new(ErrorKind::PermissionDenied, "No access", 3); + let mut sut = PartialReader::new(f, 2, None); + + let error = sut.read(v.as_mut()).unwrap_err(); + assert_eq!(error.kind(), ErrorKind::PermissionDenied); + assert_eq!(error.description(), "No access"); + } + + #[test] + fn test_read_skipping_with_two_reads_during_skip() { + let mut v = [0; 10]; + let c = Cursor::new(&b"a"[..]) + .chain(Cursor::new(&b"bcdefgh"[..])); + let mut sut = PartialReader::new(c, 2, None); + + assert_eq!(sut.read(v.as_mut()).unwrap(), 6); + assert_eq!(v, [0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0, 0, 0, 0]); + } + + #[test] + fn test_read_skipping_huge_number() { + let mut v = [0; 10]; + // test if it does not eat all memory.... + let mut sut = PartialReader::new(Cursor::new(&b"abcdefgh"[..]), usize::max_value(), None); + + sut.read(v.as_mut()).unwrap_err(); + } + + #[test] + fn test_read_limitting_all() { + let mut v = [0; 10]; + let mut sut = PartialReader::new(Cursor::new(&b"abcdefgh"[..]), 0, Some(0)); + + assert_eq!(sut.read(v.as_mut()).unwrap(), 0); + } + + #[test] + fn test_read_limitting() { + let mut v = [0; 10]; + let mut sut = PartialReader::new(Cursor::new(&b"abcdefgh"[..]), 0, Some(6)); + + assert_eq!(sut.read(v.as_mut()).unwrap(), 6); + assert_eq!(v, [0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0, 0, 0, 0]); + } + + #[test] + fn test_read_limitting_with_error() { + let mut v = [0; 10]; + let f = FailingMockStream::new(ErrorKind::PermissionDenied, "No access", 3); + let mut sut = PartialReader::new(f, 0, Some(6)); + + let error = sut.read(v.as_mut()).unwrap_err(); + assert_eq!(error.kind(), ErrorKind::PermissionDenied); + assert_eq!(error.description(), "No access"); + } + + #[test] + fn test_read_limitting_with_large_limit() { + let mut v = [0; 10]; + let mut sut = PartialReader::new(Cursor::new(&b"abcdefgh"[..]), 0, Some(20)); + + assert_eq!(sut.read(v.as_mut()).unwrap(), 8); + assert_eq!(v, [0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0, 0]); + } + + #[test] + fn test_read_limitting_with_multiple_reads() { + let mut v = [0; 3]; + let mut sut = PartialReader::new(Cursor::new(&b"abcdefgh"[..]), 0, Some(6)); + + assert_eq!(sut.read(v.as_mut()).unwrap(), 3); + assert_eq!(v, [0x61, 0x62, 0x63]); + assert_eq!(sut.read(v.as_mut()).unwrap(), 3); + assert_eq!(v, [0x64, 0x65, 0x66]); + assert_eq!(sut.read(v.as_mut()).unwrap(), 0); + } + + #[test] + fn test_read_skipping_and_limitting() { + let mut v = [0; 10]; + let mut sut = PartialReader::new(Cursor::new(&b"abcdefgh"[..]), 2, Some(4)); + + assert_eq!(sut.read(v.as_mut()).unwrap(), 4); + assert_eq!(v, [0x63, 0x64, 0x65, 0x66, 0, 0, 0, 0, 0, 0]); + } +} diff --git a/src/od/peekreader.rs b/src/od/peekreader.rs new file mode 100644 index 000000000..b6e4d53ae --- /dev/null +++ b/src/od/peekreader.rs @@ -0,0 +1,212 @@ +//! Contains the trait `PeekRead` and type `PeekReader` implementing it. + +use std::io; +use std::io::{Read, Write}; +use multifilereader::HasError; + +/// A trait which supplies a function to peek into a stream without +/// actually reading it. +/// +/// Like `std::io::Read`, it allows to read data from a stream, with +/// the additional possibility to reserve a part of the returned data +/// with the data which will be read in subsequent calls. +/// +pub trait PeekRead { + /// Reads data into a buffer. + /// + /// Fills `out` with data. The last `peek_size` bytes of `out` are + /// used for data which keeps available on subsequent calls. + /// `peek_size` must be smaller or equal to the size of `out`. + /// + /// Returns a tuple where the first number is the number of bytes + /// read from the stream, and the second number is the number of + /// bytes additionally read. Any of the numbers might be zero. + /// It can also return an error. + /// + /// A type implementing this trait, will typically also implement + /// `std::io::Read`. + /// + /// # Panics + /// Might panic if `peek_size` is larger then the size of `out` + fn peek_read(&mut self, out: &mut [u8], peek_size: usize) -> io::Result<(usize,usize)>; +} + +/// Wrapper for `std::io::Read` allowing to peek into the data to be read. +pub struct PeekReader { + inner: R, + temp_buffer: Vec, +} + +impl PeekReader { + /// Create a new `PeekReader` wrapping `inner` + pub fn new(inner: R) -> Self { + PeekReader { + inner: inner, + temp_buffer: Vec::new(), + } + } +} + +impl PeekReader { + fn read_from_tempbuffer(&mut self, mut out: &mut [u8]) -> usize { + match out.write(self.temp_buffer.as_mut_slice()) { + Ok(n) => { + self.temp_buffer.drain(..n); + n + }, + Err(_) => 0, + } + } + + fn write_to_tempbuffer(&mut self, bytes: &[u8]) { + // if temp_buffer is not empty, data has to be inserted in front + let org_buffer: Vec<_> = self.temp_buffer.drain(..).collect(); + self.temp_buffer.write(bytes).unwrap(); + self.temp_buffer.extend(org_buffer); + } +} + +impl Read for PeekReader { + fn read(&mut self, out: &mut [u8]) -> io::Result { + let start_pos = self.read_from_tempbuffer(out); + match self.inner.read(&mut out[start_pos..]) { + Err(e) => Err(e), + Ok(n) => Ok(n + start_pos), + } + } +} + +impl PeekRead for PeekReader { + /// Reads data into a buffer. + /// + /// See `PeekRead::peek_read`. + /// + /// # Panics + /// If `peek_size` is larger then the size of `out` + fn peek_read(&mut self, out: &mut [u8], peek_size: usize) -> io::Result<(usize,usize)> { + assert!(out.len() >= peek_size); + match self.read(out) { + Err(e) => Err(e), + Ok(bytes_in_buffer) => { + let unused = out.len() - bytes_in_buffer; + if peek_size <= unused { + Ok((bytes_in_buffer, 0)) + } else { + let actual_peek_size = peek_size - unused; + let real_size = bytes_in_buffer - actual_peek_size; + self.write_to_tempbuffer(&out[real_size..bytes_in_buffer]); + Ok((real_size, actual_peek_size)) + } + }, + } + } +} + +impl HasError for PeekReader { + fn has_error(&self) -> bool { + self.inner.has_error() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::io::{Cursor, Read}; + + #[test] + fn test_read_normal() { + let mut sut = PeekReader::new(Cursor::new(&b"abcdefgh"[..])); + + let mut v = [0; 10]; + assert_eq!(sut.read(v.as_mut()).unwrap(), 8); + assert_eq!(v, [0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0, 0]); + } + + #[test] + fn test_peek_read_without_buffer() { + let mut sut = PeekReader::new(Cursor::new(&b"abcdefgh"[..])); + + let mut v = [0; 10]; + assert_eq!(sut.peek_read(v.as_mut(), 0).unwrap(), (8,0)); + assert_eq!(v, [0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0, 0]); + } + + #[test] + fn test_peek_read_and_read() { + let mut sut = PeekReader::new(Cursor::new(&b"abcdefghij"[..])); + + let mut v = [0; 8]; + assert_eq!(sut.peek_read(v.as_mut(), 4).unwrap(), (4, 4)); + assert_eq!(v, [0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68]); + + let mut v2 = [0; 8]; + assert_eq!(sut.read(v2.as_mut()).unwrap(), 6); + assert_eq!(v2, [0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0, 0]); + } + + #[test] + fn test_peek_read_multiple_times() { + let mut sut = PeekReader::new(Cursor::new(&b"abcdefghij"[..])); + + let mut s1 = [0; 8]; + assert_eq!(sut.peek_read(s1.as_mut(), 4).unwrap(), (4, 4)); + assert_eq!(s1, [0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68]); + + let mut s2 = [0; 8]; + assert_eq!(sut.peek_read(s2.as_mut(), 4).unwrap(), (4, 2)); + assert_eq!(s2, [0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0, 0]); + + let mut s3 = [0; 8]; + assert_eq!(sut.peek_read(s3.as_mut(), 4).unwrap(), (2, 0)); + assert_eq!(s3, [0x69, 0x6a, 0, 0, 0, 0, 0, 0]); + } + + #[test] + fn test_peek_read_and_read_with_small_buffer() { + let mut sut = PeekReader::new(Cursor::new(&b"abcdefghij"[..])); + + let mut v = [0; 8]; + assert_eq!(sut.peek_read(v.as_mut(), 4).unwrap(), (4, 4)); + assert_eq!(v, [0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68]); + + let mut v2 = [0; 2]; + assert_eq!(sut.read(v2.as_mut()).unwrap(), 2); + assert_eq!(v2, [0x65, 0x66]); + assert_eq!(sut.read(v2.as_mut()).unwrap(), 2); + assert_eq!(v2, [0x67, 0x68]); + assert_eq!(sut.read(v2.as_mut()).unwrap(), 2); + assert_eq!(v2, [0x69, 0x6a]); + } + + #[test] + fn test_peek_read_with_smaller_buffer() { + let mut sut = PeekReader::new(Cursor::new(&b"abcdefghij"[..])); + + let mut v = [0; 8]; + assert_eq!(sut.peek_read(v.as_mut(), 4).unwrap(), (4, 4)); + assert_eq!(v, [0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68]); + + let mut v2 = [0; 2]; + assert_eq!(sut.peek_read(v2.as_mut(), 2).unwrap(), (0, 2)); + assert_eq!(v2, [0x65, 0x66]); + assert_eq!(sut.peek_read(v2.as_mut(), 0).unwrap(), (2, 0)); + assert_eq!(v2, [0x65, 0x66]); + assert_eq!(sut.peek_read(v2.as_mut(), 0).unwrap(), (2, 0)); + assert_eq!(v2, [0x67, 0x68]); + assert_eq!(sut.peek_read(v2.as_mut(), 0).unwrap(), (2, 0)); + assert_eq!(v2, [0x69, 0x6a]); + } + + #[test] + fn test_peek_read_peek_with_larger_peek_buffer() { + let mut sut = PeekReader::new(Cursor::new(&b"abcdefghij"[..])); + + let mut v = [0; 8]; + assert_eq!(sut.peek_read(v.as_mut(), 4).unwrap(), (4, 4)); + assert_eq!(v, [0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68]); + + let mut v2 = [0; 8]; + assert_eq!(sut.peek_read(v2.as_mut(), 8).unwrap(), (0, 6)); + assert_eq!(v2, [0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0, 0]); + } +} diff --git a/src/od/prn_char.rs b/src/od/prn_char.rs new file mode 100644 index 000000000..55a7c6076 --- /dev/null +++ b/src/od/prn_char.rs @@ -0,0 +1,166 @@ +use std::str::from_utf8; +use formatteriteminfo::*; + +pub static FORMAT_ITEM_A: FormatterItemInfo = FormatterItemInfo { + byte_size: 1, + print_width: 4, + formatter: FormatWriter::IntWriter(format_item_a), +}; + +pub static FORMAT_ITEM_C: FormatterItemInfo = FormatterItemInfo { + byte_size: 1, + print_width: 4, + formatter: FormatWriter::MultibyteWriter(format_item_c), +}; + + +static A_CHRS: [&'static str; 128] = +["nul", "soh", "stx", "etx", "eot", "enq", "ack", "bel", + "bs", "ht", "nl", "vt", "ff", "cr", "so", "si", + "dle", "dc1", "dc2", "dc3", "dc4", "nak", "syn", "etb", + "can", "em", "sub", "esc", "fs", "gs", "rs", "us", + "sp", "!", "\"", "#", "$", "%", "&", "'", + "(", ")", "*", "+", ",", "-", ".", "/", + "0", "1", "2", "3", "4", "5", "6", "7", + "8", "9", ":", ";", "<", "=", ">", "?", + "@", "A", "B", "C", "D", "E", "F", "G", + "H", "I", "J", "K", "L", "M", "N", "O", + "P", "Q", "R", "S", "T", "U", "V", "W", + "X", "Y", "Z", "[", "\\", "]", "^", "_", + "`", "a", "b", "c", "d", "e", "f", "g", + "h", "i", "j", "k", "l", "m", "n", "o", + "p", "q", "r", "s", "t", "u", "v", "w", + "x", "y", "z", "{", "|", "}", "~", "del"]; + +fn format_item_a(p: u64) -> String { + // itembytes == 1 + let b = (p & 0x7f) as u8; + format!("{:>4}", A_CHRS.get(b as usize).unwrap_or(&"??") + ) +} + + +static C_CHRS: [&'static str; 128] = [ +"\\0", "001", "002", "003", "004", "005", "006", "\\a", +"\\b", "\\t", "\\n", "\\v", "\\f", "\\r", "016", "017", +"020", "021", "022", "023", "024", "025", "026", "027", +"030", "031", "032", "033", "034", "035", "036", "037", + " ", "!", "\"", "#", "$", "%", "&", "'", + "(", ")", "*", "+", ",", "-", ".", "/", + "0", "1", "2", "3", "4", "5", "6", "7", + "8", "9", ":", ";", "<", "=", ">", "?", + "@", "A", "B", "C", "D", "E", "F", "G", + "H", "I", "J", "K", "L", "M", "N", "O", + "P", "Q", "R", "S", "T", "U", "V", "W", + "X", "Y", "Z", "[", "\\", "]", "^", "_", + "`", "a", "b", "c", "d", "e", "f", "g", + "h", "i", "j", "k", "l", "m", "n", "o", + "p", "q", "r", "s", "t", "u", "v", "w", + "x", "y", "z", "{", "|", "}", "~", "177"]; + + +fn format_item_c(bytes: &[u8]) -> String { + // itembytes == 1 + let b = bytes[0]; + + if b & 0x80 == 0x00 { + match C_CHRS.get(b as usize) { + Some(s) => format!("{:>4}", s), + None => format!("{:>4}", b), + } + } else if (b & 0xc0) == 0x80 { + // second or subsequent octet of an utf-8 sequence + String::from(" **") + } else if ((b & 0xe0) == 0xc0) && (bytes.len() >= 2) { + // start of a 2 octet utf-8 sequence + match from_utf8(&bytes[0..2]) { + Ok(s) => { format!("{:>4}", s) }, + Err(_) => { format!(" {:03o}", b) }, + } + } else if ((b & 0xf0) == 0xe0) && (bytes.len() >= 3) { + // start of a 3 octet utf-8 sequence + match from_utf8(&bytes[0..3]) { + Ok(s) => { format!("{:>4}", s) }, + Err(_) => { format!(" {:03o}", b) }, + } + } else if ((b & 0xf8) == 0xf0) && (bytes.len() >= 4) { + // start of a 4 octet utf-8 sequence + match from_utf8(&bytes[0..4]) { + Ok(s) => { format!("{:>4}", s) }, + Err(_) => { format!(" {:03o}", b) }, + } + } else { + // invalid utf-8 + format!(" {:03o}", b) + } +} + +pub fn format_ascii_dump(bytes: &[u8]) -> String { + let mut result = String::new(); + + result.push('>'); + for c in bytes.iter() { + if *c >= 0x20 && *c <= 0x7e { + result.push_str(C_CHRS[*c as usize]); + } else { + result.push('.'); + } + } + result.push('<'); + + result +} + +#[test] +fn test_format_item_a() { + assert_eq!(" nul", format_item_a(0x00)); + assert_eq!(" soh", format_item_a(0x01)); + assert_eq!(" sp", format_item_a(0x20)); + assert_eq!(" A", format_item_a(0x41)); + assert_eq!(" ~", format_item_a(0x7e)); + assert_eq!(" del", format_item_a(0x7f)); + + assert_eq!(" nul", format_item_a(0x80)); + assert_eq!(" A", format_item_a(0xc1)); + assert_eq!(" ~", format_item_a(0xfe)); + assert_eq!(" del", format_item_a(0xff)); +} + +#[test] +fn test_format_item_c() { + assert_eq!(" \\0", format_item_c(&[0x00])); + assert_eq!(" 001", format_item_c(&[0x01])); + assert_eq!(" ", format_item_c(&[0x20])); + assert_eq!(" A", format_item_c(&[0x41])); + assert_eq!(" ~", format_item_c(&[0x7e])); + assert_eq!(" 177", format_item_c(&[0x7f])); + assert_eq!(" A", format_item_c(&[0x41, 0x21])); + + assert_eq!(" **", format_item_c(&[0x80])); + assert_eq!(" **", format_item_c(&[0x9f])); + + assert_eq!(" ß", format_item_c(&[0xc3, 0x9f])); + assert_eq!(" ß", format_item_c(&[0xc3, 0x9f, 0x21])); + + assert_eq!(" \u{1000}", format_item_c(&[0xe1, 0x80, 0x80])); + assert_eq!(" \u{1000}", format_item_c(&[0xe1, 0x80, 0x80, 0x21])); + + assert_eq!(" \u{1f496}", format_item_c(&[0xf0, 0x9f, 0x92, 0x96])); + assert_eq!(" \u{1f496}", format_item_c(&[0xf0, 0x9f, 0x92, 0x96, 0x21])); + + assert_eq!(" 300", format_item_c(&[0xc0, 0x80])); // invalid utf-8 (MUTF-8 null) + assert_eq!(" 301", format_item_c(&[0xc1, 0xa1])); // invalid utf-8 + assert_eq!(" 303", format_item_c(&[0xc3, 0xc3])); // invalid utf-8 + assert_eq!(" 360", format_item_c(&[0xf0, 0x82, 0x82, 0xac])); // invalid utf-8 (overlong) + assert_eq!(" 360", format_item_c(&[0xf0, 0x9f, 0x92])); // invalid utf-8 (missing octet) + assert_eq!(" \u{10FFFD}", format_item_c(&[0xf4, 0x8f, 0xbf, 0xbd])); // largest valid utf-8 + assert_eq!(" 364", format_item_c(&[0xf4, 0x90, 0x00, 0x00])); // invalid utf-8 + assert_eq!(" 365", format_item_c(&[0xf5, 0x80, 0x80, 0x80])); // invalid utf-8 + assert_eq!(" 377", format_item_c(&[0xff])); // invalid utf-8 +} + +#[test] +fn test_format_ascii_dump() { + assert_eq!(">.<", format_ascii_dump(&[0x00])); + assert_eq!(">. A~.<", format_ascii_dump(&[0x1f, 0x20, 0x41, 0x7e, 0x7f])); +} diff --git a/src/od/prn_float.rs b/src/od/prn_float.rs new file mode 100644 index 000000000..26037c8b4 --- /dev/null +++ b/src/od/prn_float.rs @@ -0,0 +1,210 @@ +use std::num::FpCategory; +use half::f16; +use std::f32; +use std::f64; +use formatteriteminfo::*; + +pub static FORMAT_ITEM_F16: FormatterItemInfo = FormatterItemInfo { + byte_size: 2, + print_width: 10, + formatter: FormatWriter::FloatWriter(format_item_flo16), +}; + +pub static FORMAT_ITEM_F32: FormatterItemInfo = FormatterItemInfo { + byte_size: 4, + print_width: 15, + formatter: FormatWriter::FloatWriter(format_item_flo32), +}; + +pub static FORMAT_ITEM_F64: FormatterItemInfo = FormatterItemInfo { + byte_size: 8, + print_width: 25, + formatter: FormatWriter::FloatWriter(format_item_flo64), +}; + +pub fn format_item_flo16(f: f64) -> String { + format!(" {}", format_flo16(f16::from_f64(f))) +} + +pub fn format_item_flo32(f: f64) -> String { + format!(" {}", format_flo32(f as f32)) +} + +pub fn format_item_flo64(f: f64) -> String { + format!(" {}", format_flo64(f)) +} + +fn format_flo16(f: f16) -> String { + format_float(f64::from(f), 9, 4) +} + +// formats float with 8 significant digits, eg 12345678 or -1.2345678e+12 +// always retuns a string of 14 characters +fn format_flo32(f: f32) -> String { + let width: usize = 14; + let precision: usize = 8; + + if f.classify() == FpCategory::Subnormal { + // subnormal numbers will be normal as f64, so will print with a wrong precision + format!("{:width$e}", f, width = width) // subnormal numbers + } else { + format_float(f as f64, width, precision) + } +} + +fn format_flo64(f: f64) -> String { + format_float(f, 24, 17) +} + +fn format_float(f: f64, width: usize, precision: usize) -> String { + if !f.is_normal() { + if f == -0.0 && f.is_sign_negative() { return format!("{:>width$}", "-0", width = width) } + if f == 0.0 || !f.is_finite() { return format!("{:width$}", f, width = width) } + return format!("{:width$e}", f, width = width) // subnormal numbers + } + + let mut l = f.abs().log10().floor() as i32; + + let r = 10f64.powi(l); + if (f > 0.0 && r > f) || (f < 0.0 && -r < f) { + // fix precision error + l = l - 1; + } + + if l >= 0 && l <= (precision as i32 - 1) { + format!("{:width$.dec$}", f, + width = width, + dec = (precision-1) - l as usize) + } else if l == -1 { + format!("{:width$.dec$}", f, + width = width, + dec = precision) + } else { + format!("{:width$.dec$e}", f, + width = width, + dec = precision - 1) + } +} + +#[test] +fn test_format_flo32() { + assert_eq!(format_flo32(1.0), " 1.0000000"); + assert_eq!(format_flo32(9.9999990), " 9.9999990"); + assert_eq!(format_flo32(10.0), " 10.000000"); + assert_eq!(format_flo32(99.999977), " 99.999977"); + assert_eq!(format_flo32(99.999992), " 99.999992"); + assert_eq!(format_flo32(100.0), " 100.00000"); + assert_eq!(format_flo32(999.99994), " 999.99994"); + assert_eq!(format_flo32(1000.0), " 1000.0000"); + assert_eq!(format_flo32(9999.9990), " 9999.9990"); + assert_eq!(format_flo32(10000.0), " 10000.000"); + assert_eq!(format_flo32(99999.992), " 99999.992"); + assert_eq!(format_flo32(100000.0), " 100000.00"); + assert_eq!(format_flo32(999999.94), " 999999.94"); + assert_eq!(format_flo32(1000000.0), " 1000000.0"); + assert_eq!(format_flo32(9999999.0), " 9999999.0"); + assert_eq!(format_flo32(10000000.0), " 10000000"); + assert_eq!(format_flo32(99999992.0), " 99999992"); + assert_eq!(format_flo32(100000000.0), " 1.0000000e8"); + assert_eq!(format_flo32(9.9999994e8), " 9.9999994e8"); + assert_eq!(format_flo32(1.0e9), " 1.0000000e9"); + assert_eq!(format_flo32(9.9999990e9), " 9.9999990e9"); + assert_eq!(format_flo32(1.0e10), " 1.0000000e10"); + + assert_eq!(format_flo32(0.1), " 0.10000000"); + assert_eq!(format_flo32(0.99999994), " 0.99999994"); + assert_eq!(format_flo32(0.010000001), " 1.0000001e-2"); + assert_eq!(format_flo32(0.099999994), " 9.9999994e-2"); + assert_eq!(format_flo32(0.001), " 1.0000000e-3"); + assert_eq!(format_flo32(0.0099999998), " 9.9999998e-3"); + + assert_eq!(format_flo32(-1.0), " -1.0000000"); + assert_eq!(format_flo32(-9.9999990), " -9.9999990"); + assert_eq!(format_flo32(-10.0), " -10.000000"); + assert_eq!(format_flo32(-99.999977), " -99.999977"); + assert_eq!(format_flo32(-99.999992), " -99.999992"); + assert_eq!(format_flo32(-100.0), " -100.00000"); + assert_eq!(format_flo32(-999.99994), " -999.99994"); + assert_eq!(format_flo32(-1000.0), " -1000.0000"); + assert_eq!(format_flo32(-9999.9990), " -9999.9990"); + assert_eq!(format_flo32(-10000.0), " -10000.000"); + assert_eq!(format_flo32(-99999.992), " -99999.992"); + assert_eq!(format_flo32(-100000.0), " -100000.00"); + assert_eq!(format_flo32(-999999.94), " -999999.94"); + assert_eq!(format_flo32(-1000000.0), " -1000000.0"); + assert_eq!(format_flo32(-9999999.0), " -9999999.0"); + assert_eq!(format_flo32(-10000000.0), " -10000000"); + assert_eq!(format_flo32(-99999992.0), " -99999992"); + assert_eq!(format_flo32(-100000000.0), " -1.0000000e8"); + assert_eq!(format_flo32(-9.9999994e8), " -9.9999994e8"); + assert_eq!(format_flo32(-1.0e9), " -1.0000000e9"); + assert_eq!(format_flo32(-9.9999990e9), " -9.9999990e9"); + assert_eq!(format_flo32(-1.0e10), " -1.0000000e10"); + + assert_eq!(format_flo32(-0.1), " -0.10000000"); + assert_eq!(format_flo32(-0.99999994), " -0.99999994"); + assert_eq!(format_flo32(-0.010000001), " -1.0000001e-2"); + assert_eq!(format_flo32(-0.099999994), " -9.9999994e-2"); + assert_eq!(format_flo32(-0.001), " -1.0000000e-3"); + assert_eq!(format_flo32(-0.0099999998), " -9.9999998e-3"); + + assert_eq!(format_flo32(3.4028233e38), " 3.4028233e38"); + assert_eq!(format_flo32(-3.4028233e38), " -3.4028233e38"); + assert_eq!(format_flo32(-1.1663108e-38),"-1.1663108e-38"); + assert_eq!(format_flo32(-4.7019771e-38),"-4.7019771e-38"); + assert_eq!(format_flo32(1e-45), " 1e-45"); + + assert_eq!(format_flo32(-3.402823466e+38), " -3.4028235e38"); + assert_eq!(format_flo32(f32::NAN), " NaN"); + assert_eq!(format_flo32(f32::INFINITY), " inf"); + assert_eq!(format_flo32(f32::NEG_INFINITY), " -inf"); + assert_eq!(format_flo32(-0.0), " -0"); + assert_eq!(format_flo32(0.0), " 0"); +} + +#[test] +fn test_format_flo64() { + assert_eq!(format_flo64(1.0), " 1.0000000000000000"); + assert_eq!(format_flo64(10.0), " 10.000000000000000"); + assert_eq!(format_flo64(1000000000000000.0), " 1000000000000000.0"); + assert_eq!(format_flo64(10000000000000000.0), " 10000000000000000"); + assert_eq!(format_flo64(100000000000000000.0), " 1.0000000000000000e17"); + + assert_eq!(format_flo64(-0.1), " -0.10000000000000001"); + assert_eq!(format_flo64(-0.01), " -1.0000000000000000e-2"); + + assert_eq!(format_flo64(-2.2250738585072014e-308),"-2.2250738585072014e-308"); + assert_eq!(format_flo64(4e-320), " 4e-320"); + assert_eq!(format_flo64(f64::NAN), " NaN"); + assert_eq!(format_flo64(f64::INFINITY), " inf"); + assert_eq!(format_flo64(f64::NEG_INFINITY), " -inf"); + assert_eq!(format_flo64(-0.0), " -0"); + assert_eq!(format_flo64(0.0), " 0"); +} + +#[test] +fn test_format_flo16() { + use half::consts::*; + + assert_eq!(format_flo16(f16::from_bits(0x8400u16)), "-6.104e-5"); + assert_eq!(format_flo16(f16::from_bits(0x8401u16)), "-6.109e-5"); + assert_eq!(format_flo16(f16::from_bits(0x8402u16)), "-6.115e-5"); + assert_eq!(format_flo16(f16::from_bits(0x8403u16)), "-6.121e-5"); + + assert_eq!(format_flo16(f16::from_f32(1.0)), " 1.000"); + assert_eq!(format_flo16(f16::from_f32(10.0)), " 10.00"); + assert_eq!(format_flo16(f16::from_f32(100.0)), " 100.0"); + assert_eq!(format_flo16(f16::from_f32(1000.0)), " 1000"); + assert_eq!(format_flo16(f16::from_f32(10000.0)), " 1.000e4"); + + assert_eq!(format_flo16(f16::from_f32(-0.2)), " -0.2000"); + assert_eq!(format_flo16(f16::from_f32(-0.02)), "-2.000e-2"); + + assert_eq!(format_flo16(MIN_POSITIVE_SUBNORMAL), " 5.966e-8"); + assert_eq!(format_flo16(MIN), " -6.550e4"); + assert_eq!(format_flo16(NAN), " NaN"); + assert_eq!(format_flo16(INFINITY), " inf"); + assert_eq!(format_flo16(NEG_INFINITY), " -inf"); + assert_eq!(format_flo16(NEG_ZERO), " -0"); + assert_eq!(format_flo16(ZERO), " 0"); +} diff --git a/src/od/prn_int.rs b/src/od/prn_int.rs new file mode 100644 index 000000000..9dd1fe58c --- /dev/null +++ b/src/od/prn_int.rs @@ -0,0 +1,151 @@ +use formatteriteminfo::*; + +/// format string to print octal using `int_writer_unsigned` +macro_rules! OCT { () => { " {:0width$o}" }} +/// format string to print hexadecimal using `int_writer_unsigned` +macro_rules! HEX { () => { " {:0width$x}" }} +/// format string to print decimal using `int_writer_unsigned` or `int_writer_signed` +macro_rules! DEC { () => { " {:width$}" }} + +/// defines a static struct of type `FormatterItemInfo` called `$NAME` +/// +/// Used to format unsigned integer types with help of a function called `$function` +/// `$byte_size` is the size of the type, `$print_width` is the maximum width in +/// human-readable format. `$format_str` is one of OCT, HEX or DEC +macro_rules! int_writer_unsigned { + ($NAME:ident, $byte_size:expr, $print_width:expr, $function:ident, $format_str:expr) => { + fn $function(p: u64) -> String { + format!($format_str, + p, + width = $print_width - 1) + } + + pub static $NAME: FormatterItemInfo = FormatterItemInfo { + byte_size: $byte_size, + print_width: $print_width, + formatter: FormatWriter::IntWriter($function), + }; + } +} + +/// defines a static struct of type `FormatterItemInfo` called `$NAME` +/// +/// Used to format signed integer types with help of a function called `$function` +/// `$byte_size` is the size of the type, `$print_width` is the maximum width in +/// human-readable format. `$format_str` should be DEC +macro_rules! int_writer_signed { + ($NAME:ident, $byte_size:expr, $print_width:expr, $function:ident, $format_str:expr) => { + fn $function(p: u64) -> String { + let s = sign_extend(p, $byte_size); + format!($format_str, + s, + width = $print_width - 1) + } + + pub static $NAME: FormatterItemInfo = FormatterItemInfo { + byte_size: $byte_size, + print_width: $print_width, + formatter: FormatWriter::IntWriter($function), + }; + } +} + +/// Extends a signed number in `item` of `itembytes` bytes into a (signed) i64 +fn sign_extend(item: u64, itembytes: usize) -> i64{ + let shift = 64 - itembytes * 8; + (item << shift) as i64 >> shift +} + + +int_writer_unsigned!(FORMAT_ITEM_OCT8, 1, 4, format_item_oct8, OCT!()); // max: 377 +int_writer_unsigned!(FORMAT_ITEM_OCT16, 2, 7, format_item_oct16, OCT!()); // max: 177777 +int_writer_unsigned!(FORMAT_ITEM_OCT32, 4, 12, format_item_oct32, OCT!()); // max: 37777777777 +int_writer_unsigned!(FORMAT_ITEM_OCT64, 8, 23, format_item_oct64, OCT!()); // max: 1777777777777777777777 + +int_writer_unsigned!(FORMAT_ITEM_HEX8, 1, 3, format_item_hex8, HEX!()); // max: ff +int_writer_unsigned!(FORMAT_ITEM_HEX16, 2, 5, format_item_hex16, HEX!()); // max: ffff +int_writer_unsigned!(FORMAT_ITEM_HEX32, 4, 9, format_item_hex32, HEX!()); // max: ffffffff +int_writer_unsigned!(FORMAT_ITEM_HEX64, 8, 17, format_item_hex64, HEX!()); // max: ffffffffffffffff + +int_writer_unsigned!(FORMAT_ITEM_DEC8U, 1, 4, format_item_dec_u8, DEC!()); // max: 255 +int_writer_unsigned!(FORMAT_ITEM_DEC16U, 2, 6, format_item_dec_u16, DEC!()); // max: 65535 +int_writer_unsigned!(FORMAT_ITEM_DEC32U, 4, 11, format_item_dec_u32, DEC!()); // max: 4294967295 +int_writer_unsigned!(FORMAT_ITEM_DEC64U, 8, 21, format_item_dec_u64, DEC!()); // max: 18446744073709551615 + +int_writer_signed!(FORMAT_ITEM_DEC8S, 1, 5, format_item_dec_s8, DEC!()); // max: -128 +int_writer_signed!(FORMAT_ITEM_DEC16S, 2, 7, format_item_dec_s16, DEC!()); // max: -32768 +int_writer_signed!(FORMAT_ITEM_DEC32S, 4, 12, format_item_dec_s32, DEC!()); // max: -2147483648 +int_writer_signed!(FORMAT_ITEM_DEC64S, 8, 21, format_item_dec_s64, DEC!()); // max: -9223372036854775808 + +#[test] +fn test_sign_extend() { + assert_eq!(0xffffffffffffff80u64 as i64, sign_extend(0x0000000000000080, 1)); + assert_eq!(0xffffffffffff8000u64 as i64, sign_extend(0x0000000000008000, 2)); + assert_eq!(0xffffffffff800000u64 as i64, sign_extend(0x0000000000800000, 3)); + assert_eq!(0xffffffff80000000u64 as i64, sign_extend(0x0000000080000000, 4)); + assert_eq!(0xffffff8000000000u64 as i64, sign_extend(0x0000008000000000, 5)); + assert_eq!(0xffff800000000000u64 as i64, sign_extend(0x0000800000000000, 6)); + assert_eq!(0xff80000000000000u64 as i64, sign_extend(0x0080000000000000, 7)); + assert_eq!(0x8000000000000000u64 as i64, sign_extend(0x8000000000000000, 8)); + + assert_eq!(0x000000000000007f, sign_extend(0x000000000000007f, 1)); + assert_eq!(0x0000000000007fff, sign_extend(0x0000000000007fff, 2)); + assert_eq!(0x00000000007fffff, sign_extend(0x00000000007fffff, 3)); + assert_eq!(0x000000007fffffff, sign_extend(0x000000007fffffff, 4)); + assert_eq!(0x0000007fffffffff, sign_extend(0x0000007fffffffff, 5)); + assert_eq!(0x00007fffffffffff, sign_extend(0x00007fffffffffff, 6)); + assert_eq!(0x007fffffffffffff, sign_extend(0x007fffffffffffff, 7)); + assert_eq!(0x7fffffffffffffff, sign_extend(0x7fffffffffffffff, 8)); +} + +#[test] +fn test_format_item_oct() { + assert_eq!(" 000", format_item_oct8(0)); + assert_eq!(" 377", format_item_oct8(0xff)); + assert_eq!(" 000000", format_item_oct16(0)); + assert_eq!(" 177777", format_item_oct16(0xffff)); + assert_eq!(" 00000000000", format_item_oct32(0)); + assert_eq!(" 37777777777", format_item_oct32(0xffffffff)); + assert_eq!(" 0000000000000000000000", format_item_oct64(0)); + assert_eq!(" 1777777777777777777777", format_item_oct64(0xffffffffffffffff)); +} + +#[test] +fn test_format_item_hex() { + assert_eq!(" 00", format_item_hex8(0)); + assert_eq!(" ff", format_item_hex8(0xff)); + assert_eq!(" 0000", format_item_hex16(0)); + assert_eq!(" ffff", format_item_hex16(0xffff)); + assert_eq!(" 00000000", format_item_hex32(0)); + assert_eq!(" ffffffff", format_item_hex32(0xffffffff)); + assert_eq!(" 0000000000000000", format_item_hex64(0)); + assert_eq!(" ffffffffffffffff", format_item_hex64(0xffffffffffffffff)); +} + +#[test] +fn test_format_item_dec_u() { + assert_eq!(" 0", format_item_dec_u8(0)); + assert_eq!(" 255", format_item_dec_u8(0xff)); + assert_eq!(" 0", format_item_dec_u16(0)); + assert_eq!(" 65535", format_item_dec_u16(0xffff)); + assert_eq!(" 0", format_item_dec_u32(0)); + assert_eq!(" 4294967295", format_item_dec_u32(0xffffffff)); + assert_eq!(" 0", format_item_dec_u64(0)); + assert_eq!(" 18446744073709551615", format_item_dec_u64(0xffffffffffffffff)); +} + +#[test] +fn test_format_item_dec_s() { + assert_eq!(" 0", format_item_dec_s8(0)); + assert_eq!(" 127", format_item_dec_s8(0x7f)); + assert_eq!(" -128", format_item_dec_s8(0x80)); + assert_eq!(" 0", format_item_dec_s16(0)); + assert_eq!(" 32767", format_item_dec_s16(0x7fff)); + assert_eq!(" -32768", format_item_dec_s16(0x8000)); + assert_eq!(" 0", format_item_dec_s32(0)); + assert_eq!(" 2147483647", format_item_dec_s32(0x7fffffff)); + assert_eq!(" -2147483648", format_item_dec_s32(0x80000000)); + assert_eq!(" 0", format_item_dec_s64(0)); + assert_eq!(" 9223372036854775807", format_item_dec_s64(0x7fffffffffffffff)); + assert_eq!(" -9223372036854775808", format_item_dec_s64(0x8000000000000000)); +} diff --git a/tests/fixtures/od/-f b/tests/fixtures/od/-f new file mode 100644 index 000000000..370c31180 --- /dev/null +++ b/tests/fixtures/od/-f @@ -0,0 +1 @@ +minus lowercase f diff --git a/tests/fixtures/od/0 b/tests/fixtures/od/0 new file mode 100644 index 000000000..26af6a865 --- /dev/null +++ b/tests/fixtures/od/0 @@ -0,0 +1 @@ +zero diff --git a/tests/fixtures/od/c b/tests/fixtures/od/c new file mode 100644 index 000000000..109c7e9b0 --- /dev/null +++ b/tests/fixtures/od/c @@ -0,0 +1 @@ +lowercase c diff --git a/tests/fixtures/od/x b/tests/fixtures/od/x new file mode 100644 index 000000000..584b8c5f2 --- /dev/null +++ b/tests/fixtures/od/x @@ -0,0 +1 @@ +lowercase x diff --git a/tests/test_od.rs b/tests/test_od.rs index 1933aa23c..a10022a75 100644 --- a/tests/test_od.rs +++ b/tests/test_od.rs @@ -1,13 +1,20 @@ +extern crate unindent; + use common::util::*; use std::path::Path; use std::env; use std::io::Write; use std::fs::File; use std::fs::remove_file; +use self::unindent::*; // octal dump of 'abcdefghijklmnopqrstuvwxyz\n' -static ALPHA_OUT: &'static str = "0000000 061141 062143 063145 064147 065151 066153 067155 070157\n0000020 071161 072163 073165 074167 075171 000012 \n0000033\n"; +static ALPHA_OUT: &'static str = " + 0000000 061141 062143 063145 064147 065151 066153 067155 070157 + 0000020 071161 072163 073165 074167 075171 000012 + 0000033 + "; // XXX We could do a better job of ensuring that we have a fresh temp dir to ourself, // not a general one ful of other proc's leftovers. @@ -28,11 +35,11 @@ fn test_file() { } } - let result = new_ucmd!().arg(file.as_os_str()).run(); + let result = new_ucmd!().arg("--endian=little").arg(file.as_os_str()).run(); assert_empty_stderr!(result); assert!(result.success); - assert_eq!(result.stdout, ALPHA_OUT); + assert_eq!(result.stdout, unindent(ALPHA_OUT)); let _ = remove_file(file); } @@ -57,11 +64,11 @@ fn test_2files() { } } - let result = new_ucmd!().arg(file1.as_os_str()).arg(file2.as_os_str()).run(); + let result = new_ucmd!().arg("--endian=little").arg(file1.as_os_str()).arg(file2.as_os_str()).run(); assert_empty_stderr!(result); assert!(result.success); - assert_eq!(result.stdout, ALPHA_OUT); + assert_eq!(result.stdout, unindent(ALPHA_OUT)); let _ = remove_file(file1); let _ = remove_file(file2); @@ -82,20 +89,17 @@ fn test_no_file() { // Test that od reads from stdin instead of a file #[test] fn test_from_stdin() { - let input = "abcdefghijklmnopqrstuvwxyz\n"; - let result = new_ucmd!().run_piped_stdin(input.as_bytes()); + let result = new_ucmd!().arg("--endian=little").run_piped_stdin(input.as_bytes()); assert_empty_stderr!(result); assert!(result.success); - assert_eq!(result.stdout, ALPHA_OUT); - + assert_eq!(result.stdout, unindent(ALPHA_OUT)); } // Test that od reads from stdin and also from files #[test] fn test_from_mixed() { - let temp = env::temp_dir(); let tmpdir = Path::new(&temp); let file1 = tmpdir.join("test-1"); @@ -110,30 +114,31 @@ fn test_from_mixed() { } } - let result = new_ucmd!().arg(file1.as_os_str()).arg("--").arg(file3.as_os_str()).run_piped_stdin(data2.as_bytes()); + let result = new_ucmd!().arg("--endian=little").arg(file1.as_os_str()).arg("-").arg(file3.as_os_str()).run_piped_stdin(data2.as_bytes()); assert_empty_stderr!(result); assert!(result.success); - assert_eq!(result.stdout, ALPHA_OUT); - + assert_eq!(result.stdout, unindent(ALPHA_OUT)); } #[test] fn test_multiple_formats() { - let input = "abcdefghijklmnopqrstuvwxyz\n"; let result = new_ucmd!().arg("-c").arg("-b").run_piped_stdin(input.as_bytes()); assert_empty_stderr!(result); assert!(result.success); - assert_eq!(result.stdout, "0000000 a b c d e f g h i j k l m n o p\n 141 142 143 144 145 146 147 150 151 152 153 154 155 156 157 160\n0000020 q r s t u v w x y z \\n \n 161 162 163 164 165 166 167 170 171 172 012 \n0000033\n"); - + assert_eq!(result.stdout, unindent(" + 0000000 a b c d e f g h i j k l m n o p + 141 142 143 144 145 146 147 150 151 152 153 154 155 156 157 160 + 0000020 q r s t u v w x y z \\n + 161 162 163 164 165 166 167 170 171 172 012 + 0000033 + ")); } #[test] fn test_dec() { - - let input = [ 0u8, 0u8, 1u8, 0u8, @@ -142,25 +147,519 @@ fn test_dec() { 0xffu8,0x7fu8, 0x00u8,0x80u8, 0x01u8,0x80u8,]; - let expected_output = "0000000 0 1 2 3 32767 -32768 -32767 \n0000016\n"; - let result = new_ucmd!().arg("-i").run_piped_stdin(&input[..]); + let expected_output = unindent(" + 0000000 0 1 2 3 32767 -32768 -32767 + 0000016 + "); + let result = new_ucmd!().arg("--endian=little").arg("-s").run_piped_stdin(&input[..]); assert_empty_stderr!(result); assert!(result.success); assert_eq!(result.stdout, expected_output); - } - -// We don't support multibyte chars, so big NEIN to this -/* #[test] -fn mit_die_umlauten_getesten() { - let result = new_ucmd!() - .run_piped_stdin("Universität Tübingen".as_bytes()); +fn test_hex16(){ + let input: [u8; 9] = [ + 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xff]; + let expected_output = unindent(" + 0000000 2301 6745 ab89 efcd 00ff + 0000011 + "); + let result = new_ucmd!().arg("--endian=little").arg("-x").run_piped_stdin(&input[..]); + assert_empty_stderr!(result); assert!(result.success); - assert_eq!(result.stdout, - "0000000 U n i v e r s i t ä ** t T ü **\n0000020 b i n g e n\n0000026") + assert_eq!(result.stdout, expected_output); +} + +#[test] +fn test_hex32(){ + let input: [u8; 9] = [ + 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xff]; + let expected_output = unindent(" + 0000000 67452301 efcdab89 000000ff + 0000011 + "); + let result = new_ucmd!().arg("--endian=little").arg("-X").run_piped_stdin(&input[..]); + + assert_empty_stderr!(result); + assert!(result.success); + assert_eq!(result.stdout, expected_output); +} + +#[test] +fn test_f16(){ + let input: [u8; 14] = [ + 0x00, 0x3c, // 0x3C00 1.0 + 0x00, 0x00, // 0x0000 0.0 + 0x00, 0x80, // 0x8000 -0.0 + 0x00, 0x7c, // 0x7C00 Inf + 0x00, 0xfc, // 0xFC00 -Inf + 0x00, 0xfe, // 0xFE00 NaN + 0x00, 0x84];// 0x8400 -6.104e-5 + let expected_output = unindent(" + 0000000 1.000 0 -0 inf + 0000010 -inf NaN -6.104e-5 + 0000016 + "); + let result = new_ucmd!().arg("--endian=little").arg("-tf2").arg("-w8").run_piped_stdin(&input[..]); + + assert_empty_stderr!(result); + assert!(result.success); + assert_eq!(result.stdout, expected_output); +} + +#[test] +fn test_f32(){ + let input: [u8; 28] = [ + 0x52, 0x06, 0x9e, 0xbf, // 0xbf9e0652 -1.2345679 + 0x4e, 0x61, 0x3c, 0x4b, // 0x4b3c614e 12345678 + 0x0f, 0x9b, 0x94, 0xfe, // 0xfe949b0f -9.876543E37 + 0x00, 0x00, 0x00, 0x80, // 0x80000000 -0.0 + 0xff, 0xff, 0xff, 0x7f, // 0x7fffffff NaN + 0xc2, 0x16, 0x01, 0x00, // 0x000116c2 1e-40 + 0x00, 0x00, 0x7f, 0x80];// 0x807f0000 -1.1663108E-38 + let expected_output = unindent(" + 0000000 -1.2345679 12345678 -9.8765427e37 -0 + 0000020 NaN 1e-40 -1.1663108e-38 + 0000034 + "); + let result = new_ucmd!().arg("--endian=little").arg("-f").run_piped_stdin(&input[..]); + + assert_empty_stderr!(result); + assert!(result.success); + assert_eq!(result.stdout, expected_output); +} + +#[test] +fn test_f64(){ + let input: [u8; 40] = [ + 0x27, 0x6b, 0x0a, 0x2f, 0x2a, 0xee, 0x45, 0x43, // 0x4345EE2A2F0A6B27 12345678912345678 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x0000000000000000 0 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x80, // 0x8010000000000000 -2.2250738585072014e-308 + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x0000000000000001 5e-324 (subnormal) + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0];// 0xc000000000000000 -2 + let expected_output = unindent(" + 0000000 12345678912345678 0 + 0000020 -2.2250738585072014e-308 5e-324 + 0000040 -2.0000000000000000 + 0000050 + "); + let result = new_ucmd!().arg("--endian=little").arg("-F").run_piped_stdin(&input[..]); + + assert_empty_stderr!(result); + assert!(result.success); + assert_eq!(result.stdout, expected_output); +} + +#[test] +fn test_multibyte() { + let result = new_ucmd!().arg("-c").arg("-w12").run_piped_stdin("Universität Tübingen \u{1B000}".as_bytes()); + + assert_empty_stderr!(result); + assert!(result.success); + assert_eq!(result.stdout, unindent(" + 0000000 U n i v e r s i t ä ** t + 0000014 T ü ** b i n g e n \u{1B000} + 0000030 ** ** ** + 0000033 + ")); +} + +#[test] +fn test_width(){ + let input: [u8; 8] = [0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]; + let expected_output = unindent(" + 0000000 000000 000000 + 0000004 000000 000000 + 0000010 + "); + + let result = new_ucmd!().arg("-w4").arg("-v").run_piped_stdin(&input[..]); + + assert_empty_stderr!(result); + assert!(result.success); + assert_eq!(result.stdout, expected_output); +} + +#[test] +fn test_invalid_width(){ + let input: [u8; 4] = [0x00, 0x00, 0x00, 0x00]; + let expected_output = unindent(" + 0000000 000000 + 0000002 000000 + 0000004 + "); + + let result = new_ucmd!().arg("-w5").arg("-v").run_piped_stdin(&input[..]); + + assert_eq!(result.stderr, "od: warning: invalid width 5; using 2 instead\n"); + assert!(result.success); + assert_eq!(result.stdout, expected_output); +} + +#[test] +fn test_zero_width(){ + let input: [u8; 4] = [0x00, 0x00, 0x00, 0x00]; + let expected_output = unindent(" + 0000000 000000 + 0000002 000000 + 0000004 + "); + + let result = new_ucmd!().arg("-w0").arg("-v").run_piped_stdin(&input[..]); + + assert_eq!(result.stderr, "od: warning: invalid width 0; using 2 instead\n"); + assert!(result.success); + assert_eq!(result.stdout, expected_output); +} + +#[test] +fn test_width_without_value(){ + let input: [u8; 40] = [0 ; 40]; + let expected_output = unindent(" + 0000000 000000 000000 000000 000000 000000 000000 000000 000000 000000 000000 000000 000000 000000 000000 000000 000000 + 0000040 000000 000000 000000 000000 + 0000050 + "); + + let result = new_ucmd!().arg("-w").run_piped_stdin(&input[..]); + + assert_empty_stderr!(result); + assert!(result.success); + assert_eq!(result.stdout, expected_output); +} + +#[test] +fn test_suppress_duplicates(){ + let input: [u8; 41] = [ + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 1, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0]; + let expected_output = unindent(" + 0000000 00000000000 + 0000 0000 + * + 0000020 00000000001 + 0001 0000 + 0000024 00000000000 + 0000 0000 + * + 0000050 00000000000 + 0000 + 0000051 + "); + + let result = new_ucmd!().arg("-w4").arg("-O").arg("-x").run_piped_stdin(&input[..]); + + assert_empty_stderr!(result); + assert!(result.success); + assert_eq!(result.stdout, expected_output); +} + +#[test] +fn test_big_endian() { + let input: [u8; 8] = [ + 0xC0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00];// 0xc000000000000000 -2 + + let expected_output = unindent(" + 0000000 -2.0000000000000000 + -2.0000000 0 + c0000000 00000000 + c000 0000 0000 0000 + 0000010 + "); + + let result = new_ucmd!().arg("--endian=big").arg("-F").arg("-f").arg("-X").arg("-x").run_piped_stdin(&input[..]); + + assert_empty_stderr!(result); + assert!(result.success); + assert_eq!(result.stdout, expected_output); +} + +#[test] +#[allow(non_snake_case)] +fn test_alignment_Xxa() { + let input: [u8; 8] = [ + 0x0A, 0x0D, 0x65, 0x66, 0x67, 0x00, 0x9e, 0x9f]; + + let expected_output = unindent(" + 0000000 66650d0a 9f9e0067 + 0d0a 6665 0067 9f9e + nl cr e f g nul rs us + 0000010 + "); + + // in this case the width of the -a (8-bit) determines the alignment for the other fields + let result = new_ucmd!().arg("--endian=little").arg("-X").arg("-x").arg("-a").run_piped_stdin(&input[..]); + + assert_empty_stderr!(result); + assert!(result.success); + assert_eq!(result.stdout, expected_output); +} + +#[test] +#[allow(non_snake_case)] +fn test_alignment_Fx() { + let input: [u8; 8] = [ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xC0];// 0xc000000000000000 -2 + + let expected_output = unindent(" + 0000000 -2.0000000000000000 + 0000 0000 0000 c000 + 0000010 + "); + + // in this case the width of the -F (64-bit) determines the alignment for the other field + let result = new_ucmd!().arg("--endian=little").arg("-F").arg("-x").run_piped_stdin(&input[..]); + + assert_empty_stderr!(result); + assert!(result.success); + assert_eq!(result.stdout, expected_output); +} + +#[test] +fn test_maxuint(){ + let input = [0xFFu8 ; 8]; + let expected_output = unindent(" + 0000000 1777777777777777777777 + 37777777777 37777777777 + 177777 177777 177777 177777 + 377 377 377 377 377 377 377 377 + 18446744073709551615 + 4294967295 4294967295 + 65535 65535 65535 65535 + 255 255 255 255 255 255 255 255 + 0000010 + "); + + let result = new_ucmd!().arg("--format=o8").arg("-Oobtu8").arg("-Dd").arg("--format=u1").run_piped_stdin(&input[..]); + + assert_empty_stderr!(result); + assert!(result.success); + assert_eq!(result.stdout, expected_output); +} + +#[test] +fn test_hex_offset(){ + let input = [0u8 ; 0x1F]; + let expected_output = unindent(" + 000000 00000000 00000000 00000000 00000000 + 00000000 00000000 00000000 00000000 + 000010 00000000 00000000 00000000 00000000 + 00000000 00000000 00000000 00000000 + 00001F + "); + + let result = new_ucmd!().arg("-Ax").arg("-X").arg("-X").run_piped_stdin(&input[..]); + + assert_empty_stderr!(result); + assert!(result.success); + assert_eq!(result.stdout, expected_output); +} + +#[test] +fn test_dec_offset(){ + let input = [0u8 ; 19]; + let expected_output = unindent(" + 0000000 00000000 00000000 00000000 00000000 + 00000000 00000000 00000000 00000000 + 0000016 00000000 + 00000000 + 0000019 + "); + + let result = new_ucmd!().arg("-Ad").arg("-X").arg("-X").run_piped_stdin(&input[..]); + + assert_empty_stderr!(result); + assert!(result.success); + assert_eq!(result.stdout, expected_output); +} + +#[test] +fn test_no_offset(){ + let input = [0u8 ; 31]; + const LINE: &'static str = " 00000000 00000000 00000000 00000000\n"; + let expected_output = [LINE, LINE, LINE, LINE].join(""); + + let result = new_ucmd!().arg("-An").arg("-X").arg("-X").run_piped_stdin(&input[..]); + + assert_empty_stderr!(result); + assert!(result.success); + assert_eq!(result.stdout, expected_output); +} + +#[test] +fn test_invalid_offset(){ + let result = new_ucmd!().arg("-Ab").run(); + + assert!(!result.success); +} + +#[test] +fn test_skip_bytes(){ + let input = "abcdefghijklmnopq"; + let result = new_ucmd!().arg("-c").arg("--skip-bytes=5").run_piped_stdin(input.as_bytes()); + + assert_empty_stderr!(result); + assert!(result.success); + assert_eq!(result.stdout, unindent(" + 0000005 f g h i j k l m n o p q + 0000021 + ")); +} + +#[test] +fn test_skip_bytes_error(){ + let input = "12345"; + let result = new_ucmd!().arg("--skip-bytes=10").run_piped_stdin(input.as_bytes()); + + assert!(!result.success); +} + +#[test] +fn test_read_bytes(){ + let input = "abcdefghijklmnopqrstuvwxyz\n12345678"; + let result = new_ucmd!().arg("--endian=little").arg("--read-bytes=27").run_piped_stdin(input.as_bytes()); + + assert_empty_stderr!(result); + assert!(result.success); + assert_eq!(result.stdout, unindent(ALPHA_OUT)); +} + +#[test] +fn test_ascii_dump(){ + let input: [u8; 22] = [ + 0x00, 0x01, 0x0a, 0x0d, 0x10, 0x1f, 0x20, 0x61, 0x62, 0x63, 0x7d, + 0x7e, 0x7f, 0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0, 0xff]; + let result = new_ucmd!().arg("-tx1zacz").run_piped_stdin(&input[..]); + + assert_empty_stderr!(result); + assert!(result.success); + assert_eq!(result.stdout, unindent(r" + 0000000 00 01 0a 0d 10 1f 20 61 62 63 7d 7e 7f 80 90 a0 >...... abc}~....< + nul soh nl cr dle us sp a b c } ~ del nul dle sp + \0 001 \n \r 020 037 a b c } ~ 177 ** ** ** >...... abc}~....< + 0000020 b0 c0 d0 e0 f0 ff >......< + 0 @ P ` p del + ** 300 320 340 360 377 >......< + 0000026 + ")); +} + +#[test] +fn test_filename_parsing(){ + // files "a" and "x" both exists, but are no filenames in the commandline below + // "-f" must be treated as a filename, it contains the text: minus lowercase f + // so "-f" should not be interpreted as a formatting option. + let result = new_ucmd!().arg("--format").arg("a").arg("-A").arg("x").arg("--").arg("-f").run(); + + assert_empty_stderr!(result); + assert!(result.success); + assert_eq!(result.stdout, unindent(" + 000000 m i n u s sp l o w e r c a s e sp + 000010 f nl + 000012 + ")); +} + +#[test] +fn test_stdin_offset(){ + let input = "abcdefghijklmnopq"; + let result = new_ucmd!().arg("-c").arg("+5").run_piped_stdin(input.as_bytes()); + + assert_empty_stderr!(result); + assert!(result.success); + assert_eq!(result.stdout, unindent(" + 0000005 f g h i j k l m n o p q + 0000021 + ")); +} + +#[test] +fn test_file_offset(){ + let result = new_ucmd!().arg("-c").arg("--").arg("-f").arg("10").run(); + + assert_empty_stderr!(result); + assert!(result.success); + assert_eq!(result.stdout, unindent(r" + 0000010 w e r c a s e f \n + 0000022 + ")); +} + +#[test] +fn test_traditional(){ + // note gnu od does not align both lines + let input = "abcdefghijklmnopq"; + let result = new_ucmd!().arg("--traditional").arg("-a").arg("-c").arg("-").arg("10").arg("0").run_piped_stdin(input.as_bytes()); + + assert_empty_stderr!(result); + assert!(result.success); + assert_eq!(result.stdout, unindent(r" + 0000010 (0000000) i j k l m n o p q + i j k l m n o p q + 0000021 (0000011) + ")); +} + +#[test] +fn test_traditional_with_skip_bytes_override(){ + // --skip-bytes is ignored in this case + let input = "abcdefghijklmnop"; + let result = new_ucmd!().arg("--traditional").arg("--skip-bytes=10").arg("-c").arg("0").run_piped_stdin(input.as_bytes()); + + assert_empty_stderr!(result); + assert!(result.success); + assert_eq!(result.stdout, unindent(r" + 0000000 a b c d e f g h i j k l m n o p + 0000020 + ")); +} + +#[test] +fn test_traditional_with_skip_bytes_non_override(){ + // no offset specified in the traditional way, so --skip-bytes is used + let input = "abcdefghijklmnop"; + let result = new_ucmd!().arg("--traditional").arg("--skip-bytes=10").arg("-c").run_piped_stdin(input.as_bytes()); + + assert_empty_stderr!(result); + assert!(result.success); + assert_eq!(result.stdout, unindent(r" + 0000012 k l m n o p + 0000020 + ")); +} + +#[test] +fn test_traditional_error(){ + // file "0" exists - don't fail on that, but --traditional only accepts a single input + let result = new_ucmd!().arg("--traditional").arg("0").arg("0").arg("0").arg("0").run(); + + assert!(!result.success); +} + +#[test] +fn test_traditional_only_label(){ + let input = "abcdefghijklmnopqrstuvwxyz"; + let result = new_ucmd!().arg("-An").arg("--traditional").arg("-a").arg("-c").arg("-").arg("10").arg("0x10").run_piped_stdin(input.as_bytes()); + + assert_empty_stderr!(result); + assert!(result.success); + assert_eq!(result.stdout, unindent(r" + (0000020) i j k l m n o p q r s t u v w x + i j k l m n o p q r s t u v w x + (0000040) y z + y z + (0000042) + ")); } -*/