From e909e665f07645eebefdb846548a795540746011 Mon Sep 17 00:00:00 2001 From: Christopher Ian Stern Date: Sun, 22 May 2016 15:46:20 -0500 Subject: [PATCH] od: take more format options, respect their order. --- src/od/od.rs | 515 +++++++++++++++++++++++++++++++++++------------ tests/test_od.rs | 93 +++++++-- 2 files changed, 461 insertions(+), 147 deletions(-) diff --git a/src/od/od.rs b/src/od/od.rs index 7b52011e6..62693a1a3 100644 --- a/src/od/od.rs +++ b/src/od/od.rs @@ -17,16 +17,26 @@ use std::mem; use std::io::BufReader; use std::io::Write; use std::io; - + +//This is available in some versions of std, but not all that we target. +macro_rules! hashmap { + ($( $key: expr => $val: expr ),*) => {{ + let mut map = ::std::collections::HashMap::new(); + $( map.insert($key, $val); )* + map + }} +} + + #[derive(Debug)] enum Radix { Decimal, Hexadecimal, Octal, Binary } - + #[derive(Debug)] enum InputSource<'a> { FileName(&'a str ), Stdin } - + pub fn uumain(args: Vec) -> i32 { let mut opts = getopts::Options::new(); @@ -40,6 +50,19 @@ pub fn uumain(args: Vec) -> i32 { ("output strings of at least BYTES graphic chars. 3 is assumed when \ BYTES is not specified."), "BYTES"); + opts.optflag("a", "", "named characters, ignoring high-order bit"); + opts.optflag("b", "", "octal bytes"); + opts.optflag("c", "", "ASCII characters or backslash escapes"); + opts.optflag("d", "", "unsigned decimal 2-byte units"); + opts.optflag("o", "", "unsigned decimal 2-byte units"); + + opts.optflag("I", "", "decimal 2-byte units"); + opts.optflag("L", "", "decimal 2-byte units"); + opts.optflag("i", "", "decimal 2-byte units"); + + opts.optflag("O", "", "octal 4-byte units"); + opts.optflag("s", "", "decimal 4-byte units"); + opts.optopt("t", "format", "select output format or formats", "TYPE"); opts.optflag("v", "output-duplicates", "do not use * to mark line suppression"); opts.optopt("w", "width", @@ -58,140 +81,180 @@ pub fn uumain(args: Vec) -> i32 { Ok(r) => r, Err(f) => { panic!("Invalid -A/--address-radix\n{}", f) } }; - + // Gather up file names - args which don't start with '-' - let fnames = args[1..] - .iter() - .filter(|w| !w.starts_with('-') || w == &"--" ) // "--" starts with '-', but it denotes stdin, not a flag - .map(|x| match x.as_str() { "--" => InputSource::Stdin, x => InputSource::FileName(x)}) - .collect::>(); - - // With no filenames, od uses stdin as input. - if fnames.len() == 0 { - odfunc(&input_offset_base, &[InputSource::Stdin]) - } - else { - odfunc(&input_offset_base, &fnames) - } + let stdnionly = [InputSource::Stdin]; + let inputs = args[1..] + .iter() + .filter_map(|w| match w as &str { + "--" => Some(InputSource::Stdin), + o if o.starts_with("-") => None, + x => Some(InputSource::FileName(x)), + }) + .collect::>(); + // If no input files named, use stdin. + let inputs = if inputs.len() == 0 { + &stdnionly[..] + } else { + &inputs[..] + }; + // Gather up format flags, we don't use getopts becase we need keep them in order. + let flags = args[1..] + .iter() + .filter_map(|w| match w as &str { + "--" => None, + o if o.starts_with("-") => Some(&o[1..]), + _ => None, + }) + .collect::>(); + + // At the moment, char (-a & -c)formats need the driver to set up a + // line by inserting a different # of of spaces at the start. + struct OdFormater { + writer: fn(p: u64, itembytes: usize), + offmarg: usize, + }; + let oct = OdFormater { + writer: print_item_oct, offmarg: 2 + }; + let hex = OdFormater { + writer: print_item_hex, offmarg: 2 + }; + let dec_u = OdFormater { + writer: print_item_dec_u, offmarg: 2 + }; + let dec_s = OdFormater { + writer: print_item_dec_s, offmarg: 2 + }; + let a_char = OdFormater { + writer: print_item_a, offmarg: 1 + }; + let c_char = OdFormater { + writer: print_item_c, offmarg: 1 + }; + + fn mkfmt(itembytes: usize, fmtspec: &OdFormater) -> OdFormat { + OdFormat { + itembytes: itembytes, + writer: fmtspec.writer, + offmarg: fmtspec.offmarg, + } + } + +// TODO: -t fmts + let known_formats = hashmap![ + "a" => (1, &a_char), + "B" => (2, &oct) , + "b" => (1, &oct), + "c" => (1, &c_char), + "D" => (4, &dec_u), + // TODO: support floats + // "e" => (8, &flo64), + // "F" => (8, &flo64), + // "F" => (4, &flo32), + "H" => (4, &hex), + "X" => (4, &hex) , + "o" => (2, &oct), + "x" => (2, &hex), + "h" => (2, &hex), + + "I" => (2, &dec_s), + "L" => (2, &dec_s), + "i" => (2, &dec_s), + + "O" => (4, &oct), + "s" => (2, &dec_u) + ]; + + let mut formats = Vec::new(); + + for flag in flags.iter() { + match known_formats.get(flag) { + None => {} // not every option is a format + Some(r) => { + let (itembytes, fmtspec) = *r; + formats.push(mkfmt(itembytes, fmtspec)) + } + } + } + + if formats.is_empty() { + formats.push(mkfmt(2, &oct)); // 2 byte octal is the default + } + + odfunc(&input_offset_base, &inputs, &formats[..]) } const LINEBYTES:usize = 16; const WORDBYTES:usize = 2; - -fn odfunc(input_offset_base: &Radix, fnames: &[InputSource]) -> i32 { - - let mut status = 0; - let mut ni = fnames.iter(); - { - // Open and return the next file to process as a BufReader - // Returns None when no more files. - let mut next_file = || -> Option> { - // loop retries with subsequent files if err - normally 'loops' once - loop { - match ni.next() { - None => return None, - Some(input) => match *input { - InputSource::Stdin => return Some(Box::new(BufReader::new(std::io::stdin()))), - InputSource::FileName(fname) => match File::open(fname) { - Ok(f) => return Some(Box::new(BufReader::new(f))), - Err(e) => { - // If any file can't be opened, - // print an error at the time that the file is needed, - // then move on the the next file. - // This matches the behavior of the original `od` - let _ = writeln!(&mut std::io::stderr(), "od: '{}': {}", fname, e); - if status == 0 {status = 1} - } - } - } - } - } - }; - - let mut curr_file: Box = match next_file() { - Some(f) => f, - None => { - return 1; - } - }; - - let mut exhausted = false; // There is no more input, gone to the end of the last file. - // Fill buf with bytes read from the list of files - // Returns Ok() - // Handles io errors itself, thus always returns OK - // Fills the provided buffer completely, unless it has run out of input. - // If any call returns short (< buf.len()), all subsequent calls will return Ok<0> - let mut f_read = |buf: &mut [u8]| -> io::Result { - if exhausted { - Ok(0) - } else { - let mut xfrd = 0; - // while buffer we are filling is not full.. May go thru several files. - 'fillloop: while xfrd < buf.len() { - loop { // stdin may return on 'return' (enter), even though the buffer isn't full. - xfrd += match curr_file.read(&mut buf[xfrd..]) { - Ok(0) => break, - Ok(n) => n, - Err(e) => panic!("file error: {}", e), - }; - if xfrd == buf.len() { - // transferred all that was asked for. - break 'fillloop; - } +fn odfunc(input_offset_base: &Radix, fnames: &[InputSource], formats: &[OdFormat]) -> i32 { + + let mut mf = MultifileReader::new(fnames); + let mut addr = 0; + let bytes = &mut [b'\x00'; LINEBYTES]; + loop { + // print each line data (or multi-format raster of several lines describing the same data). + + print_with_radix(input_offset_base, addr); // print offset + // if printing in multiple formats offset is printed only once + + match mf.f_read(bytes) { + Ok(0) => { + print!("\n"); + break; + } + Ok(n) => { + let mut first = true; // First line of a multi-format raster. + for f in formats { + if !first { + // this takes the space of the file offset on subsequent + // lines of multi-format rasters. + print!(" "); } - curr_file = match next_file() { - Some(f) => f, - None => { - exhausted = true; - break; - } - }; + first = false; + print!("{:>width$}", "", width = f.offmarg);// 4 spaces after offset - we print 2 more before each word + + for b in 0..n / f.itembytes { + let mut p: u64 = 0; + for i in 0..f.itembytes { + p |= (bytes[(f.itembytes * b) + i] as u64) << (8 * i); + } + (f.writer)(p, f.itembytes); + } + // not enough byte for a whole element, this should only happen on the last line. + if n % f.itembytes != 0 { + let b = n / f.itembytes; + let mut p2: u64 = 0; + for i in 0..(n % f.itembytes) { + p2 |= (bytes[(f.itembytes * b) + i] as u64) << (8 * i); + } + (f.writer)(p2, f.itembytes); + } + // Add extra spaces to pad out the short, presumably last, line. + if n < LINEBYTES { + // calc # of items we did not print, must be short at least WORDBYTES to be missing any. + let words_short = (LINEBYTES - n) / WORDBYTES; + // XXX this is running short for -c & -a + print!("{:>width$}", "", width = (words_short) * (6 + 2)); + } + print!("\n"); } - Ok(xfrd) + addr += n; + } + Err(_) => { + break; } }; - - let mut addr = 0; - let bytes = &mut [b'\x00'; LINEBYTES]; - loop { // print each line - print_with_radix(input_offset_base, addr); // print offset - match f_read(bytes) { - Ok(0) => { - print!("\n"); - break; - } - Ok(n) => { - print!(" "); // 4 spaces after offset - we print 2 more before each word - - for b in 0 .. n / mem::size_of::() { - let bs = &bytes[(2 * b) .. (2 * b + 2)]; - let p: u16 = (bs[1] as u16) << 8 | bs[0] as u16; - print!(" {:06o}", p); - } - if n % mem::size_of::() == 1 { - print!(" {:06o}", bytes[n - 1]); - } - - // Add extra spaces to pad out the short, presumably last, line. - if nwidth$}", "", width=(words_short)*(6+2)); - } - - print!("\n"); - addr += n; - }, - Err(_) => { - break; - } - }; - }; - }; - status + } + if mf.any_err { + 1 + } else { + 0 + } } +// For file byte offset printed at left margin. fn parse_radix(radix_str: Option) -> Result { match radix_str { None => Ok(Radix::Octal), @@ -224,3 +287,205 @@ fn print_with_radix(r: &Radix, x: usize) { Radix::Binary => print!("{:07b}", x) } } + +// MultifileReader - concatenate all our input, file or stdin. +struct MultifileReader<'a> { + ni: std::slice::Iter<'a, InputSource<'a>>, + curr_file: Option>, + any_err: bool, +} +impl<'b> MultifileReader<'b> { + fn new<'a>(fnames: &'a [InputSource]) -> MultifileReader<'a> { + let mut mf = MultifileReader { + ni: fnames.iter(), + curr_file: None, // normally this means done; call next_file() + any_err: false, + }; + mf.next_file(); + return mf; + } + + fn next_file(&mut self) { + // loop retries with subsequent files if err - normally 'loops' once + loop { + match self.ni.next() { + None => { + self.curr_file = None; + return; + } + Some(input) => { + match *input { + InputSource::Stdin => { + self.curr_file = Some(Box::new(BufReader::new(std::io::stdin()))); + return; + } + InputSource::FileName(fname) => { + match File::open(fname) { + Ok(f) => { + self.curr_file = Some(Box::new(BufReader::new(f))); + return; + } + Err(e) => { + // If any file can't be opened, + // print an error at the time that the file is needed, + // then move on the the next file. + // This matches the behavior of the original `od` + let _ = + writeln!(&mut std::io::stderr(), "od: '{}': {}", fname, e); + self.any_err = true + } + } + } + } + } + } + } + } + + // Fill buf with bytes read from the list of files + // Returns Ok() + // Handles io errors itself, thus always returns OK + // Fills the provided buffer completely, unless it has run out of input. + // If any call returns short (< buf.len()), all subsequent calls will return Ok<0> + fn f_read(&mut self, buf: &mut [u8]) -> io::Result { + let mut xfrd = 0; + // while buffer we are filling is not full.. May go thru several files. + 'fillloop: while xfrd < buf.len() { + match self.curr_file { + None => break, + Some(ref mut curr_file) => { + loop { + // stdin may return on 'return' (enter), even though the buffer isn't full. + xfrd += match curr_file.read(&mut buf[xfrd..]) { + Ok(0) => break, + Ok(n) => n, + Err(e) => panic!("file error: {}", e), + }; + if xfrd == buf.len() { + // transferred all that was asked for. + break 'fillloop; + } + } + } + } + self.next_file(); + } + Ok(xfrd) + } +} + + +struct OdFormat { + itembytes: usize, + writer: fn(u64, usize), + offmarg: usize, +} + +// TODO: use some sort of byte iterator, instead of passing bytes in u64 +fn print_item_oct(p: u64, itembytes: usize) { + let itemwidth = 3 * itembytes; + let itemspace = 4 * itembytes - itemwidth; + + print!("{:>itemspace$}{:0width$o}", + "", + p, + width = itemwidth, + itemspace = itemspace); +} + +fn print_item_hex(p: u64, itembytes: usize) { + let itemwidth = 2 * itembytes; + let itemspace = 4 * itembytes - itemwidth; + + print!("{:>itemspace$}{:0width$x}", + "", + p, + width = itemwidth, + itemspace = itemspace); +} + + +fn sign_extend(item: u64, itembytes: usize) -> i64{ + // https://graphics.stanford.edu/~seander/bithacks.html#VariableSignExtend + unsafe{ + let b = 8 * itembytes; // number of bits representing the number in p + let m = mem::transmute::(1u64 << (b - 1)); + let x = mem::transmute::(item) & (mem::transmute::(1u64 << b) - 1); + let r = (x ^ m) - m; + r + } +} + + +fn print_item_dec_s(p: u64, itembytes: usize) { + // sign extend + let s = sign_extend(p,itembytes); + print!("{:totalwidth$}", s, totalwidth = 4 * itembytes); +} +fn print_item_dec_u(p: u64, itembytes: usize) { + print!("{:totalwidth$}", p, totalwidth = 4 * itembytes); +} + +// TODO: multi-byte chars +// Quoth the man page: Multi-byte characters are displayed in the area corresponding to the first byte of the character. The remaining bytes are shown as `**'. + +static A_CHRS : [&'static str; 160] = +["nul", "soh", "stx", "etx", "eot", "enq", "ack", "bel", + "bs", "ht", "nl", "vt", "ff", "cr", "so", "si", + "dle", "dc1", "dc2", "dc3", "dc4", "nak", "syn", "etb", + "can", "em", "sub", "esc", "fs", "gs", "rs", "us", + "sp", "!", "\"", "#", "$", "%", "&", "'", + "(", ")", "*", "+", ",", "-", ".", "/", + "0", "1", "2", "3", "4", "5", "6", "7", + "8", "9", ":", ";", "<", "=", ">", "?", + "@", "A", "B", "C", "D", "E", "F", "G", + "H", "I", "J", "K", "L", "M", "N", "O", + "P", "Q", "R", "S", "T", "U", "V", "W", + "X", "Y", "Z", "[", "\\", "]", "^", "_", + "`", "a", "b", "c", "d", "e", "f", "g", + "h", "i", "j", "k", "l", "m", "n", "o", + "p", "q", "r", "s", "t", "u", "v", "w", + "x", "y", "z", "{", "|", "}", "~", "del", + "80", "81", "82", "83", "84", "85", "86", "87", + "88", "89", "8a", "8b", "8c", "8d", "8e", "8f", + "90", "91", "92", "93", "94", "95", "96", "97", + "98", "99", "9a", "9b", "9c", "9d", "9e", "9f"]; + +fn print_item_a(p: u64, _: usize) { + // itembytes == 1 + let b = (p & 0xff) as u8; + print!("{:>4}", A_CHRS.get(b as usize).unwrap_or(&"?") // XXX od dose not actually do this, it just prints the byte + ); +} + + +static C_CHRS : [&'static str; 127] = [ +"\\0", "001", "002", "003", "004", "005", "006", "\\a", +"\\b", "\\t", "\\n", "\\v", "\\f", "\\r", "016", "017", +"020", "021", "022", "023", "024", "025", "026", "027", +"030", "031", "032", "033", "034", "035", "036", "037", + " ", "!", "\"", "#", "$", "%", "&", "'", + "(", ")", "*", "+", ",", "-", ".", "/", + "0", "1", "2", "3", "4", "5", "6", "7", + "8", "9", ":", ";", "<", "=", ">", "?", + "@", "A", "B", "C", "D", "E", "F", "G", + "H", "I", "J", "K", "L", "M", "N", "O", + "P", "Q", "R", "S", "T", "U", "V", "W", + "X", "Y", "Z", "[", "\\", "]", "^", "_", + "`", "a", "b", "c", "d", "e", "f", "g", + "h", "i", "j", "k", "l", "m", "n", "o", + "p", "q", "r", "s", "t", "u", "v", "w", + "x", "y", "z", "{", "|", "}", "~" ]; + + +fn print_item_c(p: u64, _: usize) { + // itembytes == 1 + let b = (p & 0xff) as usize; + + if b < C_CHRS.len() { + match C_CHRS.get(b as usize) { + Some(s) => print!("{:>4}", s), + None => print!("{:>4}", b), + } + } +} diff --git a/tests/test_od.rs b/tests/test_od.rs index 625c3ae4e..188ce943e 100644 --- a/tests/test_od.rs +++ b/tests/test_od.rs @@ -4,16 +4,16 @@ use std::env; use std::io::Write; use std::fs::File; use std::fs::remove_file; - + static UTIL_NAME: &'static str = "od"; - + // octal dump of 'abcdefghijklmnopqrstuvwxyz\n' static ALPHA_OUT: &'static str = "0000000 061141 062143 063145 064147 065151 066153 067155 070157\n0000020 071161 072163 073165 074167 075171 000012 \n0000033\n"; - + // XXX We could do a better job of ensuring that we have a fresh temp dir to ourself, -// not a general one ful of other proc's leftovers. - -// Test that od can read one file and dump with default format +// not a general one ful of other proc's leftovers. + +// Test that od can read one file and dump with default format #[test] fn test_file() { let (_, mut ucmd) = testing(UTIL_NAME); @@ -21,7 +21,7 @@ fn test_file() { let temp = env::temp_dir(); let tmpdir = Path::new(&temp); let file = tmpdir.join("test"); - + { let mut f = File::create(&file).unwrap(); match f.write_all(b"abcdefghijklmnopqrstuvwxyz\n") { @@ -29,16 +29,16 @@ fn test_file() { _ => {} } } - + let result = ucmd.arg(file.as_os_str()).run(); - + assert_empty_stderr!(result); assert!(result.success); assert_eq!(result.stdout, ALPHA_OUT); - + let _ = remove_file(file); } - + // Test that od can read 2 files and concatenate the contents #[test] fn test_2files() { @@ -47,11 +47,11 @@ fn test_2files() { let tmpdir = Path::new(&temp); let file1 = tmpdir.join("test1"); let file2 = tmpdir.join("test2"); - + for &(n,a) in [(1,"a"), (2,"b")].iter() { println!("number: {} letter:{}", n, a); } - + for &(path,data)in &[(&file1, "abcdefghijklmnop"),(&file2, "qrstuvwxyz\n")] { let mut f = File::create(&path).unwrap(); match f.write_all(data.as_bytes()) { @@ -59,27 +59,27 @@ fn test_2files() { _ => {} } } - + let result = ucmd.arg(file1.as_os_str()).arg(file2.as_os_str()).run(); - + assert_empty_stderr!(result); assert!(result.success); assert_eq!(result.stdout, ALPHA_OUT); - + let _ = remove_file(file1); let _ = remove_file(file2); } -// Test that od gives non-0 exit val for filename that dosen't exist. +// Test that od gives non-0 exit val for filename that dosen't exist. #[test] fn test_no_file() { let (_, mut ucmd) = testing(UTIL_NAME); let temp = env::temp_dir(); let tmpdir = Path::new(&temp); let file = tmpdir.join("}surely'none'would'thus'a'file'name"); - + let result = ucmd.arg(file.as_os_str()).run(); - + assert!(!result.success); } @@ -90,7 +90,7 @@ fn test_from_stdin() { let input = "abcdefghijklmnopqrstuvwxyz\n"; let result = ucmd.run_piped_stdin(input.as_bytes()); - + assert_empty_stderr!(result); assert!(result.success); assert_eq!(result.stdout, ALPHA_OUT); @@ -115,11 +115,60 @@ fn test_from_mixed() { _ => {} } } - + let result = ucmd.arg(file1.as_os_str()).arg("--").arg(file3.as_os_str()).run_piped_stdin(data2.as_bytes()); - + assert_empty_stderr!(result); assert!(result.success); assert_eq!(result.stdout, ALPHA_OUT); } + +#[test] +fn test_multiple_formats() { + let (_, mut ucmd) = testing(UTIL_NAME); + + let input = "abcdefghijklmnopqrstuvwxyz\n"; + let result = ucmd.arg("-c").arg("-b").run_piped_stdin(input.as_bytes()); + + assert_empty_stderr!(result); + assert!(result.success); + assert_eq!(result.stdout, "0000000 a b c d e f g h i j k l m n o p\n 141 142 143 144 145 146 147 150 151 152 153 154 155 156 157 160\n0000020 q r s t u v w x y z \\n \n 161 162 163 164 165 166 167 170 171 172 012 \n0000033\n"); + +} + +#[test] +fn test_dec() { + let (_, mut ucmd) = testing(UTIL_NAME); + + + let input = [ + 0u8, 0u8, + 1u8, 0u8, + 2u8, 0u8, + 3u8, 0u8, + 0xffu8,0x7fu8, + 0x00u8,0x80u8, + 0x01u8,0x80u8,]; + let expected_output = "0000000 0 1 2 3 32767 -32768 -32767 \n0000016\n"; + let result = ucmd.arg("-i").run_piped_stdin(&input[..]); + + assert_empty_stderr!(result); + assert!(result.success); + assert_eq!(result.stdout, expected_output); + +} + + +// We don't support multibyte chars, so big NEIN to this +/* +#[test] +fn mit_die_umlauten_getesten() { + let (_, mut ucmd) = testing(UTIL_NAME); + let result = ucmd.run_piped_stdin("Universität Tübingen".as_bytes()); + assert_empty_stderr!(result); + assert!(result.success); + assert_eq!(result.stdout, + "0000000 U n i v e r s i t ä ** t T ü **\n0000020 b i n g e n\n0000026") +} +*/