1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-28 11:37:44 +00:00

od: take more format options, respect their order.

This commit is contained in:
Christopher Ian Stern 2016-05-22 15:46:20 -05:00
parent bdc1ca7426
commit e909e665f0
2 changed files with 461 additions and 147 deletions

View file

@ -18,6 +18,16 @@ use std::io::BufReader;
use std::io::Write;
use std::io;
//This is available in some versions of std, but not all that we target.
macro_rules! hashmap {
($( $key: expr => $val: expr ),*) => {{
let mut map = ::std::collections::HashMap::new();
$( map.insert($key, $val); )*
map
}}
}
#[derive(Debug)]
enum Radix { Decimal, Hexadecimal, Octal, Binary }
@ -40,6 +50,19 @@ pub fn uumain(args: Vec<String>) -> i32 {
("output strings of at least BYTES graphic chars. 3 is assumed when \
BYTES is not specified."),
"BYTES");
opts.optflag("a", "", "named characters, ignoring high-order bit");
opts.optflag("b", "", "octal bytes");
opts.optflag("c", "", "ASCII characters or backslash escapes");
opts.optflag("d", "", "unsigned decimal 2-byte units");
opts.optflag("o", "", "unsigned decimal 2-byte units");
opts.optflag("I", "", "decimal 2-byte units");
opts.optflag("L", "", "decimal 2-byte units");
opts.optflag("i", "", "decimal 2-byte units");
opts.optflag("O", "", "octal 4-byte units");
opts.optflag("s", "", "decimal 4-byte units");
opts.optopt("t", "format", "select output format or formats", "TYPE");
opts.optflag("v", "output-duplicates", "do not use * to mark line suppression");
opts.optopt("w", "width",
@ -60,138 +83,178 @@ pub fn uumain(args: Vec<String>) -> i32 {
};
// Gather up file names - args which don't start with '-'
let fnames = args[1..]
.iter()
.filter(|w| !w.starts_with('-') || w == &"--" ) // "--" starts with '-', but it denotes stdin, not a flag
.map(|x| match x.as_str() { "--" => InputSource::Stdin, x => InputSource::FileName(x)})
.collect::<Vec<_>>();
let stdnionly = [InputSource::Stdin];
let inputs = args[1..]
.iter()
.filter_map(|w| match w as &str {
"--" => Some(InputSource::Stdin),
o if o.starts_with("-") => None,
x => Some(InputSource::FileName(x)),
})
.collect::<Vec<_>>();
// If no input files named, use stdin.
let inputs = if inputs.len() == 0 {
&stdnionly[..]
} else {
&inputs[..]
};
// Gather up format flags, we don't use getopts becase we need keep them in order.
let flags = args[1..]
.iter()
.filter_map(|w| match w as &str {
"--" => None,
o if o.starts_with("-") => Some(&o[1..]),
_ => None,
})
.collect::<Vec<_>>();
// With no filenames, od uses stdin as input.
if fnames.len() == 0 {
odfunc(&input_offset_base, &[InputSource::Stdin])
}
else {
odfunc(&input_offset_base, &fnames)
}
// At the moment, char (-a & -c)formats need the driver to set up a
// line by inserting a different # of of spaces at the start.
struct OdFormater {
writer: fn(p: u64, itembytes: usize),
offmarg: usize,
};
let oct = OdFormater {
writer: print_item_oct, offmarg: 2
};
let hex = OdFormater {
writer: print_item_hex, offmarg: 2
};
let dec_u = OdFormater {
writer: print_item_dec_u, offmarg: 2
};
let dec_s = OdFormater {
writer: print_item_dec_s, offmarg: 2
};
let a_char = OdFormater {
writer: print_item_a, offmarg: 1
};
let c_char = OdFormater {
writer: print_item_c, offmarg: 1
};
fn mkfmt(itembytes: usize, fmtspec: &OdFormater) -> OdFormat {
OdFormat {
itembytes: itembytes,
writer: fmtspec.writer,
offmarg: fmtspec.offmarg,
}
}
// TODO: -t fmts
let known_formats = hashmap![
"a" => (1, &a_char),
"B" => (2, &oct) ,
"b" => (1, &oct),
"c" => (1, &c_char),
"D" => (4, &dec_u),
// TODO: support floats
// "e" => (8, &flo64),
// "F" => (8, &flo64),
// "F" => (4, &flo32),
"H" => (4, &hex),
"X" => (4, &hex) ,
"o" => (2, &oct),
"x" => (2, &hex),
"h" => (2, &hex),
"I" => (2, &dec_s),
"L" => (2, &dec_s),
"i" => (2, &dec_s),
"O" => (4, &oct),
"s" => (2, &dec_u)
];
let mut formats = Vec::new();
for flag in flags.iter() {
match known_formats.get(flag) {
None => {} // not every option is a format
Some(r) => {
let (itembytes, fmtspec) = *r;
formats.push(mkfmt(itembytes, fmtspec))
}
}
}
if formats.is_empty() {
formats.push(mkfmt(2, &oct)); // 2 byte octal is the default
}
odfunc(&input_offset_base, &inputs, &formats[..])
}
const LINEBYTES:usize = 16;
const WORDBYTES:usize = 2;
fn odfunc(input_offset_base: &Radix, fnames: &[InputSource]) -> i32 {
fn odfunc(input_offset_base: &Radix, fnames: &[InputSource], formats: &[OdFormat]) -> i32 {
let mut status = 0;
let mut ni = fnames.iter();
{
// Open and return the next file to process as a BufReader
// Returns None when no more files.
let mut next_file = || -> Option<Box<io::Read>> {
// loop retries with subsequent files if err - normally 'loops' once
loop {
match ni.next() {
None => return None,
Some(input) => match *input {
InputSource::Stdin => return Some(Box::new(BufReader::new(std::io::stdin()))),
InputSource::FileName(fname) => match File::open(fname) {
Ok(f) => return Some(Box::new(BufReader::new(f))),
Err(e) => {
// If any file can't be opened,
// print an error at the time that the file is needed,
// then move on the the next file.
// This matches the behavior of the original `od`
let _ = writeln!(&mut std::io::stderr(), "od: '{}': {}", fname, e);
if status == 0 {status = 1}
}
}
}
}
let mut mf = MultifileReader::new(fnames);
let mut addr = 0;
let bytes = &mut [b'\x00'; LINEBYTES];
loop {
// print each line data (or multi-format raster of several lines describing the same data).
print_with_radix(input_offset_base, addr); // print offset
// if printing in multiple formats offset is printed only once
match mf.f_read(bytes) {
Ok(0) => {
print!("\n");
break;
}
};
Ok(n) => {
let mut first = true; // First line of a multi-format raster.
for f in formats {
if !first {
// this takes the space of the file offset on subsequent
// lines of multi-format rasters.
print!(" ");
}
first = false;
print!("{:>width$}", "", width = f.offmarg);// 4 spaces after offset - we print 2 more before each word
let mut curr_file: Box<io::Read> = match next_file() {
Some(f) => f,
None => {
return 1;
}
};
let mut exhausted = false; // There is no more input, gone to the end of the last file.
// Fill buf with bytes read from the list of files
// Returns Ok(<number of bytes read>)
// Handles io errors itself, thus always returns OK
// Fills the provided buffer completely, unless it has run out of input.
// If any call returns short (< buf.len()), all subsequent calls will return Ok<0>
let mut f_read = |buf: &mut [u8]| -> io::Result<usize> {
if exhausted {
Ok(0)
} else {
let mut xfrd = 0;
// while buffer we are filling is not full.. May go thru several files.
'fillloop: while xfrd < buf.len() {
loop { // stdin may return on 'return' (enter), even though the buffer isn't full.
xfrd += match curr_file.read(&mut buf[xfrd..]) {
Ok(0) => break,
Ok(n) => n,
Err(e) => panic!("file error: {}", e),
};
if xfrd == buf.len() {
// transferred all that was asked for.
break 'fillloop;
for b in 0..n / f.itembytes {
let mut p: u64 = 0;
for i in 0..f.itembytes {
p |= (bytes[(f.itembytes * b) + i] as u64) << (8 * i);
}
(f.writer)(p, f.itembytes);
}
curr_file = match next_file() {
Some(f) => f,
None => {
exhausted = true;
break;
// not enough byte for a whole element, this should only happen on the last line.
if n % f.itembytes != 0 {
let b = n / f.itembytes;
let mut p2: u64 = 0;
for i in 0..(n % f.itembytes) {
p2 |= (bytes[(f.itembytes * b) + i] as u64) << (8 * i);
}
};
}
Ok(xfrd)
}
};
let mut addr = 0;
let bytes = &mut [b'\x00'; LINEBYTES];
loop { // print each line
print_with_radix(input_offset_base, addr); // print offset
match f_read(bytes) {
Ok(0) => {
print!("\n");
break;
}
Ok(n) => {
print!(" "); // 4 spaces after offset - we print 2 more before each word
for b in 0 .. n / mem::size_of::<u16>() {
let bs = &bytes[(2 * b) .. (2 * b + 2)];
let p: u16 = (bs[1] as u16) << 8 | bs[0] as u16;
print!(" {:06o}", p);
(f.writer)(p2, f.itembytes);
}
if n % mem::size_of::<u16>() == 1 {
print!(" {:06o}", bytes[n - 1]);
}
// Add extra spaces to pad out the short, presumably last, line.
if n<LINEBYTES {
if n < LINEBYTES {
// calc # of items we did not print, must be short at least WORDBYTES to be missing any.
let words_short = (LINEBYTES-n)/WORDBYTES;
print!("{:>width$}", "", width=(words_short)*(6+2));
let words_short = (LINEBYTES - n) / WORDBYTES;
// XXX this is running short for -c & -a
print!("{:>width$}", "", width = (words_short) * (6 + 2));
}
print!("\n");
addr += n;
},
Err(_) => {
break;
}
};
addr += n;
}
Err(_) => {
break;
}
};
};
status
}
if mf.any_err {
1
} else {
0
}
}
// For file byte offset printed at left margin.
fn parse_radix(radix_str: Option<String>) -> Result<Radix, &'static str> {
match radix_str {
None => Ok(Radix::Octal),
@ -224,3 +287,205 @@ fn print_with_radix(r: &Radix, x: usize) {
Radix::Binary => print!("{:07b}", x)
}
}
// MultifileReader - concatenate all our input, file or stdin.
struct MultifileReader<'a> {
ni: std::slice::Iter<'a, InputSource<'a>>,
curr_file: Option<Box<io::Read>>,
any_err: bool,
}
impl<'b> MultifileReader<'b> {
fn new<'a>(fnames: &'a [InputSource]) -> MultifileReader<'a> {
let mut mf = MultifileReader {
ni: fnames.iter(),
curr_file: None, // normally this means done; call next_file()
any_err: false,
};
mf.next_file();
return mf;
}
fn next_file(&mut self) {
// loop retries with subsequent files if err - normally 'loops' once
loop {
match self.ni.next() {
None => {
self.curr_file = None;
return;
}
Some(input) => {
match *input {
InputSource::Stdin => {
self.curr_file = Some(Box::new(BufReader::new(std::io::stdin())));
return;
}
InputSource::FileName(fname) => {
match File::open(fname) {
Ok(f) => {
self.curr_file = Some(Box::new(BufReader::new(f)));
return;
}
Err(e) => {
// If any file can't be opened,
// print an error at the time that the file is needed,
// then move on the the next file.
// This matches the behavior of the original `od`
let _ =
writeln!(&mut std::io::stderr(), "od: '{}': {}", fname, e);
self.any_err = true
}
}
}
}
}
}
}
}
// Fill buf with bytes read from the list of files
// Returns Ok(<number of bytes read>)
// Handles io errors itself, thus always returns OK
// Fills the provided buffer completely, unless it has run out of input.
// If any call returns short (< buf.len()), all subsequent calls will return Ok<0>
fn f_read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
let mut xfrd = 0;
// while buffer we are filling is not full.. May go thru several files.
'fillloop: while xfrd < buf.len() {
match self.curr_file {
None => break,
Some(ref mut curr_file) => {
loop {
// stdin may return on 'return' (enter), even though the buffer isn't full.
xfrd += match curr_file.read(&mut buf[xfrd..]) {
Ok(0) => break,
Ok(n) => n,
Err(e) => panic!("file error: {}", e),
};
if xfrd == buf.len() {
// transferred all that was asked for.
break 'fillloop;
}
}
}
}
self.next_file();
}
Ok(xfrd)
}
}
struct OdFormat {
itembytes: usize,
writer: fn(u64, usize),
offmarg: usize,
}
// TODO: use some sort of byte iterator, instead of passing bytes in u64
fn print_item_oct(p: u64, itembytes: usize) {
let itemwidth = 3 * itembytes;
let itemspace = 4 * itembytes - itemwidth;
print!("{:>itemspace$}{:0width$o}",
"",
p,
width = itemwidth,
itemspace = itemspace);
}
fn print_item_hex(p: u64, itembytes: usize) {
let itemwidth = 2 * itembytes;
let itemspace = 4 * itembytes - itemwidth;
print!("{:>itemspace$}{:0width$x}",
"",
p,
width = itemwidth,
itemspace = itemspace);
}
fn sign_extend(item: u64, itembytes: usize) -> i64{
// https://graphics.stanford.edu/~seander/bithacks.html#VariableSignExtend
unsafe{
let b = 8 * itembytes; // number of bits representing the number in p
let m = mem::transmute::<u64,i64>(1u64 << (b - 1));
let x = mem::transmute::<u64,i64>(item) & (mem::transmute::<u64,i64>(1u64 << b) - 1);
let r = (x ^ m) - m;
r
}
}
fn print_item_dec_s(p: u64, itembytes: usize) {
// sign extend
let s = sign_extend(p,itembytes);
print!("{:totalwidth$}", s, totalwidth = 4 * itembytes);
}
fn print_item_dec_u(p: u64, itembytes: usize) {
print!("{:totalwidth$}", p, totalwidth = 4 * itembytes);
}
// TODO: multi-byte chars
// Quoth the man page: Multi-byte characters are displayed in the area corresponding to the first byte of the character. The remaining bytes are shown as `**'.
static A_CHRS : [&'static str; 160] =
["nul", "soh", "stx", "etx", "eot", "enq", "ack", "bel",
"bs", "ht", "nl", "vt", "ff", "cr", "so", "si",
"dle", "dc1", "dc2", "dc3", "dc4", "nak", "syn", "etb",
"can", "em", "sub", "esc", "fs", "gs", "rs", "us",
"sp", "!", "\"", "#", "$", "%", "&", "'",
"(", ")", "*", "+", ",", "-", ".", "/",
"0", "1", "2", "3", "4", "5", "6", "7",
"8", "9", ":", ";", "<", "=", ">", "?",
"@", "A", "B", "C", "D", "E", "F", "G",
"H", "I", "J", "K", "L", "M", "N", "O",
"P", "Q", "R", "S", "T", "U", "V", "W",
"X", "Y", "Z", "[", "\\", "]", "^", "_",
"`", "a", "b", "c", "d", "e", "f", "g",
"h", "i", "j", "k", "l", "m", "n", "o",
"p", "q", "r", "s", "t", "u", "v", "w",
"x", "y", "z", "{", "|", "}", "~", "del",
"80", "81", "82", "83", "84", "85", "86", "87",
"88", "89", "8a", "8b", "8c", "8d", "8e", "8f",
"90", "91", "92", "93", "94", "95", "96", "97",
"98", "99", "9a", "9b", "9c", "9d", "9e", "9f"];
fn print_item_a(p: u64, _: usize) {
// itembytes == 1
let b = (p & 0xff) as u8;
print!("{:>4}", A_CHRS.get(b as usize).unwrap_or(&"?") // XXX od dose not actually do this, it just prints the byte
);
}
static C_CHRS : [&'static str; 127] = [
"\\0", "001", "002", "003", "004", "005", "006", "\\a",
"\\b", "\\t", "\\n", "\\v", "\\f", "\\r", "016", "017",
"020", "021", "022", "023", "024", "025", "026", "027",
"030", "031", "032", "033", "034", "035", "036", "037",
" ", "!", "\"", "#", "$", "%", "&", "'",
"(", ")", "*", "+", ",", "-", ".", "/",
"0", "1", "2", "3", "4", "5", "6", "7",
"8", "9", ":", ";", "<", "=", ">", "?",
"@", "A", "B", "C", "D", "E", "F", "G",
"H", "I", "J", "K", "L", "M", "N", "O",
"P", "Q", "R", "S", "T", "U", "V", "W",
"X", "Y", "Z", "[", "\\", "]", "^", "_",
"`", "a", "b", "c", "d", "e", "f", "g",
"h", "i", "j", "k", "l", "m", "n", "o",
"p", "q", "r", "s", "t", "u", "v", "w",
"x", "y", "z", "{", "|", "}", "~" ];
fn print_item_c(p: u64, _: usize) {
// itembytes == 1
let b = (p & 0xff) as usize;
if b < C_CHRS.len() {
match C_CHRS.get(b as usize) {
Some(s) => print!("{:>4}", s),
None => print!("{:>4}", b),
}
}
}

View file

@ -123,3 +123,52 @@ fn test_from_mixed() {
assert_eq!(result.stdout, ALPHA_OUT);
}
#[test]
fn test_multiple_formats() {
let (_, mut ucmd) = testing(UTIL_NAME);
let input = "abcdefghijklmnopqrstuvwxyz\n";
let result = ucmd.arg("-c").arg("-b").run_piped_stdin(input.as_bytes());
assert_empty_stderr!(result);
assert!(result.success);
assert_eq!(result.stdout, "0000000 a b c d e f g h i j k l m n o p\n 141 142 143 144 145 146 147 150 151 152 153 154 155 156 157 160\n0000020 q r s t u v w x y z \\n \n 161 162 163 164 165 166 167 170 171 172 012 \n0000033\n");
}
#[test]
fn test_dec() {
let (_, mut ucmd) = testing(UTIL_NAME);
let input = [
0u8, 0u8,
1u8, 0u8,
2u8, 0u8,
3u8, 0u8,
0xffu8,0x7fu8,
0x00u8,0x80u8,
0x01u8,0x80u8,];
let expected_output = "0000000 0 1 2 3 32767 -32768 -32767 \n0000016\n";
let result = ucmd.arg("-i").run_piped_stdin(&input[..]);
assert_empty_stderr!(result);
assert!(result.success);
assert_eq!(result.stdout, expected_output);
}
// We don't support multibyte chars, so big NEIN to this
/*
#[test]
fn mit_die_umlauten_getesten() {
let (_, mut ucmd) = testing(UTIL_NAME);
let result = ucmd.run_piped_stdin("Universität Tübingen".as_bytes());
assert_empty_stderr!(result);
assert!(result.success);
assert_eq!(result.stdout,
"0000000 U n i v e r s i t ä ** t T ü **\n0000020 b i n g e n\n0000026")
}
*/