od: take more format options, respect their order.

2025-09-13 10:37:58 +00:00 · 2016-05-22 15:46:20 -05:00 · 2016-05-22 15:46:20 -05:00 · e909e665f0
commit e909e665f0
parent bdc1ca7426
2 changed files with 461 additions and 147 deletions
--- a/src/od/od.rs
+++ b/src/od/od.rs
@ -18,6 +18,16 @@ use std::io::BufReader;
 use std::io::Write;
 use std::io;

+//This is available in some versions of std, but not all that we target.
+macro_rules! hashmap {
+    ($( $key: expr => $val: expr ),*) => {{
+         let mut map = ::std::collections::HashMap::new();
+         $( map.insert($key, $val); )*
+         map
+    }}
+}
+
+
 #[derive(Debug)]
 enum Radix { Decimal, Hexadecimal, Octal, Binary }

@ -40,6 +50,19 @@ pub fn uumain(args: Vec<String>) -> i32 {
                ("output strings of at least BYTES graphic chars. 3 is assumed when \
                 BYTES is not specified."),
                "BYTES");
+    opts.optflag("a", "", "named characters, ignoring high-order bit");
+    opts.optflag("b", "", "octal bytes");
+    opts.optflag("c", "", "ASCII characters or backslash escapes");
+    opts.optflag("d", "", "unsigned decimal 2-byte units");
+    opts.optflag("o", "", "unsigned decimal 2-byte units");
+
+    opts.optflag("I", "", "decimal 2-byte units");
+    opts.optflag("L", "", "decimal 2-byte units");
+    opts.optflag("i", "", "decimal 2-byte units");
+
+    opts.optflag("O", "", "octal 4-byte units");
+    opts.optflag("s", "", "decimal 4-byte units");
+
    opts.optopt("t", "format", "select output format or formats", "TYPE");
    opts.optflag("v", "output-duplicates", "do not use * to mark line suppression");
    opts.optopt("w", "width",
@ -60,138 +83,178 @@ pub fn uumain(args: Vec<String>) -> i32 {
    };

    // Gather up file names - args which don't start with '-'
-    let fnames = args[1..]
-                     .iter()
-                     .filter(|w| !w.starts_with('-') || w == &"--" ) // "--" starts with '-', but it denotes stdin, not a flag 
-                     .map(|x| match x.as_str() { "--" => InputSource::Stdin, x => InputSource::FileName(x)})
-                     .collect::<Vec<_>>();
+    let stdnionly = [InputSource::Stdin];
+    let inputs = args[1..]
+        .iter()
+        .filter_map(|w| match w as &str {
+            "--" => Some(InputSource::Stdin),
+            o if o.starts_with("-") => None,
+            x => Some(InputSource::FileName(x)),
+        })
+        .collect::<Vec<_>>();
+    // If no input files named, use stdin.
+    let inputs = if inputs.len() == 0 {
+        &stdnionly[..]
+    } else {
+        &inputs[..]
+    };
+    // Gather up format flags, we don't use getopts becase we need keep them in order.
+    let flags = args[1..]
+        .iter()
+        .filter_map(|w| match w as &str {
+            "--" => None,
+            o if o.starts_with("-") => Some(&o[1..]),
+            _ => None,
+        })
+        .collect::<Vec<_>>();

-    // With no filenames, od uses stdin as input.
-    if fnames.len() == 0 {
-        odfunc(&input_offset_base, &[InputSource::Stdin])
-    }
-    else {
-        odfunc(&input_offset_base, &fnames)
-    }
+        // At the moment, char (-a & -c)formats need the driver to set up a
+        // line by inserting a different # of of spaces at the start.
+        struct OdFormater {
+            writer: fn(p: u64, itembytes: usize),
+            offmarg: usize,
+        };
+        let oct = OdFormater {
+            writer: print_item_oct,  offmarg: 2
+        };
+        let hex = OdFormater {
+            writer: print_item_hex, offmarg: 2
+        };
+        let dec_u = OdFormater {
+            writer: print_item_dec_u, offmarg: 2
+        };
+        let dec_s = OdFormater {
+            writer: print_item_dec_s, offmarg: 2
+        };
+        let a_char = OdFormater {
+            writer: print_item_a, offmarg: 1
+        };
+        let c_char = OdFormater {
+            writer: print_item_c, offmarg: 1
+        };
+
+        fn mkfmt(itembytes: usize, fmtspec: &OdFormater) -> OdFormat {
+            OdFormat {
+                itembytes: itembytes,
+                writer: fmtspec.writer,
+                offmarg: fmtspec.offmarg,
+            }
+        }
+
+// TODO: -t fmts
+        let known_formats = hashmap![
+    		"a" => (1, &a_char),
+    		"B" => (2, &oct) ,
+    		"b" => (1, &oct),
+    		"c" => (1, &c_char),
+    		"D" => (4, &dec_u),
+    // TODO: support floats
+    //		"e" => (8, &flo64),
+    //		"F" => (8, &flo64),
+    //		"F" => (4, &flo32),
+    		"H" => (4, &hex),
+    		"X" => (4, &hex) ,
+    		"o" => (2, &oct),
+    		"x" => (2, &hex),
+    		"h" => (2, &hex),
+
+    		"I" => (2, &dec_s),
+    		"L" => (2, &dec_s),
+    		"i" => (2, &dec_s),
+
+    		"O" => (4, &oct),
+    		"s" => (2, &dec_u)
+    	];
+
+        let mut formats = Vec::new();
+
+        for flag in flags.iter() {
+            match known_formats.get(flag) {
+                None => {} // not every option is a format
+                Some(r) => {
+                    let (itembytes, fmtspec) = *r;
+                    formats.push(mkfmt(itembytes, fmtspec))
+                }
+            }
+        }
+
+        if formats.is_empty() {
+            formats.push(mkfmt(2, &oct)); // 2 byte octal is the default
+        }
+
+        odfunc(&input_offset_base, &inputs, &formats[..])
 }

 const LINEBYTES:usize = 16;
 const WORDBYTES:usize = 2;

-fn odfunc(input_offset_base: &Radix, fnames: &[InputSource]) -> i32 {
+fn odfunc(input_offset_base: &Radix, fnames: &[InputSource], formats: &[OdFormat]) -> i32 {

-    let mut status = 0;
-    let mut ni = fnames.iter();
-    {
-        // Open and return the next file to process as a BufReader
-        // Returns None when no more files.
-        let mut next_file = || -> Option<Box<io::Read>> {
-            // loop retries with subsequent files if err - normally 'loops' once
-            loop {
-                match ni.next() {
-                    None => return None,
-                    Some(input) => match *input {
-                        InputSource::Stdin => return Some(Box::new(BufReader::new(std::io::stdin()))),
-                        InputSource::FileName(fname) => match File::open(fname) {
-                            Ok(f) => return Some(Box::new(BufReader::new(f))),
-                            Err(e) => {
-                                // If any file can't be opened,
-                                // print an error at the time that the file is needed,
-                                // then move on the the next file.
-                                // This matches the behavior of the original `od`
-                                let _ = writeln!(&mut std::io::stderr(), "od: '{}': {}", fname, e);
-                                if status == 0 {status = 1}
-                            }
-                        }
-                    }
-                }
+    let mut mf = MultifileReader::new(fnames);
+    let mut addr = 0;
+    let bytes = &mut [b'\x00'; LINEBYTES];
+    loop {
+        // print each line data (or multi-format raster of several lines describing the same data).
+
+        print_with_radix(input_offset_base, addr); // print offset
+		// if printing in multiple formats offset is printed only once
+
+        match mf.f_read(bytes) {
+            Ok(0) => {
+                print!("\n");
+                break;
            }
-        };
+            Ok(n) => {
+                let mut first = true; // First line of a multi-format raster.
+                for f in formats {
+                    if !first {
+                        // this takes the space of the file offset on subsequent
+                        // lines of multi-format rasters.
+                        print!("       ");
+                    }
+                    first = false;
+                    print!("{:>width$}", "", width = f.offmarg);// 4 spaces after offset - we print 2 more before each word

-        let mut curr_file: Box<io::Read> = match next_file() {
-            Some(f) => f, 
-            None => {
-                return 1;
-            } 
-        };
- 
-        let mut exhausted = false; // There is no more input, gone to the end of the last file.
-
-        // Fill buf with bytes read from the list of files
-        // Returns Ok(<number of bytes read>) 
-        // Handles io errors itself, thus always returns OK
-        // Fills the provided buffer completely, unless it has run out of input.
-        // If any call returns short (< buf.len()), all subsequent calls will return Ok<0>
-        let mut f_read = |buf: &mut [u8]| -> io::Result<usize> {
-            if exhausted {
-                Ok(0)
-            } else {
-                let mut xfrd = 0;
-                // while buffer we are filling is not full.. May go thru several files.
-                'fillloop: while xfrd < buf.len() {
-                    loop { // stdin may return on 'return' (enter), even though the buffer isn't full.
-                        xfrd += match curr_file.read(&mut buf[xfrd..]) {
-                            Ok(0) => break, 
-                            Ok(n) => n,
-                            Err(e) => panic!("file error: {}", e),
-                        };
-                        if xfrd == buf.len() {
-                            // transferred all that was asked for.
-                            break 'fillloop;
+                    for b in 0..n / f.itembytes {
+                        let mut p: u64 = 0;
+                        for i in 0..f.itembytes {
+                            p |= (bytes[(f.itembytes * b) + i] as u64) << (8 * i);
                        }
+                        (f.writer)(p, f.itembytes);
                    }
-                    curr_file = match next_file() { 
-                        Some(f) => f, 
-                        None => {
-                            exhausted = true;
-                            break;
+                    // not enough byte for a whole element, this should only happen on the last line.
+                    if n % f.itembytes != 0 {
+                        let b = n / f.itembytes;
+                        let mut p2: u64 = 0;
+                        for i in 0..(n % f.itembytes) {
+                            p2 |= (bytes[(f.itembytes * b) + i] as u64) << (8 * i);
                        }
-                    };
-                }
-                Ok(xfrd)
-            }
-        };
- 
-        let mut addr = 0;
-        let bytes = &mut [b'\x00'; LINEBYTES];
-        loop { // print each line
-            print_with_radix(input_offset_base, addr); // print offset
-            match f_read(bytes) {
-                Ok(0) => {
-                    print!("\n");
-                    break;
-                }
-                Ok(n) => {
-                    print!("  "); // 4 spaces after offset - we print 2 more before each word
-                 
-                    for b in 0 .. n / mem::size_of::<u16>() {
-                        let bs = &bytes[(2 * b) .. (2 * b + 2)];
-                        let p: u16 = (bs[1] as u16) << 8 | bs[0] as u16;
-                        print!("  {:06o}", p);
+                        (f.writer)(p2, f.itembytes);
                    }
-                    if n % mem::size_of::<u16>() == 1 {
-                        print!("  {:06o}", bytes[n - 1]);
-                    }
- 
                    // Add extra spaces to pad out the short, presumably last, line.
-                    if n<LINEBYTES {
+                    if n < LINEBYTES {
                        // calc # of items we did not print, must be short at least WORDBYTES to be missing any.
-                        let words_short = (LINEBYTES-n)/WORDBYTES; 
-                        print!("{:>width$}", "", width=(words_short)*(6+2));
+                        let words_short = (LINEBYTES - n) / WORDBYTES;
+                        // XXX this is running short for -c & -a
+                        print!("{:>width$}", "", width = (words_short) * (6 + 2));
                    }
- 
                    print!("\n");
-                    addr += n;
-                },
-                Err(_) => {
-                    break;
                }
-            };
+                addr += n;
+            }
+            Err(_) => {
+                break;
+            }
        };
-    };
-    status
+    }
+    if mf.any_err {
+        1
+    } else {
+        0
+    }
 }

+// For file byte offset printed at left margin.
 fn parse_radix(radix_str: Option<String>) -> Result<Radix, &'static str> {
    match radix_str {
        None => Ok(Radix::Octal),
@ -224,3 +287,205 @@ fn print_with_radix(r: &Radix, x: usize) {
        Radix::Binary => print!("{:07b}", x)
    }
 }
+
+// MultifileReader - concatenate all our input, file or stdin.
+struct MultifileReader<'a> {
+    ni: std::slice::Iter<'a, InputSource<'a>>,
+    curr_file: Option<Box<io::Read>>,
+    any_err: bool,
+}
+impl<'b> MultifileReader<'b> {
+    fn new<'a>(fnames: &'a [InputSource]) -> MultifileReader<'a> {
+        let mut mf = MultifileReader {
+            ni: fnames.iter(),
+            curr_file: None, // normally this means done; call next_file()
+            any_err: false,
+        };
+        mf.next_file();
+        return mf;
+    }
+
+    fn next_file(&mut self) {
+        // loop retries with subsequent files if err - normally 'loops' once
+        loop {
+            match self.ni.next() {
+                None => {
+                    self.curr_file = None;
+                    return;
+                }
+                Some(input) => {
+                    match *input {
+                        InputSource::Stdin => {
+                            self.curr_file = Some(Box::new(BufReader::new(std::io::stdin())));
+                            return;
+                        }
+                        InputSource::FileName(fname) => {
+                            match File::open(fname) {
+                                Ok(f) => {
+                                    self.curr_file = Some(Box::new(BufReader::new(f)));
+                                    return;
+                                }
+                                Err(e) => {
+                                    // If any file can't be opened,
+                                    // print an error at the time that the file is needed,
+                                    // then move on the the next file.
+                                    // This matches the behavior of the original `od`
+                                    let _ =
+                                        writeln!(&mut std::io::stderr(), "od: '{}': {}", fname, e);
+                                    self.any_err = true
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    // Fill buf with bytes read from the list of files
+    // Returns Ok(<number of bytes read>)
+    // Handles io errors itself, thus always returns OK
+    // Fills the provided buffer completely, unless it has run out of input.
+    // If any call returns short (< buf.len()), all subsequent calls will return Ok<0>
+    fn f_read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
+        let mut xfrd = 0;
+        // while buffer we are filling is not full.. May go thru several files.
+        'fillloop: while xfrd < buf.len() {
+            match self.curr_file {
+                None => break,
+                Some(ref mut curr_file) => {
+                    loop {
+                        // stdin may return on 'return' (enter), even though the buffer isn't full.
+                        xfrd += match curr_file.read(&mut buf[xfrd..]) {
+                            Ok(0) => break,
+                            Ok(n) => n,
+                            Err(e) => panic!("file error: {}", e),
+                        };
+                        if xfrd == buf.len() {
+                            // transferred all that was asked for.
+                            break 'fillloop;
+                        }
+                    }
+                }
+            }
+            self.next_file();
+        }
+        Ok(xfrd)
+    }
+}
+
+
+struct OdFormat {
+    itembytes: usize,
+    writer: fn(u64, usize),
+    offmarg: usize,
+}
+
+// TODO: use some sort of byte iterator, instead of passing bytes in u64
+fn print_item_oct(p: u64, itembytes: usize) {
+    let itemwidth = 3 * itembytes;
+    let itemspace = 4 * itembytes - itemwidth;
+
+    print!("{:>itemspace$}{:0width$o}",
+           "",
+           p,
+           width = itemwidth,
+           itemspace = itemspace);
+}
+
+fn print_item_hex(p: u64, itembytes: usize) {
+    let itemwidth = 2 * itembytes;
+    let itemspace = 4 * itembytes - itemwidth;
+
+    print!("{:>itemspace$}{:0width$x}",
+           "",
+           p,
+           width = itemwidth,
+           itemspace = itemspace);
+}
+
+
+fn sign_extend(item: u64, itembytes: usize) -> i64{
+	// https://graphics.stanford.edu/~seander/bithacks.html#VariableSignExtend
+	unsafe{
+		let b = 8 * itembytes; // number of bits representing the number in p
+		let m =  mem::transmute::<u64,i64>(1u64 << (b - 1));
+		let x =  mem::transmute::<u64,i64>(item) & (mem::transmute::<u64,i64>(1u64 << b) - 1);
+		let r = (x ^ m) - m;
+		r
+	}
+}
+
+
+fn print_item_dec_s(p: u64, itembytes: usize) {
+    // sign extend
+    let s = sign_extend(p,itembytes);
+    print!("{:totalwidth$}", s, totalwidth = 4 * itembytes);
+}
+fn print_item_dec_u(p: u64, itembytes: usize) {
+    print!("{:totalwidth$}", p, totalwidth = 4 * itembytes);
+}
+
+// TODO: multi-byte chars
+// Quoth the man page: Multi-byte characters are displayed in the area corresponding to the first byte of the character. The remaining bytes are shown as `**'.
+
+static A_CHRS : [&'static str; 160]  =
+["nul",   "soh",   "stx",   "etx",   "eot",   "enq",   "ack",   "bel",
+ "bs",    "ht",   "nl",     "vt",    "ff",    "cr",    "so",    "si",
+ "dle",   "dc1",   "dc2",   "dc3",   "dc4",   "nak",   "syn",   "etb",
+ "can",   "em",   "sub",   "esc",    "fs",    "gs",    "rs",    "us",
+ "sp",     "!",     "\"",     "#",     "$",     "%",     "&",     "'",
+  "(",     ")",     "*",     "+",     ",",     "-",     ".",     "/",
+  "0",     "1",     "2",     "3",     "4",     "5",     "6",     "7",
+  "8",     "9",     ":",     ";",     "<",     "=",     ">",     "?",
+  "@",     "A",     "B",     "C",     "D",     "E",     "F",     "G",
+  "H",     "I",     "J",     "K",     "L",     "M",     "N",     "O",
+  "P",     "Q",     "R",     "S",     "T",     "U",     "V",     "W",
+  "X",     "Y",     "Z",     "[",     "\\",    "]",     "^",     "_",
+  "`",     "a",     "b",     "c",     "d",     "e",     "f",     "g",
+  "h",     "i",     "j",     "k",     "l",     "m",     "n",     "o",
+  "p",     "q",     "r",     "s",     "t",     "u",     "v",     "w",
+  "x",     "y",     "z",     "{",     "|",     "}",     "~",   "del",
+ "80",    "81",    "82",    "83",    "84",    "85",    "86",    "87",
+ "88",    "89",    "8a",    "8b",    "8c",    "8d",    "8e",    "8f",
+ "90",    "91",    "92",    "93",    "94",    "95",    "96",    "97",
+ "98",    "99",    "9a",    "9b",    "9c",    "9d",    "9e",    "9f"];
+
+fn print_item_a(p: u64, _: usize) {
+    // itembytes == 1
+    let b = (p & 0xff) as u8;
+    print!("{:>4}", A_CHRS.get(b as usize).unwrap_or(&"?") // XXX od dose not actually do this, it just prints the byte
+  );
+}
+
+
+static C_CHRS : [&'static str; 127]  = [
+"\\0",   "001",   "002",   "003",   "004",   "005",   "006",    "\\a",
+"\\b",    "\\t",  "\\n",   "\\v",    "\\f",    "\\r",   "016",   "017",
+"020",   "021",   "022",   "023",   "024",   "025",   "026",   "027",
+"030",   "031",   "032",   "033",   "034",   "035",   "036",   "037",
+  " ",   "!",     "\"",     "#",     "$",     "%",     "&",     "'",
+  "(",     ")",     "*",     "+",     ",",     "-",     ".",     "/",
+  "0",     "1",     "2",     "3",     "4",     "5",     "6",     "7",
+  "8",     "9",     ":",     ";",     "<",     "=",     ">",     "?",
+  "@",     "A",     "B",     "C",     "D",     "E",     "F",     "G",
+  "H",     "I",     "J",     "K",     "L",     "M",     "N",     "O",
+  "P",     "Q",     "R",     "S",     "T",     "U",     "V",     "W",
+  "X",     "Y",     "Z",     "[",     "\\",     "]",     "^",     "_",
+  "`",     "a",     "b",     "c",     "d",     "e",     "f",     "g",
+  "h",     "i",     "j",     "k",     "l",     "m",     "n",     "o",
+  "p",     "q",     "r",     "s",     "t",     "u",     "v",     "w",
+  "x",     "y",     "z",     "{",     "|",     "}",     "~" ];
+
+
+fn print_item_c(p: u64, _: usize) {
+    // itembytes == 1
+    let b = (p & 0xff) as usize;
+
+    if b < C_CHRS.len() {
+        match C_CHRS.get(b as usize) {
+            Some(s) => print!("{:>4}", s),
+            None => print!("{:>4}", b),
+        }
+    }
+}
--- a/tests/test_od.rs
+++ b/tests/test_od.rs
@ -123,3 +123,52 @@ fn test_from_mixed() {
    assert_eq!(result.stdout, ALPHA_OUT);

 }
+
+#[test]
+fn test_multiple_formats() {
+    let (_, mut ucmd) = testing(UTIL_NAME);
+
+    let input = "abcdefghijklmnopqrstuvwxyz\n";
+    let result = ucmd.arg("-c").arg("-b").run_piped_stdin(input.as_bytes());
+
+    assert_empty_stderr!(result);
+    assert!(result.success);
+    assert_eq!(result.stdout, "0000000    a   b   c   d   e   f   g   h   i   j   k   l   m   n   o   p\n          141 142 143 144 145 146 147 150 151 152 153 154 155 156 157 160\n0000020    q   r   s   t   u   v   w   x   y   z  \\n                \n          161 162 163 164 165 166 167 170 171 172 012                \n0000033\n");
+
+}
+
+#[test]
+fn test_dec() {
+    let (_, mut ucmd) = testing(UTIL_NAME);
+
+
+    let input = [
+    	0u8, 0u8,
+    	1u8, 0u8,
+    	2u8, 0u8,
+    	3u8, 0u8,
+    	0xffu8,0x7fu8,
+    	0x00u8,0x80u8,
+    	0x01u8,0x80u8,];
+    let expected_output = "0000000         0       1       2       3   32767  -32768  -32767        \n0000016\n";
+    let result = ucmd.arg("-i").run_piped_stdin(&input[..]);
+
+    assert_empty_stderr!(result);
+    assert!(result.success);
+    assert_eq!(result.stdout, expected_output);
+
+}
+
+
+// We don't support multibyte chars, so big NEIN to this
+/*
+#[test]
+fn mit_die_umlauten_getesten() {
+    let (_, mut ucmd) = testing(UTIL_NAME);
+    let result = ucmd.run_piped_stdin("Universität Tübingen".as_bytes());
+    assert_empty_stderr!(result);
+    assert!(result.success);
+    assert_eq!(result.stdout,
+    "0000000    U   n   i   v   e   r   s   i   t   ä  **   t       T   ü  **\n0000020    b   i   n   g   e   n\n0000026")
+}
+*/