od: implement ascii dump

2025-07-28 03:27:44 +00:00 · 2016-08-12 17:51:24 +02:00 · 2016-08-12 17:51:24 +02:00 · e8eab8d3e8
commit e8eab8d3e8
parent cea4297fdf
4 changed files with 144 additions and 65 deletions
--- a/src/od/od.rs
+++ b/src/od/od.rs
@ -38,7 +38,8 @@ use partialreader::*;
 use peekreader::*;
 use formatteriteminfo::*;
 use parse_nrofbytes::*;
-use parse_formats::parse_format_flags;
+use parse_formats::{parse_format_flags, ParsedFormatterItemInfo};
+use prn_char::format_ascii_dump;

 static VERSION: &'static str = env!("CARGO_PKG_VERSION");
 const MAX_BYTES_PER_UNIT: usize = 8;
@ -163,7 +164,7 @@ pub fn uumain(args: Vec<String>) -> i32 {
            }
        }
    };
-    let min_bytes = formats.iter().fold(1, |max, next| cmp::max(max, next.byte_size));
+    let min_bytes = formats.iter().fold(1, |max, next| cmp::max(max, next.formatter_item_info.byte_size));
    if line_bytes % min_bytes != 0 {
        show_warning!("invalid width {}; using {} instead", line_bytes, min_bytes);
        line_bytes = min_bytes;
@ -201,7 +202,7 @@ pub fn uumain(args: Vec<String>) -> i32 {
 }

 fn odfunc(line_bytes: usize, input_offset_base: Radix, byte_order: ByteOrder,
-        fnames: Vec<InputSource>, formats: &[FormatterItemInfo], output_duplicates: bool,
+        fnames: Vec<InputSource>, formats: &[ParsedFormatterItemInfo], output_duplicates: bool,
        skip_bytes: usize, read_bytes: Option<usize>) -> i32 {

    let mf = MultifileReader::new(fnames);
@ -213,12 +214,13 @@ fn odfunc(line_bytes: usize, input_offset_base: Radix, byte_order: ByteOrder,
    let mut bytes: Vec<u8> = Vec::with_capacity(line_bytes + PEEK_BUFFER_SIZE);
    unsafe { bytes.set_len(line_bytes + PEEK_BUFFER_SIZE); } // fast but uninitialized

-    let byte_size_block = formats.iter().fold(1, |max, next| cmp::max(max, next.byte_size));
+    let byte_size_block = formats.iter().fold(1, |max, next| cmp::max(max, next.formatter_item_info.byte_size));
    let print_width_block = formats
        .iter()
        .fold(1, |max, next| {
-            cmp::max(max, next.print_width * (byte_size_block / next.byte_size))
+            cmp::max(max, next.formatter_item_info.print_width * (byte_size_block / next.formatter_item_info.byte_size))
        });
+    let print_width_line = print_width_block * (line_bytes / byte_size_block);

    if byte_size_block > MAX_BYTES_PER_UNIT {
        panic!("{}-bits types are unsupported. Current max={}-bits.",
@ -233,9 +235,9 @@ fn odfunc(line_bytes: usize, input_offset_base: Radix, byte_order: ByteOrder,

    // calculate proper alignment for each item
    for sf in &mut spaced_formatters {
-        let mut byte_size = sf.frm.byte_size;
+        let mut byte_size = sf.frm.formatter_item_info.byte_size;
        let mut items_in_block = byte_size_block / byte_size;
-        let thisblock_width = sf.frm.print_width * items_in_block;
+        let thisblock_width = sf.frm.formatter_item_info.print_width * items_in_block;
        let mut missing_spacing = print_width_block - thisblock_width;

        while items_in_block > 0 {
@ -291,7 +293,7 @@ fn odfunc(line_bytes: usize, input_offset_base: Radix, byte_order: ByteOrder,

                    print_bytes(byte_order, &bytes, n, peekbytes,
                        &print_with_radix(input_offset_base, addr),
-                        &spaced_formatters, byte_size_block);
+                        &spaced_formatters, byte_size_block, print_width_line);
                }

                addr += n;
@ -312,22 +314,22 @@ fn odfunc(line_bytes: usize, input_offset_base: Radix, byte_order: ByteOrder,
 }

 fn print_bytes(byte_order: ByteOrder, bytes: &[u8], length: usize, peekbytes: usize, prefix: &str,
-        formats: &[SpacedFormatterItemInfo], byte_size_block: usize) {
+        formats: &[SpacedFormatterItemInfo], byte_size_block: usize, print_width_line: usize) {
    let mut first = true; // First line of a multi-format raster.
    for f in formats {
        let mut output_text = String::new();

        let mut b = 0;
        while b < length {
-            let nextb = b + f.frm.byte_size;
+            let nextb = b + f.frm.formatter_item_info.byte_size;

            output_text.push_str(&format!("{:>width$}",
                    "",
                    width = f.spacing[b % byte_size_block]));

-            match f.frm.formatter {
+            match f.frm.formatter_item_info.formatter {
                FormatWriter::IntWriter(func) => {
-                    let p: u64 = match f.frm.byte_size {
+                    let p: u64 = match f.frm.formatter_item_info.byte_size {
                        1 => {
                            bytes[b] as u64
                        }
@ -340,19 +342,19 @@ fn print_bytes(byte_order: ByteOrder, bytes: &[u8], length: usize, peekbytes: us
                        8 => {
                            byte_order.read_u64(&bytes[b..nextb])
                        }
-                        _ => { panic!("Invalid byte_size: {}", f.frm.byte_size); }
+                        _ => { panic!("Invalid byte_size: {}", f.frm.formatter_item_info.byte_size); }
                    };
-                    output_text.push_str(&func(p, f.frm.byte_size, f.frm.print_width));
+                    output_text.push_str(&func(p, f.frm.formatter_item_info.byte_size, f.frm.formatter_item_info.print_width));
                }
                FormatWriter::FloatWriter(func) => {
-                    let p: f64 = match f.frm.byte_size {
+                    let p: f64 = match f.frm.formatter_item_info.byte_size {
                        4 => {
                            byte_order.read_f32(&bytes[b..nextb]) as f64
                        }
                        8 => {
                            byte_order.read_f64(&bytes[b..nextb])
                        }
-                        _ => { panic!("Invalid byte_size: {}", f.frm.byte_size); }
+                        _ => { panic!("Invalid byte_size: {}", f.frm.formatter_item_info.byte_size); }
                    };
                    output_text.push_str(&func(p));
                }
@ -363,6 +365,14 @@ fn print_bytes(byte_order: ByteOrder, bytes: &[u8], length: usize, peekbytes: us
            b = nextb;
        }

+        if f.frm.add_ascii_dump {
+            let missing_spacing = print_width_line.saturating_sub(output_text.chars().count());
+            output_text.push_str(&format!("{:>width$}  {}",
+                    "",
+                    format_ascii_dump(&bytes[..length]),
+                    width=missing_spacing));
+        }
+
        if first {
            print!("{}", prefix); // print offset
            // if printing in multiple formats offset is printed only once
@ -416,6 +426,6 @@ fn print_final_offset(r: Radix, x: usize) {
 }

 struct SpacedFormatterItemInfo {
-    frm: FormatterItemInfo,
+    frm: ParsedFormatterItemInfo,
    spacing: [usize; MAX_BYTES_PER_UNIT],
 }
--- a/src/od/parse_formats.rs
+++ b/src/od/parse_formats.rs
@ -13,6 +13,22 @@ macro_rules! hashmap {
    }}
 }

+#[derive(Copy, Clone, PartialEq, Eq, Debug)]
+pub struct ParsedFormatterItemInfo {
+    pub formatter_item_info:  FormatterItemInfo,
+    pub add_ascii_dump: bool,
+}
+
+impl ParsedFormatterItemInfo {
+    pub fn new(formatter_item_info: FormatterItemInfo, add_ascii_dump: bool) -> ParsedFormatterItemInfo {
+        ParsedFormatterItemInfo {
+            formatter_item_info: formatter_item_info,
+            add_ascii_dump: add_ascii_dump,
+        }
+    }
+}
+
+
 /// Parses format flags from commandline
 ///
 /// getopts, docopt, clap don't seem suitable to parse the commandline
@ -24,7 +40,7 @@ macro_rules! hashmap {
 /// arguments with parameters like -w16 can only appear at the end: -fvoxw16
 /// parameters of -t/--format specify 1 or more formats.
 /// if -- appears on the commandline, parsing should stop.
-pub fn parse_format_flags(args: &Vec<String>) -> Result<Vec<FormatterItemInfo>, String> {
+pub fn parse_format_flags(args: &Vec<String>) -> Result<Vec<ParsedFormatterItemInfo>, String> {

    let known_formats = hashmap![
    'a' => FORMAT_ITEM_A,
@ -97,7 +113,7 @@ pub fn parse_format_flags(args: &Vec<String>) -> Result<Vec<FormatterItemInfo>,
                    match known_formats.get(&c) {
                        None => {} // not every option is a format
                        Some(r) => {
-                            formats.push(*r)
+                            formats.push(ParsedFormatterItemInfo::new(*r, false))
                        }
                    }
                }
@ -116,7 +132,7 @@ pub fn parse_format_flags(args: &Vec<String>) -> Result<Vec<FormatterItemInfo>,
    }

    if formats.is_empty() {
-        formats.push(FORMAT_ITEM_OCT16); // 2 byte octal is the default
+        formats.push(ParsedFormatterItemInfo::new(FORMAT_ITEM_OCT16, false)); // 2 byte octal is the default
    }

    Ok(formats)
@ -130,7 +146,7 @@ enum ParseState {
    Finished        // no more characters may appear.
 }

-fn parse_type_string(params: &String) -> Result<Vec<FormatterItemInfo>, String> {
+fn parse_type_string(params: &String) -> Result<Vec<ParsedFormatterItemInfo>, String> {

    let type_chars: HashSet<_> = ['a', 'c'].iter().cloned().collect();
    let type_ints: HashSet<_> = ['d', 'o', 'u', 'x'].iter().cloned().collect();
@ -233,50 +249,50 @@ fn parse_type_string(params: &String) -> Result<Vec<FormatterItemInfo>, String>
        }

        match type_char {
-            'a' => formats.push(FORMAT_ITEM_A),
-            'c' => formats.push(FORMAT_ITEM_C),
+            'a' => formats.push(ParsedFormatterItemInfo::new(FORMAT_ITEM_A, show_ascii_dump)),
+            'c' => formats.push(ParsedFormatterItemInfo::new(FORMAT_ITEM_C, show_ascii_dump)),
            'd' => {
-                formats.push(match byte_size {
+                formats.push(ParsedFormatterItemInfo::new(match byte_size {
                    1 => FORMAT_ITEM_DEC8S,
                    2 => FORMAT_ITEM_DEC16S,
                    4|0 => FORMAT_ITEM_DEC32S,
                    8 => FORMAT_ITEM_DEC64S,
                    _ => return Err(format!("invalid size '{}' in format specification '{}'", byte_size, format_type)),
-                });
+                }, show_ascii_dump));
            },
            'o' => {
-                formats.push(match byte_size {
+                formats.push(ParsedFormatterItemInfo::new(match byte_size {
                    1 => FORMAT_ITEM_OCT8,
                    2 => FORMAT_ITEM_OCT16,
                    4|0 => FORMAT_ITEM_OCT32,
                    8 => FORMAT_ITEM_OCT64,
                    _ => return Err(format!("invalid size '{}' in format specification '{}'", byte_size, format_type)),
-                });
+                }, show_ascii_dump));
            },
            'u' => {
-                formats.push(match byte_size {
+                formats.push(ParsedFormatterItemInfo::new(match byte_size {
                    1 => FORMAT_ITEM_DEC8U,
                    2 => FORMAT_ITEM_DEC16U,
                    4|0 => FORMAT_ITEM_DEC32U,
                    8 => FORMAT_ITEM_DEC64U,
                    _ => return Err(format!("invalid size '{}' in format specification '{}'", byte_size, format_type)),
-                });
+                }, show_ascii_dump));
            },
            'x' => {
-                formats.push(match byte_size {
+                formats.push(ParsedFormatterItemInfo::new(match byte_size {
                    1 => FORMAT_ITEM_HEX8,
                    2 => FORMAT_ITEM_HEX16,
                    4|0 => FORMAT_ITEM_HEX32,
                    8 => FORMAT_ITEM_HEX64,
                    _ => return Err(format!("invalid size '{}' in format specification '{}'", byte_size, format_type)),
-                });
+                }, show_ascii_dump));
            },
            'f' => {
-                formats.push(match byte_size {
+                formats.push(ParsedFormatterItemInfo::new(match byte_size {
                    4|0 => FORMAT_ITEM_F32,
                    8 => FORMAT_ITEM_F64,
                    _ => return Err(format!("invalid size '{}' in format specification '{}'", byte_size, format_type)),
-                });
+                }, show_ascii_dump));
            },
            _ => unreachable!(),
        }
@ -287,12 +303,22 @@ fn parse_type_string(params: &String) -> Result<Vec<FormatterItemInfo>, String>
    Ok(formats)
 }

-#[allow(dead_code)]
+#[cfg(test)]
 pub fn parse_format_flags_str(args_str: &Vec<&'static str>) -> Result<Vec<FormatterItemInfo>, String> {
    let args = args_str.iter().map(|s| s.to_string()).collect();
-    parse_format_flags(&args)
+    match parse_format_flags(&args) {
+        Err(e) => Err(e),
+        Ok(v) => {
+            // tests using this function asume add_ascii_dump is not set
+            Ok(v.into_iter()
+                .inspect(|f| assert!(!f.add_ascii_dump))
+                .map(|f| f.formatter_item_info)
+                .collect())
+        },
+    }
 }

+
 #[test]
 fn test_no_options() {
    assert_eq!(parse_format_flags_str(
@ -372,9 +398,9 @@ fn test_long_format_a() {

 #[test]
 fn test_long_format_cz() {
-   assert_eq!(parse_format_flags_str(
-       &vec!("od", "--format=cz")).unwrap(),
-       vec!(FORMAT_ITEM_C)); // TODO 'z'
+   assert_eq!(parse_format_flags(
+       &vec!("od".to_string(), "--format=cz".to_string())).unwrap(),
+       vec!(ParsedFormatterItemInfo::new(FORMAT_ITEM_C, true)));
 }

 #[test]
@ -448,36 +474,35 @@ fn test_format_next_arg_invalid() {
    parse_format_flags_str(&vec!("od", "-t")).unwrap_err();
 }

-
 #[test]
 fn test_mixed_formats() {
-   assert_eq!(parse_format_flags_str(
+   assert_eq!(parse_format_flags(
       &vec!(
-           "od",
-           "--skip-bytes=2",
-           "-vItu1z",
-           "-N",
-           "1000",
-           "-xt",
-           "acdx1",
-           "--format=u2c",
-           "--format",
-           "f",
-           "-xAx",
-           "--",
-           "-h",
-           "--format=f8")).unwrap(),
+           "od".to_string(),
+           "--skip-bytes=2".to_string(),
+           "-vItu1z".to_string(),
+           "-N".to_string(),
+           "1000".to_string(),
+           "-xt".to_string(),
+           "acdx1".to_string(),
+           "--format=u2c".to_string(),
+           "--format".to_string(),
+           "f".to_string(),
+           "-xAx".to_string(),
+           "--".to_string(),
+           "-h".to_string(),
+           "--format=f8".to_string())).unwrap(),
       vec!(
-           FORMAT_ITEM_DEC64S,  // I
-           FORMAT_ITEM_DEC8U,   // tu1z
-           FORMAT_ITEM_HEX16,   // x
-           FORMAT_ITEM_A,       // ta
-           FORMAT_ITEM_C,       // tc
-           FORMAT_ITEM_DEC32S,  // td
-           FORMAT_ITEM_HEX8,    // tx1
-           FORMAT_ITEM_DEC16U,  // tu2
-           FORMAT_ITEM_C,       // tc
-           FORMAT_ITEM_F32,     // tf
-           FORMAT_ITEM_HEX16,   // x
+           ParsedFormatterItemInfo::new(FORMAT_ITEM_DEC64S, false),  // I
+           ParsedFormatterItemInfo::new(FORMAT_ITEM_DEC8U, true),    // tu1z
+           ParsedFormatterItemInfo::new(FORMAT_ITEM_HEX16, false),   // x
+           ParsedFormatterItemInfo::new(FORMAT_ITEM_A, false),       // ta
+           ParsedFormatterItemInfo::new(FORMAT_ITEM_C, false),       // tc
+           ParsedFormatterItemInfo::new(FORMAT_ITEM_DEC32S, false),  // td
+           ParsedFormatterItemInfo::new(FORMAT_ITEM_HEX8, false),    // tx1
+           ParsedFormatterItemInfo::new(FORMAT_ITEM_DEC16U, false),  // tu2
+           ParsedFormatterItemInfo::new(FORMAT_ITEM_C, false),       // tc
+           ParsedFormatterItemInfo::new(FORMAT_ITEM_F32, false),     // tf
+           ParsedFormatterItemInfo::new(FORMAT_ITEM_HEX16, false),   // x
       ));
 }
--- a/src/od/prn_char.rs
+++ b/src/od/prn_char.rs
@ -100,6 +100,23 @@ fn format_item_c(bytes: &[u8]) -> String {
    }
 }

+pub fn format_ascii_dump(bytes: &[u8]) -> String {
+    let mut result = String::new();
+
+    result.push('>');
+    for c in bytes.iter() {
+        if *c >= 0x20 && *c <= 0x7e {
+            result.push_str(C_CHRS[*c as usize]);
+        }
+        else {
+            result.push('.');
+        }
+    }
+    result.push('<');
+
+    result
+}
+
 #[test]
 fn test_format_item_a() {
    assert_eq!(" nul", format_item_a(0x00, 1, 4));
@ -147,3 +164,9 @@ fn test_format_item_c() {
    assert_eq!(" 365", format_item_c(&[0xf5, 0x80, 0x80, 0x80])); // invalid utf-8
    assert_eq!(" 377", format_item_c(&[0xff])); // invalid utf-8
 }
+
+#[test]
+fn test_format_ascii_dump() {
+    assert_eq!(">.<", format_ascii_dump(&[0x00]));
+    assert_eq!(">. A~.<", format_ascii_dump(&[0x1f, 0x20, 0x41, 0x7e, 0x7f]));
+}
--- a/tests/test_od.rs
+++ b/tests/test_od.rs
@ -527,3 +527,24 @@ fn test_read_bytes(){
    assert!(result.success);
    assert_eq!(result.stdout, unindent(ALPHA_OUT));
 }
+
+#[test]
+fn test_ascii_dump(){
+
+    let input : [u8; 22] = [
+        0x00, 0x01, 0x0a, 0x0d, 0x10, 0x1f, 0x20, 0x61, 0x62, 0x63, 0x7d,
+        0x7e, 0x7f, 0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0, 0xff];
+    let result = new_ucmd!().arg("-tx1zacz").run_piped_stdin(&input[..]);
+
+    assert_empty_stderr!(result);
+    assert!(result.success);
+    assert_eq!(result.stdout, unindent(r"
+            0000000  00  01  0a  0d  10  1f  20  61  62  63  7d  7e  7f  80  90  a0  >...... abc}~....<
+                    nul soh  nl  cr dle  us  sp   a   b   c   }   ~ del nul dle  sp
+                     \0 001  \n  \r 020 037       a   b   c   }   ~ 177  **  **  **  >...... abc}~....<
+            0000020  b0  c0  d0  e0  f0  ff                                          >......<
+                      0   @   P   `   p del
+                     ** 300 320 340 360 377                                          >......<
+            0000026
+            "));
+}