From a900b42a1f520b2b9e1898d1b515a43466f8850b Mon Sep 17 00:00:00 2001 From: Wim Hueskes Date: Mon, 18 Jul 2016 22:25:33 +0200 Subject: [PATCH 01/41] od: refactor: readability of expected output use multiple lines in source too and use unindent crate to fix spacing --- Cargo.lock | 6 ++++++ Cargo.toml | 1 + tests/test_od.rs | 30 +++++++++++++++++++++++------- 3 files changed, 30 insertions(+), 7 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 9de14011a..31636641b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -90,6 +90,7 @@ dependencies = [ "tty 0.0.1", "uname 0.0.1", "unexpand 0.0.1", + "unindent 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "uniq 0.0.1", "unlink 0.0.1", "uptime 0.0.1", @@ -1129,6 +1130,11 @@ name = "unicode-width" version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "unindent" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "uniq" version = "0.0.1" diff --git a/Cargo.toml b/Cargo.toml index 95ea8df4c..0ed6157a1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -203,6 +203,7 @@ libc = "*" regex="*" rand="*" tempdir="*" +unindent="*" [[bin]] name = "uutils" diff --git a/tests/test_od.rs b/tests/test_od.rs index 1933aa23c..44d0e5cc4 100644 --- a/tests/test_od.rs +++ b/tests/test_od.rs @@ -1,13 +1,20 @@ +extern crate unindent; + use common::util::*; use std::path::Path; use std::env; use std::io::Write; use std::fs::File; use std::fs::remove_file; +use self::unindent::*; // octal dump of 'abcdefghijklmnopqrstuvwxyz\n' -static ALPHA_OUT: &'static str = "0000000 061141 062143 063145 064147 065151 066153 067155 070157\n0000020 071161 072163 073165 074167 075171 000012 \n0000033\n"; +static ALPHA_OUT: &'static str = " + 0000000 061141 062143 063145 064147 065151 066153 067155 070157 + 0000020 071161 072163 073165 074167 075171 000012 + 0000033 + "; // XXX We could do a better job of ensuring that we have a fresh temp dir to ourself, // not a general one ful of other proc's leftovers. @@ -32,7 +39,7 @@ fn test_file() { assert_empty_stderr!(result); assert!(result.success); - assert_eq!(result.stdout, ALPHA_OUT); + assert_eq!(result.stdout, unindent(ALPHA_OUT)); let _ = remove_file(file); } @@ -61,7 +68,7 @@ fn test_2files() { assert_empty_stderr!(result); assert!(result.success); - assert_eq!(result.stdout, ALPHA_OUT); + assert_eq!(result.stdout, unindent(ALPHA_OUT)); let _ = remove_file(file1); let _ = remove_file(file2); @@ -88,7 +95,7 @@ fn test_from_stdin() { assert_empty_stderr!(result); assert!(result.success); - assert_eq!(result.stdout, ALPHA_OUT); + assert_eq!(result.stdout, unindent(ALPHA_OUT)); } @@ -114,7 +121,7 @@ fn test_from_mixed() { assert_empty_stderr!(result); assert!(result.success); - assert_eq!(result.stdout, ALPHA_OUT); + assert_eq!(result.stdout, unindent(ALPHA_OUT)); } @@ -126,7 +133,13 @@ fn test_multiple_formats() { assert_empty_stderr!(result); assert!(result.success); - assert_eq!(result.stdout, "0000000 a b c d e f g h i j k l m n o p\n 141 142 143 144 145 146 147 150 151 152 153 154 155 156 157 160\n0000020 q r s t u v w x y z \\n \n 161 162 163 164 165 166 167 170 171 172 012 \n0000033\n"); + assert_eq!(result.stdout, unindent(" + 0000000 a b c d e f g h i j k l m n o p + 141 142 143 144 145 146 147 150 151 152 153 154 155 156 157 160 + 0000020 q r s t u v w x y z \\n + 161 162 163 164 165 166 167 170 171 172 012 + 0000033 + ")); } @@ -142,7 +155,10 @@ fn test_dec() { 0xffu8,0x7fu8, 0x00u8,0x80u8, 0x01u8,0x80u8,]; - let expected_output = "0000000 0 1 2 3 32767 -32768 -32767 \n0000016\n"; + let expected_output = unindent(" + 0000000 0 1 2 3 32767 -32768 -32767 + 0000016 + "); let result = new_ucmd!().arg("-i").run_piped_stdin(&input[..]); assert_empty_stderr!(result); From e0b7ff1953f82b15bfebfb8391bda651766318dd Mon Sep 17 00:00:00 2001 From: Wim Hueskes Date: Mon, 18 Jul 2016 22:46:04 +0200 Subject: [PATCH 02/41] od: implement --help and --version --- Cargo.lock | 1 + src/od/Cargo.toml | 1 + src/od/od.rs | 18 ++++++++++++++++++ 3 files changed, 20 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index 31636641b..aafb0f1cf 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -660,6 +660,7 @@ version = "0.0.1" dependencies = [ "getopts 0.2.14 (registry+https://github.com/rust-lang/crates.io-index)", "libc 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)", + "unindent 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] diff --git a/src/od/Cargo.toml b/src/od/Cargo.toml index d6320dd65..43bd178c5 100644 --- a/src/od/Cargo.toml +++ b/src/od/Cargo.toml @@ -10,6 +10,7 @@ path = "od.rs" [dependencies] getopts = "*" libc = "*" +unindent = "*" [[bin]] name = "od" diff --git a/src/od/od.rs b/src/od/od.rs index 62693a1a3..f09cb8134 100644 --- a/src/od/od.rs +++ b/src/od/od.rs @@ -10,6 +10,7 @@ */ extern crate getopts; +extern crate unindent; use std::fs::File; use std::io::Read; @@ -17,6 +18,7 @@ use std::mem; use std::io::BufReader; use std::io::Write; use std::io; +use unindent::*; //This is available in some versions of std, but not all that we target. macro_rules! hashmap { @@ -27,6 +29,8 @@ macro_rules! hashmap { }} } +static NAME: &'static str = "od"; +static VERSION: &'static str = env!("CARGO_PKG_VERSION"); #[derive(Debug)] enum Radix { Decimal, Hexadecimal, Octal, Binary } @@ -77,6 +81,20 @@ pub fn uumain(args: Vec) -> i32 { Err(f) => panic!("Invalid options\n{}", f) }; + if matches.opt_present("h") { + let msg = unindent(&format!(" + Usage: + {0} [OPTION]... [FILENAME]... + + Displays data in various human-readable formats.", NAME)); + println!("{}", opts.usage(&msg)); + return 0; + } + if matches.opt_present("version") { + println!("{} {}", NAME, VERSION); + return 0; + } + let input_offset_base = match parse_radix(matches.opt_str("A")) { Ok(r) => r, Err(f) => { panic!("Invalid -A/--address-radix\n{}", f) } From 3e143217a9f093f048680cb60406d24b5c21c5f8 Mon Sep 17 00:00:00 2001 From: Wim Hueskes Date: Thu, 21 Jul 2016 10:20:42 +0200 Subject: [PATCH 03/41] od: refactor sign_extend easier to understand algoritm which does not use unsafe --- src/od/od.rs | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/src/od/od.rs b/src/od/od.rs index f09cb8134..39a09964d 100644 --- a/src/od/od.rs +++ b/src/od/od.rs @@ -424,14 +424,8 @@ fn print_item_hex(p: u64, itembytes: usize) { fn sign_extend(item: u64, itembytes: usize) -> i64{ - // https://graphics.stanford.edu/~seander/bithacks.html#VariableSignExtend - unsafe{ - let b = 8 * itembytes; // number of bits representing the number in p - let m = mem::transmute::(1u64 << (b - 1)); - let x = mem::transmute::(item) & (mem::transmute::(1u64 << b) - 1); - let r = (x ^ m) - m; - r - } + let shift = 64 - itembytes * 8; + (item << shift) as i64 >> shift } From 5c495359c13f925c272b4b5644b75ffe1203b6f6 Mon Sep 17 00:00:00 2001 From: Wim Hueskes Date: Thu, 21 Jul 2016 10:27:02 +0200 Subject: [PATCH 04/41] od: refactor: convert bytes using byteorder crate --- Cargo.lock | 6 ++++++ src/od/Cargo.toml | 1 + src/od/od.rs | 39 +++++++++++++++++++++++++++------------ 3 files changed, 34 insertions(+), 12 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index aafb0f1cf..32e89bfb9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -176,6 +176,11 @@ name = "bitflags" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "byteorder" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "cat" version = "0.0.1" @@ -658,6 +663,7 @@ dependencies = [ name = "od" version = "0.0.1" dependencies = [ + "byteorder 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)", "getopts 0.2.14 (registry+https://github.com/rust-lang/crates.io-index)", "libc 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)", "unindent 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", diff --git a/src/od/Cargo.toml b/src/od/Cargo.toml index 43bd178c5..555c82875 100644 --- a/src/od/Cargo.toml +++ b/src/od/Cargo.toml @@ -11,6 +11,7 @@ path = "od.rs" getopts = "*" libc = "*" unindent = "*" +byteorder = "*" [[bin]] name = "od" diff --git a/src/od/od.rs b/src/od/od.rs index 39a09964d..c515efa13 100644 --- a/src/od/od.rs +++ b/src/od/od.rs @@ -11,14 +11,15 @@ extern crate getopts; extern crate unindent; +extern crate byteorder; use std::fs::File; use std::io::Read; -use std::mem; use std::io::BufReader; use std::io::Write; use std::io; use unindent::*; +use byteorder::*; //This is available in some versions of std, but not all that we target. macro_rules! hashmap { @@ -233,21 +234,35 @@ fn odfunc(input_offset_base: &Radix, fnames: &[InputSource], formats: &[OdFormat first = false; print!("{:>width$}", "", width = f.offmarg);// 4 spaces after offset - we print 2 more before each word - for b in 0..n / f.itembytes { - let mut p: u64 = 0; - for i in 0..f.itembytes { - p |= (bytes[(f.itembytes * b) + i] as u64) << (8 * i); - } - (f.writer)(p, f.itembytes); - } // not enough byte for a whole element, this should only happen on the last line. if n % f.itembytes != 0 { let b = n / f.itembytes; - let mut p2: u64 = 0; - for i in 0..(n % f.itembytes) { - p2 |= (bytes[(f.itembytes * b) + i] as u64) << (8 * i); + // set zero bytes in the part of the buffer that will be used, but is not filled. + for i in n..(b + 1) * f.itembytes { + bytes[i] = 0; } - (f.writer)(p2, f.itembytes); + } + + let mut b = 0; + while b < n { + let nextb = b + f.itembytes; + let p: u64 = match f.itembytes { + 1 => { + bytes[b] as u64 + } + 2 => { + LittleEndian::read_u16(&bytes[b..nextb]) as u64 + } + 4 => { + LittleEndian::read_u32(&bytes[b..nextb]) as u64 + } + 8 => { + LittleEndian::read_u64(&bytes[b..nextb]) + } + _ => { panic!("Invalid itembytes: {}", f.itembytes); } + }; + (f.writer)(p, f.itembytes); + b = nextb; } // Add extra spaces to pad out the short, presumably last, line. if n < LINEBYTES { From f53aefa7e94757dd732f65492f388710e3c2574e Mon Sep 17 00:00:00 2001 From: Wim Hueskes Date: Thu, 21 Jul 2016 19:31:23 +0200 Subject: [PATCH 05/41] od: implement 32-bit floating point output --- src/od/od.rs | 186 +++++++++++++++++++++++++++++++++++++++++------ tests/test_od.rs | 22 +++++- 2 files changed, 184 insertions(+), 24 deletions(-) diff --git a/src/od/od.rs b/src/od/od.rs index c515efa13..18f02d15a 100644 --- a/src/od/od.rs +++ b/src/od/od.rs @@ -18,6 +18,7 @@ use std::io::Read; use std::io::BufReader; use std::io::Write; use std::io; +use std::f32; use unindent::*; use byteorder::*; @@ -68,6 +69,8 @@ pub fn uumain(args: Vec) -> i32 { opts.optflag("O", "", "octal 4-byte units"); opts.optflag("s", "", "decimal 4-byte units"); + opts.optflag("f", "", "floating point IEEE-754 single precision (32-bit) units"); + opts.optopt("t", "format", "select output format or formats", "TYPE"); opts.optflag("v", "output-duplicates", "do not use * to mark line suppression"); opts.optopt("w", "width", @@ -130,26 +133,29 @@ pub fn uumain(args: Vec) -> i32 { // At the moment, char (-a & -c)formats need the driver to set up a // line by inserting a different # of of spaces at the start. struct OdFormater { - writer: fn(p: u64, itembytes: usize), + writer: FormatWriter, offmarg: usize, }; let oct = OdFormater { - writer: print_item_oct, offmarg: 2 + writer: FormatWriter::IntWriter(print_item_oct), offmarg: 2 }; let hex = OdFormater { - writer: print_item_hex, offmarg: 2 + writer: FormatWriter::IntWriter(print_item_hex), offmarg: 2 }; let dec_u = OdFormater { - writer: print_item_dec_u, offmarg: 2 + writer: FormatWriter::IntWriter(print_item_dec_u), offmarg: 2 }; let dec_s = OdFormater { - writer: print_item_dec_s, offmarg: 2 + writer: FormatWriter::IntWriter(print_item_dec_s), offmarg: 2 }; let a_char = OdFormater { - writer: print_item_a, offmarg: 1 + writer: FormatWriter::IntWriter(print_item_a), offmarg: 1 }; let c_char = OdFormater { - writer: print_item_c, offmarg: 1 + writer: FormatWriter::IntWriter(print_item_c), offmarg: 1 + }; + let flo32 = OdFormater { + writer: FormatWriter::FloatWriter(print_item_flo32), offmarg: 0 }; fn mkfmt(itembytes: usize, fmtspec: &OdFormater) -> OdFormat { @@ -170,7 +176,7 @@ pub fn uumain(args: Vec) -> i32 { // TODO: support floats // "e" => (8, &flo64), // "F" => (8, &flo64), - // "F" => (4, &flo32), + "f" => (4, &flo32), "H" => (4, &hex), "X" => (4, &hex) , "o" => (2, &oct), @@ -246,22 +252,38 @@ fn odfunc(input_offset_base: &Radix, fnames: &[InputSource], formats: &[OdFormat let mut b = 0; while b < n { let nextb = b + f.itembytes; - let p: u64 = match f.itembytes { - 1 => { - bytes[b] as u64 + match f.writer { + FormatWriter::IntWriter(func) => { + let p: u64 = match f.itembytes { + 1 => { + bytes[b] as u64 + } + 2 => { + LittleEndian::read_u16(&bytes[b..nextb]) as u64 + } + 4 => { + LittleEndian::read_u32(&bytes[b..nextb]) as u64 + } + 8 => { + LittleEndian::read_u64(&bytes[b..nextb]) + } + _ => { panic!("Invalid itembytes: {}", f.itembytes); } + }; + func(p, f.itembytes); } - 2 => { - LittleEndian::read_u16(&bytes[b..nextb]) as u64 + FormatWriter::FloatWriter(func) => { + let p: f64 = match f.itembytes { + 4 => { + LittleEndian::read_f32(&bytes[b..nextb]) as f64 + } + 8 => { + LittleEndian::read_f64(&bytes[b..nextb]) + } + _ => { panic!("Invalid itembytes: {}", f.itembytes); } + }; + func(p); } - 4 => { - LittleEndian::read_u32(&bytes[b..nextb]) as u64 - } - 8 => { - LittleEndian::read_u64(&bytes[b..nextb]) - } - _ => { panic!("Invalid itembytes: {}", f.itembytes); } - }; - (f.writer)(p, f.itembytes); + } b = nextb; } // Add extra spaces to pad out the short, presumably last, line. @@ -407,10 +429,15 @@ impl<'b> MultifileReader<'b> { } } +#[derive(Clone, Copy)] +enum FormatWriter { + IntWriter(fn(u64, usize)), + FloatWriter(fn(f64)), +} struct OdFormat { itembytes: usize, - writer: fn(u64, usize), + writer: FormatWriter, offmarg: usize, } @@ -516,3 +543,116 @@ fn print_item_c(p: u64, _: usize) { } } } + +fn print_item_flo32(f: f64) { + print!(" {}", format_flo32(f as f32)) +} + +// formats float with 8 significant digits, eg 12345678 or -1.2345678e+12 +// always retuns a string of 14 characters +fn format_flo32(f: f32) -> String { + + if !f.is_normal() { + if f == -0.0 && f.is_sign_negative() { return format!("{:>14}", "-0") } + if f == 0.0 || !f.is_finite() { return format!("{:14}", f) } + return format!("{:14e}", f) // subnormal numbers + } + + let mut l = f.abs().log10().floor() as i32; + + let r = 10f32.powi(l); + if (f > 0.0 && r > f) || (f < 0.0 && -r < f) { + // fix precision error + l = l - 1; + } + + if l >=0 && l <= 7 { + format!("{:width$.dec$}", f, + width=14, + dec=7-l as usize) + } + else if l == -1 { + format!("{:width$.dec$}", f, + width=14, + dec=8) + } + else { + format!("{:14.7e}", f) + } +} + +#[test] +fn test_format_flo32() { + assert_eq!(format_flo32(1.0), " 1.0000000"); + assert_eq!(format_flo32(9.9999990), " 9.9999990"); + assert_eq!(format_flo32(10.0), " 10.000000"); + assert_eq!(format_flo32(99.999977), " 99.999977"); + assert_eq!(format_flo32(99.999992), " 99.999992"); + assert_eq!(format_flo32(100.0), " 100.00000"); + assert_eq!(format_flo32(999.99994), " 999.99994"); + assert_eq!(format_flo32(1000.0), " 1000.0000"); + assert_eq!(format_flo32(9999.9990), " 9999.9990"); + assert_eq!(format_flo32(10000.0), " 10000.000"); + assert_eq!(format_flo32(99999.992), " 99999.992"); + assert_eq!(format_flo32(100000.0), " 100000.00"); + assert_eq!(format_flo32(999999.94), " 999999.94"); + assert_eq!(format_flo32(1000000.0), " 1000000.0"); + assert_eq!(format_flo32(9999999.4), " 9999999.0"); + assert_eq!(format_flo32(10000000.0), " 10000000"); + assert_eq!(format_flo32(99999992.0), " 99999992"); + assert_eq!(format_flo32(100000000.0), " 1.0000000e8"); + assert_eq!(format_flo32(9.9999994e8), " 9.9999994e8"); + assert_eq!(format_flo32(1.0e9), " 1.0000000e9"); + assert_eq!(format_flo32(9.9999990e9), " 9.9999990e9"); + assert_eq!(format_flo32(1.0e10), " 1.0000000e10"); + + assert_eq!(format_flo32(0.1), " 0.10000000"); + assert_eq!(format_flo32(0.99999994), " 0.99999994"); + assert_eq!(format_flo32(0.010000001), " 1.0000001e-2"); + //assert_eq!(format_flo32(0.01), " 1.0000000e-2"); // 9.9999998e-3 + assert_eq!(format_flo32(0.099999994), " 9.9999994e-2"); + assert_eq!(format_flo32(0.001), " 1.0000000e-3"); + + assert_eq!(format_flo32(-1.0), " -1.0000000"); + assert_eq!(format_flo32(-9.9999990), " -9.9999990"); + assert_eq!(format_flo32(-10.0), " -10.000000"); + assert_eq!(format_flo32(-99.999977), " -99.999977"); + assert_eq!(format_flo32(-99.999992), " -99.999992"); + assert_eq!(format_flo32(-100.0), " -100.00000"); + assert_eq!(format_flo32(-999.99994), " -999.99994"); + assert_eq!(format_flo32(-1000.0), " -1000.0000"); + assert_eq!(format_flo32(-9999.9990), " -9999.9990"); + assert_eq!(format_flo32(-10000.0), " -10000.000"); + assert_eq!(format_flo32(-99999.992), " -99999.992"); + assert_eq!(format_flo32(-100000.0), " -100000.00"); + assert_eq!(format_flo32(-999999.94), " -999999.94"); + assert_eq!(format_flo32(-1000000.0), " -1000000.0"); + assert_eq!(format_flo32(-9999999.4), " -9999999.0"); + assert_eq!(format_flo32(-10000000.0), " -10000000"); + assert_eq!(format_flo32(-99999992.0), " -99999992"); + assert_eq!(format_flo32(-100000000.0), " -1.0000000e8"); + assert_eq!(format_flo32(-9.9999994e8), " -9.9999994e8"); + assert_eq!(format_flo32(-1.0e9), " -1.0000000e9"); + assert_eq!(format_flo32(-9.9999990e9), " -9.9999990e9"); + assert_eq!(format_flo32(-1.0e10), " -1.0000000e10"); + + assert_eq!(format_flo32(-0.1), " -0.10000000"); + assert_eq!(format_flo32(-0.99999994), " -0.99999994"); + assert_eq!(format_flo32(-0.010000001), " -1.0000001e-2"); + //assert_eq!(format_flo32(-0.01), " -1.0000000e-2"); // -9.9999998e-3 + assert_eq!(format_flo32(-0.099999994), " -9.9999994e-2"); + assert_eq!(format_flo32(-0.001), " -1.0000000e-3"); + + assert_eq!(format_flo32(3.4028233e38), " 3.4028233e38"); + assert_eq!(format_flo32(-3.4028233e38), " -3.4028233e38"); + //assert_eq!(format_flo32(-3.4028235E38), " -3.4028235e38"); // literal out of range for f32 + + assert_eq!(format_flo32(-1.1663108e-38),"-1.1663108e-38"); + assert_eq!(format_flo32(-4.701977e-38), "-4.7019771e-38"); + + assert_eq!(format_flo32(f32::NAN), " NaN"); + assert_eq!(format_flo32(f32::INFINITY), " inf"); + assert_eq!(format_flo32(f32::NEG_INFINITY), " -inf"); + assert_eq!(format_flo32(-0.0), " -0"); + assert_eq!(format_flo32(0.0), " 0"); +} diff --git a/tests/test_od.rs b/tests/test_od.rs index 44d0e5cc4..7c1385005 100644 --- a/tests/test_od.rs +++ b/tests/test_od.rs @@ -146,7 +146,6 @@ fn test_multiple_formats() { #[test] fn test_dec() { - let input = [ 0u8, 0u8, 1u8, 0u8, @@ -167,6 +166,27 @@ fn test_dec() { } +#[test] +fn test_f32(){ + + let input : [u8; 24] = [ + 0x52, 0x06, 0x9e, 0xbf, // 0xbf9e0652 -1.2345679 + 0x4e, 0x61, 0x3c, 0x4b, // 0x4b3c614e 12345678 + 0x0f, 0x9b, 0x94, 0xfe, // 0xfe949b0f -9.876543E37 + 0x00, 0x00, 0x00, 0x80, // 0x80000000 -0.0 + 0xff, 0xff, 0xff, 0x7f, // 0x7fffffff NaN + 0x00, 0x00, 0x7f, 0x80];// 0x807f0000 -1.1663108E-38 + let expected_output = unindent(" + 0000000 -1.2345679 12345678 -9.8765427e37 -0 + 0000020 NaN -1.1663108e-38 + 0000030 + "); + let result = new_ucmd!().arg("-f").run_piped_stdin(&input[..]); + + assert_empty_stderr!(result); + assert!(result.success); + assert_eq!(result.stdout, expected_output); +} // We don't support multibyte chars, so big NEIN to this /* From 104f8eb5098484ac04f8d796b1c1b42d868c3c83 Mon Sep 17 00:00:00 2001 From: Wim Hueskes Date: Fri, 22 Jul 2016 21:47:06 +0200 Subject: [PATCH 06/41] od: implement 64-bit floating point output --- src/od/od.rs | 91 +++++++++++++++++++++++++++++++++++++----------- tests/test_od.rs | 29 +++++++++++++-- 2 files changed, 96 insertions(+), 24 deletions(-) diff --git a/src/od/od.rs b/src/od/od.rs index 18f02d15a..99e9f6726 100644 --- a/src/od/od.rs +++ b/src/od/od.rs @@ -18,7 +18,9 @@ use std::io::Read; use std::io::BufReader; use std::io::Write; use std::io; +use std::num::FpCategory; use std::f32; +use std::f64; use unindent::*; use byteorder::*; @@ -69,7 +71,9 @@ pub fn uumain(args: Vec) -> i32 { opts.optflag("O", "", "octal 4-byte units"); opts.optflag("s", "", "decimal 4-byte units"); - opts.optflag("f", "", "floating point IEEE-754 single precision (32-bit) units"); + opts.optflag("e", "", "floating point double precision (64-bit) units"); + opts.optflag("f", "", "floating point single precision (32-bit) units"); + opts.optflag("F", "", "floating point double precision (64-bit) units"); opts.optopt("t", "format", "select output format or formats", "TYPE"); opts.optflag("v", "output-duplicates", "do not use * to mark line suppression"); @@ -157,6 +161,9 @@ pub fn uumain(args: Vec) -> i32 { let flo32 = OdFormater { writer: FormatWriter::FloatWriter(print_item_flo32), offmarg: 0 }; + let flo64 = OdFormater { + writer: FormatWriter::FloatWriter(print_item_flo64), offmarg: 0 + }; fn mkfmt(itembytes: usize, fmtspec: &OdFormater) -> OdFormat { OdFormat { @@ -173,9 +180,8 @@ pub fn uumain(args: Vec) -> i32 { "b" => (1, &oct), "c" => (1, &c_char), "D" => (4, &dec_u), - // TODO: support floats - // "e" => (8, &flo64), - // "F" => (8, &flo64), + "e" => (8, &flo64), + "F" => (8, &flo64), "f" => (4, &flo32), "H" => (4, &hex), "X" => (4, &hex) , @@ -548,36 +554,59 @@ fn print_item_flo32(f: f64) { print!(" {}", format_flo32(f as f32)) } +fn print_item_flo64(f: f64) { + print!(" {}", format_flo64(f)) +} + // formats float with 8 significant digits, eg 12345678 or -1.2345678e+12 // always retuns a string of 14 characters fn format_flo32(f: f32) -> String { + let width: usize = 14; + let precision: usize = 8; + + if f.classify() == FpCategory::Subnormal { + // subnormal numbers will be normal as f64, so will print with a wrong precision + format!("{:width$e}", f, width = width) // subnormal numbers + } + else { + format_float(f as f64, width, precision) + } +} + +fn format_flo64(f: f64) -> String { + format_float(f, 24, 17) +} + +fn format_float(f: f64, width: usize, precision: usize) -> String { if !f.is_normal() { - if f == -0.0 && f.is_sign_negative() { return format!("{:>14}", "-0") } - if f == 0.0 || !f.is_finite() { return format!("{:14}", f) } - return format!("{:14e}", f) // subnormal numbers + if f == -0.0 && f.is_sign_negative() { return format!("{:>width$}", "-0", width = width) } + if f == 0.0 || !f.is_finite() { return format!("{:width$}", f, width = width) } + return format!("{:width$e}", f, width = width) // subnormal numbers } let mut l = f.abs().log10().floor() as i32; - let r = 10f32.powi(l); + let r = 10f64.powi(l); if (f > 0.0 && r > f) || (f < 0.0 && -r < f) { // fix precision error l = l - 1; } - if l >=0 && l <= 7 { + if l >= 0 && l <= (precision as i32 - 1) { format!("{:width$.dec$}", f, - width=14, - dec=7-l as usize) + width = width, + dec = (precision-1) - l as usize) } else if l == -1 { format!("{:width$.dec$}", f, - width=14, - dec=8) + width = width, + dec = precision) } else { - format!("{:14.7e}", f) + format!("{:width$.dec$e}", f, + width = width, + dec = precision - 1) } } @@ -597,7 +626,7 @@ fn test_format_flo32() { assert_eq!(format_flo32(100000.0), " 100000.00"); assert_eq!(format_flo32(999999.94), " 999999.94"); assert_eq!(format_flo32(1000000.0), " 1000000.0"); - assert_eq!(format_flo32(9999999.4), " 9999999.0"); + assert_eq!(format_flo32(9999999.0), " 9999999.0"); assert_eq!(format_flo32(10000000.0), " 10000000"); assert_eq!(format_flo32(99999992.0), " 99999992"); assert_eq!(format_flo32(100000000.0), " 1.0000000e8"); @@ -609,9 +638,9 @@ fn test_format_flo32() { assert_eq!(format_flo32(0.1), " 0.10000000"); assert_eq!(format_flo32(0.99999994), " 0.99999994"); assert_eq!(format_flo32(0.010000001), " 1.0000001e-2"); - //assert_eq!(format_flo32(0.01), " 1.0000000e-2"); // 9.9999998e-3 assert_eq!(format_flo32(0.099999994), " 9.9999994e-2"); assert_eq!(format_flo32(0.001), " 1.0000000e-3"); + assert_eq!(format_flo32(0.0099999998), " 9.9999998e-3"); assert_eq!(format_flo32(-1.0), " -1.0000000"); assert_eq!(format_flo32(-9.9999990), " -9.9999990"); @@ -627,7 +656,7 @@ fn test_format_flo32() { assert_eq!(format_flo32(-100000.0), " -100000.00"); assert_eq!(format_flo32(-999999.94), " -999999.94"); assert_eq!(format_flo32(-1000000.0), " -1000000.0"); - assert_eq!(format_flo32(-9999999.4), " -9999999.0"); + assert_eq!(format_flo32(-9999999.0), " -9999999.0"); assert_eq!(format_flo32(-10000000.0), " -10000000"); assert_eq!(format_flo32(-99999992.0), " -99999992"); assert_eq!(format_flo32(-100000000.0), " -1.0000000e8"); @@ -639,20 +668,40 @@ fn test_format_flo32() { assert_eq!(format_flo32(-0.1), " -0.10000000"); assert_eq!(format_flo32(-0.99999994), " -0.99999994"); assert_eq!(format_flo32(-0.010000001), " -1.0000001e-2"); - //assert_eq!(format_flo32(-0.01), " -1.0000000e-2"); // -9.9999998e-3 assert_eq!(format_flo32(-0.099999994), " -9.9999994e-2"); assert_eq!(format_flo32(-0.001), " -1.0000000e-3"); + assert_eq!(format_flo32(-0.0099999998), " -9.9999998e-3"); assert_eq!(format_flo32(3.4028233e38), " 3.4028233e38"); assert_eq!(format_flo32(-3.4028233e38), " -3.4028233e38"); - //assert_eq!(format_flo32(-3.4028235E38), " -3.4028235e38"); // literal out of range for f32 - assert_eq!(format_flo32(-1.1663108e-38),"-1.1663108e-38"); - assert_eq!(format_flo32(-4.701977e-38), "-4.7019771e-38"); + assert_eq!(format_flo32(-4.7019771e-38),"-4.7019771e-38"); + assert_eq!(format_flo32(1e-45), " 1e-45"); + assert_eq!(format_flo32(-3.402823466e+38), " -3.4028235e38"); assert_eq!(format_flo32(f32::NAN), " NaN"); assert_eq!(format_flo32(f32::INFINITY), " inf"); assert_eq!(format_flo32(f32::NEG_INFINITY), " -inf"); assert_eq!(format_flo32(-0.0), " -0"); assert_eq!(format_flo32(0.0), " 0"); } + +#[test] +fn test_format_flo64() { + assert_eq!(format_flo64(1.0), " 1.0000000000000000"); + assert_eq!(format_flo64(10.0), " 10.000000000000000"); + assert_eq!(format_flo64(1000000000000000.0), " 1000000000000000.0"); + assert_eq!(format_flo64(10000000000000000.0), " 10000000000000000"); + assert_eq!(format_flo64(100000000000000000.0), " 1.0000000000000000e17"); + + assert_eq!(format_flo64(-0.1), " -0.10000000000000001"); + assert_eq!(format_flo64(-0.01), " -1.0000000000000000e-2"); + + assert_eq!(format_flo64(-2.2250738585072014e-308),"-2.2250738585072014e-308"); + assert_eq!(format_flo64(4e-320), " 4e-320"); + assert_eq!(format_flo64(f64::NAN), " NaN"); + assert_eq!(format_flo64(f64::INFINITY), " inf"); + assert_eq!(format_flo64(f64::NEG_INFINITY), " -inf"); + assert_eq!(format_flo64(-0.0), " -0"); + assert_eq!(format_flo64(0.0), " 0"); +} diff --git a/tests/test_od.rs b/tests/test_od.rs index 7c1385005..7854e789b 100644 --- a/tests/test_od.rs +++ b/tests/test_od.rs @@ -169,17 +169,18 @@ fn test_dec() { #[test] fn test_f32(){ - let input : [u8; 24] = [ + let input : [u8; 28] = [ 0x52, 0x06, 0x9e, 0xbf, // 0xbf9e0652 -1.2345679 0x4e, 0x61, 0x3c, 0x4b, // 0x4b3c614e 12345678 0x0f, 0x9b, 0x94, 0xfe, // 0xfe949b0f -9.876543E37 0x00, 0x00, 0x00, 0x80, // 0x80000000 -0.0 0xff, 0xff, 0xff, 0x7f, // 0x7fffffff NaN + 0xc2, 0x16, 0x01, 0x00, // 0x000116c2 1e-40 0x00, 0x00, 0x7f, 0x80];// 0x807f0000 -1.1663108E-38 let expected_output = unindent(" 0000000 -1.2345679 12345678 -9.8765427e37 -0 - 0000020 NaN -1.1663108e-38 - 0000030 + 0000020 NaN 1e-40 -1.1663108e-38 + 0000034 "); let result = new_ucmd!().arg("-f").run_piped_stdin(&input[..]); @@ -188,6 +189,28 @@ fn test_f32(){ assert_eq!(result.stdout, expected_output); } +#[test] +fn test_f64(){ + + let input : [u8; 40] = [ + 0x27, 0x6b, 0x0a, 0x2f, 0x2a, 0xee, 0x45, 0x43, // 0x4345EE2A2F0A6B27 12345678912345678 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x0000000000000000 0 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x80, // 0x8010000000000000 -2.2250738585072014e-308 + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x0000000000000001 5e-324 (subnormal) + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0];// 0xc000000000000000 -2 + let expected_output = unindent(" + 0000000 12345678912345678 0 + 0000020 -2.2250738585072014e-308 5e-324 + 0000040 -2.0000000000000000 + 0000050 + "); + let result = new_ucmd!().arg("-F").run_piped_stdin(&input[..]); + + assert_empty_stderr!(result); + assert!(result.success); + assert_eq!(result.stdout, expected_output); +} + // We don't support multibyte chars, so big NEIN to this /* #[test] From f45169d37c1452695f4e1b90906ab51573abd839 Mon Sep 17 00:00:00 2001 From: Wim Hueskes Date: Sat, 23 Jul 2016 19:15:47 +0200 Subject: [PATCH 07/41] od: refactor: split into modules --- src/od/multifilereader.rs | 98 ++++++++++ src/od/od.rs | 367 +------------------------------------- src/od/prn_char.rs | 64 +++++++ src/od/prn_float.rs | 159 +++++++++++++++++ src/od/prn_int.rs | 39 ++++ 5 files changed, 369 insertions(+), 358 deletions(-) create mode 100644 src/od/multifilereader.rs create mode 100644 src/od/prn_char.rs create mode 100644 src/od/prn_float.rs create mode 100644 src/od/prn_int.rs diff --git a/src/od/multifilereader.rs b/src/od/multifilereader.rs new file mode 100644 index 000000000..2e3d2d909 --- /dev/null +++ b/src/od/multifilereader.rs @@ -0,0 +1,98 @@ +use std; +use std::io; +use std::io::BufReader; +use std::fs::File; +use std::io::Write; + +#[derive(Debug)] +pub enum InputSource<'a> { + FileName(&'a str ), + Stdin +} + +// MultifileReader - concatenate all our input, file or stdin. +pub struct MultifileReader<'a> { + ni: std::slice::Iter<'a, InputSource<'a>>, + curr_file: Option>, + pub any_err: bool, +} + +impl<'b> MultifileReader<'b> { + pub fn new<'a>(fnames: &'a [InputSource]) -> MultifileReader<'a> { + let mut mf = MultifileReader { + ni: fnames.iter(), + curr_file: None, // normally this means done; call next_file() + any_err: false, + }; + mf.next_file(); + return mf; + } + + fn next_file(&mut self) { + // loop retries with subsequent files if err - normally 'loops' once + loop { + match self.ni.next() { + None => { + self.curr_file = None; + return; + } + Some(input) => { + match *input { + InputSource::Stdin => { + self.curr_file = Some(Box::new(BufReader::new(std::io::stdin()))); + return; + } + InputSource::FileName(fname) => { + match File::open(fname) { + Ok(f) => { + self.curr_file = Some(Box::new(BufReader::new(f))); + return; + } + Err(e) => { + // If any file can't be opened, + // print an error at the time that the file is needed, + // then move on the the next file. + // This matches the behavior of the original `od` + let _ = + writeln!(&mut std::io::stderr(), "od: '{}': {}", fname, e); + self.any_err = true + } + } + } + } + } + } + } + } + + // Fill buf with bytes read from the list of files + // Returns Ok() + // Handles io errors itself, thus always returns OK + // Fills the provided buffer completely, unless it has run out of input. + // If any call returns short (< buf.len()), all subsequent calls will return Ok<0> + pub fn f_read(&mut self, buf: &mut [u8]) -> io::Result { + let mut xfrd = 0; + // while buffer we are filling is not full.. May go thru several files. + 'fillloop: while xfrd < buf.len() { + match self.curr_file { + None => break, + Some(ref mut curr_file) => { + loop { + // stdin may return on 'return' (enter), even though the buffer isn't full. + xfrd += match curr_file.read(&mut buf[xfrd..]) { + Ok(0) => break, + Ok(n) => n, + Err(e) => panic!("file error: {}", e), + }; + if xfrd == buf.len() { + // transferred all that was asked for. + break 'fillloop; + } + } + } + } + self.next_file(); + } + Ok(xfrd) + } +} diff --git a/src/od/od.rs b/src/od/od.rs index 99e9f6726..f658f8b83 100644 --- a/src/od/od.rs +++ b/src/od/od.rs @@ -13,16 +13,18 @@ extern crate getopts; extern crate unindent; extern crate byteorder; -use std::fs::File; -use std::io::Read; -use std::io::BufReader; -use std::io::Write; -use std::io; -use std::num::FpCategory; -use std::f32; +mod multifilereader; +mod prn_int; +mod prn_char; +mod prn_float; + use std::f64; use unindent::*; use byteorder::*; +use multifilereader::*; +use prn_int::*; +use prn_char::*; +use prn_float::*; //This is available in some versions of std, but not all that we target. macro_rules! hashmap { @@ -39,12 +41,6 @@ static VERSION: &'static str = env!("CARGO_PKG_VERSION"); #[derive(Debug)] enum Radix { Decimal, Hexadecimal, Octal, Binary } -#[derive(Debug)] -enum InputSource<'a> { - FileName(&'a str ), - Stdin -} - pub fn uumain(args: Vec) -> i32 { let mut opts = getopts::Options::new(); @@ -349,92 +345,6 @@ fn print_with_radix(r: &Radix, x: usize) { } } -// MultifileReader - concatenate all our input, file or stdin. -struct MultifileReader<'a> { - ni: std::slice::Iter<'a, InputSource<'a>>, - curr_file: Option>, - any_err: bool, -} -impl<'b> MultifileReader<'b> { - fn new<'a>(fnames: &'a [InputSource]) -> MultifileReader<'a> { - let mut mf = MultifileReader { - ni: fnames.iter(), - curr_file: None, // normally this means done; call next_file() - any_err: false, - }; - mf.next_file(); - return mf; - } - - fn next_file(&mut self) { - // loop retries with subsequent files if err - normally 'loops' once - loop { - match self.ni.next() { - None => { - self.curr_file = None; - return; - } - Some(input) => { - match *input { - InputSource::Stdin => { - self.curr_file = Some(Box::new(BufReader::new(std::io::stdin()))); - return; - } - InputSource::FileName(fname) => { - match File::open(fname) { - Ok(f) => { - self.curr_file = Some(Box::new(BufReader::new(f))); - return; - } - Err(e) => { - // If any file can't be opened, - // print an error at the time that the file is needed, - // then move on the the next file. - // This matches the behavior of the original `od` - let _ = - writeln!(&mut std::io::stderr(), "od: '{}': {}", fname, e); - self.any_err = true - } - } - } - } - } - } - } - } - - // Fill buf with bytes read from the list of files - // Returns Ok() - // Handles io errors itself, thus always returns OK - // Fills the provided buffer completely, unless it has run out of input. - // If any call returns short (< buf.len()), all subsequent calls will return Ok<0> - fn f_read(&mut self, buf: &mut [u8]) -> io::Result { - let mut xfrd = 0; - // while buffer we are filling is not full.. May go thru several files. - 'fillloop: while xfrd < buf.len() { - match self.curr_file { - None => break, - Some(ref mut curr_file) => { - loop { - // stdin may return on 'return' (enter), even though the buffer isn't full. - xfrd += match curr_file.read(&mut buf[xfrd..]) { - Ok(0) => break, - Ok(n) => n, - Err(e) => panic!("file error: {}", e), - }; - if xfrd == buf.len() { - // transferred all that was asked for. - break 'fillloop; - } - } - } - } - self.next_file(); - } - Ok(xfrd) - } -} - #[derive(Clone, Copy)] enum FormatWriter { IntWriter(fn(u64, usize)), @@ -446,262 +356,3 @@ struct OdFormat { writer: FormatWriter, offmarg: usize, } - -// TODO: use some sort of byte iterator, instead of passing bytes in u64 -fn print_item_oct(p: u64, itembytes: usize) { - let itemwidth = 3 * itembytes; - let itemspace = 4 * itembytes - itemwidth; - - print!("{:>itemspace$}{:0width$o}", - "", - p, - width = itemwidth, - itemspace = itemspace); -} - -fn print_item_hex(p: u64, itembytes: usize) { - let itemwidth = 2 * itembytes; - let itemspace = 4 * itembytes - itemwidth; - - print!("{:>itemspace$}{:0width$x}", - "", - p, - width = itemwidth, - itemspace = itemspace); -} - - -fn sign_extend(item: u64, itembytes: usize) -> i64{ - let shift = 64 - itembytes * 8; - (item << shift) as i64 >> shift -} - - -fn print_item_dec_s(p: u64, itembytes: usize) { - // sign extend - let s = sign_extend(p,itembytes); - print!("{:totalwidth$}", s, totalwidth = 4 * itembytes); -} -fn print_item_dec_u(p: u64, itembytes: usize) { - print!("{:totalwidth$}", p, totalwidth = 4 * itembytes); -} - -// TODO: multi-byte chars -// Quoth the man page: Multi-byte characters are displayed in the area corresponding to the first byte of the character. The remaining bytes are shown as `**'. - -static A_CHRS : [&'static str; 160] = -["nul", "soh", "stx", "etx", "eot", "enq", "ack", "bel", - "bs", "ht", "nl", "vt", "ff", "cr", "so", "si", - "dle", "dc1", "dc2", "dc3", "dc4", "nak", "syn", "etb", - "can", "em", "sub", "esc", "fs", "gs", "rs", "us", - "sp", "!", "\"", "#", "$", "%", "&", "'", - "(", ")", "*", "+", ",", "-", ".", "/", - "0", "1", "2", "3", "4", "5", "6", "7", - "8", "9", ":", ";", "<", "=", ">", "?", - "@", "A", "B", "C", "D", "E", "F", "G", - "H", "I", "J", "K", "L", "M", "N", "O", - "P", "Q", "R", "S", "T", "U", "V", "W", - "X", "Y", "Z", "[", "\\", "]", "^", "_", - "`", "a", "b", "c", "d", "e", "f", "g", - "h", "i", "j", "k", "l", "m", "n", "o", - "p", "q", "r", "s", "t", "u", "v", "w", - "x", "y", "z", "{", "|", "}", "~", "del", - "80", "81", "82", "83", "84", "85", "86", "87", - "88", "89", "8a", "8b", "8c", "8d", "8e", "8f", - "90", "91", "92", "93", "94", "95", "96", "97", - "98", "99", "9a", "9b", "9c", "9d", "9e", "9f"]; - -fn print_item_a(p: u64, _: usize) { - // itembytes == 1 - let b = (p & 0xff) as u8; - print!("{:>4}", A_CHRS.get(b as usize).unwrap_or(&"?") // XXX od dose not actually do this, it just prints the byte - ); -} - - -static C_CHRS : [&'static str; 127] = [ -"\\0", "001", "002", "003", "004", "005", "006", "\\a", -"\\b", "\\t", "\\n", "\\v", "\\f", "\\r", "016", "017", -"020", "021", "022", "023", "024", "025", "026", "027", -"030", "031", "032", "033", "034", "035", "036", "037", - " ", "!", "\"", "#", "$", "%", "&", "'", - "(", ")", "*", "+", ",", "-", ".", "/", - "0", "1", "2", "3", "4", "5", "6", "7", - "8", "9", ":", ";", "<", "=", ">", "?", - "@", "A", "B", "C", "D", "E", "F", "G", - "H", "I", "J", "K", "L", "M", "N", "O", - "P", "Q", "R", "S", "T", "U", "V", "W", - "X", "Y", "Z", "[", "\\", "]", "^", "_", - "`", "a", "b", "c", "d", "e", "f", "g", - "h", "i", "j", "k", "l", "m", "n", "o", - "p", "q", "r", "s", "t", "u", "v", "w", - "x", "y", "z", "{", "|", "}", "~" ]; - - -fn print_item_c(p: u64, _: usize) { - // itembytes == 1 - let b = (p & 0xff) as usize; - - if b < C_CHRS.len() { - match C_CHRS.get(b as usize) { - Some(s) => print!("{:>4}", s), - None => print!("{:>4}", b), - } - } -} - -fn print_item_flo32(f: f64) { - print!(" {}", format_flo32(f as f32)) -} - -fn print_item_flo64(f: f64) { - print!(" {}", format_flo64(f)) -} - -// formats float with 8 significant digits, eg 12345678 or -1.2345678e+12 -// always retuns a string of 14 characters -fn format_flo32(f: f32) -> String { - let width: usize = 14; - let precision: usize = 8; - - if f.classify() == FpCategory::Subnormal { - // subnormal numbers will be normal as f64, so will print with a wrong precision - format!("{:width$e}", f, width = width) // subnormal numbers - } - else { - format_float(f as f64, width, precision) - } -} - -fn format_flo64(f: f64) -> String { - format_float(f, 24, 17) -} - -fn format_float(f: f64, width: usize, precision: usize) -> String { - - if !f.is_normal() { - if f == -0.0 && f.is_sign_negative() { return format!("{:>width$}", "-0", width = width) } - if f == 0.0 || !f.is_finite() { return format!("{:width$}", f, width = width) } - return format!("{:width$e}", f, width = width) // subnormal numbers - } - - let mut l = f.abs().log10().floor() as i32; - - let r = 10f64.powi(l); - if (f > 0.0 && r > f) || (f < 0.0 && -r < f) { - // fix precision error - l = l - 1; - } - - if l >= 0 && l <= (precision as i32 - 1) { - format!("{:width$.dec$}", f, - width = width, - dec = (precision-1) - l as usize) - } - else if l == -1 { - format!("{:width$.dec$}", f, - width = width, - dec = precision) - } - else { - format!("{:width$.dec$e}", f, - width = width, - dec = precision - 1) - } -} - -#[test] -fn test_format_flo32() { - assert_eq!(format_flo32(1.0), " 1.0000000"); - assert_eq!(format_flo32(9.9999990), " 9.9999990"); - assert_eq!(format_flo32(10.0), " 10.000000"); - assert_eq!(format_flo32(99.999977), " 99.999977"); - assert_eq!(format_flo32(99.999992), " 99.999992"); - assert_eq!(format_flo32(100.0), " 100.00000"); - assert_eq!(format_flo32(999.99994), " 999.99994"); - assert_eq!(format_flo32(1000.0), " 1000.0000"); - assert_eq!(format_flo32(9999.9990), " 9999.9990"); - assert_eq!(format_flo32(10000.0), " 10000.000"); - assert_eq!(format_flo32(99999.992), " 99999.992"); - assert_eq!(format_flo32(100000.0), " 100000.00"); - assert_eq!(format_flo32(999999.94), " 999999.94"); - assert_eq!(format_flo32(1000000.0), " 1000000.0"); - assert_eq!(format_flo32(9999999.0), " 9999999.0"); - assert_eq!(format_flo32(10000000.0), " 10000000"); - assert_eq!(format_flo32(99999992.0), " 99999992"); - assert_eq!(format_flo32(100000000.0), " 1.0000000e8"); - assert_eq!(format_flo32(9.9999994e8), " 9.9999994e8"); - assert_eq!(format_flo32(1.0e9), " 1.0000000e9"); - assert_eq!(format_flo32(9.9999990e9), " 9.9999990e9"); - assert_eq!(format_flo32(1.0e10), " 1.0000000e10"); - - assert_eq!(format_flo32(0.1), " 0.10000000"); - assert_eq!(format_flo32(0.99999994), " 0.99999994"); - assert_eq!(format_flo32(0.010000001), " 1.0000001e-2"); - assert_eq!(format_flo32(0.099999994), " 9.9999994e-2"); - assert_eq!(format_flo32(0.001), " 1.0000000e-3"); - assert_eq!(format_flo32(0.0099999998), " 9.9999998e-3"); - - assert_eq!(format_flo32(-1.0), " -1.0000000"); - assert_eq!(format_flo32(-9.9999990), " -9.9999990"); - assert_eq!(format_flo32(-10.0), " -10.000000"); - assert_eq!(format_flo32(-99.999977), " -99.999977"); - assert_eq!(format_flo32(-99.999992), " -99.999992"); - assert_eq!(format_flo32(-100.0), " -100.00000"); - assert_eq!(format_flo32(-999.99994), " -999.99994"); - assert_eq!(format_flo32(-1000.0), " -1000.0000"); - assert_eq!(format_flo32(-9999.9990), " -9999.9990"); - assert_eq!(format_flo32(-10000.0), " -10000.000"); - assert_eq!(format_flo32(-99999.992), " -99999.992"); - assert_eq!(format_flo32(-100000.0), " -100000.00"); - assert_eq!(format_flo32(-999999.94), " -999999.94"); - assert_eq!(format_flo32(-1000000.0), " -1000000.0"); - assert_eq!(format_flo32(-9999999.0), " -9999999.0"); - assert_eq!(format_flo32(-10000000.0), " -10000000"); - assert_eq!(format_flo32(-99999992.0), " -99999992"); - assert_eq!(format_flo32(-100000000.0), " -1.0000000e8"); - assert_eq!(format_flo32(-9.9999994e8), " -9.9999994e8"); - assert_eq!(format_flo32(-1.0e9), " -1.0000000e9"); - assert_eq!(format_flo32(-9.9999990e9), " -9.9999990e9"); - assert_eq!(format_flo32(-1.0e10), " -1.0000000e10"); - - assert_eq!(format_flo32(-0.1), " -0.10000000"); - assert_eq!(format_flo32(-0.99999994), " -0.99999994"); - assert_eq!(format_flo32(-0.010000001), " -1.0000001e-2"); - assert_eq!(format_flo32(-0.099999994), " -9.9999994e-2"); - assert_eq!(format_flo32(-0.001), " -1.0000000e-3"); - assert_eq!(format_flo32(-0.0099999998), " -9.9999998e-3"); - - assert_eq!(format_flo32(3.4028233e38), " 3.4028233e38"); - assert_eq!(format_flo32(-3.4028233e38), " -3.4028233e38"); - assert_eq!(format_flo32(-1.1663108e-38),"-1.1663108e-38"); - assert_eq!(format_flo32(-4.7019771e-38),"-4.7019771e-38"); - assert_eq!(format_flo32(1e-45), " 1e-45"); - - assert_eq!(format_flo32(-3.402823466e+38), " -3.4028235e38"); - assert_eq!(format_flo32(f32::NAN), " NaN"); - assert_eq!(format_flo32(f32::INFINITY), " inf"); - assert_eq!(format_flo32(f32::NEG_INFINITY), " -inf"); - assert_eq!(format_flo32(-0.0), " -0"); - assert_eq!(format_flo32(0.0), " 0"); -} - -#[test] -fn test_format_flo64() { - assert_eq!(format_flo64(1.0), " 1.0000000000000000"); - assert_eq!(format_flo64(10.0), " 10.000000000000000"); - assert_eq!(format_flo64(1000000000000000.0), " 1000000000000000.0"); - assert_eq!(format_flo64(10000000000000000.0), " 10000000000000000"); - assert_eq!(format_flo64(100000000000000000.0), " 1.0000000000000000e17"); - - assert_eq!(format_flo64(-0.1), " -0.10000000000000001"); - assert_eq!(format_flo64(-0.01), " -1.0000000000000000e-2"); - - assert_eq!(format_flo64(-2.2250738585072014e-308),"-2.2250738585072014e-308"); - assert_eq!(format_flo64(4e-320), " 4e-320"); - assert_eq!(format_flo64(f64::NAN), " NaN"); - assert_eq!(format_flo64(f64::INFINITY), " inf"); - assert_eq!(format_flo64(f64::NEG_INFINITY), " -inf"); - assert_eq!(format_flo64(-0.0), " -0"); - assert_eq!(format_flo64(0.0), " 0"); -} diff --git a/src/od/prn_char.rs b/src/od/prn_char.rs new file mode 100644 index 000000000..8791d51d5 --- /dev/null +++ b/src/od/prn_char.rs @@ -0,0 +1,64 @@ + +// TODO: multi-byte chars +// Quoth the man page: Multi-byte characters are displayed in the area corresponding to the first byte of the character. The remaining bytes are shown as `**'. + +static A_CHRS : [&'static str; 160] = +["nul", "soh", "stx", "etx", "eot", "enq", "ack", "bel", + "bs", "ht", "nl", "vt", "ff", "cr", "so", "si", + "dle", "dc1", "dc2", "dc3", "dc4", "nak", "syn", "etb", + "can", "em", "sub", "esc", "fs", "gs", "rs", "us", + "sp", "!", "\"", "#", "$", "%", "&", "'", + "(", ")", "*", "+", ",", "-", ".", "/", + "0", "1", "2", "3", "4", "5", "6", "7", + "8", "9", ":", ";", "<", "=", ">", "?", + "@", "A", "B", "C", "D", "E", "F", "G", + "H", "I", "J", "K", "L", "M", "N", "O", + "P", "Q", "R", "S", "T", "U", "V", "W", + "X", "Y", "Z", "[", "\\", "]", "^", "_", + "`", "a", "b", "c", "d", "e", "f", "g", + "h", "i", "j", "k", "l", "m", "n", "o", + "p", "q", "r", "s", "t", "u", "v", "w", + "x", "y", "z", "{", "|", "}", "~", "del", + "80", "81", "82", "83", "84", "85", "86", "87", + "88", "89", "8a", "8b", "8c", "8d", "8e", "8f", + "90", "91", "92", "93", "94", "95", "96", "97", + "98", "99", "9a", "9b", "9c", "9d", "9e", "9f"]; + +pub fn print_item_a(p: u64, _: usize) { + // itembytes == 1 + let b = (p & 0xff) as u8; + print!("{:>4}", A_CHRS.get(b as usize).unwrap_or(&"?") // XXX od dose not actually do this, it just prints the byte + ); +} + + +static C_CHRS : [&'static str; 127] = [ +"\\0", "001", "002", "003", "004", "005", "006", "\\a", +"\\b", "\\t", "\\n", "\\v", "\\f", "\\r", "016", "017", +"020", "021", "022", "023", "024", "025", "026", "027", +"030", "031", "032", "033", "034", "035", "036", "037", + " ", "!", "\"", "#", "$", "%", "&", "'", + "(", ")", "*", "+", ",", "-", ".", "/", + "0", "1", "2", "3", "4", "5", "6", "7", + "8", "9", ":", ";", "<", "=", ">", "?", + "@", "A", "B", "C", "D", "E", "F", "G", + "H", "I", "J", "K", "L", "M", "N", "O", + "P", "Q", "R", "S", "T", "U", "V", "W", + "X", "Y", "Z", "[", "\\", "]", "^", "_", + "`", "a", "b", "c", "d", "e", "f", "g", + "h", "i", "j", "k", "l", "m", "n", "o", + "p", "q", "r", "s", "t", "u", "v", "w", + "x", "y", "z", "{", "|", "}", "~" ]; + + +pub fn print_item_c(p: u64, _: usize) { + // itembytes == 1 + let b = (p & 0xff) as usize; + + if b < C_CHRS.len() { + match C_CHRS.get(b as usize) { + Some(s) => print!("{:>4}", s), + None => print!("{:>4}", b), + } + } +} diff --git a/src/od/prn_float.rs b/src/od/prn_float.rs new file mode 100644 index 000000000..76118e73c --- /dev/null +++ b/src/od/prn_float.rs @@ -0,0 +1,159 @@ +use std::num::FpCategory; +use std::f32; +use std::f64; + +pub fn print_item_flo32(f: f64) { + print!(" {}", format_flo32(f as f32)) +} + +pub fn print_item_flo64(f: f64) { + print!(" {}", format_flo64(f)) +} + +// formats float with 8 significant digits, eg 12345678 or -1.2345678e+12 +// always retuns a string of 14 characters +fn format_flo32(f: f32) -> String { + let width: usize = 14; + let precision: usize = 8; + + if f.classify() == FpCategory::Subnormal { + // subnormal numbers will be normal as f64, so will print with a wrong precision + format!("{:width$e}", f, width = width) // subnormal numbers + } + else { + format_float(f as f64, width, precision) + } +} + +fn format_flo64(f: f64) -> String { + format_float(f, 24, 17) +} + +fn format_float(f: f64, width: usize, precision: usize) -> String { + + if !f.is_normal() { + if f == -0.0 && f.is_sign_negative() { return format!("{:>width$}", "-0", width = width) } + if f == 0.0 || !f.is_finite() { return format!("{:width$}", f, width = width) } + return format!("{:width$e}", f, width = width) // subnormal numbers + } + + let mut l = f.abs().log10().floor() as i32; + + let r = 10f64.powi(l); + if (f > 0.0 && r > f) || (f < 0.0 && -r < f) { + // fix precision error + l = l - 1; + } + + if l >= 0 && l <= (precision as i32 - 1) { + format!("{:width$.dec$}", f, + width = width, + dec = (precision-1) - l as usize) + } + else if l == -1 { + format!("{:width$.dec$}", f, + width = width, + dec = precision) + } + else { + format!("{:width$.dec$e}", f, + width = width, + dec = precision - 1) + } +} + +#[test] +fn test_format_flo32() { + assert_eq!(format_flo32(1.0), " 1.0000000"); + assert_eq!(format_flo32(9.9999990), " 9.9999990"); + assert_eq!(format_flo32(10.0), " 10.000000"); + assert_eq!(format_flo32(99.999977), " 99.999977"); + assert_eq!(format_flo32(99.999992), " 99.999992"); + assert_eq!(format_flo32(100.0), " 100.00000"); + assert_eq!(format_flo32(999.99994), " 999.99994"); + assert_eq!(format_flo32(1000.0), " 1000.0000"); + assert_eq!(format_flo32(9999.9990), " 9999.9990"); + assert_eq!(format_flo32(10000.0), " 10000.000"); + assert_eq!(format_flo32(99999.992), " 99999.992"); + assert_eq!(format_flo32(100000.0), " 100000.00"); + assert_eq!(format_flo32(999999.94), " 999999.94"); + assert_eq!(format_flo32(1000000.0), " 1000000.0"); + assert_eq!(format_flo32(9999999.0), " 9999999.0"); + assert_eq!(format_flo32(10000000.0), " 10000000"); + assert_eq!(format_flo32(99999992.0), " 99999992"); + assert_eq!(format_flo32(100000000.0), " 1.0000000e8"); + assert_eq!(format_flo32(9.9999994e8), " 9.9999994e8"); + assert_eq!(format_flo32(1.0e9), " 1.0000000e9"); + assert_eq!(format_flo32(9.9999990e9), " 9.9999990e9"); + assert_eq!(format_flo32(1.0e10), " 1.0000000e10"); + + assert_eq!(format_flo32(0.1), " 0.10000000"); + assert_eq!(format_flo32(0.99999994), " 0.99999994"); + assert_eq!(format_flo32(0.010000001), " 1.0000001e-2"); + assert_eq!(format_flo32(0.099999994), " 9.9999994e-2"); + assert_eq!(format_flo32(0.001), " 1.0000000e-3"); + assert_eq!(format_flo32(0.0099999998), " 9.9999998e-3"); + + assert_eq!(format_flo32(-1.0), " -1.0000000"); + assert_eq!(format_flo32(-9.9999990), " -9.9999990"); + assert_eq!(format_flo32(-10.0), " -10.000000"); + assert_eq!(format_flo32(-99.999977), " -99.999977"); + assert_eq!(format_flo32(-99.999992), " -99.999992"); + assert_eq!(format_flo32(-100.0), " -100.00000"); + assert_eq!(format_flo32(-999.99994), " -999.99994"); + assert_eq!(format_flo32(-1000.0), " -1000.0000"); + assert_eq!(format_flo32(-9999.9990), " -9999.9990"); + assert_eq!(format_flo32(-10000.0), " -10000.000"); + assert_eq!(format_flo32(-99999.992), " -99999.992"); + assert_eq!(format_flo32(-100000.0), " -100000.00"); + assert_eq!(format_flo32(-999999.94), " -999999.94"); + assert_eq!(format_flo32(-1000000.0), " -1000000.0"); + assert_eq!(format_flo32(-9999999.0), " -9999999.0"); + assert_eq!(format_flo32(-10000000.0), " -10000000"); + assert_eq!(format_flo32(-99999992.0), " -99999992"); + assert_eq!(format_flo32(-100000000.0), " -1.0000000e8"); + assert_eq!(format_flo32(-9.9999994e8), " -9.9999994e8"); + assert_eq!(format_flo32(-1.0e9), " -1.0000000e9"); + assert_eq!(format_flo32(-9.9999990e9), " -9.9999990e9"); + assert_eq!(format_flo32(-1.0e10), " -1.0000000e10"); + + assert_eq!(format_flo32(-0.1), " -0.10000000"); + assert_eq!(format_flo32(-0.99999994), " -0.99999994"); + assert_eq!(format_flo32(-0.010000001), " -1.0000001e-2"); + assert_eq!(format_flo32(-0.099999994), " -9.9999994e-2"); + assert_eq!(format_flo32(-0.001), " -1.0000000e-3"); + assert_eq!(format_flo32(-0.0099999998), " -9.9999998e-3"); + + assert_eq!(format_flo32(3.4028233e38), " 3.4028233e38"); + assert_eq!(format_flo32(-3.4028233e38), " -3.4028233e38"); + assert_eq!(format_flo32(-1.1663108e-38),"-1.1663108e-38"); + assert_eq!(format_flo32(-4.7019771e-38),"-4.7019771e-38"); + assert_eq!(format_flo32(1e-45), " 1e-45"); + + assert_eq!(format_flo32(-3.402823466e+38), " -3.4028235e38"); + assert_eq!(format_flo32(f32::NAN), " NaN"); + assert_eq!(format_flo32(f32::INFINITY), " inf"); + assert_eq!(format_flo32(f32::NEG_INFINITY), " -inf"); + assert_eq!(format_flo32(-0.0), " -0"); + assert_eq!(format_flo32(0.0), " 0"); +} + +#[test] +fn test_format_flo64() { + assert_eq!(format_flo64(1.0), " 1.0000000000000000"); + assert_eq!(format_flo64(10.0), " 10.000000000000000"); + assert_eq!(format_flo64(1000000000000000.0), " 1000000000000000.0"); + assert_eq!(format_flo64(10000000000000000.0), " 10000000000000000"); + assert_eq!(format_flo64(100000000000000000.0), " 1.0000000000000000e17"); + + assert_eq!(format_flo64(-0.1), " -0.10000000000000001"); + assert_eq!(format_flo64(-0.01), " -1.0000000000000000e-2"); + + assert_eq!(format_flo64(-2.2250738585072014e-308),"-2.2250738585072014e-308"); + assert_eq!(format_flo64(4e-320), " 4e-320"); + assert_eq!(format_flo64(f64::NAN), " NaN"); + assert_eq!(format_flo64(f64::INFINITY), " inf"); + assert_eq!(format_flo64(f64::NEG_INFINITY), " -inf"); + assert_eq!(format_flo64(-0.0), " -0"); + assert_eq!(format_flo64(0.0), " 0"); +} diff --git a/src/od/prn_int.rs b/src/od/prn_int.rs new file mode 100644 index 000000000..47959168f --- /dev/null +++ b/src/od/prn_int.rs @@ -0,0 +1,39 @@ +// TODO: use some sort of byte iterator, instead of passing bytes in u64 +pub fn print_item_oct(p: u64, itembytes: usize) { + let itemwidth = 3 * itembytes; + let itemspace = 4 * itembytes - itemwidth; + + print!("{:>itemspace$}{:0width$o}", + "", + p, + width = itemwidth, + itemspace = itemspace); +} + +pub fn print_item_hex(p: u64, itembytes: usize) { + let itemwidth = 2 * itembytes; + let itemspace = 4 * itembytes - itemwidth; + + print!("{:>itemspace$}{:0width$x}", + "", + p, + width = itemwidth, + itemspace = itemspace); +} + + +fn sign_extend(item: u64, itembytes: usize) -> i64{ + let shift = 64 - itembytes * 8; + (item << shift) as i64 >> shift +} + + +pub fn print_item_dec_s(p: u64, itembytes: usize) { + // sign extend + let s = sign_extend(p,itembytes); + print!("{:totalwidth$}", s, totalwidth = 4 * itembytes); +} + +pub fn print_item_dec_u(p: u64, itembytes: usize) { + print!("{:totalwidth$}", p, totalwidth = 4 * itembytes); +} From 1b01f02e9eb204419afb930f5a6ba0c41db3d366 Mon Sep 17 00:00:00 2001 From: Wim Hueskes Date: Sat, 23 Jul 2016 19:51:03 +0200 Subject: [PATCH 08/41] od: remove trailing spaces in output can't find documentation confirming this behaviour besides it does not work correctly on all formats see also uutils/coreutils#922 --- src/od/od.rs | 8 -------- tests/test_od.rs | 12 ++++++------ 2 files changed, 6 insertions(+), 14 deletions(-) diff --git a/src/od/od.rs b/src/od/od.rs index f658f8b83..fdb7b7dcd 100644 --- a/src/od/od.rs +++ b/src/od/od.rs @@ -213,7 +213,6 @@ pub fn uumain(args: Vec) -> i32 { } const LINEBYTES:usize = 16; -const WORDBYTES:usize = 2; fn odfunc(input_offset_base: &Radix, fnames: &[InputSource], formats: &[OdFormat]) -> i32 { @@ -288,13 +287,6 @@ fn odfunc(input_offset_base: &Radix, fnames: &[InputSource], formats: &[OdFormat } b = nextb; } - // Add extra spaces to pad out the short, presumably last, line. - if n < LINEBYTES { - // calc # of items we did not print, must be short at least WORDBYTES to be missing any. - let words_short = (LINEBYTES - n) / WORDBYTES; - // XXX this is running short for -c & -a - print!("{:>width$}", "", width = (words_short) * (6 + 2)); - } print!("\n"); } addr += n; diff --git a/tests/test_od.rs b/tests/test_od.rs index 7854e789b..8836faa58 100644 --- a/tests/test_od.rs +++ b/tests/test_od.rs @@ -12,7 +12,7 @@ use self::unindent::*; // octal dump of 'abcdefghijklmnopqrstuvwxyz\n' static ALPHA_OUT: &'static str = " 0000000 061141 062143 063145 064147 065151 066153 067155 070157 - 0000020 071161 072163 073165 074167 075171 000012 + 0000020 071161 072163 073165 074167 075171 000012 0000033 "; @@ -136,8 +136,8 @@ fn test_multiple_formats() { assert_eq!(result.stdout, unindent(" 0000000 a b c d e f g h i j k l m n o p 141 142 143 144 145 146 147 150 151 152 153 154 155 156 157 160 - 0000020 q r s t u v w x y z \\n - 161 162 163 164 165 166 167 170 171 172 012 + 0000020 q r s t u v w x y z \\n + 161 162 163 164 165 166 167 170 171 172 012 0000033 ")); @@ -155,7 +155,7 @@ fn test_dec() { 0x00u8,0x80u8, 0x01u8,0x80u8,]; let expected_output = unindent(" - 0000000 0 1 2 3 32767 -32768 -32767 + 0000000 0 1 2 3 32767 -32768 -32767 0000016 "); let result = new_ucmd!().arg("-i").run_piped_stdin(&input[..]); @@ -179,7 +179,7 @@ fn test_f32(){ 0x00, 0x00, 0x7f, 0x80];// 0x807f0000 -1.1663108E-38 let expected_output = unindent(" 0000000 -1.2345679 12345678 -9.8765427e37 -0 - 0000020 NaN 1e-40 -1.1663108e-38 + 0000020 NaN 1e-40 -1.1663108e-38 0000034 "); let result = new_ucmd!().arg("-f").run_piped_stdin(&input[..]); @@ -201,7 +201,7 @@ fn test_f64(){ let expected_output = unindent(" 0000000 12345678912345678 0 0000020 -2.2250738585072014e-308 5e-324 - 0000040 -2.0000000000000000 + 0000040 -2.0000000000000000 0000050 "); let result = new_ucmd!().arg("-F").run_piped_stdin(&input[..]); From 167d7d3ca9cd2258c8cbd7f73c36e0cb1f9f040e Mon Sep 17 00:00:00 2001 From: Wim Hueskes Date: Sat, 23 Jul 2016 21:49:43 +0200 Subject: [PATCH 09/41] od: implement -w (width) --- Cargo.lock | 1 + src/od/Cargo.toml | 1 + src/od/od.rs | 33 ++++++++++++++++++++++-------- tests/test_od.rs | 51 +++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 78 insertions(+), 8 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 32e89bfb9..ebe84879a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -667,6 +667,7 @@ dependencies = [ "getopts 0.2.14 (registry+https://github.com/rust-lang/crates.io-index)", "libc 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)", "unindent 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", + "uucore 0.0.1", ] [[package]] diff --git a/src/od/Cargo.toml b/src/od/Cargo.toml index 555c82875..0b44a8633 100644 --- a/src/od/Cargo.toml +++ b/src/od/Cargo.toml @@ -12,6 +12,7 @@ getopts = "*" libc = "*" unindent = "*" byteorder = "*" +uucore = { path="../uucore" } [[bin]] name = "od" diff --git a/src/od/od.rs b/src/od/od.rs index fdb7b7dcd..f46ed8c64 100644 --- a/src/od/od.rs +++ b/src/od/od.rs @@ -13,12 +13,16 @@ extern crate getopts; extern crate unindent; extern crate byteorder; +#[macro_use] +extern crate uucore; + mod multifilereader; mod prn_int; mod prn_char; mod prn_float; -use std::f64; +use std::cmp; +use std::io::Write; use unindent::*; use byteorder::*; use multifilereader::*; @@ -73,7 +77,7 @@ pub fn uumain(args: Vec) -> i32 { opts.optopt("t", "format", "select output format or formats", "TYPE"); opts.optflag("v", "output-duplicates", "do not use * to mark line suppression"); - opts.optopt("w", "width", + opts.optflagopt("w", "width", ("output BYTES bytes per output line. 32 is implied when BYTES is not \ specified."), "BYTES"); @@ -209,23 +213,36 @@ pub fn uumain(args: Vec) -> i32 { formats.push(mkfmt(2, &oct)); // 2 byte octal is the default } - odfunc(&input_offset_base, &inputs, &formats[..]) + let mut line_bytes = match matches.opt_default("w", "32") { + None => 16, + Some(s) => { + match s.parse::() { + Ok(i) => { i } + Err(_) => { 2 } + } + } + }; + let min_bytes = formats.iter().fold(2, |max, next| cmp::max(max, next.itembytes)); + if line_bytes % min_bytes != 0 { + show_warning!("invalid width {}; using {} instead", line_bytes, min_bytes); + line_bytes = min_bytes; + } + + odfunc(line_bytes, &input_offset_base, &inputs, &formats[..]) } -const LINEBYTES:usize = 16; - -fn odfunc(input_offset_base: &Radix, fnames: &[InputSource], formats: &[OdFormat]) -> i32 { +fn odfunc(line_bytes: usize, input_offset_base: &Radix, fnames: &[InputSource], formats: &[OdFormat]) -> i32 { let mut mf = MultifileReader::new(fnames); let mut addr = 0; - let bytes = &mut [b'\x00'; LINEBYTES]; + let mut bytes: Vec = vec![b'\x00'; line_bytes]; loop { // print each line data (or multi-format raster of several lines describing the same data). print_with_radix(input_offset_base, addr); // print offset // if printing in multiple formats offset is printed only once - match mf.f_read(bytes) { + match mf.f_read(bytes.as_mut_slice()) { Ok(0) => { print!("\n"); break; diff --git a/tests/test_od.rs b/tests/test_od.rs index 8836faa58..98d399826 100644 --- a/tests/test_od.rs +++ b/tests/test_od.rs @@ -223,3 +223,54 @@ fn mit_die_umlauten_getesten() { "0000000 U n i v e r s i t ä ** t T ü **\n0000020 b i n g e n\n0000026") } */ + +#[test] +fn test_width(){ + + let input : [u8; 8] = [0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]; + let expected_output = unindent(" + 0000000 000000 000000 + 0000004 000000 000000 + 0000010 + "); + + let result = new_ucmd!().arg("-w4").run_piped_stdin(&input[..]); + + assert_empty_stderr!(result); + assert!(result.success); + assert_eq!(result.stdout, expected_output); +} + +#[test] +fn test_invalid_width(){ + + let input : [u8; 4] = [0x00, 0x00, 0x00, 0x00]; + let expected_output = unindent(" + 0000000 000000 + 0000002 000000 + 0000004 + "); + + let result = new_ucmd!().arg("-w5").run_piped_stdin(&input[..]); + + assert_eq!(result.stderr, "od: warning: invalid width 5; using 2 instead\n"); + assert!(result.success); + assert_eq!(result.stdout, expected_output); +} + +#[test] +fn test_width_without_value(){ + + let input : [u8; 40] = [0 ; 40]; + let expected_output = unindent(" + 0000000 000000 000000 000000 000000 000000 000000 000000 000000 000000 000000 000000 000000 000000 000000 000000 000000 + 0000040 000000 000000 000000 000000 + 0000050 + "); + + let result = new_ucmd!().arg("-w").run_piped_stdin(&input[..]); + + assert_empty_stderr!(result); + assert!(result.success); + assert_eq!(result.stdout, expected_output); +} From e905c2ec717aa433bff238e0eba67d460dba1cc2 Mon Sep 17 00:00:00 2001 From: Wim Hueskes Date: Sat, 23 Jul 2016 23:18:02 +0200 Subject: [PATCH 10/41] od: do not panic on invalid user input use macros from uucore where possible --- src/od/multifilereader.rs | 5 +++-- src/od/od.rs | 15 ++++++++++----- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/src/od/multifilereader.rs b/src/od/multifilereader.rs index 2e3d2d909..c2bcb688e 100644 --- a/src/od/multifilereader.rs +++ b/src/od/multifilereader.rs @@ -53,8 +53,9 @@ impl<'b> MultifileReader<'b> { // print an error at the time that the file is needed, // then move on the the next file. // This matches the behavior of the original `od` - let _ = - writeln!(&mut std::io::stderr(), "od: '{}': {}", fname, e); + eprintln!("{}: '{}': {}", + executable!().split("::").next().unwrap(), // remove module + fname, e); self.any_err = true } } diff --git a/src/od/od.rs b/src/od/od.rs index f46ed8c64..707b75ee1 100644 --- a/src/od/od.rs +++ b/src/od/od.rs @@ -39,7 +39,6 @@ macro_rules! hashmap { }} } -static NAME: &'static str = "od"; static VERSION: &'static str = env!("CARGO_PKG_VERSION"); #[derive(Debug)] @@ -86,7 +85,10 @@ pub fn uumain(args: Vec) -> i32 { let matches = match opts.parse(&args[1..]) { Ok(m) => m, - Err(f) => panic!("Invalid options\n{}", f) + Err(f) => { + disp_err!("{}", f); + return 1; + } }; if matches.opt_present("h") { @@ -94,18 +96,21 @@ pub fn uumain(args: Vec) -> i32 { Usage: {0} [OPTION]... [FILENAME]... - Displays data in various human-readable formats.", NAME)); + Displays data in various human-readable formats.", executable!())); println!("{}", opts.usage(&msg)); return 0; } if matches.opt_present("version") { - println!("{} {}", NAME, VERSION); + println!("{} {}", executable!(), VERSION); return 0; } let input_offset_base = match parse_radix(matches.opt_str("A")) { Ok(r) => r, - Err(f) => { panic!("Invalid -A/--address-radix\n{}", f) } + Err(f) => { + disp_err!("Invalid -A/--address-radix\n{}", f); + return 1; + } }; // Gather up file names - args which don't start with '-' From 36b88f268dd868d433c96ac834191a72428036a5 Mon Sep 17 00:00:00 2001 From: Wim Hueskes Date: Sun, 24 Jul 2016 21:51:21 +0200 Subject: [PATCH 11/41] od: enable hexadecimal output --- src/od/od.rs | 4 ++++ tests/test_od.rs | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/src/od/od.rs b/src/od/od.rs index 707b75ee1..8600f6e31 100644 --- a/src/od/od.rs +++ b/src/od/od.rs @@ -66,9 +66,13 @@ pub fn uumain(args: Vec) -> i32 { opts.optflag("I", "", "decimal 2-byte units"); opts.optflag("L", "", "decimal 2-byte units"); opts.optflag("i", "", "decimal 2-byte units"); + opts.optflag("x", "", "hexadecimal 2-byte units"); + opts.optflag("h", "", "hexadecimal 2-byte units"); opts.optflag("O", "", "octal 4-byte units"); opts.optflag("s", "", "decimal 4-byte units"); + opts.optflag("X", "", "hexadecimal 4-byte units"); + opts.optflag("H", "", "hexadecimal 4-byte units"); opts.optflag("e", "", "floating point double precision (64-bit) units"); opts.optflag("f", "", "floating point single precision (32-bit) units"); diff --git a/tests/test_od.rs b/tests/test_od.rs index 98d399826..651e85c65 100644 --- a/tests/test_od.rs +++ b/tests/test_od.rs @@ -166,6 +166,38 @@ fn test_dec() { } +#[test] +fn test_hex16(){ + + let input : [u8; 9] = [ + 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xff]; + let expected_output = unindent(" + 0000000 2301 6745 ab89 efcd 00ff + 0000011 + "); + let result = new_ucmd!().arg("-x").run_piped_stdin(&input[..]); + + assert_empty_stderr!(result); + assert!(result.success); + assert_eq!(result.stdout, expected_output); +} + +#[test] +fn test_hex32(){ + + let input : [u8; 9] = [ + 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xff]; + let expected_output = unindent(" + 0000000 67452301 efcdab89 000000ff + 0000011 + "); + let result = new_ucmd!().arg("-X").run_piped_stdin(&input[..]); + + assert_empty_stderr!(result); + assert!(result.success); + assert_eq!(result.stdout, expected_output); +} + #[test] fn test_f32(){ From e006a841368eb932ca1d992302b25d1502ea8036 Mon Sep 17 00:00:00 2001 From: Wim Hueskes Date: Sun, 24 Jul 2016 22:28:22 +0200 Subject: [PATCH 12/41] od: refactor: do all printing in odfunc --- src/od/od.rs | 63 ++++++++++++++++++++++++--------------------- src/od/prn_char.rs | 13 +++++----- src/od/prn_float.rs | 8 +++--- src/od/prn_int.rs | 20 +++++++------- 4 files changed, 55 insertions(+), 49 deletions(-) diff --git a/src/od/od.rs b/src/od/od.rs index 8600f6e31..4379e1550 100644 --- a/src/od/od.rs +++ b/src/od/od.rs @@ -150,28 +150,28 @@ pub fn uumain(args: Vec) -> i32 { offmarg: usize, }; let oct = OdFormater { - writer: FormatWriter::IntWriter(print_item_oct), offmarg: 2 + writer: FormatWriter::IntWriter(format_item_oct), offmarg: 2 }; let hex = OdFormater { - writer: FormatWriter::IntWriter(print_item_hex), offmarg: 2 + writer: FormatWriter::IntWriter(format_item_hex), offmarg: 2 }; let dec_u = OdFormater { - writer: FormatWriter::IntWriter(print_item_dec_u), offmarg: 2 + writer: FormatWriter::IntWriter(format_item_dec_u), offmarg: 2 }; let dec_s = OdFormater { - writer: FormatWriter::IntWriter(print_item_dec_s), offmarg: 2 + writer: FormatWriter::IntWriter(format_item_dec_s), offmarg: 2 }; let a_char = OdFormater { - writer: FormatWriter::IntWriter(print_item_a), offmarg: 1 + writer: FormatWriter::IntWriter(format_item_a), offmarg: 1 }; let c_char = OdFormater { - writer: FormatWriter::IntWriter(print_item_c), offmarg: 1 + writer: FormatWriter::IntWriter(format_item_c), offmarg: 1 }; let flo32 = OdFormater { - writer: FormatWriter::FloatWriter(print_item_flo32), offmarg: 0 + writer: FormatWriter::FloatWriter(format_item_flo32), offmarg: 0 }; let flo64 = OdFormater { - writer: FormatWriter::FloatWriter(print_item_flo64), offmarg: 0 + writer: FormatWriter::FloatWriter(format_item_flo64), offmarg: 0 }; fn mkfmt(itembytes: usize, fmtspec: &OdFormater) -> OdFormat { @@ -248,24 +248,17 @@ fn odfunc(line_bytes: usize, input_offset_base: &Radix, fnames: &[InputSource], loop { // print each line data (or multi-format raster of several lines describing the same data). - print_with_radix(input_offset_base, addr); // print offset - // if printing in multiple formats offset is printed only once - match mf.f_read(bytes.as_mut_slice()) { Ok(0) => { - print!("\n"); + print!("{}\n", print_with_radix(input_offset_base, addr)); // print final offset break; } Ok(n) => { let mut first = true; // First line of a multi-format raster. for f in formats { - if !first { - // this takes the space of the file offset on subsequent - // lines of multi-format rasters. - print!(" "); - } - first = false; - print!("{:>width$}", "", width = f.offmarg);// 4 spaces after offset - we print 2 more before each word + let mut output_text = String::new(); + + output_text.push_str(&format!("{:>width$}", "", width = f.offmarg));// 4 spaces after offset - we print 2 more before each word // not enough byte for a whole element, this should only happen on the last line. if n % f.itembytes != 0 { @@ -296,7 +289,7 @@ fn odfunc(line_bytes: usize, input_offset_base: &Radix, fnames: &[InputSource], } _ => { panic!("Invalid itembytes: {}", f.itembytes); } }; - func(p, f.itembytes); + output_text.push_str(&func(p, f.itembytes)); } FormatWriter::FloatWriter(func) => { let p: f64 = match f.itembytes { @@ -308,12 +301,23 @@ fn odfunc(line_bytes: usize, input_offset_base: &Radix, fnames: &[InputSource], } _ => { panic!("Invalid itembytes: {}", f.itembytes); } }; - func(p); + output_text.push_str(&func(p)); } } b = nextb; } - print!("\n"); + + if first { + print!("{}", print_with_radix(input_offset_base, addr)); // print offset + // if printing in multiple formats offset is printed only once + first = false; + } + else { + // this takes the space of the file offset on subsequent + // lines of multi-format rasters. + print!(" "); + } + print!("{}\n", output_text); } addr += n; } @@ -322,6 +326,7 @@ fn odfunc(line_bytes: usize, input_offset_base: &Radix, fnames: &[InputSource], } }; } + if mf.any_err { 1 } else { @@ -352,21 +357,21 @@ fn parse_radix(radix_str: Option) -> Result { } } -fn print_with_radix(r: &Radix, x: usize) { +fn print_with_radix(r: &Radix, x: usize) -> String{ // TODO(keunwoo): field widths should be based on sizeof(x), or chosen dynamically based on the // expected range of address values. Binary in particular is not great here. match *r { - Radix::Decimal => print!("{:07}", x), - Radix::Hexadecimal => print!("{:07X}", x), - Radix::Octal => print!("{:07o}", x), - Radix::Binary => print!("{:07b}", x) + Radix::Decimal => format!("{:07}", x), + Radix::Hexadecimal => format!("{:07X}", x), + Radix::Octal => format!("{:07o}", x), + Radix::Binary => format!("{:07b}", x) } } #[derive(Clone, Copy)] enum FormatWriter { - IntWriter(fn(u64, usize)), - FloatWriter(fn(f64)), + IntWriter(fn(u64, usize) -> String), + FloatWriter(fn(f64) -> String), } struct OdFormat { diff --git a/src/od/prn_char.rs b/src/od/prn_char.rs index 8791d51d5..b631becfe 100644 --- a/src/od/prn_char.rs +++ b/src/od/prn_char.rs @@ -24,11 +24,11 @@ static A_CHRS : [&'static str; 160] = "90", "91", "92", "93", "94", "95", "96", "97", "98", "99", "9a", "9b", "9c", "9d", "9e", "9f"]; -pub fn print_item_a(p: u64, _: usize) { +pub fn format_item_a(p: u64, _: usize) -> String { // itembytes == 1 let b = (p & 0xff) as u8; - print!("{:>4}", A_CHRS.get(b as usize).unwrap_or(&"?") // XXX od dose not actually do this, it just prints the byte - ); + format!("{:>4}", A_CHRS.get(b as usize).unwrap_or(&"?") // XXX od dose not actually do this, it just prints the byte + ) } @@ -51,14 +51,15 @@ static C_CHRS : [&'static str; 127] = [ "x", "y", "z", "{", "|", "}", "~" ]; -pub fn print_item_c(p: u64, _: usize) { +pub fn format_item_c(p: u64, _: usize) -> String { // itembytes == 1 let b = (p & 0xff) as usize; if b < C_CHRS.len() { match C_CHRS.get(b as usize) { - Some(s) => print!("{:>4}", s), - None => print!("{:>4}", b), + Some(s) => format!("{:>4}", s), + None => format!("{:>4}", b), } } + else { String::new() } } diff --git a/src/od/prn_float.rs b/src/od/prn_float.rs index 76118e73c..93a4c9ee6 100644 --- a/src/od/prn_float.rs +++ b/src/od/prn_float.rs @@ -2,12 +2,12 @@ use std::num::FpCategory; use std::f32; use std::f64; -pub fn print_item_flo32(f: f64) { - print!(" {}", format_flo32(f as f32)) +pub fn format_item_flo32(f: f64) -> String { + format!(" {}", format_flo32(f as f32)) } -pub fn print_item_flo64(f: f64) { - print!(" {}", format_flo64(f)) +pub fn format_item_flo64(f: f64) -> String { + format!(" {}", format_flo64(f)) } // formats float with 8 significant digits, eg 12345678 or -1.2345678e+12 diff --git a/src/od/prn_int.rs b/src/od/prn_int.rs index 47959168f..24bdee939 100644 --- a/src/od/prn_int.rs +++ b/src/od/prn_int.rs @@ -1,24 +1,24 @@ // TODO: use some sort of byte iterator, instead of passing bytes in u64 -pub fn print_item_oct(p: u64, itembytes: usize) { +pub fn format_item_oct(p: u64, itembytes: usize) -> String { let itemwidth = 3 * itembytes; let itemspace = 4 * itembytes - itemwidth; - print!("{:>itemspace$}{:0width$o}", + format!("{:>itemspace$}{:0width$o}", "", p, width = itemwidth, - itemspace = itemspace); + itemspace = itemspace) } -pub fn print_item_hex(p: u64, itembytes: usize) { +pub fn format_item_hex(p: u64, itembytes: usize) -> String { let itemwidth = 2 * itembytes; let itemspace = 4 * itembytes - itemwidth; - print!("{:>itemspace$}{:0width$x}", + format!("{:>itemspace$}{:0width$x}", "", p, width = itemwidth, - itemspace = itemspace); + itemspace = itemspace) } @@ -28,12 +28,12 @@ fn sign_extend(item: u64, itembytes: usize) -> i64{ } -pub fn print_item_dec_s(p: u64, itembytes: usize) { +pub fn format_item_dec_s(p: u64, itembytes: usize) -> String { // sign extend let s = sign_extend(p,itembytes); - print!("{:totalwidth$}", s, totalwidth = 4 * itembytes); + format!("{:totalwidth$}", s, totalwidth = 4 * itembytes) } -pub fn print_item_dec_u(p: u64, itembytes: usize) { - print!("{:totalwidth$}", p, totalwidth = 4 * itembytes); +pub fn format_item_dec_u(p: u64, itembytes: usize) -> String { + format!("{:totalwidth$}", p, totalwidth = 4 * itembytes) } From 80386ef04fb17fe2c5bfb20939fe1726ba4b8704 Mon Sep 17 00:00:00 2001 From: Wim Hueskes Date: Mon, 25 Jul 2016 13:59:21 +0200 Subject: [PATCH 13/41] od: split odfunc() --- src/od/od.rs | 130 ++++++++++++++++++++++++++------------------------- 1 file changed, 67 insertions(+), 63 deletions(-) diff --git a/src/od/od.rs b/src/od/od.rs index 4379e1550..db82177e4 100644 --- a/src/od/od.rs +++ b/src/od/od.rs @@ -254,71 +254,16 @@ fn odfunc(line_bytes: usize, input_offset_base: &Radix, fnames: &[InputSource], break; } Ok(n) => { - let mut first = true; // First line of a multi-format raster. - for f in formats { - let mut output_text = String::new(); - - output_text.push_str(&format!("{:>width$}", "", width = f.offmarg));// 4 spaces after offset - we print 2 more before each word - - // not enough byte for a whole element, this should only happen on the last line. - if n % f.itembytes != 0 { - let b = n / f.itembytes; - // set zero bytes in the part of the buffer that will be used, but is not filled. - for i in n..(b + 1) * f.itembytes { - bytes[i] = 0; - } + // not enough byte for a whole element, this should only happen on the last line. + if n != line_bytes { + // set zero bytes in the part of the buffer that will be used, but is not filled. + for i in n..line_bytes { + bytes[i] = 0; } - - let mut b = 0; - while b < n { - let nextb = b + f.itembytes; - match f.writer { - FormatWriter::IntWriter(func) => { - let p: u64 = match f.itembytes { - 1 => { - bytes[b] as u64 - } - 2 => { - LittleEndian::read_u16(&bytes[b..nextb]) as u64 - } - 4 => { - LittleEndian::read_u32(&bytes[b..nextb]) as u64 - } - 8 => { - LittleEndian::read_u64(&bytes[b..nextb]) - } - _ => { panic!("Invalid itembytes: {}", f.itembytes); } - }; - output_text.push_str(&func(p, f.itembytes)); - } - FormatWriter::FloatWriter(func) => { - let p: f64 = match f.itembytes { - 4 => { - LittleEndian::read_f32(&bytes[b..nextb]) as f64 - } - 8 => { - LittleEndian::read_f64(&bytes[b..nextb]) - } - _ => { panic!("Invalid itembytes: {}", f.itembytes); } - }; - output_text.push_str(&func(p)); - } - } - b = nextb; - } - - if first { - print!("{}", print_with_radix(input_offset_base, addr)); // print offset - // if printing in multiple formats offset is printed only once - first = false; - } - else { - // this takes the space of the file offset on subsequent - // lines of multi-format rasters. - print!(" "); - } - print!("{}\n", output_text); } + + print_bytes(&bytes, n, &print_with_radix(input_offset_base, addr), formats); + addr += n; } Err(_) => { @@ -334,6 +279,65 @@ fn odfunc(line_bytes: usize, input_offset_base: &Radix, fnames: &[InputSource], } } +fn print_bytes(bytes: &[u8], length: usize, prefix: &str, formats: &[OdFormat]) { + let mut first = true; // First line of a multi-format raster. + for f in formats { + let mut output_text = String::new(); + + output_text.push_str(&format!("{:>width$}", "", width = f.offmarg));// 4 spaces after offset - we print 2 more before each word + + let mut b = 0; + while b < length { + let nextb = b + f.itembytes; + match f.writer { + FormatWriter::IntWriter(func) => { + let p: u64 = match f.itembytes { + 1 => { + bytes[b] as u64 + } + 2 => { + LittleEndian::read_u16(&bytes[b..nextb]) as u64 + } + 4 => { + LittleEndian::read_u32(&bytes[b..nextb]) as u64 + } + 8 => { + LittleEndian::read_u64(&bytes[b..nextb]) + } + _ => { panic!("Invalid itembytes: {}", f.itembytes); } + }; + output_text.push_str(&func(p, f.itembytes)); + } + FormatWriter::FloatWriter(func) => { + let p: f64 = match f.itembytes { + 4 => { + LittleEndian::read_f32(&bytes[b..nextb]) as f64 + } + 8 => { + LittleEndian::read_f64(&bytes[b..nextb]) + } + _ => { panic!("Invalid itembytes: {}", f.itembytes); } + }; + output_text.push_str(&func(p)); + } + } + b = nextb; + } + + if first { + print!("{}", prefix); // print offset + // if printing in multiple formats offset is printed only once + first = false; + } + else { + // this takes the space of the file offset on subsequent + // lines of multi-format rasters. + print!("{:>width$}", "", width=prefix.chars().count()); + } + print!("{}\n", output_text); + } +} + // For file byte offset printed at left margin. fn parse_radix(radix_str: Option) -> Result { match radix_str { From 47706908232270dd4a3d65354a3122ca611d8cef Mon Sep 17 00:00:00 2001 From: Wim Hueskes Date: Mon, 25 Jul 2016 14:17:45 +0200 Subject: [PATCH 14/41] od: suppress duplicates --- src/od/od.rs | 27 ++++++++++++++++++++++----- tests/test_od.rs | 24 ++++++++++++++++++++++-- 2 files changed, 44 insertions(+), 7 deletions(-) diff --git a/src/od/od.rs b/src/od/od.rs index db82177e4..29f68c637 100644 --- a/src/od/od.rs +++ b/src/od/od.rs @@ -237,14 +237,20 @@ pub fn uumain(args: Vec) -> i32 { line_bytes = min_bytes; } - odfunc(line_bytes, &input_offset_base, &inputs, &formats[..]) + let output_duplicates = matches.opt_present("v"); + + odfunc(line_bytes, &input_offset_base, &inputs, &formats[..], output_duplicates) } -fn odfunc(line_bytes: usize, input_offset_base: &Radix, fnames: &[InputSource], formats: &[OdFormat]) -> i32 { +fn odfunc(line_bytes: usize, input_offset_base: &Radix, + fnames: &[InputSource], formats: &[OdFormat], output_duplicates: bool) -> i32 { let mut mf = MultifileReader::new(fnames); let mut addr = 0; let mut bytes: Vec = vec![b'\x00'; line_bytes]; + let mut previous_bytes = Vec::::with_capacity(line_bytes); + let mut duplicate_line = false; + loop { // print each line data (or multi-format raster of several lines describing the same data). @@ -261,9 +267,20 @@ fn odfunc(line_bytes: usize, input_offset_base: &Radix, fnames: &[InputSource], bytes[i] = 0; } } - - print_bytes(&bytes, n, &print_with_radix(input_offset_base, addr), formats); - + + if !output_duplicates && previous_bytes == bytes && n == line_bytes { + if !duplicate_line { + duplicate_line = true; + println!("*"); + } + } + else { + duplicate_line = false; + previous_bytes.clone_from(&bytes); + + print_bytes(&bytes, n, &print_with_radix(input_offset_base, addr), formats); + } + addr += n; } Err(_) => { diff --git a/tests/test_od.rs b/tests/test_od.rs index 651e85c65..b8d2d2198 100644 --- a/tests/test_od.rs +++ b/tests/test_od.rs @@ -266,7 +266,7 @@ fn test_width(){ 0000010 "); - let result = new_ucmd!().arg("-w4").run_piped_stdin(&input[..]); + let result = new_ucmd!().arg("-w4").arg("-v").run_piped_stdin(&input[..]); assert_empty_stderr!(result); assert!(result.success); @@ -283,7 +283,7 @@ fn test_invalid_width(){ 0000004 "); - let result = new_ucmd!().arg("-w5").run_piped_stdin(&input[..]); + let result = new_ucmd!().arg("-w5").arg("-v").run_piped_stdin(&input[..]); assert_eq!(result.stderr, "od: warning: invalid width 5; using 2 instead\n"); assert!(result.success); @@ -306,3 +306,23 @@ fn test_width_without_value(){ assert!(result.success); assert_eq!(result.stdout, expected_output); } + +#[test] +fn test_suppress_duplicates(){ + + let input = [0u8 ; 41]; + let expected_output = unindent(" + 0000000 000000000000 + 0000 0000 + * + 0000050 000000000000 + 0000 + 0000051 + "); + + let result = new_ucmd!().arg("-w4").arg("-O").arg("-x").run_piped_stdin(&input[..]); + + assert_empty_stderr!(result); + assert!(result.success); + assert_eq!(result.stdout, expected_output); +} From d18be30c8aac08c0a3e0b6b9e034a05f90f75d6c Mon Sep 17 00:00:00 2001 From: Vladimir Matveev Date: Sat, 30 Jul 2016 20:17:16 +0200 Subject: [PATCH 15/41] od: add byteorder_io copied from: https://github.com/netvl/immeta/blob/4460ee/src/utils.rs workaround for: https://github.com/BurntSushi/byteorder/issues/41 --- src/od/byteorder_io.rs | 46 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 src/od/byteorder_io.rs diff --git a/src/od/byteorder_io.rs b/src/od/byteorder_io.rs new file mode 100644 index 000000000..ba49c3220 --- /dev/null +++ b/src/od/byteorder_io.rs @@ -0,0 +1,46 @@ +// from: https://github.com/netvl/immeta/blob/4460ee/src/utils.rs#L76 + +use std::io::{self, Read, BufRead, ErrorKind}; + +use byteorder::{self, ReadBytesExt, LittleEndian, BigEndian}; +use byteorder::ByteOrder as ByteOrderTrait; + +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub enum ByteOrder { + Little, + Big, +} + +macro_rules! gen_byte_order_ops { + ($($read_name:ident, $write_name:ident -> $tpe:ty),+) => { + impl ByteOrder { + $( + #[inline] + pub fn $read_name(self, source: &[u8]) -> $tpe { + match self { + ByteOrder::Little => LittleEndian::$read_name(source), + ByteOrder::Big => BigEndian::$read_name(source), + } + } + + pub fn $write_name(self, target: &mut [u8], n: $tpe) { + match self { + ByteOrder::Little => LittleEndian::$write_name(target, n), + ByteOrder::Big => BigEndian::$write_name(target, n), + } + } + )+ + } + } +} + +gen_byte_order_ops! { + read_u16, write_u16 -> u16, + read_u32, write_u32 -> u32, + read_u64, write_u64 -> u64, + read_i16, write_i16 -> i16, + read_i32, write_i32 -> i32, + read_i64, write_i64 -> i64, + read_f32, write_f32 -> f32, + read_f64, write_f64 -> f64 +} From f7d7beb79beaf5199e37a89990eba80ad590de55 Mon Sep 17 00:00:00 2001 From: Wim Hueskes Date: Sat, 30 Jul 2016 20:39:09 +0200 Subject: [PATCH 16/41] od: implement --endian --- src/od/byteorder_io.rs | 12 ++++++++---- src/od/od.rs | 32 ++++++++++++++++++++++---------- tests/test_od.rs | 37 ++++++++++++++++++++++++++++--------- 3 files changed, 58 insertions(+), 23 deletions(-) diff --git a/src/od/byteorder_io.rs b/src/od/byteorder_io.rs index ba49c3220..e72b4373b 100644 --- a/src/od/byteorder_io.rs +++ b/src/od/byteorder_io.rs @@ -1,32 +1,36 @@ -// from: https://github.com/netvl/immeta/blob/4460ee/src/utils.rs#L76 +// workaround until https://github.com/BurntSushi/byteorder/issues/41 has been fixed +// based on: https://github.com/netvl/immeta/blob/4460ee/src/utils.rs#L76 -use std::io::{self, Read, BufRead, ErrorKind}; - -use byteorder::{self, ReadBytesExt, LittleEndian, BigEndian}; +use byteorder::{NativeEndian, LittleEndian, BigEndian}; use byteorder::ByteOrder as ByteOrderTrait; #[derive(Copy, Clone, Debug, Eq, PartialEq)] pub enum ByteOrder { Little, Big, + Native, } macro_rules! gen_byte_order_ops { ($($read_name:ident, $write_name:ident -> $tpe:ty),+) => { impl ByteOrder { $( + #[allow(dead_code)] #[inline] pub fn $read_name(self, source: &[u8]) -> $tpe { match self { ByteOrder::Little => LittleEndian::$read_name(source), ByteOrder::Big => BigEndian::$read_name(source), + ByteOrder::Native => NativeEndian::$read_name(source), } } + #[allow(dead_code)] pub fn $write_name(self, target: &mut [u8], n: $tpe) { match self { ByteOrder::Little => LittleEndian::$write_name(target, n), ByteOrder::Big => BigEndian::$write_name(target, n), + ByteOrder::Native => NativeEndian::$write_name(target, n), } } )+ diff --git a/src/od/od.rs b/src/od/od.rs index 29f68c637..6ee2566d9 100644 --- a/src/od/od.rs +++ b/src/od/od.rs @@ -17,6 +17,7 @@ extern crate byteorder; extern crate uucore; mod multifilereader; +mod byteorder_io; mod prn_int; mod prn_char; mod prn_float; @@ -24,7 +25,7 @@ mod prn_float; use std::cmp; use std::io::Write; use unindent::*; -use byteorder::*; +use byteorder_io::*; use multifilereader::*; use prn_int::*; use prn_char::*; @@ -53,6 +54,7 @@ pub fn uumain(args: Vec) -> i32 { "Skip bytes input bytes before formatting and writing.", "BYTES"); opts.optopt("N", "read-bytes", "limit dump to BYTES input bytes", "BYTES"); + opts.optopt("", "endian", "byte order to use for multi-byte formats", "big|little"); opts.optopt("S", "strings", ("output strings of at least BYTES graphic chars. 3 is assumed when \ BYTES is not specified."), @@ -117,6 +119,16 @@ pub fn uumain(args: Vec) -> i32 { } }; + let byte_order = match matches.opt_str("endian").as_ref().map(String::as_ref) { + None => { ByteOrder::Native }, + Some("little") => { ByteOrder::Little }, + Some("big") => { ByteOrder::Big }, + Some(s) => { + disp_err!("Invalid argument --endian={}", s); + return 1; + } + }; + // Gather up file names - args which don't start with '-' let stdnionly = [InputSource::Stdin]; let inputs = args[1..] @@ -239,10 +251,10 @@ pub fn uumain(args: Vec) -> i32 { let output_duplicates = matches.opt_present("v"); - odfunc(line_bytes, &input_offset_base, &inputs, &formats[..], output_duplicates) + odfunc(line_bytes, &input_offset_base, byte_order, &inputs, &formats[..], output_duplicates) } -fn odfunc(line_bytes: usize, input_offset_base: &Radix, +fn odfunc(line_bytes: usize, input_offset_base: &Radix, byte_order: ByteOrder, fnames: &[InputSource], formats: &[OdFormat], output_duplicates: bool) -> i32 { let mut mf = MultifileReader::new(fnames); @@ -278,7 +290,7 @@ fn odfunc(line_bytes: usize, input_offset_base: &Radix, duplicate_line = false; previous_bytes.clone_from(&bytes); - print_bytes(&bytes, n, &print_with_radix(input_offset_base, addr), formats); + print_bytes(byte_order, &bytes, n, &print_with_radix(input_offset_base, addr), formats); } addr += n; @@ -296,7 +308,7 @@ fn odfunc(line_bytes: usize, input_offset_base: &Radix, } } -fn print_bytes(bytes: &[u8], length: usize, prefix: &str, formats: &[OdFormat]) { +fn print_bytes(byte_order: ByteOrder, bytes: &[u8], length: usize, prefix: &str, formats: &[OdFormat]) { let mut first = true; // First line of a multi-format raster. for f in formats { let mut output_text = String::new(); @@ -313,13 +325,13 @@ fn print_bytes(bytes: &[u8], length: usize, prefix: &str, formats: &[OdFormat]) bytes[b] as u64 } 2 => { - LittleEndian::read_u16(&bytes[b..nextb]) as u64 + byte_order.read_u16(&bytes[b..nextb]) as u64 } 4 => { - LittleEndian::read_u32(&bytes[b..nextb]) as u64 + byte_order.read_u32(&bytes[b..nextb]) as u64 } 8 => { - LittleEndian::read_u64(&bytes[b..nextb]) + byte_order.read_u64(&bytes[b..nextb]) } _ => { panic!("Invalid itembytes: {}", f.itembytes); } }; @@ -328,10 +340,10 @@ fn print_bytes(bytes: &[u8], length: usize, prefix: &str, formats: &[OdFormat]) FormatWriter::FloatWriter(func) => { let p: f64 = match f.itembytes { 4 => { - LittleEndian::read_f32(&bytes[b..nextb]) as f64 + byte_order.read_f32(&bytes[b..nextb]) as f64 } 8 => { - LittleEndian::read_f64(&bytes[b..nextb]) + byte_order.read_f64(&bytes[b..nextb]) } _ => { panic!("Invalid itembytes: {}", f.itembytes); } }; diff --git a/tests/test_od.rs b/tests/test_od.rs index b8d2d2198..a6dd2046f 100644 --- a/tests/test_od.rs +++ b/tests/test_od.rs @@ -35,7 +35,7 @@ fn test_file() { } } - let result = new_ucmd!().arg(file.as_os_str()).run(); + let result = new_ucmd!().arg("--endian=little").arg(file.as_os_str()).run(); assert_empty_stderr!(result); assert!(result.success); @@ -64,7 +64,7 @@ fn test_2files() { } } - let result = new_ucmd!().arg(file1.as_os_str()).arg(file2.as_os_str()).run(); + let result = new_ucmd!().arg("--endian=little").arg(file1.as_os_str()).arg(file2.as_os_str()).run(); assert_empty_stderr!(result); assert!(result.success); @@ -91,7 +91,7 @@ fn test_no_file() { fn test_from_stdin() { let input = "abcdefghijklmnopqrstuvwxyz\n"; - let result = new_ucmd!().run_piped_stdin(input.as_bytes()); + let result = new_ucmd!().arg("--endian=little").run_piped_stdin(input.as_bytes()); assert_empty_stderr!(result); assert!(result.success); @@ -117,7 +117,7 @@ fn test_from_mixed() { } } - let result = new_ucmd!().arg(file1.as_os_str()).arg("--").arg(file3.as_os_str()).run_piped_stdin(data2.as_bytes()); + let result = new_ucmd!().arg("--endian=little").arg(file1.as_os_str()).arg("--").arg(file3.as_os_str()).run_piped_stdin(data2.as_bytes()); assert_empty_stderr!(result); assert!(result.success); @@ -158,7 +158,7 @@ fn test_dec() { 0000000 0 1 2 3 32767 -32768 -32767 0000016 "); - let result = new_ucmd!().arg("-i").run_piped_stdin(&input[..]); + let result = new_ucmd!().arg("--endian=little").arg("-i").run_piped_stdin(&input[..]); assert_empty_stderr!(result); assert!(result.success); @@ -175,7 +175,7 @@ fn test_hex16(){ 0000000 2301 6745 ab89 efcd 00ff 0000011 "); - let result = new_ucmd!().arg("-x").run_piped_stdin(&input[..]); + let result = new_ucmd!().arg("--endian=little").arg("-x").run_piped_stdin(&input[..]); assert_empty_stderr!(result); assert!(result.success); @@ -191,7 +191,7 @@ fn test_hex32(){ 0000000 67452301 efcdab89 000000ff 0000011 "); - let result = new_ucmd!().arg("-X").run_piped_stdin(&input[..]); + let result = new_ucmd!().arg("--endian=little").arg("-X").run_piped_stdin(&input[..]); assert_empty_stderr!(result); assert!(result.success); @@ -214,7 +214,7 @@ fn test_f32(){ 0000020 NaN 1e-40 -1.1663108e-38 0000034 "); - let result = new_ucmd!().arg("-f").run_piped_stdin(&input[..]); + let result = new_ucmd!().arg("--endian=little").arg("-f").run_piped_stdin(&input[..]); assert_empty_stderr!(result); assert!(result.success); @@ -236,7 +236,7 @@ fn test_f64(){ 0000040 -2.0000000000000000 0000050 "); - let result = new_ucmd!().arg("-F").run_piped_stdin(&input[..]); + let result = new_ucmd!().arg("--endian=little").arg("-F").run_piped_stdin(&input[..]); assert_empty_stderr!(result); assert!(result.success); @@ -326,3 +326,22 @@ fn test_suppress_duplicates(){ assert!(result.success); assert_eq!(result.stdout, expected_output); } + +#[test] +fn test_big_endian() { + + let input : [u8; 8] = [ + 0xC0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00];// 0xc000000000000000 -2 + let expected_output = unindent(" + 0000000 -2.0000000000000000 + -2.0000000 0 + c0000000 00000000 + c000 0000 0000 0000 + 0000010 + "); + let result = new_ucmd!().arg("--endian=big").arg("-F").arg("-f").arg("-X").arg("-x").run_piped_stdin(&input[..]); + + assert_empty_stderr!(result); + assert!(result.success); + assert_eq!(result.stdout, expected_output); +} From 45895be96dd5c0a6863db8bd255f6222d2850233 Mon Sep 17 00:00:00 2001 From: Wim Hueskes Date: Sat, 30 Jul 2016 23:59:10 +0200 Subject: [PATCH 17/41] od: use structs to specify possible dump formats remove extra padding too - preparing proper allignment --- src/od/formatteriteminfo.rs | 12 +++ src/od/od.rs | 118 +++++++++--------------------- src/od/prn_char.rs | 17 ++++- src/od/prn_float.rs | 14 ++++ src/od/prn_int.rs | 141 +++++++++++++++++++++++++++++++----- tests/test_od.rs | 42 +++++------ 6 files changed, 216 insertions(+), 128 deletions(-) create mode 100644 src/od/formatteriteminfo.rs diff --git a/src/od/formatteriteminfo.rs b/src/od/formatteriteminfo.rs new file mode 100644 index 000000000..c6974a172 --- /dev/null +++ b/src/od/formatteriteminfo.rs @@ -0,0 +1,12 @@ +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub enum FormatWriter { + IntWriter(fn(u64, usize, usize) -> String), + FloatWriter(fn(f64) -> String), +} + +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub struct FormatterItemInfo { + pub byte_size: usize, + pub print_width: usize, + pub formatter: FormatWriter, +} diff --git a/src/od/od.rs b/src/od/od.rs index 6ee2566d9..ac7984dde 100644 --- a/src/od/od.rs +++ b/src/od/od.rs @@ -18,6 +18,7 @@ extern crate uucore; mod multifilereader; mod byteorder_io; +mod formatteriteminfo; mod prn_int; mod prn_char; mod prn_float; @@ -30,6 +31,7 @@ use multifilereader::*; use prn_int::*; use prn_char::*; use prn_float::*; +use formatteriteminfo::*; //This is available in some versions of std, but not all that we target. macro_rules! hashmap { @@ -155,68 +157,29 @@ pub fn uumain(args: Vec) -> i32 { }) .collect::>(); - // At the moment, char (-a & -c)formats need the driver to set up a - // line by inserting a different # of of spaces at the start. - struct OdFormater { - writer: FormatWriter, - offmarg: usize, - }; - let oct = OdFormater { - writer: FormatWriter::IntWriter(format_item_oct), offmarg: 2 - }; - let hex = OdFormater { - writer: FormatWriter::IntWriter(format_item_hex), offmarg: 2 - }; - let dec_u = OdFormater { - writer: FormatWriter::IntWriter(format_item_dec_u), offmarg: 2 - }; - let dec_s = OdFormater { - writer: FormatWriter::IntWriter(format_item_dec_s), offmarg: 2 - }; - let a_char = OdFormater { - writer: FormatWriter::IntWriter(format_item_a), offmarg: 1 - }; - let c_char = OdFormater { - writer: FormatWriter::IntWriter(format_item_c), offmarg: 1 - }; - let flo32 = OdFormater { - writer: FormatWriter::FloatWriter(format_item_flo32), offmarg: 0 - }; - let flo64 = OdFormater { - writer: FormatWriter::FloatWriter(format_item_flo64), offmarg: 0 - }; - - fn mkfmt(itembytes: usize, fmtspec: &OdFormater) -> OdFormat { - OdFormat { - itembytes: itembytes, - writer: fmtspec.writer, - offmarg: fmtspec.offmarg, - } - } - // TODO: -t fmts let known_formats = hashmap![ - "a" => (1, &a_char), - "B" => (2, &oct) , - "b" => (1, &oct), - "c" => (1, &c_char), - "D" => (4, &dec_u), - "e" => (8, &flo64), - "F" => (8, &flo64), - "f" => (4, &flo32), - "H" => (4, &hex), - "X" => (4, &hex) , - "o" => (2, &oct), - "x" => (2, &hex), - "h" => (2, &hex), + "a" => FORMAT_ITEM_A, + "B" => FORMAT_ITEM_OCT16, + "b" => FORMAT_ITEM_OCT8, + "c" => FORMAT_ITEM_C, + "D" => FORMAT_ITEM_DEC32U, + "e" => FORMAT_ITEM_F64, + "F" => FORMAT_ITEM_F64, + "f" => FORMAT_ITEM_F32, + "H" => FORMAT_ITEM_HEX32, + "X" => FORMAT_ITEM_HEX32, + "o" => FORMAT_ITEM_OCT16, + "x" => FORMAT_ITEM_HEX16, + "h" => FORMAT_ITEM_HEX16, - "I" => (2, &dec_s), - "L" => (2, &dec_s), - "i" => (2, &dec_s), + "I" => FORMAT_ITEM_DEC16S, + "L" => FORMAT_ITEM_DEC16S, + "i" => FORMAT_ITEM_DEC16S, - "O" => (4, &oct), - "s" => (2, &dec_u) - ]; + "O" => FORMAT_ITEM_OCT32, + "s" => FORMAT_ITEM_DEC16U + ]; let mut formats = Vec::new(); @@ -224,14 +187,13 @@ pub fn uumain(args: Vec) -> i32 { match known_formats.get(flag) { None => {} // not every option is a format Some(r) => { - let (itembytes, fmtspec) = *r; - formats.push(mkfmt(itembytes, fmtspec)) + formats.push(*r) } } } if formats.is_empty() { - formats.push(mkfmt(2, &oct)); // 2 byte octal is the default + formats.push(FORMAT_ITEM_OCT16); // 2 byte octal is the default } let mut line_bytes = match matches.opt_default("w", "32") { @@ -243,7 +205,7 @@ pub fn uumain(args: Vec) -> i32 { } } }; - let min_bytes = formats.iter().fold(2, |max, next| cmp::max(max, next.itembytes)); + let min_bytes = formats.iter().fold(2, |max, next| cmp::max(max, next.byte_size)); if line_bytes % min_bytes != 0 { show_warning!("invalid width {}; using {} instead", line_bytes, min_bytes); line_bytes = min_bytes; @@ -255,7 +217,7 @@ pub fn uumain(args: Vec) -> i32 { } fn odfunc(line_bytes: usize, input_offset_base: &Radix, byte_order: ByteOrder, - fnames: &[InputSource], formats: &[OdFormat], output_duplicates: bool) -> i32 { + fnames: &[InputSource], formats: &[FormatterItemInfo], output_duplicates: bool) -> i32 { let mut mf = MultifileReader::new(fnames); let mut addr = 0; @@ -308,19 +270,17 @@ fn odfunc(line_bytes: usize, input_offset_base: &Radix, byte_order: ByteOrder, } } -fn print_bytes(byte_order: ByteOrder, bytes: &[u8], length: usize, prefix: &str, formats: &[OdFormat]) { +fn print_bytes(byte_order: ByteOrder, bytes: &[u8], length: usize, prefix: &str, formats: &[FormatterItemInfo]) { let mut first = true; // First line of a multi-format raster. for f in formats { let mut output_text = String::new(); - output_text.push_str(&format!("{:>width$}", "", width = f.offmarg));// 4 spaces after offset - we print 2 more before each word - let mut b = 0; while b < length { - let nextb = b + f.itembytes; - match f.writer { + let nextb = b + f.byte_size; + match f.formatter { FormatWriter::IntWriter(func) => { - let p: u64 = match f.itembytes { + let p: u64 = match f.byte_size { 1 => { bytes[b] as u64 } @@ -333,19 +293,19 @@ fn print_bytes(byte_order: ByteOrder, bytes: &[u8], length: usize, prefix: &str, 8 => { byte_order.read_u64(&bytes[b..nextb]) } - _ => { panic!("Invalid itembytes: {}", f.itembytes); } + _ => { panic!("Invalid byte_size: {}", f.byte_size); } }; - output_text.push_str(&func(p, f.itembytes)); + output_text.push_str(&func(p, f.byte_size, f.print_width)); } FormatWriter::FloatWriter(func) => { - let p: f64 = match f.itembytes { + let p: f64 = match f.byte_size { 4 => { byte_order.read_f32(&bytes[b..nextb]) as f64 } 8 => { byte_order.read_f64(&bytes[b..nextb]) } - _ => { panic!("Invalid itembytes: {}", f.itembytes); } + _ => { panic!("Invalid byte_size: {}", f.byte_size); } }; output_text.push_str(&func(p)); } @@ -400,15 +360,3 @@ fn print_with_radix(r: &Radix, x: usize) -> String{ Radix::Binary => format!("{:07b}", x) } } - -#[derive(Clone, Copy)] -enum FormatWriter { - IntWriter(fn(u64, usize) -> String), - FloatWriter(fn(f64) -> String), -} - -struct OdFormat { - itembytes: usize, - writer: FormatWriter, - offmarg: usize, -} diff --git a/src/od/prn_char.rs b/src/od/prn_char.rs index b631becfe..a9663c26a 100644 --- a/src/od/prn_char.rs +++ b/src/od/prn_char.rs @@ -1,3 +1,16 @@ +use formatteriteminfo::*; + +pub static FORMAT_ITEM_A: FormatterItemInfo = FormatterItemInfo { + byte_size: 1, + print_width: 3, + formatter: FormatWriter::IntWriter(format_item_a), +}; + +pub static FORMAT_ITEM_C: FormatterItemInfo = FormatterItemInfo { + byte_size: 1, + print_width: 3, + formatter: FormatWriter::IntWriter(format_item_c), +}; // TODO: multi-byte chars // Quoth the man page: Multi-byte characters are displayed in the area corresponding to the first byte of the character. The remaining bytes are shown as `**'. @@ -24,7 +37,7 @@ static A_CHRS : [&'static str; 160] = "90", "91", "92", "93", "94", "95", "96", "97", "98", "99", "9a", "9b", "9c", "9d", "9e", "9f"]; -pub fn format_item_a(p: u64, _: usize) -> String { +pub fn format_item_a(p: u64, _: usize, _: usize) -> String { // itembytes == 1 let b = (p & 0xff) as u8; format!("{:>4}", A_CHRS.get(b as usize).unwrap_or(&"?") // XXX od dose not actually do this, it just prints the byte @@ -51,7 +64,7 @@ static C_CHRS : [&'static str; 127] = [ "x", "y", "z", "{", "|", "}", "~" ]; -pub fn format_item_c(p: u64, _: usize) -> String { +pub fn format_item_c(p: u64, _: usize, _: usize) -> String { // itembytes == 1 let b = (p & 0xff) as usize; diff --git a/src/od/prn_float.rs b/src/od/prn_float.rs index 93a4c9ee6..4ecc63200 100644 --- a/src/od/prn_float.rs +++ b/src/od/prn_float.rs @@ -1,6 +1,20 @@ use std::num::FpCategory; use std::f32; use std::f64; +use formatteriteminfo::*; + +pub static FORMAT_ITEM_F32: FormatterItemInfo = FormatterItemInfo { + byte_size: 4, + print_width: 14, + formatter: FormatWriter::FloatWriter(format_item_flo32), +}; + +pub static FORMAT_ITEM_F64: FormatterItemInfo = FormatterItemInfo { + byte_size: 8, + print_width: 24, + formatter: FormatWriter::FloatWriter(format_item_flo64), +}; + pub fn format_item_flo32(f: f64) -> String { format!(" {}", format_flo32(f as f32)) diff --git a/src/od/prn_int.rs b/src/od/prn_int.rs index 24bdee939..27cdcfdde 100644 --- a/src/od/prn_int.rs +++ b/src/od/prn_int.rs @@ -1,24 +1,125 @@ -// TODO: use some sort of byte iterator, instead of passing bytes in u64 -pub fn format_item_oct(p: u64, itembytes: usize) -> String { - let itemwidth = 3 * itembytes; - let itemspace = 4 * itembytes - itemwidth; +use formatteriteminfo::*; - format!("{:>itemspace$}{:0width$o}", - "", +pub static FORMAT_ITEM_OCT8: FormatterItemInfo = FormatterItemInfo { + byte_size: 1, + print_width: 3, + formatter: FormatWriter::IntWriter(format_item_oct), +}; + +pub static FORMAT_ITEM_OCT16: FormatterItemInfo = FormatterItemInfo { + byte_size: 2, + print_width: 6, + formatter: FormatWriter::IntWriter(format_item_oct), +}; + +pub static FORMAT_ITEM_OCT32: FormatterItemInfo = FormatterItemInfo { + byte_size: 4, + print_width: 12, + formatter: FormatWriter::IntWriter(format_item_oct), +}; + +#[allow(dead_code)] +pub static FORMAT_ITEM_OCT64: FormatterItemInfo = FormatterItemInfo { + byte_size: 8, + print_width: 24, + formatter: FormatWriter::IntWriter(format_item_oct), +}; + +#[allow(dead_code)] +pub static FORMAT_ITEM_HEX8: FormatterItemInfo = FormatterItemInfo { + byte_size: 1, + print_width: 2, + formatter: FormatWriter::IntWriter(format_item_hex), +}; + +pub static FORMAT_ITEM_HEX16: FormatterItemInfo = FormatterItemInfo { + byte_size: 2, + print_width: 4, + formatter: FormatWriter::IntWriter(format_item_hex), +}; + +pub static FORMAT_ITEM_HEX32: FormatterItemInfo = FormatterItemInfo { + byte_size: 4, + print_width: 8, + formatter: FormatWriter::IntWriter(format_item_hex), +}; + +#[allow(dead_code)] +pub static FORMAT_ITEM_HEX64: FormatterItemInfo = FormatterItemInfo { + byte_size: 8, + print_width: 16, + formatter: FormatWriter::IntWriter(format_item_hex), +}; + + +#[allow(dead_code)] +pub static FORMAT_ITEM_DEC8U: FormatterItemInfo = FormatterItemInfo { + byte_size: 1, + print_width: 3, + formatter: FormatWriter::IntWriter(format_item_dec_u), +}; + +pub static FORMAT_ITEM_DEC16U: FormatterItemInfo = FormatterItemInfo { + byte_size: 2, + print_width: 5, + formatter: FormatWriter::IntWriter(format_item_dec_u), +}; + +pub static FORMAT_ITEM_DEC32U: FormatterItemInfo = FormatterItemInfo { + byte_size: 4, + print_width: 10, + formatter: FormatWriter::IntWriter(format_item_dec_u), +}; + +#[allow(dead_code)] +pub static FORMAT_ITEM_DEC64U: FormatterItemInfo = FormatterItemInfo { + byte_size: 8, + print_width: 19, + formatter: FormatWriter::IntWriter(format_item_dec_u), +}; + + +#[allow(dead_code)] +pub static FORMAT_ITEM_DEC8S: FormatterItemInfo = FormatterItemInfo { + byte_size: 1, + print_width: 4, + formatter: FormatWriter::IntWriter(format_item_dec_s), +}; + +pub static FORMAT_ITEM_DEC16S: FormatterItemInfo = FormatterItemInfo { + byte_size: 2, + print_width: 6, + formatter: FormatWriter::IntWriter(format_item_dec_s), +}; + +#[allow(dead_code)] +pub static FORMAT_ITEM_DEC32S: FormatterItemInfo = FormatterItemInfo { + byte_size: 4, + print_width: 11, + formatter: FormatWriter::IntWriter(format_item_dec_s), +}; + +#[allow(dead_code)] +pub static FORMAT_ITEM_DEC64S: FormatterItemInfo = FormatterItemInfo { + byte_size: 8, + print_width: 20, + formatter: FormatWriter::IntWriter(format_item_dec_s), +}; + + +// TODO: use some sort of byte iterator, instead of passing bytes in u64 +pub fn format_item_oct(p: u64, _: usize, print_width: usize) -> String { + + format!(" {:0width$o}", p, - width = itemwidth, - itemspace = itemspace) + width = print_width) } -pub fn format_item_hex(p: u64, itembytes: usize) -> String { - let itemwidth = 2 * itembytes; - let itemspace = 4 * itembytes - itemwidth; +pub fn format_item_hex(p: u64, _: usize, print_width: usize) -> String { - format!("{:>itemspace$}{:0width$x}", - "", + format!(" {:0width$x}", p, - width = itemwidth, - itemspace = itemspace) + width = print_width) } @@ -28,12 +129,12 @@ fn sign_extend(item: u64, itembytes: usize) -> i64{ } -pub fn format_item_dec_s(p: u64, itembytes: usize) -> String { +pub fn format_item_dec_s(p: u64, itembytes: usize, print_width: usize) -> String { // sign extend - let s = sign_extend(p,itembytes); - format!("{:totalwidth$}", s, totalwidth = 4 * itembytes) + let s = sign_extend(p, itembytes); + format!(" {:width$}", s, width = print_width) } -pub fn format_item_dec_u(p: u64, itembytes: usize) -> String { - format!("{:totalwidth$}", p, totalwidth = 4 * itembytes) +pub fn format_item_dec_u(p: u64, _: usize, print_width: usize) -> String { + format!(" {:width$}", p, width = print_width) } diff --git a/tests/test_od.rs b/tests/test_od.rs index a6dd2046f..a7bfc3d5e 100644 --- a/tests/test_od.rs +++ b/tests/test_od.rs @@ -11,8 +11,8 @@ use self::unindent::*; // octal dump of 'abcdefghijklmnopqrstuvwxyz\n' static ALPHA_OUT: &'static str = " - 0000000 061141 062143 063145 064147 065151 066153 067155 070157 - 0000020 071161 072163 073165 074167 075171 000012 + 0000000 061141 062143 063145 064147 065151 066153 067155 070157 + 0000020 071161 072163 073165 074167 075171 000012 0000033 "; @@ -134,10 +134,10 @@ fn test_multiple_formats() { assert_empty_stderr!(result); assert!(result.success); assert_eq!(result.stdout, unindent(" - 0000000 a b c d e f g h i j k l m n o p - 141 142 143 144 145 146 147 150 151 152 153 154 155 156 157 160 - 0000020 q r s t u v w x y z \\n - 161 162 163 164 165 166 167 170 171 172 012 + 0000000 a b c d e f g h i j k l m n o p + 141 142 143 144 145 146 147 150 151 152 153 154 155 156 157 160 + 0000020 q r s t u v w x y z \\n + 161 162 163 164 165 166 167 170 171 172 012 0000033 ")); @@ -155,7 +155,7 @@ fn test_dec() { 0x00u8,0x80u8, 0x01u8,0x80u8,]; let expected_output = unindent(" - 0000000 0 1 2 3 32767 -32768 -32767 + 0000000 0 1 2 3 32767 -32768 -32767 0000016 "); let result = new_ucmd!().arg("--endian=little").arg("-i").run_piped_stdin(&input[..]); @@ -172,7 +172,7 @@ fn test_hex16(){ let input : [u8; 9] = [ 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xff]; let expected_output = unindent(" - 0000000 2301 6745 ab89 efcd 00ff + 0000000 2301 6745 ab89 efcd 00ff 0000011 "); let result = new_ucmd!().arg("--endian=little").arg("-x").run_piped_stdin(&input[..]); @@ -188,7 +188,7 @@ fn test_hex32(){ let input : [u8; 9] = [ 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xff]; let expected_output = unindent(" - 0000000 67452301 efcdab89 000000ff + 0000000 67452301 efcdab89 000000ff 0000011 "); let result = new_ucmd!().arg("--endian=little").arg("-X").run_piped_stdin(&input[..]); @@ -261,8 +261,8 @@ fn test_width(){ let input : [u8; 8] = [0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]; let expected_output = unindent(" - 0000000 000000 000000 - 0000004 000000 000000 + 0000000 000000 000000 + 0000004 000000 000000 0000010 "); @@ -278,8 +278,8 @@ fn test_invalid_width(){ let input : [u8; 4] = [0x00, 0x00, 0x00, 0x00]; let expected_output = unindent(" - 0000000 000000 - 0000002 000000 + 0000000 000000 + 0000002 000000 0000004 "); @@ -295,8 +295,8 @@ fn test_width_without_value(){ let input : [u8; 40] = [0 ; 40]; let expected_output = unindent(" - 0000000 000000 000000 000000 000000 000000 000000 000000 000000 000000 000000 000000 000000 000000 000000 000000 000000 - 0000040 000000 000000 000000 000000 + 0000000 000000 000000 000000 000000 000000 000000 000000 000000 000000 000000 000000 000000 000000 000000 000000 000000 + 0000040 000000 000000 000000 000000 0000050 "); @@ -312,11 +312,11 @@ fn test_suppress_duplicates(){ let input = [0u8 ; 41]; let expected_output = unindent(" - 0000000 000000000000 - 0000 0000 + 0000000 000000000000 + 0000 0000 * - 0000050 000000000000 - 0000 + 0000050 000000000000 + 0000 0000051 "); @@ -335,8 +335,8 @@ fn test_big_endian() { let expected_output = unindent(" 0000000 -2.0000000000000000 -2.0000000 0 - c0000000 00000000 - c000 0000 0000 0000 + c0000000 00000000 + c000 0000 0000 0000 0000010 "); let result = new_ucmd!().arg("--endian=big").arg("-F").arg("-f").arg("-X").arg("-x").run_piped_stdin(&input[..]); From 24fb6d66c4d82eddacbdf640c9ae98c699c0e2d0 Mon Sep 17 00:00:00 2001 From: Wim Hueskes Date: Sun, 31 Jul 2016 16:51:20 +0200 Subject: [PATCH 18/41] od: proper align different sized output --- src/od/formatteriteminfo.rs | 2 +- src/od/od.rs | 70 ++++++++++++++++++++++++++---- src/od/prn_char.rs | 4 +- src/od/prn_float.rs | 4 +- src/od/prn_int.rs | 40 ++++++++--------- tests/test_od.rs | 85 ++++++++++++++++++++++++++++++++----- 6 files changed, 161 insertions(+), 44 deletions(-) diff --git a/src/od/formatteriteminfo.rs b/src/od/formatteriteminfo.rs index c6974a172..a6bd6f5a6 100644 --- a/src/od/formatteriteminfo.rs +++ b/src/od/formatteriteminfo.rs @@ -7,6 +7,6 @@ pub enum FormatWriter { #[derive(Copy, Clone, Debug, Eq, PartialEq)] pub struct FormatterItemInfo { pub byte_size: usize, - pub print_width: usize, + pub print_width: usize, // including a space in front of the text pub formatter: FormatWriter, } diff --git a/src/od/od.rs b/src/od/od.rs index ac7984dde..a7bdec8a6 100644 --- a/src/od/od.rs +++ b/src/od/od.rs @@ -43,6 +43,7 @@ macro_rules! hashmap { } static VERSION: &'static str = env!("CARGO_PKG_VERSION"); +const MAX_BYTES_PER_UNIT: usize = 8; #[derive(Debug)] enum Radix { Decimal, Hexadecimal, Octal, Binary } @@ -65,6 +66,7 @@ pub fn uumain(args: Vec) -> i32 { opts.optflag("b", "", "octal bytes"); opts.optflag("c", "", "ASCII characters or backslash escapes"); opts.optflag("d", "", "unsigned decimal 2-byte units"); + opts.optflag("D", "", "unsigned decimal 4-byte units"); opts.optflag("o", "", "unsigned decimal 2-byte units"); opts.optflag("I", "", "decimal 2-byte units"); @@ -164,6 +166,7 @@ pub fn uumain(args: Vec) -> i32 { "b" => FORMAT_ITEM_OCT8, "c" => FORMAT_ITEM_C, "D" => FORMAT_ITEM_DEC32U, + "d" => FORMAT_ITEM_DEC16U, "e" => FORMAT_ITEM_F64, "F" => FORMAT_ITEM_F64, "f" => FORMAT_ITEM_F32, @@ -225,6 +228,43 @@ fn odfunc(line_bytes: usize, input_offset_base: &Radix, byte_order: ByteOrder, let mut previous_bytes = Vec::::with_capacity(line_bytes); let mut duplicate_line = false; + let byte_size_block = formats.iter().fold(1, |max, next| cmp::max(max, next.byte_size)); + let print_width_block = formats + .iter() + .fold(1, |max, next| { + cmp::max(max, next.print_width * (byte_size_block / next.byte_size)) + }); + + if byte_size_block > MAX_BYTES_PER_UNIT { + panic!("{}-bits types are unsupported. Current max={}-bits.", + 8 * byte_size_block, + 8 * MAX_BYTES_PER_UNIT); + } + + let mut spaced_formatters: Vec = formats + .iter() + .map(|f| SpacedFormatterItemInfo { frm: *f, spacing: [0; MAX_BYTES_PER_UNIT] }) + .collect(); + + // calculate proper alignment for each item + for sf in &mut spaced_formatters { + let mut byte_size = sf.frm.byte_size; + let mut items_in_block = byte_size_block / byte_size; + let thisblock_width = sf.frm.print_width * items_in_block; + let mut missing_spacing = print_width_block - thisblock_width; + + while items_in_block > 0 { + let avg_spacing: usize = missing_spacing / items_in_block; + for i in 0..items_in_block { + sf.spacing[i * byte_size] += avg_spacing; + missing_spacing -= avg_spacing; + } + // this assumes the size of all types is a power of 2 (1, 2, 4, 8, 16, ...) + items_in_block /= 2; + byte_size *= 2; + } + } + loop { // print each line data (or multi-format raster of several lines describing the same data). @@ -252,7 +292,8 @@ fn odfunc(line_bytes: usize, input_offset_base: &Radix, byte_order: ByteOrder, duplicate_line = false; previous_bytes.clone_from(&bytes); - print_bytes(byte_order, &bytes, n, &print_with_radix(input_offset_base, addr), formats); + print_bytes(byte_order, &bytes, n, &print_with_radix(input_offset_base, addr), + &spaced_formatters, byte_size_block); } addr += n; @@ -270,17 +311,23 @@ fn odfunc(line_bytes: usize, input_offset_base: &Radix, byte_order: ByteOrder, } } -fn print_bytes(byte_order: ByteOrder, bytes: &[u8], length: usize, prefix: &str, formats: &[FormatterItemInfo]) { +fn print_bytes(byte_order: ByteOrder, bytes: &[u8], length: usize, prefix: &str, + formats: &[SpacedFormatterItemInfo], byte_size_block: usize) { let mut first = true; // First line of a multi-format raster. for f in formats { let mut output_text = String::new(); let mut b = 0; while b < length { - let nextb = b + f.byte_size; - match f.formatter { + let nextb = b + f.frm.byte_size; + + output_text.push_str(&format!("{:>width$}", + "", + width = f.spacing[b % byte_size_block])); + + match f.frm.formatter { FormatWriter::IntWriter(func) => { - let p: u64 = match f.byte_size { + let p: u64 = match f.frm.byte_size { 1 => { bytes[b] as u64 } @@ -293,19 +340,19 @@ fn print_bytes(byte_order: ByteOrder, bytes: &[u8], length: usize, prefix: &str, 8 => { byte_order.read_u64(&bytes[b..nextb]) } - _ => { panic!("Invalid byte_size: {}", f.byte_size); } + _ => { panic!("Invalid byte_size: {}", f.frm.byte_size); } }; - output_text.push_str(&func(p, f.byte_size, f.print_width)); + output_text.push_str(&func(p, f.frm.byte_size, f.frm.print_width)); } FormatWriter::FloatWriter(func) => { - let p: f64 = match f.byte_size { + let p: f64 = match f.frm.byte_size { 4 => { byte_order.read_f32(&bytes[b..nextb]) as f64 } 8 => { byte_order.read_f64(&bytes[b..nextb]) } - _ => { panic!("Invalid byte_size: {}", f.byte_size); } + _ => { panic!("Invalid byte_size: {}", f.frm.byte_size); } }; output_text.push_str(&func(p)); } @@ -360,3 +407,8 @@ fn print_with_radix(r: &Radix, x: usize) -> String{ Radix::Binary => format!("{:07b}", x) } } + +struct SpacedFormatterItemInfo { + frm: FormatterItemInfo, + spacing: [usize; MAX_BYTES_PER_UNIT], +} diff --git a/src/od/prn_char.rs b/src/od/prn_char.rs index a9663c26a..d0811f107 100644 --- a/src/od/prn_char.rs +++ b/src/od/prn_char.rs @@ -2,13 +2,13 @@ use formatteriteminfo::*; pub static FORMAT_ITEM_A: FormatterItemInfo = FormatterItemInfo { byte_size: 1, - print_width: 3, + print_width: 4, formatter: FormatWriter::IntWriter(format_item_a), }; pub static FORMAT_ITEM_C: FormatterItemInfo = FormatterItemInfo { byte_size: 1, - print_width: 3, + print_width: 4, formatter: FormatWriter::IntWriter(format_item_c), }; diff --git a/src/od/prn_float.rs b/src/od/prn_float.rs index 4ecc63200..22918ccb4 100644 --- a/src/od/prn_float.rs +++ b/src/od/prn_float.rs @@ -5,13 +5,13 @@ use formatteriteminfo::*; pub static FORMAT_ITEM_F32: FormatterItemInfo = FormatterItemInfo { byte_size: 4, - print_width: 14, + print_width: 15, formatter: FormatWriter::FloatWriter(format_item_flo32), }; pub static FORMAT_ITEM_F64: FormatterItemInfo = FormatterItemInfo { byte_size: 8, - print_width: 24, + print_width: 25, formatter: FormatWriter::FloatWriter(format_item_flo64), }; diff --git a/src/od/prn_int.rs b/src/od/prn_int.rs index 27cdcfdde..7f2b1b58f 100644 --- a/src/od/prn_int.rs +++ b/src/od/prn_int.rs @@ -2,52 +2,52 @@ use formatteriteminfo::*; pub static FORMAT_ITEM_OCT8: FormatterItemInfo = FormatterItemInfo { byte_size: 1, - print_width: 3, + print_width: 4, // max: 377 formatter: FormatWriter::IntWriter(format_item_oct), }; pub static FORMAT_ITEM_OCT16: FormatterItemInfo = FormatterItemInfo { byte_size: 2, - print_width: 6, + print_width: 7, // max: 177777 formatter: FormatWriter::IntWriter(format_item_oct), }; pub static FORMAT_ITEM_OCT32: FormatterItemInfo = FormatterItemInfo { byte_size: 4, - print_width: 12, + print_width: 12, // max: 37777777777 formatter: FormatWriter::IntWriter(format_item_oct), }; #[allow(dead_code)] pub static FORMAT_ITEM_OCT64: FormatterItemInfo = FormatterItemInfo { byte_size: 8, - print_width: 24, + print_width: 23, // max: 2000000000000000000000 formatter: FormatWriter::IntWriter(format_item_oct), }; #[allow(dead_code)] pub static FORMAT_ITEM_HEX8: FormatterItemInfo = FormatterItemInfo { byte_size: 1, - print_width: 2, + print_width: 3, // max: ff formatter: FormatWriter::IntWriter(format_item_hex), }; pub static FORMAT_ITEM_HEX16: FormatterItemInfo = FormatterItemInfo { byte_size: 2, - print_width: 4, + print_width: 5, // max: ffff formatter: FormatWriter::IntWriter(format_item_hex), }; pub static FORMAT_ITEM_HEX32: FormatterItemInfo = FormatterItemInfo { byte_size: 4, - print_width: 8, + print_width: 9, // max: ffffffff formatter: FormatWriter::IntWriter(format_item_hex), }; #[allow(dead_code)] pub static FORMAT_ITEM_HEX64: FormatterItemInfo = FormatterItemInfo { byte_size: 8, - print_width: 16, + print_width: 17, // max: ffffffffffffffff formatter: FormatWriter::IntWriter(format_item_hex), }; @@ -55,26 +55,26 @@ pub static FORMAT_ITEM_HEX64: FormatterItemInfo = FormatterItemInfo { #[allow(dead_code)] pub static FORMAT_ITEM_DEC8U: FormatterItemInfo = FormatterItemInfo { byte_size: 1, - print_width: 3, + print_width: 4, // max: 255 formatter: FormatWriter::IntWriter(format_item_dec_u), }; pub static FORMAT_ITEM_DEC16U: FormatterItemInfo = FormatterItemInfo { byte_size: 2, - print_width: 5, + print_width: 6, // max: 65535 formatter: FormatWriter::IntWriter(format_item_dec_u), }; pub static FORMAT_ITEM_DEC32U: FormatterItemInfo = FormatterItemInfo { byte_size: 4, - print_width: 10, + print_width: 11, // max: 4294967295 formatter: FormatWriter::IntWriter(format_item_dec_u), }; #[allow(dead_code)] pub static FORMAT_ITEM_DEC64U: FormatterItemInfo = FormatterItemInfo { byte_size: 8, - print_width: 19, + print_width: 21, // max: 18446744073709551615 formatter: FormatWriter::IntWriter(format_item_dec_u), }; @@ -82,27 +82,27 @@ pub static FORMAT_ITEM_DEC64U: FormatterItemInfo = FormatterItemInfo { #[allow(dead_code)] pub static FORMAT_ITEM_DEC8S: FormatterItemInfo = FormatterItemInfo { byte_size: 1, - print_width: 4, + print_width: 5, // max: -128 formatter: FormatWriter::IntWriter(format_item_dec_s), }; pub static FORMAT_ITEM_DEC16S: FormatterItemInfo = FormatterItemInfo { byte_size: 2, - print_width: 6, + print_width: 7, // max: -32768 formatter: FormatWriter::IntWriter(format_item_dec_s), }; #[allow(dead_code)] pub static FORMAT_ITEM_DEC32S: FormatterItemInfo = FormatterItemInfo { byte_size: 4, - print_width: 11, + print_width: 12, // max: -2147483648 formatter: FormatWriter::IntWriter(format_item_dec_s), }; #[allow(dead_code)] pub static FORMAT_ITEM_DEC64S: FormatterItemInfo = FormatterItemInfo { byte_size: 8, - print_width: 20, + print_width: 21, // max: -9223372036854775808 formatter: FormatWriter::IntWriter(format_item_dec_s), }; @@ -112,14 +112,14 @@ pub fn format_item_oct(p: u64, _: usize, print_width: usize) -> String { format!(" {:0width$o}", p, - width = print_width) + width = print_width - 1) } pub fn format_item_hex(p: u64, _: usize, print_width: usize) -> String { format!(" {:0width$x}", p, - width = print_width) + width = print_width - 1) } @@ -132,9 +132,9 @@ fn sign_extend(item: u64, itembytes: usize) -> i64{ pub fn format_item_dec_s(p: u64, itembytes: usize, print_width: usize) -> String { // sign extend let s = sign_extend(p, itembytes); - format!(" {:width$}", s, width = print_width) + format!("{:width$}", s, width = print_width) } pub fn format_item_dec_u(p: u64, _: usize, print_width: usize) -> String { - format!(" {:width$}", p, width = print_width) + format!("{:width$}", p, width = print_width) } diff --git a/tests/test_od.rs b/tests/test_od.rs index a7bfc3d5e..1c513e28e 100644 --- a/tests/test_od.rs +++ b/tests/test_od.rs @@ -312,11 +312,11 @@ fn test_suppress_duplicates(){ let input = [0u8 ; 41]; let expected_output = unindent(" - 0000000 000000000000 - 0000 0000 + 0000000 00000000000 + 0000 0000 * - 0000050 000000000000 - 0000 + 0000050 00000000000 + 0000 0000051 "); @@ -332,16 +332,81 @@ fn test_big_endian() { let input : [u8; 8] = [ 0xC0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00];// 0xc000000000000000 -2 + let expected_output = unindent(" - 0000000 -2.0000000000000000 - -2.0000000 0 - c0000000 00000000 - c000 0000 0000 0000 - 0000010 - "); + 0000000 -2.0000000000000000 + -2.0000000 0 + c0000000 00000000 + c000 0000 0000 0000 + 0000010 + "); + let result = new_ucmd!().arg("--endian=big").arg("-F").arg("-f").arg("-X").arg("-x").run_piped_stdin(&input[..]); assert_empty_stderr!(result); assert!(result.success); assert_eq!(result.stdout, expected_output); } + +#[test] +#[allow(non_snake_case)] +fn test_alignment_Xxa() { + + let input : [u8; 8] = [ + 0x0A, 0x0D, 0x65, 0x66, 0x67, 0x00, 0x9e, 0x9f]; + + let expected_output = unindent(" + 0000000 66650d0a 9f9e0067 + 0d0a 6665 0067 9f9e + nl cr e f g nul 9e 9f + 0000010 + "); + + // in this case the width of the -a (8-bit) determines the alignment for the other fields + let result = new_ucmd!().arg("--endian=little").arg("-X").arg("-x").arg("-a").run_piped_stdin(&input[..]); + + assert_empty_stderr!(result); + assert!(result.success); + assert_eq!(result.stdout, expected_output); +} + +#[test] +#[allow(non_snake_case)] +fn test_alignment_Fx() { + + let input : [u8; 8] = [ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xC0];// 0xc000000000000000 -2 + + let expected_output = unindent(" + 0000000 -2.0000000000000000 + 0000 0000 0000 c000 + 0000010 + "); + + // in this case the width of the -F (64-bit) determines the alignment for the other field + let result = new_ucmd!().arg("--endian=little").arg("-F").arg("-x").run_piped_stdin(&input[..]); + + assert_empty_stderr!(result); + assert!(result.success); + assert_eq!(result.stdout, expected_output); +} + +#[test] +fn test_maxuint(){ + + let input = [0xFFu8 ; 8]; + let expected_output = unindent(" + 0000000 37777777777 37777777777 + 177777 177777 177777 177777 + 377 377 377 377 377 377 377 377 + 4294967295 4294967295 + 65535 65535 65535 65535 + 0000010 + "); + + let result = new_ucmd!().arg("-O").arg("-o").arg("-b").arg("-D").arg("-d").run_piped_stdin(&input[..]); + + assert_empty_stderr!(result); + assert!(result.success); + assert_eq!(result.stdout, expected_output); +} From 1164b9e1187a95d097dd373facfb03a0cbd5563a Mon Sep 17 00:00:00 2001 From: Wim Hueskes Date: Mon, 1 Aug 2016 00:03:47 +0200 Subject: [PATCH 19/41] od: fix file byte offset for non-octal types removed binary offset, added no offset. --- src/od/od.rs | 64 ++++++++++++++++++++++++------------------------ tests/test_od.rs | 62 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 94 insertions(+), 32 deletions(-) diff --git a/src/od/od.rs b/src/od/od.rs index a7bdec8a6..efe83cd8c 100644 --- a/src/od/od.rs +++ b/src/od/od.rs @@ -45,8 +45,8 @@ macro_rules! hashmap { static VERSION: &'static str = env!("CARGO_PKG_VERSION"); const MAX_BYTES_PER_UNIT: usize = 8; -#[derive(Debug)] -enum Radix { Decimal, Hexadecimal, Octal, Binary } +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +enum Radix { Decimal, Hexadecimal, Octal, NoPrefix } pub fn uumain(args: Vec) -> i32 { let mut opts = getopts::Options::new(); @@ -62,27 +62,27 @@ pub fn uumain(args: Vec) -> i32 { ("output strings of at least BYTES graphic chars. 3 is assumed when \ BYTES is not specified."), "BYTES"); - opts.optflag("a", "", "named characters, ignoring high-order bit"); - opts.optflag("b", "", "octal bytes"); - opts.optflag("c", "", "ASCII characters or backslash escapes"); - opts.optflag("d", "", "unsigned decimal 2-byte units"); - opts.optflag("D", "", "unsigned decimal 4-byte units"); - opts.optflag("o", "", "unsigned decimal 2-byte units"); + opts.optflagmulti("a", "", "named characters, ignoring high-order bit"); + opts.optflagmulti("b", "", "octal bytes"); + opts.optflagmulti("c", "", "ASCII characters or backslash escapes"); + opts.optflagmulti("d", "", "unsigned decimal 2-byte units"); + opts.optflagmulti("D", "", "unsigned decimal 4-byte units"); + opts.optflagmulti("o", "", "unsigned decimal 2-byte units"); - opts.optflag("I", "", "decimal 2-byte units"); - opts.optflag("L", "", "decimal 2-byte units"); - opts.optflag("i", "", "decimal 2-byte units"); - opts.optflag("x", "", "hexadecimal 2-byte units"); - opts.optflag("h", "", "hexadecimal 2-byte units"); + opts.optflagmulti("I", "", "decimal 2-byte units"); + opts.optflagmulti("L", "", "decimal 2-byte units"); + opts.optflagmulti("i", "", "decimal 2-byte units"); + opts.optflagmulti("x", "", "hexadecimal 2-byte units"); + opts.optflagmulti("h", "", "hexadecimal 2-byte units"); - opts.optflag("O", "", "octal 4-byte units"); - opts.optflag("s", "", "decimal 4-byte units"); - opts.optflag("X", "", "hexadecimal 4-byte units"); - opts.optflag("H", "", "hexadecimal 4-byte units"); + opts.optflagmulti("O", "", "octal 4-byte units"); + opts.optflagmulti("s", "", "decimal 4-byte units"); + opts.optflagmulti("X", "", "hexadecimal 4-byte units"); + opts.optflagmulti("H", "", "hexadecimal 4-byte units"); - opts.optflag("e", "", "floating point double precision (64-bit) units"); - opts.optflag("f", "", "floating point single precision (32-bit) units"); - opts.optflag("F", "", "floating point double precision (64-bit) units"); + opts.optflagmulti("e", "", "floating point double precision (64-bit) units"); + opts.optflagmulti("f", "", "floating point single precision (32-bit) units"); + opts.optflagmulti("F", "", "floating point double precision (64-bit) units"); opts.optopt("t", "format", "select output format or formats", "TYPE"); opts.optflag("v", "output-duplicates", "do not use * to mark line suppression"); @@ -216,10 +216,10 @@ pub fn uumain(args: Vec) -> i32 { let output_duplicates = matches.opt_present("v"); - odfunc(line_bytes, &input_offset_base, byte_order, &inputs, &formats[..], output_duplicates) + odfunc(line_bytes, input_offset_base, byte_order, &inputs, &formats[..], output_duplicates) } -fn odfunc(line_bytes: usize, input_offset_base: &Radix, byte_order: ByteOrder, +fn odfunc(line_bytes: usize, input_offset_base: Radix, byte_order: ByteOrder, fnames: &[InputSource], formats: &[FormatterItemInfo], output_duplicates: bool) -> i32 { let mut mf = MultifileReader::new(fnames); @@ -270,7 +270,9 @@ fn odfunc(line_bytes: usize, input_offset_base: &Radix, byte_order: ByteOrder, match mf.f_read(bytes.as_mut_slice()) { Ok(0) => { - print!("{}\n", print_with_radix(input_offset_base, addr)); // print final offset + if input_offset_base != Radix::NoPrefix { + print!("{}\n", print_with_radix(input_offset_base, addr)); // print final offset + } break; } Ok(n) => { @@ -381,7 +383,7 @@ fn parse_radix(radix_str: Option) -> Result { Some(s) => { let st = s.into_bytes(); if st.len() != 1 { - Err("Radix must be one of [d, o, b, x]\n") + Err("Radix must be one of [d, o, n, x]\n") } else { let radix: char = *(st.get(0) .expect("byte string of length 1 lacks a 0th elem")) as char; @@ -389,22 +391,20 @@ fn parse_radix(radix_str: Option) -> Result { 'd' => Ok(Radix::Decimal), 'x' => Ok(Radix::Hexadecimal), 'o' => Ok(Radix::Octal), - 'b' => Ok(Radix::Binary), - _ => Err("Radix must be one of [d, o, b, x]\n") + 'n' => Ok(Radix::NoPrefix), + _ => Err("Radix must be one of [d, o, n, x]\n") } } } } } -fn print_with_radix(r: &Radix, x: usize) -> String{ - // TODO(keunwoo): field widths should be based on sizeof(x), or chosen dynamically based on the - // expected range of address values. Binary in particular is not great here. - match *r { +fn print_with_radix(r: Radix, x: usize) -> String{ + match r { Radix::Decimal => format!("{:07}", x), - Radix::Hexadecimal => format!("{:07X}", x), + Radix::Hexadecimal => format!("{:06X}", x), Radix::Octal => format!("{:07o}", x), - Radix::Binary => format!("{:07b}", x) + Radix::NoPrefix => String::from(""), } } diff --git a/tests/test_od.rs b/tests/test_od.rs index 1c513e28e..96196d9e5 100644 --- a/tests/test_od.rs +++ b/tests/test_od.rs @@ -410,3 +410,65 @@ fn test_maxuint(){ assert!(result.success); assert_eq!(result.stdout, expected_output); } + +#[test] +fn test_hex_offset(){ + + let input = [0u8 ; 0x1F]; + let expected_output = unindent(" + 000000 00000000 00000000 00000000 00000000 + 00000000 00000000 00000000 00000000 + 000010 00000000 00000000 00000000 00000000 + 00000000 00000000 00000000 00000000 + 00001F + "); + + let result = new_ucmd!().arg("-Ax").arg("-X").arg("-X").run_piped_stdin(&input[..]); + + assert_empty_stderr!(result); + assert!(result.success); + assert_eq!(result.stdout, expected_output); +} + +#[test] +fn test_dec_offset(){ + + let input = [0u8 ; 19]; + let expected_output = unindent(" + 0000000 00000000 00000000 00000000 00000000 + 00000000 00000000 00000000 00000000 + 0000016 00000000 + 00000000 + 0000019 + "); + + let result = new_ucmd!().arg("-Ad").arg("-X").arg("-X").run_piped_stdin(&input[..]); + + assert_empty_stderr!(result); + assert!(result.success); + assert_eq!(result.stdout, expected_output); +} + +#[test] +fn test_no_offset(){ + + let input = [0u8 ; 31]; + const LINE: &'static str = " 00000000 00000000 00000000 00000000\n"; + let expected_output = [LINE, LINE, LINE, LINE].join(""); + + let result = new_ucmd!().arg("-An").arg("-X").arg("-X").run_piped_stdin(&input[..]); + + assert_empty_stderr!(result); + assert!(result.success); + assert_eq!(result.stdout, expected_output); +} + +#[test] +fn test_invalid_offset(){ + + let input = [0u8 ; 4]; + + let result = new_ucmd!().arg("-Ab").run_piped_stdin(&input[..]); + + assert!(!result.success); +} From bd0424fa0cc1ad25ed5c1d5538d39d1348bbd3de Mon Sep 17 00:00:00 2001 From: Wim Hueskes Date: Mon, 1 Aug 2016 16:39:58 +0200 Subject: [PATCH 20/41] od: start with multi-byte support --- src/od/formatteriteminfo.rs | 12 +++- src/od/od.rs | 6 +- src/od/prn_char.rs | 107 ++++++++++++++++++++++++++++++------ tests/test_od.rs | 21 ++++--- 4 files changed, 116 insertions(+), 30 deletions(-) diff --git a/src/od/formatteriteminfo.rs b/src/od/formatteriteminfo.rs index a6bd6f5a6..dae0f63e9 100644 --- a/src/od/formatteriteminfo.rs +++ b/src/od/formatteriteminfo.rs @@ -1,10 +1,18 @@ -#[derive(Copy, Clone, Debug, Eq, PartialEq)] +#[derive(Copy)] pub enum FormatWriter { IntWriter(fn(u64, usize, usize) -> String), FloatWriter(fn(f64) -> String), + MultibyteWriter(fn(&[u8]) -> String), } -#[derive(Copy, Clone, Debug, Eq, PartialEq)] +impl Clone for FormatWriter { + #[inline] + fn clone(&self) -> Self { + *self + } +} + +#[derive(Copy, Clone)] pub struct FormatterItemInfo { pub byte_size: usize, pub print_width: usize, // including a space in front of the text diff --git a/src/od/od.rs b/src/od/od.rs index efe83cd8c..74728bd70 100644 --- a/src/od/od.rs +++ b/src/od/od.rs @@ -208,7 +208,7 @@ pub fn uumain(args: Vec) -> i32 { } } }; - let min_bytes = formats.iter().fold(2, |max, next| cmp::max(max, next.byte_size)); + let min_bytes = formats.iter().fold(1, |max, next| cmp::max(max, next.byte_size)); if line_bytes % min_bytes != 0 { show_warning!("invalid width {}; using {} instead", line_bytes, min_bytes); line_bytes = min_bytes; @@ -267,6 +267,7 @@ fn odfunc(line_bytes: usize, input_offset_base: Radix, byte_order: ByteOrder, loop { // print each line data (or multi-format raster of several lines describing the same data). + // TODO: we need to read more data in case a multi-byte sequence starts at the end of the line match mf.f_read(bytes.as_mut_slice()) { Ok(0) => { @@ -358,6 +359,9 @@ fn print_bytes(byte_order: ByteOrder, bytes: &[u8], length: usize, prefix: &str, }; output_text.push_str(&func(p)); } + FormatWriter::MultibyteWriter(func) => { + output_text.push_str(&func(&bytes[b..length])); + } } b = nextb; } diff --git a/src/od/prn_char.rs b/src/od/prn_char.rs index d0811f107..f4d096aa8 100644 --- a/src/od/prn_char.rs +++ b/src/od/prn_char.rs @@ -1,3 +1,4 @@ +use std::str::from_utf8; use formatteriteminfo::*; pub static FORMAT_ITEM_A: FormatterItemInfo = FormatterItemInfo { @@ -9,13 +10,11 @@ pub static FORMAT_ITEM_A: FormatterItemInfo = FormatterItemInfo { pub static FORMAT_ITEM_C: FormatterItemInfo = FormatterItemInfo { byte_size: 1, print_width: 4, - formatter: FormatWriter::IntWriter(format_item_c), + formatter: FormatWriter::MultibyteWriter(format_item_c), }; -// TODO: multi-byte chars -// Quoth the man page: Multi-byte characters are displayed in the area corresponding to the first byte of the character. The remaining bytes are shown as `**'. -static A_CHRS : [&'static str; 160] = +static A_CHRS : [&'static str; 128] = ["nul", "soh", "stx", "etx", "eot", "enq", "ack", "bel", "bs", "ht", "nl", "vt", "ff", "cr", "so", "si", "dle", "dc1", "dc2", "dc3", "dc4", "nak", "syn", "etb", @@ -31,21 +30,17 @@ static A_CHRS : [&'static str; 160] = "`", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", - "x", "y", "z", "{", "|", "}", "~", "del", - "80", "81", "82", "83", "84", "85", "86", "87", - "88", "89", "8a", "8b", "8c", "8d", "8e", "8f", - "90", "91", "92", "93", "94", "95", "96", "97", - "98", "99", "9a", "9b", "9c", "9d", "9e", "9f"]; + "x", "y", "z", "{", "|", "}", "~", "del"]; -pub fn format_item_a(p: u64, _: usize, _: usize) -> String { +fn format_item_a(p: u64, _: usize, _: usize) -> String { // itembytes == 1 - let b = (p & 0xff) as u8; - format!("{:>4}", A_CHRS.get(b as usize).unwrap_or(&"?") // XXX od dose not actually do this, it just prints the byte + let b = (p & 0x7f) as u8; + format!("{:>4}", A_CHRS.get(b as usize).unwrap_or(&"??") ) } -static C_CHRS : [&'static str; 127] = [ +static C_CHRS : [&'static str; 128] = [ "\\0", "001", "002", "003", "004", "005", "006", "\\a", "\\b", "\\t", "\\n", "\\v", "\\f", "\\r", "016", "017", "020", "021", "022", "023", "024", "025", "026", "027", @@ -61,18 +56,94 @@ static C_CHRS : [&'static str; 127] = [ "`", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", - "x", "y", "z", "{", "|", "}", "~" ]; + "x", "y", "z", "{", "|", "}", "~", "177"]; -pub fn format_item_c(p: u64, _: usize, _: usize) -> String { +fn format_item_c(bytes: &[u8]) -> String { // itembytes == 1 - let b = (p & 0xff) as usize; + let b = bytes[0]; - if b < C_CHRS.len() { + if b & 0x80 == 0x00 { match C_CHRS.get(b as usize) { Some(s) => format!("{:>4}", s), None => format!("{:>4}", b), } } - else { String::new() } + else if (b & 0xc0) == 0x80 { + // second or subsequent octet of an utf-8 sequence + String::from(" **") + } + else if ((b & 0xe0) == 0xc0) && (bytes.len() >= 2) { + // start of a 2 octet utf-8 sequence + match from_utf8(&bytes[0..2]) { + Ok(s) => { format!("{:>4}", s) }, + Err(_) => { format!(" {:03o}", b) }, + } + } + else if ((b & 0xf0) == 0xe0) && (bytes.len() >= 3) { + // start of a 3 octet utf-8 sequence + match from_utf8(&bytes[0..3]) { + Ok(s) => { format!("{:>4}", s) }, + Err(_) => { format!(" {:03o}", b) }, + } + } + else if ((b & 0xf8) == 0xf0) && (bytes.len() >= 4) { + // start of a 4 octet utf-8 sequence + match from_utf8(&bytes[0..4]) { + Ok(s) => { format!("{:>4}", s) }, + Err(_) => { format!(" {:03o}", b) }, + } + } + else { + // invalid utf-8 + format!(" {:03o}", b) + } +} + +#[test] +fn test_format_item_a() { + assert_eq!(" nul", format_item_a(0x00, 1, 4)); + assert_eq!(" soh", format_item_a(0x01, 1, 4)); + assert_eq!(" sp", format_item_a(0x20, 1, 4)); + assert_eq!(" A", format_item_a(0x41, 1, 4)); + assert_eq!(" ~", format_item_a(0x7e, 1, 4)); + assert_eq!(" del", format_item_a(0x7f, 1, 4)); + + assert_eq!(" nul", format_item_a(0x80, 1, 4)); + assert_eq!(" A", format_item_a(0xc1, 1, 4)); + assert_eq!(" ~", format_item_a(0xfe, 1, 4)); + assert_eq!(" del", format_item_a(0xff, 1, 4)); +} + +#[test] +fn test_format_item_c() { + assert_eq!(" \\0", format_item_c(&[0x00])); + assert_eq!(" 001", format_item_c(&[0x01])); + assert_eq!(" ", format_item_c(&[0x20])); + assert_eq!(" A", format_item_c(&[0x41])); + assert_eq!(" ~", format_item_c(&[0x7e])); + assert_eq!(" 177", format_item_c(&[0x7f])); + assert_eq!(" A", format_item_c(&[0x41, 0x21])); + + assert_eq!(" **", format_item_c(&[0x80])); + assert_eq!(" **", format_item_c(&[0x9f])); + + assert_eq!(" ß", format_item_c(&[0xc3, 0x9f])); + assert_eq!(" ß", format_item_c(&[0xc3, 0x9f, 0x21])); + + assert_eq!(" \u{1000}", format_item_c(&[0xe1, 0x80, 0x80])); + assert_eq!(" \u{1000}", format_item_c(&[0xe1, 0x80, 0x80, 0x21])); + + assert_eq!(" \u{1f496}", format_item_c(&[0xf0, 0x9f, 0x92, 0x96])); + assert_eq!(" \u{1f496}", format_item_c(&[0xf0, 0x9f, 0x92, 0x96, 0x21])); + + assert_eq!(" 300", format_item_c(&[0xc0, 0x80])); // invalid utf-8 (MUTF-8 null) + assert_eq!(" 301", format_item_c(&[0xc1, 0xa1])); // invalid utf-8 + assert_eq!(" 303", format_item_c(&[0xc3, 0xc3])); // invalid utf-8 + assert_eq!(" 360", format_item_c(&[0xf0, 0x82, 0x82, 0xac])); // invalid utf-8 (overlong) + assert_eq!(" 360", format_item_c(&[0xf0, 0x9f, 0x92])); // invalid utf-8 (missing octet) + assert_eq!(" \u{10FFFD}", format_item_c(&[0xf4, 0x8f, 0xbf, 0xbd])); // largest valid utf-8 + assert_eq!(" 364", format_item_c(&[0xf4, 0x90, 0x00, 0x00])); // invalid utf-8 + assert_eq!(" 365", format_item_c(&[0xf5, 0x80, 0x80, 0x80])); // invalid utf-8 + assert_eq!(" 377", format_item_c(&[0xff])); // invalid utf-8 } diff --git a/tests/test_od.rs b/tests/test_od.rs index 96196d9e5..af4eca612 100644 --- a/tests/test_od.rs +++ b/tests/test_od.rs @@ -243,18 +243,21 @@ fn test_f64(){ assert_eq!(result.stdout, expected_output); } -// We don't support multibyte chars, so big NEIN to this -/* #[test] -fn mit_die_umlauten_getesten() { - let result = new_ucmd!() - .run_piped_stdin("Universität Tübingen".as_bytes()); +fn test_multibyte() { + + // TODO: replace **** with \u{1B000} + let result = new_ucmd!().arg("-c").arg("-w12").run_piped_stdin("Universität Tübingen ****".as_bytes()); + assert_empty_stderr!(result); assert!(result.success); - assert_eq!(result.stdout, - "0000000 U n i v e r s i t ä ** t T ü **\n0000020 b i n g e n\n0000026") + assert_eq!(result.stdout, unindent(" + 0000000 U n i v e r s i t ä ** t + 0000014 T ü ** b i n g e n * + 0000030 * * * + 0000033 + ")); } -*/ #[test] fn test_width(){ @@ -358,7 +361,7 @@ fn test_alignment_Xxa() { let expected_output = unindent(" 0000000 66650d0a 9f9e0067 0d0a 6665 0067 9f9e - nl cr e f g nul 9e 9f + nl cr e f g nul rs us 0000010 "); From c15936ad68128a7a0791b4d4e8457011460b3baa Mon Sep 17 00:00:00 2001 From: Wim Hueskes Date: Tue, 2 Aug 2016 00:08:42 +0200 Subject: [PATCH 21/41] od: fix performance issue with large values of -w like -w100000000 or larger --- src/od/od.rs | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/src/od/od.rs b/src/od/od.rs index 74728bd70..827c48816 100644 --- a/src/od/od.rs +++ b/src/od/od.rs @@ -224,9 +224,10 @@ fn odfunc(line_bytes: usize, input_offset_base: Radix, byte_order: ByteOrder, let mut mf = MultifileReader::new(fnames); let mut addr = 0; - let mut bytes: Vec = vec![b'\x00'; line_bytes]; - let mut previous_bytes = Vec::::with_capacity(line_bytes); let mut duplicate_line = false; + let mut previous_bytes: Vec = Vec::new(); + let mut bytes: Vec = Vec::with_capacity(line_bytes); + unsafe { bytes.set_len(line_bytes); } // fast but uninitialized let byte_size_block = formats.iter().fold(1, |max, next| cmp::max(max, next.byte_size)); let print_width_block = formats @@ -280,12 +281,17 @@ fn odfunc(line_bytes: usize, input_offset_base: Radix, byte_order: ByteOrder, // not enough byte for a whole element, this should only happen on the last line. if n != line_bytes { // set zero bytes in the part of the buffer that will be used, but is not filled. - for i in n..line_bytes { + let mut max_used = n + MAX_BYTES_PER_UNIT; + if max_used > line_bytes { + max_used = line_bytes; + } + + for i in n..max_used { bytes[i] = 0; } } - if !output_duplicates && previous_bytes == bytes && n == line_bytes { + if !output_duplicates && n == line_bytes && previous_bytes == bytes { if !duplicate_line { duplicate_line = true; println!("*"); @@ -293,7 +299,10 @@ fn odfunc(line_bytes: usize, input_offset_base: Radix, byte_order: ByteOrder, } else { duplicate_line = false; - previous_bytes.clone_from(&bytes); + if n == line_bytes { + // save a copy of the input unless it is the last line + previous_bytes.clone_from(&bytes); + } print_bytes(byte_order, &bytes, n, &print_with_radix(input_offset_base, addr), &spaced_formatters, byte_size_block); From 2b10cc47ff252b7e450a00b93cd198c50dd0f695 Mon Sep 17 00:00:00 2001 From: Wim Hueskes Date: Tue, 2 Aug 2016 22:55:23 +0200 Subject: [PATCH 22/41] od: implement Read for MultifileReader also add tests and fix error handling --- src/od/mockstream.rs | 102 ++++++++++++++++++++++++++ src/od/multifilereader.rs | 146 ++++++++++++++++++++++++++++++-------- src/od/od.rs | 22 +++--- 3 files changed, 229 insertions(+), 41 deletions(-) create mode 100644 src/od/mockstream.rs diff --git a/src/od/mockstream.rs b/src/od/mockstream.rs new file mode 100644 index 000000000..4dd02e22f --- /dev/null +++ b/src/od/mockstream.rs @@ -0,0 +1,102 @@ +// https://github.com/lazy-bitfield/rust-mockstream/pull/2 + +use std::io::{Cursor, Read, Result, Error, ErrorKind}; +use std::error::Error as errorError; + +/// `FailingMockStream` mocks a stream which will fail upon read or write +/// +/// # Examples +/// +/// ``` +/// use std::io::{Cursor, Read}; +/// +/// struct CountIo {} +/// +/// impl CountIo { +/// fn read_data(&self, r: &mut Read) -> usize { +/// let mut count: usize = 0; +/// let mut retries = 3; +/// +/// loop { +/// let mut buffer = [0; 5]; +/// match r.read(&mut buffer) { +/// Err(_) => { +/// if retries == 0 { break; } +/// retries -= 1; +/// }, +/// Ok(0) => break, +/// Ok(n) => count += n, +/// } +/// } +/// count +/// } +/// } +/// +/// #[test] +/// fn test_io_retries() { +/// let mut c = Cursor::new(&b"1234"[..]) +/// .chain(FailingMockStream::new(ErrorKind::Other, "Failing", 3)) +/// .chain(Cursor::new(&b"5678"[..])); +/// +/// let sut = CountIo {}; +/// // this will fail unless read_data performs at least 3 retries on I/O errors +/// assert_eq!(8, sut.read_data(&mut c)); +/// } +/// ``` +#[derive(Clone)] +pub struct FailingMockStream { + kind: ErrorKind, + message: &'static str, + repeat_count: i32, +} + +impl FailingMockStream { + /// Creates a FailingMockStream + /// + /// When `read` or `write` is called, it will return an error `repeat_count` times. + /// `kind` and `message` can be specified to define the exact error. + pub fn new(kind: ErrorKind, message: &'static str, repeat_count: i32) -> FailingMockStream { + FailingMockStream { kind: kind, message: message, repeat_count: repeat_count, } + } + + fn error(&mut self) -> Result { + if self.repeat_count == 0 { + return Ok(0) + } + else { + if self.repeat_count > 0 { + self.repeat_count -= 1; + } + Err(Error::new(self.kind, self.message)) + } + } +} + +impl Read for FailingMockStream { + fn read(&mut self, _: &mut [u8]) -> Result { + self.error() + } +} + +#[test] +fn test_failing_mock_stream_read() { + let mut s = FailingMockStream::new(ErrorKind::BrokenPipe, "The dog ate the ethernet cable", 1); + let mut v = [0; 4]; + let error = s.read(v.as_mut()).unwrap_err(); + assert_eq!(error.kind(), ErrorKind::BrokenPipe); + assert_eq!(error.description(), "The dog ate the ethernet cable"); + // after a single error, it will return Ok(0) + assert_eq!(s.read(v.as_mut()).unwrap(), 0); +} + +#[test] +fn test_failing_mock_stream_chain_interrupted() { + let mut c = Cursor::new(&b"abcd"[..]) + .chain(FailingMockStream::new(ErrorKind::Interrupted, "Interrupted", 5)) + .chain(Cursor::new(&b"ABCD"[..])); + + let mut v = [0; 8]; + c.read_exact(v.as_mut()).unwrap(); + assert_eq!(v, [0x61, 0x62, 0x63, 0x64, 0x41, 0x42, 0x43, 0x44]); + assert_eq!(c.read(v.as_mut()).unwrap(), 0); +} diff --git a/src/od/multifilereader.rs b/src/od/multifilereader.rs index c2bcb688e..6df298f2b 100644 --- a/src/od/multifilereader.rs +++ b/src/od/multifilereader.rs @@ -3,24 +3,26 @@ use std::io; use std::io::BufReader; use std::fs::File; use std::io::Write; +use std::vec::Vec; -#[derive(Debug)] pub enum InputSource<'a> { FileName(&'a str ), - Stdin + Stdin, + #[allow(dead_code)] + Stream(Box), } // MultifileReader - concatenate all our input, file or stdin. pub struct MultifileReader<'a> { - ni: std::slice::Iter<'a, InputSource<'a>>, + ni: Vec>, curr_file: Option>, pub any_err: bool, } impl<'b> MultifileReader<'b> { - pub fn new<'a>(fnames: &'a [InputSource]) -> MultifileReader<'a> { + pub fn new<'a>(fnames: Vec>) -> MultifileReader<'a> { let mut mf = MultifileReader { - ni: fnames.iter(), + ni: fnames, curr_file: None, // normally this means done; call next_file() any_err: false, }; @@ -31,47 +33,50 @@ impl<'b> MultifileReader<'b> { fn next_file(&mut self) { // loop retries with subsequent files if err - normally 'loops' once loop { - match self.ni.next() { - None => { + if self.ni.len() == 0 { self.curr_file = None; return; + } + match self.ni.remove(0) { + InputSource::Stdin => { + self.curr_file = Some(Box::new(BufReader::new(std::io::stdin()))); + return; } - Some(input) => { - match *input { - InputSource::Stdin => { - self.curr_file = Some(Box::new(BufReader::new(std::io::stdin()))); + InputSource::FileName(fname) => { + match File::open(fname) { + Ok(f) => { + self.curr_file = Some(Box::new(BufReader::new(f))); return; } - InputSource::FileName(fname) => { - match File::open(fname) { - Ok(f) => { - self.curr_file = Some(Box::new(BufReader::new(f))); - return; - } - Err(e) => { - // If any file can't be opened, - // print an error at the time that the file is needed, - // then move on the the next file. - // This matches the behavior of the original `od` - eprintln!("{}: '{}': {}", - executable!().split("::").next().unwrap(), // remove module - fname, e); - self.any_err = true - } - } + Err(e) => { + // If any file can't be opened, + // print an error at the time that the file is needed, + // then move on the the next file. + // This matches the behavior of the original `od` + eprintln!("{}: '{}': {}", + executable!().split("::").next().unwrap(), // remove module + fname, e); + self.any_err = true } } } + InputSource::Stream(s) => { + self.curr_file = Some(s); + return; + } } } } +} + +impl<'b> io::Read for MultifileReader<'b> { // Fill buf with bytes read from the list of files // Returns Ok() // Handles io errors itself, thus always returns OK // Fills the provided buffer completely, unless it has run out of input. // If any call returns short (< buf.len()), all subsequent calls will return Ok<0> - pub fn f_read(&mut self, buf: &mut [u8]) -> io::Result { + fn read(&mut self, buf: &mut [u8]) -> io::Result { let mut xfrd = 0; // while buffer we are filling is not full.. May go thru several files. 'fillloop: while xfrd < buf.len() { @@ -83,7 +88,13 @@ impl<'b> MultifileReader<'b> { xfrd += match curr_file.read(&mut buf[xfrd..]) { Ok(0) => break, Ok(n) => n, - Err(e) => panic!("file error: {}", e), + Err(e) => { + eprintln!("{}: I/O: {}", + executable!().split("::").next().unwrap(), // remove module + e); + self.any_err = true; + break; + }, }; if xfrd == buf.len() { // transferred all that was asked for. @@ -97,3 +108,78 @@ impl<'b> MultifileReader<'b> { Ok(xfrd) } } + +#[cfg(test)] +mod tests { + use super::*; + use std::io::{Cursor, Read, ErrorKind}; + use mockstream::*; + + #[test] + fn test_multi_file_reader_one_read() { + let mut inputs = Vec::new(); + inputs.push(InputSource::Stream(Box::new(Cursor::new(&b"abcd"[..])))); + inputs.push(InputSource::Stream(Box::new(Cursor::new(&b"ABCD"[..])))); + let mut v = [0; 10]; + + let mut sut = MultifileReader::new(inputs); + + assert_eq!(sut.read(v.as_mut()).unwrap(), 8); + assert_eq!(v, [0x61, 0x62, 0x63, 0x64, 0x41, 0x42, 0x43, 0x44, 0, 0]); + assert_eq!(sut.read(v.as_mut()).unwrap(), 0); + } + + #[test] + fn test_multi_file_reader_two_reads() { + let mut inputs = Vec::new(); + inputs.push(InputSource::Stream(Box::new(Cursor::new(&b"abcd"[..])))); + inputs.push(InputSource::Stream(Box::new(Cursor::new(&b"ABCD"[..])))); + let mut v = [0; 5]; + + let mut sut = MultifileReader::new(inputs); + + assert_eq!(sut.read(v.as_mut()).unwrap(), 5); + assert_eq!(v, [0x61, 0x62, 0x63, 0x64, 0x41]); + assert_eq!(sut.read(v.as_mut()).unwrap(), 3); + assert_eq!(v, [0x42, 0x43, 0x44, 0x64, 0x41]); // last two bytes are not overwritten + } + + #[test] + fn test_multi_file_reader_read_error() { + let c = Cursor::new(&b"1234"[..]) + .chain(FailingMockStream::new(ErrorKind::Other, "Failing", 1)) + .chain(Cursor::new(&b"5678"[..])); + let mut inputs = Vec::new(); + inputs.push(InputSource::Stream(Box::new(c))); + inputs.push(InputSource::Stream(Box::new(Cursor::new(&b"ABCD"[..])))); + let mut v = [0; 5]; + + let mut sut = MultifileReader::new(inputs); + + assert_eq!(sut.read(v.as_mut()).unwrap(), 5); + assert_eq!(v, [49, 50, 51, 52, 65]); + assert_eq!(sut.read(v.as_mut()).unwrap(), 3); + assert_eq!(v, [66, 67, 68, 52, 65]); // last two bytes are not overwritten + + // note: no retry on i/o error, so 5678 is missing + } + + #[test] + fn test_multi_file_reader_read_error_at_start() { + let mut inputs = Vec::new(); + inputs.push(InputSource::Stream(Box::new(FailingMockStream::new(ErrorKind::Other, "Failing", 1)))); + inputs.push(InputSource::Stream(Box::new(Cursor::new(&b"abcd"[..])))); + inputs.push(InputSource::Stream(Box::new(FailingMockStream::new(ErrorKind::Other, "Failing", 1)))); + inputs.push(InputSource::Stream(Box::new(Cursor::new(&b"ABCD"[..])))); + inputs.push(InputSource::Stream(Box::new(FailingMockStream::new(ErrorKind::Other, "Failing", 1)))); + let mut v = [0; 5]; + + let mut sut = MultifileReader::new(inputs); + + assert_eq!(sut.read(v.as_mut()).unwrap(), 5); + assert_eq!(v, [0x61, 0x62, 0x63, 0x64, 0x41]); + assert_eq!(sut.read(v.as_mut()).unwrap(), 3); + assert_eq!(v, [0x42, 0x43, 0x44, 0x64, 0x41]); // last two bytes are not overwritten + } + +} diff --git a/src/od/od.rs b/src/od/od.rs index 827c48816..c8ab51f2e 100644 --- a/src/od/od.rs +++ b/src/od/od.rs @@ -22,8 +22,11 @@ mod formatteriteminfo; mod prn_int; mod prn_char; mod prn_float; +#[cfg(test)] +mod mockstream; use std::cmp; +use std::io::Read; use std::io::Write; use unindent::*; use byteorder_io::*; @@ -134,8 +137,7 @@ pub fn uumain(args: Vec) -> i32 { }; // Gather up file names - args which don't start with '-' - let stdnionly = [InputSource::Stdin]; - let inputs = args[1..] + let mut inputs = args[1..] .iter() .filter_map(|w| match w as &str { "--" => Some(InputSource::Stdin), @@ -143,12 +145,10 @@ pub fn uumain(args: Vec) -> i32 { x => Some(InputSource::FileName(x)), }) .collect::>(); - // If no input files named, use stdin. - let inputs = if inputs.len() == 0 { - &stdnionly[..] - } else { - &inputs[..] - }; + if inputs.len() == 0 { + inputs.push(InputSource::Stdin); + } + // Gather up format flags, we don't use getopts becase we need keep them in order. let flags = args[1..] .iter() @@ -216,11 +216,11 @@ pub fn uumain(args: Vec) -> i32 { let output_duplicates = matches.opt_present("v"); - odfunc(line_bytes, input_offset_base, byte_order, &inputs, &formats[..], output_duplicates) + odfunc(line_bytes, input_offset_base, byte_order, inputs, &formats[..], output_duplicates) } fn odfunc(line_bytes: usize, input_offset_base: Radix, byte_order: ByteOrder, - fnames: &[InputSource], formats: &[FormatterItemInfo], output_duplicates: bool) -> i32 { + fnames: Vec, formats: &[FormatterItemInfo], output_duplicates: bool) -> i32 { let mut mf = MultifileReader::new(fnames); let mut addr = 0; @@ -270,7 +270,7 @@ fn odfunc(line_bytes: usize, input_offset_base: Radix, byte_order: ByteOrder, // print each line data (or multi-format raster of several lines describing the same data). // TODO: we need to read more data in case a multi-byte sequence starts at the end of the line - match mf.f_read(bytes.as_mut_slice()) { + match mf.read(bytes.as_mut_slice()) { Ok(0) => { if input_offset_base != Radix::NoPrefix { print!("{}\n", print_with_radix(input_offset_base, addr)); // print final offset From 459db47c2bbe3ca75ed7a8c3096b15977209d630 Mon Sep 17 00:00:00 2001 From: Wim Hueskes Date: Sun, 7 Aug 2016 01:33:23 +0200 Subject: [PATCH 23/41] od: implement --skip-bytes and --read-bytes --- src/od/multifilereader.rs | 13 ++- src/od/od.rs | 60 +++++++++-- src/od/parse_nrofbytes.rs | 129 ++++++++++++++++++++++++ src/od/partialreader.rs | 204 ++++++++++++++++++++++++++++++++++++++ tests/test_od.rs | 34 +++++++ 5 files changed, 428 insertions(+), 12 deletions(-) create mode 100644 src/od/parse_nrofbytes.rs create mode 100644 src/od/partialreader.rs diff --git a/src/od/multifilereader.rs b/src/od/multifilereader.rs index 6df298f2b..6ab66cf68 100644 --- a/src/od/multifilereader.rs +++ b/src/od/multifilereader.rs @@ -16,7 +16,11 @@ pub enum InputSource<'a> { pub struct MultifileReader<'a> { ni: Vec>, curr_file: Option>, - pub any_err: bool, + any_err: bool, +} + +pub trait HasError { + fn has_error(&self) -> bool; } impl<'b> MultifileReader<'b> { @@ -109,6 +113,13 @@ impl<'b> io::Read for MultifileReader<'b> { } } +impl<'b> HasError for MultifileReader<'b> { + fn has_error(&self) -> bool { + self.any_err + } +} + + #[cfg(test)] mod tests { use super::*; diff --git a/src/od/od.rs b/src/od/od.rs index c8ab51f2e..2b91a0e29 100644 --- a/src/od/od.rs +++ b/src/od/od.rs @@ -17,11 +17,13 @@ extern crate byteorder; extern crate uucore; mod multifilereader; +mod partialreader; mod byteorder_io; mod formatteriteminfo; mod prn_int; mod prn_char; mod prn_float; +mod parse_nrofbytes; #[cfg(test)] mod mockstream; @@ -31,10 +33,12 @@ use std::io::Write; use unindent::*; use byteorder_io::*; use multifilereader::*; +use partialreader::*; use prn_int::*; use prn_char::*; use prn_float::*; use formatteriteminfo::*; +use parse_nrofbytes::*; //This is available in some versions of std, but not all that we target. macro_rules! hashmap { @@ -216,14 +220,42 @@ pub fn uumain(args: Vec) -> i32 { let output_duplicates = matches.opt_present("v"); - odfunc(line_bytes, input_offset_base, byte_order, inputs, &formats[..], output_duplicates) + let skip_bytes = match matches.opt_default("skip-bytes", "0") { + None => 0, + Some(s) => { + match parse_number_of_bytes(&s) { + Ok(i) => { i } + Err(_) => { + disp_err!("Invalid argument --skip-bytes={}", s); + return 1; + } + } + } + }; + let read_bytes = match matches.opt_str("read-bytes") { + None => None, + Some(s) => { + match parse_number_of_bytes(&s) { + Ok(i) => { Some(i) } + Err(_) => { + disp_err!("Invalid argument --read-bytes={}", s); + return 1; + } + } + } + }; + + odfunc(line_bytes, input_offset_base, byte_order, inputs, &formats[..], + output_duplicates, skip_bytes, read_bytes) } fn odfunc(line_bytes: usize, input_offset_base: Radix, byte_order: ByteOrder, - fnames: Vec, formats: &[FormatterItemInfo], output_duplicates: bool) -> i32 { + fnames: Vec, formats: &[FormatterItemInfo], output_duplicates: bool, + skip_bytes: usize, read_bytes: Option) -> i32 { - let mut mf = MultifileReader::new(fnames); - let mut addr = 0; + let mf = MultifileReader::new(fnames); + let mut input = PartialReader::new(mf, skip_bytes, read_bytes); + let mut addr = skip_bytes; let mut duplicate_line = false; let mut previous_bytes: Vec = Vec::new(); let mut bytes: Vec = Vec::with_capacity(line_bytes); @@ -270,11 +302,9 @@ fn odfunc(line_bytes: usize, input_offset_base: Radix, byte_order: ByteOrder, // print each line data (or multi-format raster of several lines describing the same data). // TODO: we need to read more data in case a multi-byte sequence starts at the end of the line - match mf.read(bytes.as_mut_slice()) { + match input.read(bytes.as_mut_slice()) { Ok(0) => { - if input_offset_base != Radix::NoPrefix { - print!("{}\n", print_with_radix(input_offset_base, addr)); // print final offset - } + print_final_offset(input_offset_base, addr); break; } Ok(n) => { @@ -310,13 +340,15 @@ fn odfunc(line_bytes: usize, input_offset_base: Radix, byte_order: ByteOrder, addr += n; } - Err(_) => { - break; + Err(e) => { + show_error!("{}", e); + print_final_offset(input_offset_base, addr); + return 1; } }; } - if mf.any_err { + if input.has_error() { 1 } else { 0 @@ -421,6 +453,12 @@ fn print_with_radix(r: Radix, x: usize) -> String{ } } +fn print_final_offset(r: Radix, x: usize) { + if r != Radix::NoPrefix { + print!("{}\n", print_with_radix(r, x)); + } +} + struct SpacedFormatterItemInfo { frm: FormatterItemInfo, spacing: [usize; MAX_BYTES_PER_UNIT], diff --git a/src/od/parse_nrofbytes.rs b/src/od/parse_nrofbytes.rs new file mode 100644 index 000000000..780c5f30b --- /dev/null +++ b/src/od/parse_nrofbytes.rs @@ -0,0 +1,129 @@ + +pub fn parse_number_of_bytes(s: &String) -> Result { + let mut start = 0; + let mut len = s.len(); + let mut radix = 10; + let mut multiply = 1; + + if s.starts_with("0x") || s.starts_with("0X") { + start = 2; + radix = 16; + } + else if s.starts_with("0") { + radix = 8; + } + + let mut ends_with = s.chars().rev(); + match ends_with.next() { + Some('b') if radix != 16 => { + multiply = 512; + len -= 1; + }, + Some('k') | Some('K') => { + multiply = 1024; + len -= 1; + } + Some('m') | Some('M') => { + multiply = 1024*1024; + len -= 1; + } + Some('G') => { + multiply = 1024*1024*1024; + len -= 1; + } + #[cfg(target_pointer_width = "64")] + Some('T') => { + multiply = 1024*1024*1024*1024; + len -= 1; + } + #[cfg(target_pointer_width = "64")] + Some('P') => { + multiply = 1024*1024*1024*1024*1024; + len -= 1; + } + #[cfg(target_pointer_width = "64")] + Some('E') => { + multiply = 1024*1024*1024*1024*1024*1024; + len -= 1; + } + Some('B') if radix != 16 => { + len -= 2; + multiply = match ends_with.next() { + Some('k') | Some('K') => 1000, + Some('m') | Some('M') => 1000*1000, + Some('G') => 1000*1000*1000, + #[cfg(target_pointer_width = "64")] + Some('T') => 1000*1000*1000*1000, + #[cfg(target_pointer_width = "64")] + Some('P') => 1000*1000*1000*1000*1000, + #[cfg(target_pointer_width = "64")] + Some('E') => 1000*1000*1000*1000*1000*1000, + _ => return Err("parse failed"), + } + }, + _ => {}, + } + + match usize::from_str_radix(&s[start..len], radix) { + Ok(i) => Ok(i * multiply), + Err(_) => Err("parse failed"), + } +} + +#[allow(dead_code)] +fn parse_number_of_bytes_str(s: &str) -> Result { + parse_number_of_bytes(&String::from(s)) +} + +#[test] +fn test_parse_number_of_bytes() { + // normal decimal numbers + assert_eq!(0, parse_number_of_bytes_str("0").unwrap()); + assert_eq!(5, parse_number_of_bytes_str("5").unwrap()); + assert_eq!(999, parse_number_of_bytes_str("999").unwrap()); + assert_eq!(2*512, parse_number_of_bytes_str("2b").unwrap()); + assert_eq!(2*1024, parse_number_of_bytes_str("2k").unwrap()); + assert_eq!(4*1024, parse_number_of_bytes_str("4K").unwrap()); + assert_eq!(2*1048576, parse_number_of_bytes_str("2m").unwrap()); + assert_eq!(4*1048576, parse_number_of_bytes_str("4M").unwrap()); + assert_eq!(1073741824, parse_number_of_bytes_str("1G").unwrap()); + assert_eq!(2000, parse_number_of_bytes_str("2kB").unwrap()); + assert_eq!(4000, parse_number_of_bytes_str("4KB").unwrap()); + assert_eq!(2000000, parse_number_of_bytes_str("2mB").unwrap()); + assert_eq!(4000000, parse_number_of_bytes_str("4MB").unwrap()); + assert_eq!(2000000000, parse_number_of_bytes_str("2GB").unwrap()); + + // octal input + assert_eq!(8, parse_number_of_bytes_str("010").unwrap()); + assert_eq!(8*512, parse_number_of_bytes_str("010b").unwrap()); + assert_eq!(8*1024, parse_number_of_bytes_str("010k").unwrap()); + assert_eq!(8*1048576, parse_number_of_bytes_str("010m").unwrap()); + + // hex input + assert_eq!(15, parse_number_of_bytes_str("0xf").unwrap()); + assert_eq!(15, parse_number_of_bytes_str("0XF").unwrap()); + assert_eq!(27, parse_number_of_bytes_str("0x1b").unwrap()); + assert_eq!(16*1024, parse_number_of_bytes_str("0x10k").unwrap()); + assert_eq!(16*1048576, parse_number_of_bytes_str("0x10m").unwrap()); + + // invalid input + parse_number_of_bytes_str("").unwrap_err(); + parse_number_of_bytes_str("-1").unwrap_err(); + parse_number_of_bytes_str("1e2").unwrap_err(); + parse_number_of_bytes_str("xyz").unwrap_err(); + parse_number_of_bytes_str("b").unwrap_err(); + parse_number_of_bytes_str("1Y").unwrap_err(); + parse_number_of_bytes_str("∞").unwrap_err(); +} + +#[test] +#[cfg(target_pointer_width = "64")] +fn test_parse_number_of_bytes_64bits() { + assert_eq!(1099511627776, parse_number_of_bytes_str("1T").unwrap()); + assert_eq!(1125899906842624, parse_number_of_bytes_str("1P").unwrap()); + assert_eq!(1152921504606846976, parse_number_of_bytes_str("1E").unwrap()); + + assert_eq!(2000000000000, parse_number_of_bytes_str("2TB").unwrap()); + assert_eq!(2000000000000000, parse_number_of_bytes_str("2PB").unwrap()); + assert_eq!(2000000000000000000, parse_number_of_bytes_str("2EB").unwrap()); +} diff --git a/src/od/partialreader.rs b/src/od/partialreader.rs new file mode 100644 index 000000000..e68b267e0 --- /dev/null +++ b/src/od/partialreader.rs @@ -0,0 +1,204 @@ +use std::cmp; +use std::io; +use std::io::Read; +use multifilereader::HasError; + +/// When a large number of bytes must be skipped, it will be read into a +/// dynamically allocated buffer. The buffer will be limited to this size. +const MAX_SKIP_BUFFER: usize = 64*1024; + +/// Wrapper for `std::io::Read` which can skip bytes at the beginning +/// of the input, and it can limit the returned bytes to a particular +/// number of bytes. +pub struct PartialReader { + inner: R, + skip: usize, + limit: Option, +} + +impl PartialReader { + /// Create a new `PartialReader` wrapping `inner`, which will skip + /// `skip` bytes, and limits the output to `limit` bytes. Set `limit` + /// to `None` if there should be no limit. + pub fn new(inner: R, skip: usize, limit: Option) -> Self { + PartialReader { + inner: inner, + skip: skip, + limit: limit, + } + } +} + +impl Read for PartialReader { + fn read(&mut self, out: &mut [u8]) -> io::Result { + if self.skip > 0 { + let buf_size = cmp::min(self.skip, MAX_SKIP_BUFFER); + let mut bytes: Vec = Vec::with_capacity(buf_size); + unsafe { bytes.set_len(buf_size); } + + while self.skip > 0 { + let skip_count = cmp::min(self.skip, buf_size); + + match self.inner.read_exact(&mut bytes[..skip_count]) { + Err(e) => return Err(e), + Ok(()) => self.skip -= skip_count, + } + } + } + match self.limit { + None => self.inner.read(out), + Some(0) => Ok(0), + Some(ref mut limit) => { + let slice = if *limit > out.len() { out } else { &mut out[0..*limit] }; + match self.inner.read(slice) { + Err(e) => Err(e), + Ok(r) => { + *limit -= r; + Ok(r) + }, + } + }, + } + } +} + +impl HasError for PartialReader { + fn has_error(&self) -> bool { + self.inner.has_error() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::io::{Cursor, Read, ErrorKind}; + use std::error::Error; + use mockstream::*; + + #[test] + fn test_read_without_limits() { + let mut v = [0; 10]; + let mut sut = PartialReader::new(Cursor::new(&b"abcdefgh"[..]), 0, None); + + assert_eq!(sut.read(v.as_mut()).unwrap(), 8); + assert_eq!(v, [0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0, 0]); + } + + #[test] + fn test_read_without_limits_with_error() { + let mut v = [0; 10]; + let f = FailingMockStream::new(ErrorKind::PermissionDenied, "No access", 3); + let mut sut = PartialReader::new(f, 0, None); + + let error = sut.read(v.as_mut()).unwrap_err(); + assert_eq!(error.kind(), ErrorKind::PermissionDenied); + assert_eq!(error.description(), "No access"); + } + + #[test] + fn test_read_skipping_bytes() { + let mut v = [0; 10]; + let mut sut = PartialReader::new(Cursor::new(&b"abcdefgh"[..]), 2, None); + + assert_eq!(sut.read(v.as_mut()).unwrap(), 6); + assert_eq!(v, [0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0, 0, 0, 0]); + } + + #[test] + fn test_read_skipping_all() { + let mut v = [0; 10]; + let mut sut = PartialReader::new(Cursor::new(&b"abcdefgh"[..]), 20, None); + + let error = sut.read(v.as_mut()).unwrap_err(); + assert_eq!(error.kind(), ErrorKind::UnexpectedEof); + } + + #[test] + fn test_read_skipping_with_error() { + let mut v = [0; 10]; + let f = FailingMockStream::new(ErrorKind::PermissionDenied, "No access", 3); + let mut sut = PartialReader::new(f, 2, None); + + let error = sut.read(v.as_mut()).unwrap_err(); + assert_eq!(error.kind(), ErrorKind::PermissionDenied); + assert_eq!(error.description(), "No access"); + } + + #[test] + fn test_read_skipping_with_two_reads_during_skip() { + let mut v = [0; 10]; + let c = Cursor::new(&b"a"[..]) + .chain(Cursor::new(&b"bcdefgh"[..])); + let mut sut = PartialReader::new(c, 2, None); + + assert_eq!(sut.read(v.as_mut()).unwrap(), 6); + assert_eq!(v, [0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0, 0, 0, 0]); + } + + #[test] + fn test_read_skipping_huge_number() { + let mut v = [0; 10]; + // test if it does not eat all memory.... + let mut sut = PartialReader::new(Cursor::new(&b"abcdefgh"[..]), usize::max_value(), None); + + sut.read(v.as_mut()).unwrap_err(); + } + + #[test] + fn test_read_limitting_all() { + let mut v = [0; 10]; + let mut sut = PartialReader::new(Cursor::new(&b"abcdefgh"[..]), 0, Some(0)); + + assert_eq!(sut.read(v.as_mut()).unwrap(), 0); + } + + #[test] + fn test_read_limitting() { + let mut v = [0; 10]; + let mut sut = PartialReader::new(Cursor::new(&b"abcdefgh"[..]), 0, Some(6)); + + assert_eq!(sut.read(v.as_mut()).unwrap(), 6); + assert_eq!(v, [0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0, 0, 0, 0]); + } + + #[test] + fn test_read_limitting_with_error() { + let mut v = [0; 10]; + let f = FailingMockStream::new(ErrorKind::PermissionDenied, "No access", 3); + let mut sut = PartialReader::new(f, 0, Some(6)); + + let error = sut.read(v.as_mut()).unwrap_err(); + assert_eq!(error.kind(), ErrorKind::PermissionDenied); + assert_eq!(error.description(), "No access"); + } + + #[test] + fn test_read_limitting_with_large_limit() { + let mut v = [0; 10]; + let mut sut = PartialReader::new(Cursor::new(&b"abcdefgh"[..]), 0, Some(20)); + + assert_eq!(sut.read(v.as_mut()).unwrap(), 8); + assert_eq!(v, [0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0, 0]); + } + + #[test] + fn test_read_limitting_with_multiple_reads() { + let mut v = [0; 3]; + let mut sut = PartialReader::new(Cursor::new(&b"abcdefgh"[..]), 0, Some(6)); + + assert_eq!(sut.read(v.as_mut()).unwrap(), 3); + assert_eq!(v, [0x61, 0x62, 0x63]); + assert_eq!(sut.read(v.as_mut()).unwrap(), 3); + assert_eq!(v, [0x64, 0x65, 0x66]); + assert_eq!(sut.read(v.as_mut()).unwrap(), 0); + } + + #[test] + fn test_read_skipping_and_limitting() { + let mut v = [0; 10]; + let mut sut = PartialReader::new(Cursor::new(&b"abcdefgh"[..]), 2, Some(4)); + + assert_eq!(sut.read(v.as_mut()).unwrap(), 4); + assert_eq!(v, [0x63, 0x64, 0x65, 0x66, 0, 0, 0, 0, 0, 0]); + } +} diff --git a/tests/test_od.rs b/tests/test_od.rs index af4eca612..26f3d5b60 100644 --- a/tests/test_od.rs +++ b/tests/test_od.rs @@ -475,3 +475,37 @@ fn test_invalid_offset(){ assert!(!result.success); } + +#[test] +fn test_skip_bytes(){ + + let input = "abcdefghijklmnopq"; + let result = new_ucmd!().arg("-c").arg("--skip-bytes=5").run_piped_stdin(input.as_bytes()); + + assert_empty_stderr!(result); + assert!(result.success); + assert_eq!(result.stdout, unindent(" + 0000005 f g h i j k l m n o p q + 0000021 + ")); +} + +#[test] +fn test_skip_bytes_error(){ + + let input = "12345"; + let result = new_ucmd!().arg("--skip-bytes=10").run_piped_stdin(input.as_bytes()); + + assert!(!result.success); +} + +#[test] +fn test_read_bytes(){ + + let input = "abcdefghijklmnopqrstuvwxyz\n12345678"; + let result = new_ucmd!().arg("--endian=little").arg("--read-bytes=27").run_piped_stdin(input.as_bytes()); + + assert_empty_stderr!(result); + assert!(result.success); + assert_eq!(result.stdout, unindent(ALPHA_OUT)); +} From 69bde1170dbbff9640fe2cf43ad443ce1f8048e0 Mon Sep 17 00:00:00 2001 From: Wim Hueskes Date: Sun, 7 Aug 2016 23:57:53 +0200 Subject: [PATCH 24/41] od: finish multi-byte support --- src/od/od.rs | 29 +++--- src/od/peekreader.rs | 213 +++++++++++++++++++++++++++++++++++++++++++ tests/test_od.rs | 25 ++++- 3 files changed, 251 insertions(+), 16 deletions(-) create mode 100644 src/od/peekreader.rs diff --git a/src/od/od.rs b/src/od/od.rs index 2b91a0e29..7563c751d 100644 --- a/src/od/od.rs +++ b/src/od/od.rs @@ -18,6 +18,7 @@ extern crate uucore; mod multifilereader; mod partialreader; +mod peekreader; mod byteorder_io; mod formatteriteminfo; mod prn_int; @@ -28,12 +29,12 @@ mod parse_nrofbytes; mod mockstream; use std::cmp; -use std::io::Read; use std::io::Write; use unindent::*; use byteorder_io::*; use multifilereader::*; use partialreader::*; +use peekreader::*; use prn_int::*; use prn_char::*; use prn_float::*; @@ -51,6 +52,7 @@ macro_rules! hashmap { static VERSION: &'static str = env!("CARGO_PKG_VERSION"); const MAX_BYTES_PER_UNIT: usize = 8; +const PEEK_BUFFER_SIZE: usize = 4; // utf-8 can be 4 bytes #[derive(Copy, Clone, Debug, Eq, PartialEq)] enum Radix { Decimal, Hexadecimal, Octal, NoPrefix } @@ -254,12 +256,13 @@ fn odfunc(line_bytes: usize, input_offset_base: Radix, byte_order: ByteOrder, skip_bytes: usize, read_bytes: Option) -> i32 { let mf = MultifileReader::new(fnames); - let mut input = PartialReader::new(mf, skip_bytes, read_bytes); + let pr = PartialReader::new(mf, skip_bytes, read_bytes); + let mut input = PeekReader::new(pr); let mut addr = skip_bytes; let mut duplicate_line = false; let mut previous_bytes: Vec = Vec::new(); - let mut bytes: Vec = Vec::with_capacity(line_bytes); - unsafe { bytes.set_len(line_bytes); } // fast but uninitialized + let mut bytes: Vec = Vec::with_capacity(line_bytes + PEEK_BUFFER_SIZE); + unsafe { bytes.set_len(line_bytes + PEEK_BUFFER_SIZE); } // fast but uninitialized let byte_size_block = formats.iter().fold(1, |max, next| cmp::max(max, next.byte_size)); let print_width_block = formats @@ -302,12 +305,12 @@ fn odfunc(line_bytes: usize, input_offset_base: Radix, byte_order: ByteOrder, // print each line data (or multi-format raster of several lines describing the same data). // TODO: we need to read more data in case a multi-byte sequence starts at the end of the line - match input.read(bytes.as_mut_slice()) { - Ok(0) => { + match input.peek_read(bytes.as_mut_slice(), PEEK_BUFFER_SIZE) { + Ok((0, _)) => { print_final_offset(input_offset_base, addr); break; } - Ok(n) => { + Ok((n, peekbytes)) => { // not enough byte for a whole element, this should only happen on the last line. if n != line_bytes { // set zero bytes in the part of the buffer that will be used, but is not filled. @@ -321,7 +324,10 @@ fn odfunc(line_bytes: usize, input_offset_base: Radix, byte_order: ByteOrder, } } - if !output_duplicates && n == line_bytes && previous_bytes == bytes { + if !output_duplicates + && n == line_bytes + && !previous_bytes.is_empty() + && previous_bytes[..line_bytes] == bytes[..line_bytes] { if !duplicate_line { duplicate_line = true; println!("*"); @@ -334,7 +340,8 @@ fn odfunc(line_bytes: usize, input_offset_base: Radix, byte_order: ByteOrder, previous_bytes.clone_from(&bytes); } - print_bytes(byte_order, &bytes, n, &print_with_radix(input_offset_base, addr), + print_bytes(byte_order, &bytes, n, peekbytes, + &print_with_radix(input_offset_base, addr), &spaced_formatters, byte_size_block); } @@ -355,7 +362,7 @@ fn odfunc(line_bytes: usize, input_offset_base: Radix, byte_order: ByteOrder, } } -fn print_bytes(byte_order: ByteOrder, bytes: &[u8], length: usize, prefix: &str, +fn print_bytes(byte_order: ByteOrder, bytes: &[u8], length: usize, peekbytes: usize, prefix: &str, formats: &[SpacedFormatterItemInfo], byte_size_block: usize) { let mut first = true; // First line of a multi-format raster. for f in formats { @@ -401,7 +408,7 @@ fn print_bytes(byte_order: ByteOrder, bytes: &[u8], length: usize, prefix: &str, output_text.push_str(&func(p)); } FormatWriter::MultibyteWriter(func) => { - output_text.push_str(&func(&bytes[b..length])); + output_text.push_str(&func(&bytes[b..length+peekbytes])); } } b = nextb; diff --git a/src/od/peekreader.rs b/src/od/peekreader.rs new file mode 100644 index 000000000..10e415273 --- /dev/null +++ b/src/od/peekreader.rs @@ -0,0 +1,213 @@ +//! Contains the trait `PeekRead` and type `PeekReader` implementing it. + +use std::io; +use std::io::{Read, Write}; +use multifilereader::HasError; + +/// A trait which supplies a function to peek into a stream without +/// actually reading it. +/// +/// Like `std::io::Read`, it allows to read data from a stream, with +/// the additional possibility to reserve a part of the returned data +/// with the data which will be read in subsequent calls. +/// +pub trait PeekRead { + /// Reads data into a buffer. + /// + /// Fills `out` with data. The last `peek_size` bytes of `out` are + /// used for data which keeps available on subsequent calls. + /// `peek_size` must be smaller or equal to the size of `out`. + /// + /// Returns a tuple where the first number is the number of bytes + /// read from the stream, and the second number is the number of + /// bytes additionally read. Any of the numbers might be zero. + /// It can also return an error. + /// + /// A type implementing this trait, will typically also implement + /// `std::io::Read`. + /// + /// # Panics + /// Might panic if `peek_size` is larger then the size of `out` + fn peek_read(&mut self, out: &mut [u8], peek_size: usize) -> io::Result<(usize,usize)>; +} + +/// Wrapper for `std::io::Read` allowing to peek into the data to be read. +pub struct PeekReader { + inner: R, + temp_buffer: Vec, +} + +impl PeekReader { + /// Create a new `PeekReader` wrapping `inner` + pub fn new(inner: R) -> Self { + PeekReader { + inner: inner, + temp_buffer: Vec::new(), + } + } +} + +impl PeekReader { + fn read_from_tempbuffer(&mut self, mut out: &mut [u8]) -> usize { + match out.write(self.temp_buffer.as_mut_slice()) { + Ok(n) => { + self.temp_buffer.drain(..n); + n + }, + Err(_) => 0, + } + } + + fn write_to_tempbuffer(&mut self, bytes: &[u8]) { + // if temp_buffer is not empty, data has to be inserted in front + let org_buffer: Vec<_> = self.temp_buffer.drain(..).collect(); + self.temp_buffer.write(bytes).unwrap(); + self.temp_buffer.extend(org_buffer); + } +} + +impl Read for PeekReader { + fn read(&mut self, out: &mut [u8]) -> io::Result { + let start_pos = self.read_from_tempbuffer(out); + match self.inner.read(&mut out[start_pos..]) { + Err(e) => Err(e), + Ok(n) => Ok(n + start_pos), + } + } +} + +impl PeekRead for PeekReader { + /// Reads data into a buffer. + /// + /// See `PeekRead::peek_read`. + /// + /// # Panics + /// If `peek_size` is larger then the size of `out` + fn peek_read(&mut self, out: &mut [u8], peek_size: usize) -> io::Result<(usize,usize)> { + assert!(out.len() >= peek_size); + match self.read(out) { + Err(e) => Err(e), + Ok(bytes_in_buffer) => { + let unused = out.len() - bytes_in_buffer; + if peek_size <= unused { + Ok((bytes_in_buffer, 0)) + } + else { + let actual_peek_size = peek_size - unused; + let real_size = bytes_in_buffer - actual_peek_size; + self.write_to_tempbuffer(&out[real_size..bytes_in_buffer]); + Ok((real_size, actual_peek_size)) + } + }, + } + } +} + +impl HasError for PeekReader { + fn has_error(&self) -> bool { + self.inner.has_error() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::io::{Cursor, Read}; + + #[test] + fn test_read_normal() { + let mut sut = PeekReader::new(Cursor::new(&b"abcdefgh"[..])); + + let mut v = [0; 10]; + assert_eq!(sut.read(v.as_mut()).unwrap(), 8); + assert_eq!(v, [0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0, 0]); + } + + #[test] + fn test_peek_read_without_buffer() { + let mut sut = PeekReader::new(Cursor::new(&b"abcdefgh"[..])); + + let mut v = [0; 10]; + assert_eq!(sut.peek_read(v.as_mut(), 0).unwrap(), (8,0)); + assert_eq!(v, [0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0, 0]); + } + + #[test] + fn test_peek_read_and_read() { + let mut sut = PeekReader::new(Cursor::new(&b"abcdefghij"[..])); + + let mut v = [0; 8]; + assert_eq!(sut.peek_read(v.as_mut(), 4).unwrap(), (4, 4)); + assert_eq!(v, [0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68]); + + let mut v2 = [0; 8]; + assert_eq!(sut.read(v2.as_mut()).unwrap(), 6); + assert_eq!(v2, [0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0, 0]); + } + + #[test] + fn test_peek_read_multiple_times() { + let mut sut = PeekReader::new(Cursor::new(&b"abcdefghij"[..])); + + let mut s1 = [0; 8]; + assert_eq!(sut.peek_read(s1.as_mut(), 4).unwrap(), (4, 4)); + assert_eq!(s1, [0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68]); + + let mut s2 = [0; 8]; + assert_eq!(sut.peek_read(s2.as_mut(), 4).unwrap(), (4, 2)); + assert_eq!(s2, [0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0, 0]); + + let mut s3 = [0; 8]; + assert_eq!(sut.peek_read(s3.as_mut(), 4).unwrap(), (2, 0)); + assert_eq!(s3, [0x69, 0x6a, 0, 0, 0, 0, 0, 0]); + } + + #[test] + fn test_peek_read_and_read_with_small_buffer() { + let mut sut = PeekReader::new(Cursor::new(&b"abcdefghij"[..])); + + let mut v = [0; 8]; + assert_eq!(sut.peek_read(v.as_mut(), 4).unwrap(), (4, 4)); + assert_eq!(v, [0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68]); + + let mut v2 = [0; 2]; + assert_eq!(sut.read(v2.as_mut()).unwrap(), 2); + assert_eq!(v2, [0x65, 0x66]); + assert_eq!(sut.read(v2.as_mut()).unwrap(), 2); + assert_eq!(v2, [0x67, 0x68]); + assert_eq!(sut.read(v2.as_mut()).unwrap(), 2); + assert_eq!(v2, [0x69, 0x6a]); + } + + #[test] + fn test_peek_read_with_smaller_buffer() { + let mut sut = PeekReader::new(Cursor::new(&b"abcdefghij"[..])); + + let mut v = [0; 8]; + assert_eq!(sut.peek_read(v.as_mut(), 4).unwrap(), (4, 4)); + assert_eq!(v, [0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68]); + + let mut v2 = [0; 2]; + assert_eq!(sut.peek_read(v2.as_mut(), 2).unwrap(), (0, 2)); + assert_eq!(v2, [0x65, 0x66]); + assert_eq!(sut.peek_read(v2.as_mut(), 0).unwrap(), (2, 0)); + assert_eq!(v2, [0x65, 0x66]); + assert_eq!(sut.peek_read(v2.as_mut(), 0).unwrap(), (2, 0)); + assert_eq!(v2, [0x67, 0x68]); + assert_eq!(sut.peek_read(v2.as_mut(), 0).unwrap(), (2, 0)); + assert_eq!(v2, [0x69, 0x6a]); + } + + #[test] + fn test_peek_read_peek_with_larger_peek_buffer() { + let mut sut = PeekReader::new(Cursor::new(&b"abcdefghij"[..])); + + let mut v = [0; 8]; + assert_eq!(sut.peek_read(v.as_mut(), 4).unwrap(), (4, 4)); + assert_eq!(v, [0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68]); + + let mut v2 = [0; 8]; + assert_eq!(sut.peek_read(v2.as_mut(), 8).unwrap(), (0, 6)); + assert_eq!(v2, [0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0, 0]); + } +} diff --git a/tests/test_od.rs b/tests/test_od.rs index 26f3d5b60..e18cd9501 100644 --- a/tests/test_od.rs +++ b/tests/test_od.rs @@ -246,15 +246,14 @@ fn test_f64(){ #[test] fn test_multibyte() { - // TODO: replace **** with \u{1B000} - let result = new_ucmd!().arg("-c").arg("-w12").run_piped_stdin("Universität Tübingen ****".as_bytes()); + let result = new_ucmd!().arg("-c").arg("-w12").run_piped_stdin("Universität Tübingen \u{1B000}".as_bytes()); assert_empty_stderr!(result); assert!(result.success); assert_eq!(result.stdout, unindent(" 0000000 U n i v e r s i t ä ** t - 0000014 T ü ** b i n g e n * - 0000030 * * * + 0000014 T ü ** b i n g e n \u{1B000} + 0000030 ** ** ** 0000033 ")); } @@ -313,11 +312,27 @@ fn test_width_without_value(){ #[test] fn test_suppress_duplicates(){ - let input = [0u8 ; 41]; + let input: [u8; 41] = [ + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 1, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0]; let expected_output = unindent(" 0000000 00000000000 0000 0000 * + 0000020 00000000001 + 0001 0000 + 0000024 00000000000 + 0000 0000 + * 0000050 00000000000 0000 0000051 From c2d61a294e5d4d9c568acbc00ad154595191af5b Mon Sep 17 00:00:00 2001 From: Wim Hueskes Date: Mon, 8 Aug 2016 21:48:44 +0200 Subject: [PATCH 25/41] od: refactor: separate mod for parse_format_flags --- src/od/od.rs | 138 ++++++++++++---------------------------- src/od/parse_formats.rs | 67 +++++++++++++++++++ 2 files changed, 107 insertions(+), 98 deletions(-) create mode 100644 src/od/parse_formats.rs diff --git a/src/od/od.rs b/src/od/od.rs index 7563c751d..157f2e2dc 100644 --- a/src/od/od.rs +++ b/src/od/od.rs @@ -25,6 +25,7 @@ mod prn_int; mod prn_char; mod prn_float; mod parse_nrofbytes; +mod parse_formats; #[cfg(test)] mod mockstream; @@ -35,20 +36,9 @@ use byteorder_io::*; use multifilereader::*; use partialreader::*; use peekreader::*; -use prn_int::*; -use prn_char::*; -use prn_float::*; use formatteriteminfo::*; use parse_nrofbytes::*; - -//This is available in some versions of std, but not all that we target. -macro_rules! hashmap { - ($( $key: expr => $val: expr ),*) => {{ - let mut map = ::std::collections::HashMap::new(); - $( map.insert($key, $val); )* - map - }} -} +use parse_formats::parse_format_flags; static VERSION: &'static str = env!("CARGO_PKG_VERSION"); const MAX_BYTES_PER_UNIT: usize = 8; @@ -155,100 +145,52 @@ pub fn uumain(args: Vec) -> i32 { inputs.push(InputSource::Stdin); } - // Gather up format flags, we don't use getopts becase we need keep them in order. - let flags = args[1..] - .iter() - .filter_map(|w| match w as &str { - "--" => None, - o if o.starts_with("-") => Some(&o[1..]), - _ => None, - }) - .collect::>(); + let formats = parse_format_flags(&args); -// TODO: -t fmts - let known_formats = hashmap![ - "a" => FORMAT_ITEM_A, - "B" => FORMAT_ITEM_OCT16, - "b" => FORMAT_ITEM_OCT8, - "c" => FORMAT_ITEM_C, - "D" => FORMAT_ITEM_DEC32U, - "d" => FORMAT_ITEM_DEC16U, - "e" => FORMAT_ITEM_F64, - "F" => FORMAT_ITEM_F64, - "f" => FORMAT_ITEM_F32, - "H" => FORMAT_ITEM_HEX32, - "X" => FORMAT_ITEM_HEX32, - "o" => FORMAT_ITEM_OCT16, - "x" => FORMAT_ITEM_HEX16, - "h" => FORMAT_ITEM_HEX16, + let mut line_bytes = match matches.opt_default("w", "32") { + None => 16, + Some(s) => { + match s.parse::() { + Ok(i) => { i } + Err(_) => { 2 } + } + } + }; + let min_bytes = formats.iter().fold(1, |max, next| cmp::max(max, next.byte_size)); + if line_bytes % min_bytes != 0 { + show_warning!("invalid width {}; using {} instead", line_bytes, min_bytes); + line_bytes = min_bytes; + } - "I" => FORMAT_ITEM_DEC16S, - "L" => FORMAT_ITEM_DEC16S, - "i" => FORMAT_ITEM_DEC16S, + let output_duplicates = matches.opt_present("v"); - "O" => FORMAT_ITEM_OCT32, - "s" => FORMAT_ITEM_DEC16U - ]; - - let mut formats = Vec::new(); - - for flag in flags.iter() { - match known_formats.get(flag) { - None => {} // not every option is a format - Some(r) => { - formats.push(*r) + let skip_bytes = match matches.opt_default("skip-bytes", "0") { + None => 0, + Some(s) => { + match parse_number_of_bytes(&s) { + Ok(i) => { i } + Err(_) => { + disp_err!("Invalid argument --skip-bytes={}", s); + return 1; } } } - - if formats.is_empty() { - formats.push(FORMAT_ITEM_OCT16); // 2 byte octal is the default + }; + let read_bytes = match matches.opt_str("read-bytes") { + None => None, + Some(s) => { + match parse_number_of_bytes(&s) { + Ok(i) => { Some(i) } + Err(_) => { + disp_err!("Invalid argument --read-bytes={}", s); + return 1; + } + } } + }; - let mut line_bytes = match matches.opt_default("w", "32") { - None => 16, - Some(s) => { - match s.parse::() { - Ok(i) => { i } - Err(_) => { 2 } - } - } - }; - let min_bytes = formats.iter().fold(1, |max, next| cmp::max(max, next.byte_size)); - if line_bytes % min_bytes != 0 { - show_warning!("invalid width {}; using {} instead", line_bytes, min_bytes); - line_bytes = min_bytes; - } - - let output_duplicates = matches.opt_present("v"); - - let skip_bytes = match matches.opt_default("skip-bytes", "0") { - None => 0, - Some(s) => { - match parse_number_of_bytes(&s) { - Ok(i) => { i } - Err(_) => { - disp_err!("Invalid argument --skip-bytes={}", s); - return 1; - } - } - } - }; - let read_bytes = match matches.opt_str("read-bytes") { - None => None, - Some(s) => { - match parse_number_of_bytes(&s) { - Ok(i) => { Some(i) } - Err(_) => { - disp_err!("Invalid argument --read-bytes={}", s); - return 1; - } - } - } - }; - - odfunc(line_bytes, input_offset_base, byte_order, inputs, &formats[..], - output_duplicates, skip_bytes, read_bytes) + odfunc(line_bytes, input_offset_base, byte_order, inputs, &formats[..], + output_duplicates, skip_bytes, read_bytes) } fn odfunc(line_bytes: usize, input_offset_base: Radix, byte_order: ByteOrder, diff --git a/src/od/parse_formats.rs b/src/od/parse_formats.rs new file mode 100644 index 000000000..cf9718575 --- /dev/null +++ b/src/od/parse_formats.rs @@ -0,0 +1,67 @@ +use formatteriteminfo::FormatterItemInfo; +use prn_int::*; +use prn_char::*; +use prn_float::*; + +//This is available in some versions of std, but not all that we target. +macro_rules! hashmap { + ($( $key: expr => $val: expr ),*) => {{ + let mut map = ::std::collections::HashMap::new(); + $( map.insert($key, $val); )* + map + }} +} + +pub fn parse_format_flags(args: &Vec) -> Vec { + // Gather up format flags, we don't use getopts becase we need keep them in order. + let flags = args[1..] + .iter() + .filter_map(|w| match w as &str { + "--" => None, + o if o.starts_with("-") => Some(&o[1..]), + _ => None, + }) + .collect::>(); + + // TODO: -t fmts + let known_formats = hashmap![ + "a" => FORMAT_ITEM_A, + "B" => FORMAT_ITEM_OCT16, + "b" => FORMAT_ITEM_OCT8, + "c" => FORMAT_ITEM_C, + "D" => FORMAT_ITEM_DEC32U, + "d" => FORMAT_ITEM_DEC16U, + "e" => FORMAT_ITEM_F64, + "F" => FORMAT_ITEM_F64, + "f" => FORMAT_ITEM_F32, + "H" => FORMAT_ITEM_HEX32, + "X" => FORMAT_ITEM_HEX32, + "o" => FORMAT_ITEM_OCT16, + "x" => FORMAT_ITEM_HEX16, + "h" => FORMAT_ITEM_HEX16, + + "I" => FORMAT_ITEM_DEC16S, + "L" => FORMAT_ITEM_DEC16S, + "i" => FORMAT_ITEM_DEC16S, + + "O" => FORMAT_ITEM_OCT32, + "s" => FORMAT_ITEM_DEC16U + ]; + + let mut formats = Vec::new(); + + for flag in flags.iter() { + match known_formats.get(flag) { + None => {} // not every option is a format + Some(r) => { + formats.push(*r) + } + } + } + + if formats.is_empty() { + formats.push(FORMAT_ITEM_OCT16); // 2 byte octal is the default + } + + formats +} From d15604b2e4cec6d84da6bb7b12b149be5e2e82b6 Mon Sep 17 00:00:00 2001 From: Wim Hueskes Date: Wed, 10 Aug 2016 00:57:04 +0200 Subject: [PATCH 26/41] od: fix and add tests for simple format args --- src/od/formatteriteminfo.rs | 38 +++++++++- src/od/od.rs | 11 +-- src/od/parse_formats.rs | 144 +++++++++++++++++++++++++++--------- tests/test_od.rs | 2 +- 4 files changed, 153 insertions(+), 42 deletions(-) diff --git a/src/od/formatteriteminfo.rs b/src/od/formatteriteminfo.rs index dae0f63e9..034eece53 100644 --- a/src/od/formatteriteminfo.rs +++ b/src/od/formatteriteminfo.rs @@ -1,4 +1,6 @@ -#[derive(Copy)] +use std::fmt; + +#[derive(Copy, Eq)] pub enum FormatWriter { IntWriter(fn(u64, usize, usize) -> String), FloatWriter(fn(f64) -> String), @@ -12,7 +14,39 @@ impl Clone for FormatWriter { } } -#[derive(Copy, Clone)] +impl PartialEq for FormatWriter { + fn eq(&self, other: &FormatWriter) -> bool { + use formatteriteminfo::FormatWriter::*; + + match (self, other) { + (&IntWriter(ref a), &IntWriter(ref b)) => a == b, + (&FloatWriter(ref a), &FloatWriter(ref b)) => a == b, + (&MultibyteWriter(ref a), &MultibyteWriter(ref b)) => *a as usize == *b as usize, + _ => false, + } + } +} + +impl fmt::Debug for FormatWriter { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + &FormatWriter::IntWriter(ref p) => { + try!(f.write_str("IntWriter:")); + fmt::Pointer::fmt(p, f) + }, + &FormatWriter::FloatWriter(ref p) => { + try!(f.write_str("FloatWriter:")); + fmt::Pointer::fmt(p, f) + }, + &FormatWriter::MultibyteWriter(ref p) => { + try!(f.write_str("MultibyteWriter:")); + fmt::Pointer::fmt(&(*p as *const ()), f) + }, + } + } + } + +#[derive(Copy, Clone, PartialEq, Eq, Debug)] pub struct FormatterItemInfo { pub byte_size: usize, pub print_width: usize, // including a space in front of the text diff --git a/src/od/od.rs b/src/od/od.rs index 157f2e2dc..d98f8eaac 100644 --- a/src/od/od.rs +++ b/src/od/od.rs @@ -66,16 +66,17 @@ pub fn uumain(args: Vec) -> i32 { opts.optflagmulti("c", "", "ASCII characters or backslash escapes"); opts.optflagmulti("d", "", "unsigned decimal 2-byte units"); opts.optflagmulti("D", "", "unsigned decimal 4-byte units"); - opts.optflagmulti("o", "", "unsigned decimal 2-byte units"); + opts.optflagmulti("o", "", "octal 2-byte units"); - opts.optflagmulti("I", "", "decimal 2-byte units"); - opts.optflagmulti("L", "", "decimal 2-byte units"); - opts.optflagmulti("i", "", "decimal 2-byte units"); + opts.optflagmulti("I", "", "decimal 8-byte units"); + opts.optflagmulti("L", "", "decimal 8-byte units"); + opts.optflagmulti("i", "", "decimal 4-byte units"); + opts.optflagmulti("l", "", "decimal 8-byte units"); opts.optflagmulti("x", "", "hexadecimal 2-byte units"); opts.optflagmulti("h", "", "hexadecimal 2-byte units"); opts.optflagmulti("O", "", "octal 4-byte units"); - opts.optflagmulti("s", "", "decimal 4-byte units"); + opts.optflagmulti("s", "", "decimal 2-byte units"); opts.optflagmulti("X", "", "hexadecimal 4-byte units"); opts.optflagmulti("H", "", "hexadecimal 4-byte units"); diff --git a/src/od/parse_formats.rs b/src/od/parse_formats.rs index cf9718575..00ddce0ee 100644 --- a/src/od/parse_formats.rs +++ b/src/od/parse_formats.rs @@ -1,3 +1,4 @@ +use std::collections::HashSet; use formatteriteminfo::FormatterItemInfo; use prn_int::*; use prn_char::*; @@ -12,49 +13,69 @@ macro_rules! hashmap { }} } +/// Parses format flags from commandline +/// +/// getopts, docopt, clap don't seem suitable to parse the commandline +/// arguments used for formats. In particular arguments can appear +/// multiple times and the order they appear in, is significant. +/// +/// arguments like -f, -o, -x can appear separate or combined: -fox +/// it can also be mixed with non format related flags like -v: -fvox +/// arguments with parameters like -w16 can only appear at the end: -fvoxw16 +/// parameters of -t/--format specify 1 or more formats. +/// if -- appears on the commandline, parsing should stop. pub fn parse_format_flags(args: &Vec) -> Vec { // Gather up format flags, we don't use getopts becase we need keep them in order. - let flags = args[1..] - .iter() - .filter_map(|w| match w as &str { - "--" => None, - o if o.starts_with("-") => Some(&o[1..]), - _ => None, - }) - .collect::>(); // TODO: -t fmts let known_formats = hashmap![ - "a" => FORMAT_ITEM_A, - "B" => FORMAT_ITEM_OCT16, - "b" => FORMAT_ITEM_OCT8, - "c" => FORMAT_ITEM_C, - "D" => FORMAT_ITEM_DEC32U, - "d" => FORMAT_ITEM_DEC16U, - "e" => FORMAT_ITEM_F64, - "F" => FORMAT_ITEM_F64, - "f" => FORMAT_ITEM_F32, - "H" => FORMAT_ITEM_HEX32, - "X" => FORMAT_ITEM_HEX32, - "o" => FORMAT_ITEM_OCT16, - "x" => FORMAT_ITEM_HEX16, - "h" => FORMAT_ITEM_HEX16, - - "I" => FORMAT_ITEM_DEC16S, - "L" => FORMAT_ITEM_DEC16S, - "i" => FORMAT_ITEM_DEC16S, - - "O" => FORMAT_ITEM_OCT32, - "s" => FORMAT_ITEM_DEC16U + 'a' => FORMAT_ITEM_A, + 'B' => FORMAT_ITEM_OCT16, + 'b' => FORMAT_ITEM_OCT8, + 'c' => FORMAT_ITEM_C, + 'D' => FORMAT_ITEM_DEC32U, + 'd' => FORMAT_ITEM_DEC16U, + 'e' => FORMAT_ITEM_F64, + 'F' => FORMAT_ITEM_F64, + 'f' => FORMAT_ITEM_F32, + 'H' => FORMAT_ITEM_HEX32, + 'h' => FORMAT_ITEM_HEX16, + 'i' => FORMAT_ITEM_DEC32S, + 'I' => FORMAT_ITEM_DEC64S, + 'L' => FORMAT_ITEM_DEC64S, + 'l' => FORMAT_ITEM_DEC64S, + 'O' => FORMAT_ITEM_OCT32, + 'o' => FORMAT_ITEM_OCT16, + 's' => FORMAT_ITEM_DEC16S, + 'X' => FORMAT_ITEM_HEX32, + 'x' => FORMAT_ITEM_HEX16 ]; + let ignored_arg_opts: HashSet<_> = ['A', 'j', 'N', 'S', 'w'].iter().cloned().collect(); + let mut formats = Vec::new(); - for flag in flags.iter() { - match known_formats.get(flag) { - None => {} // not every option is a format - Some(r) => { - formats.push(*r) + // args[0] is the name of the binary + let mut arg_iter = args.iter().skip(1); + + while let Some(arg) = arg_iter.next() { + if arg.starts_with("--") { + if arg.len() == 2 { + break; + } + } + else if arg.starts_with("-") { + let mut flags = arg.chars().skip(1); + while let Some(c) = flags.next() { + if ignored_arg_opts.contains(&c) { + break; + } + match known_formats.get(&c) { + None => {} // not every option is a format + Some(r) => { + formats.push(*r) + } + } } } } @@ -65,3 +86,58 @@ pub fn parse_format_flags(args: &Vec) -> Vec { formats } + +#[allow(dead_code)] +pub fn parse_format_flags_str(args_str: &Vec<&'static str>) -> Vec { + let args = args_str.iter().map(|s| s.to_string()).collect(); + parse_format_flags(&args) +} + +#[test] +fn test_no_options() { + assert_eq!(parse_format_flags_str( + &vec!("od")), + vec!(FORMAT_ITEM_OCT16)); +} + +#[test] +fn test_one_option() { + assert_eq!(parse_format_flags_str( + &vec!("od", "-F")), + vec!(FORMAT_ITEM_F64)); +} + +#[test] +fn test_two_separate_options() { + assert_eq!(parse_format_flags_str( + &vec!("od", "-F", "-x")), + vec!(FORMAT_ITEM_F64, FORMAT_ITEM_HEX16)); +} + +#[test] +fn test_two_combined_options() { + assert_eq!(parse_format_flags_str( + &vec!("od", "-Fx")), + vec!(FORMAT_ITEM_F64, FORMAT_ITEM_HEX16)); +} + +#[test] +fn test_ignore_non_format_parameters() { + assert_eq!(parse_format_flags_str( + &vec!("od", "-d", "-Ax")), + vec!(FORMAT_ITEM_DEC16U)); +} + +#[test] +fn test_ignore_separate_parameters() { + assert_eq!(parse_format_flags_str( + &vec!("od", "-I", "-A", "x")), + vec!(FORMAT_ITEM_DEC64S)); +} + +#[test] +fn test_ignore_trailing_vals() { + assert_eq!(parse_format_flags_str( + &vec!("od", "-D", "--", "-x")), + vec!(FORMAT_ITEM_DEC32U)); +} diff --git a/tests/test_od.rs b/tests/test_od.rs index e18cd9501..5a7bd0699 100644 --- a/tests/test_od.rs +++ b/tests/test_od.rs @@ -158,7 +158,7 @@ fn test_dec() { 0000000 0 1 2 3 32767 -32768 -32767 0000016 "); - let result = new_ucmd!().arg("--endian=little").arg("-i").run_piped_stdin(&input[..]); + let result = new_ucmd!().arg("--endian=little").arg("-s").run_piped_stdin(&input[..]); assert_empty_stderr!(result); assert!(result.success); From cea4297fdfe7f86cda06aa9de16f8748c04ba370 Mon Sep 17 00:00:00 2001 From: Wim Hueskes Date: Thu, 11 Aug 2016 13:45:39 +0200 Subject: [PATCH 27/41] od: implement --format / -t --- src/od/od.rs | 10 +- src/od/parse_formats.rs | 376 ++++++++++++++++++++++++++++++++++++++-- src/od/prn_int.rs | 10 +- tests/test_od.rs | 7 +- 4 files changed, 372 insertions(+), 31 deletions(-) diff --git a/src/od/od.rs b/src/od/od.rs index d98f8eaac..99056985d 100644 --- a/src/od/od.rs +++ b/src/od/od.rs @@ -84,7 +84,7 @@ pub fn uumain(args: Vec) -> i32 { opts.optflagmulti("f", "", "floating point single precision (32-bit) units"); opts.optflagmulti("F", "", "floating point double precision (64-bit) units"); - opts.optopt("t", "format", "select output format or formats", "TYPE"); + opts.optmulti("t", "format", "select output format or formats", "TYPE"); opts.optflag("v", "output-duplicates", "do not use * to mark line suppression"); opts.optflagopt("w", "width", ("output BYTES bytes per output line. 32 is implied when BYTES is not \ @@ -146,7 +146,13 @@ pub fn uumain(args: Vec) -> i32 { inputs.push(InputSource::Stdin); } - let formats = parse_format_flags(&args); + let formats = match parse_format_flags(&args) { + Ok(f) => f, + Err(e) => { + disp_err!("{}", e); + return 1; + } + }; let mut line_bytes = match matches.opt_default("w", "32") { None => 16, diff --git a/src/od/parse_formats.rs b/src/od/parse_formats.rs index 00ddce0ee..5682e0ac2 100644 --- a/src/od/parse_formats.rs +++ b/src/od/parse_formats.rs @@ -24,10 +24,8 @@ macro_rules! hashmap { /// arguments with parameters like -w16 can only appear at the end: -fvoxw16 /// parameters of -t/--format specify 1 or more formats. /// if -- appears on the commandline, parsing should stop. -pub fn parse_format_flags(args: &Vec) -> Vec { - // Gather up format flags, we don't use getopts becase we need keep them in order. +pub fn parse_format_flags(args: &Vec) -> Result, String> { - // TODO: -t fmts let known_formats = hashmap![ 'a' => FORMAT_ITEM_A, 'B' => FORMAT_ITEM_OCT16, @@ -57,38 +55,240 @@ pub fn parse_format_flags(args: &Vec) -> Vec { // args[0] is the name of the binary let mut arg_iter = args.iter().skip(1); + let mut expect_type_string = false; while let Some(arg) = arg_iter.next() { - if arg.starts_with("--") { + if expect_type_string { + match parse_type_string(arg) { + Ok(v) => formats.extend(v.into_iter()), + Err(e) => return Err(e), + } + expect_type_string = false; + } + else if arg.starts_with("--") { if arg.len() == 2 { break; } + if arg.starts_with("--format=") { + let params: String = arg.chars().skip_while(|c| *c != '=').skip(1).collect(); + match parse_type_string(¶ms) { + Ok(v) => formats.extend(v.into_iter()), + Err(e) => return Err(e), + } + } + if arg == "--format" { + expect_type_string = true; + } } else if arg.starts_with("-") { let mut flags = arg.chars().skip(1); + let mut format_spec = String::new(); while let Some(c) = flags.next() { - if ignored_arg_opts.contains(&c) { + if expect_type_string { + format_spec.push(c); + } + else if ignored_arg_opts.contains(&c) { break; } - match known_formats.get(&c) { - None => {} // not every option is a format - Some(r) => { - formats.push(*r) + else if c=='t' { + expect_type_string = true; + } + else { + match known_formats.get(&c) { + None => {} // not every option is a format + Some(r) => { + formats.push(*r) + } } } } + if !format_spec.is_empty() { + match parse_type_string(&format_spec) { + Ok(v) => formats.extend(v.into_iter()), + Err(e) => return Err(e), + } + expect_type_string = false; + } } } + if expect_type_string { + return Err(format!("missing format specification after '--format' / '-t'")); + } if formats.is_empty() { formats.push(FORMAT_ITEM_OCT16); // 2 byte octal is the default } - formats + Ok(formats) +} + +#[derive(PartialEq, Eq, Debug)] +enum ParseState { + ExpectSize, // expect optional size character like L for long. + ExpectDecimal, // expect optional additional digits, like for 16. + ExpectDump, // expect optional 'z'. + Finished // no more characters may appear. +} + +fn parse_type_string(params: &String) -> Result, String> { + + let type_chars: HashSet<_> = ['a', 'c'].iter().cloned().collect(); + let type_ints: HashSet<_> = ['d', 'o', 'u', 'x'].iter().cloned().collect(); + let type_floats: HashSet<_> = ['f'].iter().cloned().collect(); + let type_all: HashSet<_> = + type_chars.iter() + .chain(type_ints.iter()) + .chain(type_floats.iter()) + .collect(); + + let mut formats = Vec::new(); + + // first split a type string into parts refering a single type + let mut type_parts = Vec::new(); + let mut s = String::new(); + for c in params.chars() { + if type_all.contains(&c) { + if !s.is_empty() { + type_parts.push(s); + s = String::new(); + } + s.push(c); + } + else { + if s.is_empty() { + return Err(format!("unexpected char '{}' in format specification '{}'", c, params)); + } + s.push(c); + } + } + if !s.is_empty() { + type_parts.push(s); + } + + for format_type in type_parts.iter() { + let mut chars=format_type.chars(); + + let type_char = chars.next().unwrap(); + + let mut parse_state = ParseState::ExpectSize; + let mut decimal_size = String::new(); + let mut byte_size = 0u8; + let mut show_ascii_dump = false; + + if type_chars.contains(&type_char) { + parse_state = ParseState::ExpectDump; + } + + loop { + match chars.next() { + None => break, + Some('z') if parse_state != ParseState::Finished => { + show_ascii_dump = true; + parse_state = ParseState::Finished; + }, + Some(d) if d.is_digit(10) + && (parse_state == ParseState::ExpectSize || parse_state == ParseState::ExpectDecimal) => { + decimal_size.push(d); + parse_state = ParseState::ExpectDecimal; + }, + + Some('C') if type_ints.contains(&type_char) && parse_state == ParseState::ExpectSize => { + byte_size = 1; + parse_state = ParseState::ExpectDump; + }, + Some('S') if type_ints.contains(&type_char) && parse_state == ParseState::ExpectSize => { + byte_size = 2; + parse_state = ParseState::ExpectDump; + }, + Some('I') if type_ints.contains(&type_char) && parse_state == ParseState::ExpectSize => { + byte_size = 4; + parse_state = ParseState::ExpectDump; + }, + Some('L') if type_ints.contains(&type_char) && parse_state == ParseState::ExpectSize => { + byte_size = 8; + parse_state = ParseState::ExpectDump; + }, + + Some('F') if type_char == 'f' && parse_state == ParseState::ExpectSize => { + byte_size = 4; + parse_state = ParseState::ExpectDump; + }, + Some('D') if type_char == 'f' && parse_state == ParseState::ExpectSize => { + byte_size = 8; + parse_state = ParseState::ExpectDump; + }, + // Some('L') if type_char == 'f' => byte_size = 16, // TODO support f128 + + Some(c) => { + return Err(format!("unexpected char '{}' in format specification '{}'", c, format_type)); + } + } + } + + if !decimal_size.is_empty() { + byte_size=match decimal_size.parse() { + Err(_) => return Err(format!("invalid number '{}' in format specification '{}'", decimal_size, format_type)), + Ok(n) => n, + } + } + + match type_char { + 'a' => formats.push(FORMAT_ITEM_A), + 'c' => formats.push(FORMAT_ITEM_C), + 'd' => { + formats.push(match byte_size { + 1 => FORMAT_ITEM_DEC8S, + 2 => FORMAT_ITEM_DEC16S, + 4|0 => FORMAT_ITEM_DEC32S, + 8 => FORMAT_ITEM_DEC64S, + _ => return Err(format!("invalid size '{}' in format specification '{}'", byte_size, format_type)), + }); + }, + 'o' => { + formats.push(match byte_size { + 1 => FORMAT_ITEM_OCT8, + 2 => FORMAT_ITEM_OCT16, + 4|0 => FORMAT_ITEM_OCT32, + 8 => FORMAT_ITEM_OCT64, + _ => return Err(format!("invalid size '{}' in format specification '{}'", byte_size, format_type)), + }); + }, + 'u' => { + formats.push(match byte_size { + 1 => FORMAT_ITEM_DEC8U, + 2 => FORMAT_ITEM_DEC16U, + 4|0 => FORMAT_ITEM_DEC32U, + 8 => FORMAT_ITEM_DEC64U, + _ => return Err(format!("invalid size '{}' in format specification '{}'", byte_size, format_type)), + }); + }, + 'x' => { + formats.push(match byte_size { + 1 => FORMAT_ITEM_HEX8, + 2 => FORMAT_ITEM_HEX16, + 4|0 => FORMAT_ITEM_HEX32, + 8 => FORMAT_ITEM_HEX64, + _ => return Err(format!("invalid size '{}' in format specification '{}'", byte_size, format_type)), + }); + }, + 'f' => { + formats.push(match byte_size { + 4|0 => FORMAT_ITEM_F32, + 8 => FORMAT_ITEM_F64, + _ => return Err(format!("invalid size '{}' in format specification '{}'", byte_size, format_type)), + }); + }, + _ => unreachable!(), + } + + if show_ascii_dump { /*TODO*/ } + } + + Ok(formats) } #[allow(dead_code)] -pub fn parse_format_flags_str(args_str: &Vec<&'static str>) -> Vec { +pub fn parse_format_flags_str(args_str: &Vec<&'static str>) -> Result, String> { let args = args_str.iter().map(|s| s.to_string()).collect(); parse_format_flags(&args) } @@ -96,48 +296,188 @@ pub fn parse_format_flags_str(args_str: &Vec<&'static str>) -> Vec Date: Fri, 12 Aug 2016 17:51:24 +0200 Subject: [PATCH 28/41] od: implement ascii dump --- src/od/od.rs | 44 +++++++++------ src/od/parse_formats.rs | 121 ++++++++++++++++++++++++---------------- src/od/prn_char.rs | 23 ++++++++ tests/test_od.rs | 21 +++++++ 4 files changed, 144 insertions(+), 65 deletions(-) diff --git a/src/od/od.rs b/src/od/od.rs index 99056985d..c7aeed168 100644 --- a/src/od/od.rs +++ b/src/od/od.rs @@ -38,7 +38,8 @@ use partialreader::*; use peekreader::*; use formatteriteminfo::*; use parse_nrofbytes::*; -use parse_formats::parse_format_flags; +use parse_formats::{parse_format_flags, ParsedFormatterItemInfo}; +use prn_char::format_ascii_dump; static VERSION: &'static str = env!("CARGO_PKG_VERSION"); const MAX_BYTES_PER_UNIT: usize = 8; @@ -163,7 +164,7 @@ pub fn uumain(args: Vec) -> i32 { } } }; - let min_bytes = formats.iter().fold(1, |max, next| cmp::max(max, next.byte_size)); + let min_bytes = formats.iter().fold(1, |max, next| cmp::max(max, next.formatter_item_info.byte_size)); if line_bytes % min_bytes != 0 { show_warning!("invalid width {}; using {} instead", line_bytes, min_bytes); line_bytes = min_bytes; @@ -201,7 +202,7 @@ pub fn uumain(args: Vec) -> i32 { } fn odfunc(line_bytes: usize, input_offset_base: Radix, byte_order: ByteOrder, - fnames: Vec, formats: &[FormatterItemInfo], output_duplicates: bool, + fnames: Vec, formats: &[ParsedFormatterItemInfo], output_duplicates: bool, skip_bytes: usize, read_bytes: Option) -> i32 { let mf = MultifileReader::new(fnames); @@ -213,12 +214,13 @@ fn odfunc(line_bytes: usize, input_offset_base: Radix, byte_order: ByteOrder, let mut bytes: Vec = Vec::with_capacity(line_bytes + PEEK_BUFFER_SIZE); unsafe { bytes.set_len(line_bytes + PEEK_BUFFER_SIZE); } // fast but uninitialized - let byte_size_block = formats.iter().fold(1, |max, next| cmp::max(max, next.byte_size)); + let byte_size_block = formats.iter().fold(1, |max, next| cmp::max(max, next.formatter_item_info.byte_size)); let print_width_block = formats .iter() .fold(1, |max, next| { - cmp::max(max, next.print_width * (byte_size_block / next.byte_size)) + cmp::max(max, next.formatter_item_info.print_width * (byte_size_block / next.formatter_item_info.byte_size)) }); + let print_width_line = print_width_block * (line_bytes / byte_size_block); if byte_size_block > MAX_BYTES_PER_UNIT { panic!("{}-bits types are unsupported. Current max={}-bits.", @@ -233,9 +235,9 @@ fn odfunc(line_bytes: usize, input_offset_base: Radix, byte_order: ByteOrder, // calculate proper alignment for each item for sf in &mut spaced_formatters { - let mut byte_size = sf.frm.byte_size; + let mut byte_size = sf.frm.formatter_item_info.byte_size; let mut items_in_block = byte_size_block / byte_size; - let thisblock_width = sf.frm.print_width * items_in_block; + let thisblock_width = sf.frm.formatter_item_info.print_width * items_in_block; let mut missing_spacing = print_width_block - thisblock_width; while items_in_block > 0 { @@ -291,7 +293,7 @@ fn odfunc(line_bytes: usize, input_offset_base: Radix, byte_order: ByteOrder, print_bytes(byte_order, &bytes, n, peekbytes, &print_with_radix(input_offset_base, addr), - &spaced_formatters, byte_size_block); + &spaced_formatters, byte_size_block, print_width_line); } addr += n; @@ -312,22 +314,22 @@ fn odfunc(line_bytes: usize, input_offset_base: Radix, byte_order: ByteOrder, } fn print_bytes(byte_order: ByteOrder, bytes: &[u8], length: usize, peekbytes: usize, prefix: &str, - formats: &[SpacedFormatterItemInfo], byte_size_block: usize) { + formats: &[SpacedFormatterItemInfo], byte_size_block: usize, print_width_line: usize) { let mut first = true; // First line of a multi-format raster. for f in formats { let mut output_text = String::new(); let mut b = 0; while b < length { - let nextb = b + f.frm.byte_size; + let nextb = b + f.frm.formatter_item_info.byte_size; output_text.push_str(&format!("{:>width$}", "", width = f.spacing[b % byte_size_block])); - match f.frm.formatter { + match f.frm.formatter_item_info.formatter { FormatWriter::IntWriter(func) => { - let p: u64 = match f.frm.byte_size { + let p: u64 = match f.frm.formatter_item_info.byte_size { 1 => { bytes[b] as u64 } @@ -340,19 +342,19 @@ fn print_bytes(byte_order: ByteOrder, bytes: &[u8], length: usize, peekbytes: us 8 => { byte_order.read_u64(&bytes[b..nextb]) } - _ => { panic!("Invalid byte_size: {}", f.frm.byte_size); } + _ => { panic!("Invalid byte_size: {}", f.frm.formatter_item_info.byte_size); } }; - output_text.push_str(&func(p, f.frm.byte_size, f.frm.print_width)); + output_text.push_str(&func(p, f.frm.formatter_item_info.byte_size, f.frm.formatter_item_info.print_width)); } FormatWriter::FloatWriter(func) => { - let p: f64 = match f.frm.byte_size { + let p: f64 = match f.frm.formatter_item_info.byte_size { 4 => { byte_order.read_f32(&bytes[b..nextb]) as f64 } 8 => { byte_order.read_f64(&bytes[b..nextb]) } - _ => { panic!("Invalid byte_size: {}", f.frm.byte_size); } + _ => { panic!("Invalid byte_size: {}", f.frm.formatter_item_info.byte_size); } }; output_text.push_str(&func(p)); } @@ -363,6 +365,14 @@ fn print_bytes(byte_order: ByteOrder, bytes: &[u8], length: usize, peekbytes: us b = nextb; } + if f.frm.add_ascii_dump { + let missing_spacing = print_width_line.saturating_sub(output_text.chars().count()); + output_text.push_str(&format!("{:>width$} {}", + "", + format_ascii_dump(&bytes[..length]), + width=missing_spacing)); + } + if first { print!("{}", prefix); // print offset // if printing in multiple formats offset is printed only once @@ -416,6 +426,6 @@ fn print_final_offset(r: Radix, x: usize) { } struct SpacedFormatterItemInfo { - frm: FormatterItemInfo, + frm: ParsedFormatterItemInfo, spacing: [usize; MAX_BYTES_PER_UNIT], } diff --git a/src/od/parse_formats.rs b/src/od/parse_formats.rs index 5682e0ac2..3394493e7 100644 --- a/src/od/parse_formats.rs +++ b/src/od/parse_formats.rs @@ -13,6 +13,22 @@ macro_rules! hashmap { }} } +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub struct ParsedFormatterItemInfo { + pub formatter_item_info: FormatterItemInfo, + pub add_ascii_dump: bool, +} + +impl ParsedFormatterItemInfo { + pub fn new(formatter_item_info: FormatterItemInfo, add_ascii_dump: bool) -> ParsedFormatterItemInfo { + ParsedFormatterItemInfo { + formatter_item_info: formatter_item_info, + add_ascii_dump: add_ascii_dump, + } + } +} + + /// Parses format flags from commandline /// /// getopts, docopt, clap don't seem suitable to parse the commandline @@ -24,7 +40,7 @@ macro_rules! hashmap { /// arguments with parameters like -w16 can only appear at the end: -fvoxw16 /// parameters of -t/--format specify 1 or more formats. /// if -- appears on the commandline, parsing should stop. -pub fn parse_format_flags(args: &Vec) -> Result, String> { +pub fn parse_format_flags(args: &Vec) -> Result, String> { let known_formats = hashmap![ 'a' => FORMAT_ITEM_A, @@ -97,7 +113,7 @@ pub fn parse_format_flags(args: &Vec) -> Result, match known_formats.get(&c) { None => {} // not every option is a format Some(r) => { - formats.push(*r) + formats.push(ParsedFormatterItemInfo::new(*r, false)) } } } @@ -116,7 +132,7 @@ pub fn parse_format_flags(args: &Vec) -> Result, } if formats.is_empty() { - formats.push(FORMAT_ITEM_OCT16); // 2 byte octal is the default + formats.push(ParsedFormatterItemInfo::new(FORMAT_ITEM_OCT16, false)); // 2 byte octal is the default } Ok(formats) @@ -130,7 +146,7 @@ enum ParseState { Finished // no more characters may appear. } -fn parse_type_string(params: &String) -> Result, String> { +fn parse_type_string(params: &String) -> Result, String> { let type_chars: HashSet<_> = ['a', 'c'].iter().cloned().collect(); let type_ints: HashSet<_> = ['d', 'o', 'u', 'x'].iter().cloned().collect(); @@ -233,50 +249,50 @@ fn parse_type_string(params: &String) -> Result, String> } match type_char { - 'a' => formats.push(FORMAT_ITEM_A), - 'c' => formats.push(FORMAT_ITEM_C), + 'a' => formats.push(ParsedFormatterItemInfo::new(FORMAT_ITEM_A, show_ascii_dump)), + 'c' => formats.push(ParsedFormatterItemInfo::new(FORMAT_ITEM_C, show_ascii_dump)), 'd' => { - formats.push(match byte_size { + formats.push(ParsedFormatterItemInfo::new(match byte_size { 1 => FORMAT_ITEM_DEC8S, 2 => FORMAT_ITEM_DEC16S, 4|0 => FORMAT_ITEM_DEC32S, 8 => FORMAT_ITEM_DEC64S, _ => return Err(format!("invalid size '{}' in format specification '{}'", byte_size, format_type)), - }); + }, show_ascii_dump)); }, 'o' => { - formats.push(match byte_size { + formats.push(ParsedFormatterItemInfo::new(match byte_size { 1 => FORMAT_ITEM_OCT8, 2 => FORMAT_ITEM_OCT16, 4|0 => FORMAT_ITEM_OCT32, 8 => FORMAT_ITEM_OCT64, _ => return Err(format!("invalid size '{}' in format specification '{}'", byte_size, format_type)), - }); + }, show_ascii_dump)); }, 'u' => { - formats.push(match byte_size { + formats.push(ParsedFormatterItemInfo::new(match byte_size { 1 => FORMAT_ITEM_DEC8U, 2 => FORMAT_ITEM_DEC16U, 4|0 => FORMAT_ITEM_DEC32U, 8 => FORMAT_ITEM_DEC64U, _ => return Err(format!("invalid size '{}' in format specification '{}'", byte_size, format_type)), - }); + }, show_ascii_dump)); }, 'x' => { - formats.push(match byte_size { + formats.push(ParsedFormatterItemInfo::new(match byte_size { 1 => FORMAT_ITEM_HEX8, 2 => FORMAT_ITEM_HEX16, 4|0 => FORMAT_ITEM_HEX32, 8 => FORMAT_ITEM_HEX64, _ => return Err(format!("invalid size '{}' in format specification '{}'", byte_size, format_type)), - }); + }, show_ascii_dump)); }, 'f' => { - formats.push(match byte_size { + formats.push(ParsedFormatterItemInfo::new(match byte_size { 4|0 => FORMAT_ITEM_F32, 8 => FORMAT_ITEM_F64, _ => return Err(format!("invalid size '{}' in format specification '{}'", byte_size, format_type)), - }); + }, show_ascii_dump)); }, _ => unreachable!(), } @@ -287,12 +303,22 @@ fn parse_type_string(params: &String) -> Result, String> Ok(formats) } -#[allow(dead_code)] +#[cfg(test)] pub fn parse_format_flags_str(args_str: &Vec<&'static str>) -> Result, String> { let args = args_str.iter().map(|s| s.to_string()).collect(); - parse_format_flags(&args) + match parse_format_flags(&args) { + Err(e) => Err(e), + Ok(v) => { + // tests using this function asume add_ascii_dump is not set + Ok(v.into_iter() + .inspect(|f| assert!(!f.add_ascii_dump)) + .map(|f| f.formatter_item_info) + .collect()) + }, + } } + #[test] fn test_no_options() { assert_eq!(parse_format_flags_str( @@ -372,9 +398,9 @@ fn test_long_format_a() { #[test] fn test_long_format_cz() { - assert_eq!(parse_format_flags_str( - &vec!("od", "--format=cz")).unwrap(), - vec!(FORMAT_ITEM_C)); // TODO 'z' + assert_eq!(parse_format_flags( + &vec!("od".to_string(), "--format=cz".to_string())).unwrap(), + vec!(ParsedFormatterItemInfo::new(FORMAT_ITEM_C, true))); } #[test] @@ -448,36 +474,35 @@ fn test_format_next_arg_invalid() { parse_format_flags_str(&vec!("od", "-t")).unwrap_err(); } - #[test] fn test_mixed_formats() { - assert_eq!(parse_format_flags_str( + assert_eq!(parse_format_flags( &vec!( - "od", - "--skip-bytes=2", - "-vItu1z", - "-N", - "1000", - "-xt", - "acdx1", - "--format=u2c", - "--format", - "f", - "-xAx", - "--", - "-h", - "--format=f8")).unwrap(), + "od".to_string(), + "--skip-bytes=2".to_string(), + "-vItu1z".to_string(), + "-N".to_string(), + "1000".to_string(), + "-xt".to_string(), + "acdx1".to_string(), + "--format=u2c".to_string(), + "--format".to_string(), + "f".to_string(), + "-xAx".to_string(), + "--".to_string(), + "-h".to_string(), + "--format=f8".to_string())).unwrap(), vec!( - FORMAT_ITEM_DEC64S, // I - FORMAT_ITEM_DEC8U, // tu1z - FORMAT_ITEM_HEX16, // x - FORMAT_ITEM_A, // ta - FORMAT_ITEM_C, // tc - FORMAT_ITEM_DEC32S, // td - FORMAT_ITEM_HEX8, // tx1 - FORMAT_ITEM_DEC16U, // tu2 - FORMAT_ITEM_C, // tc - FORMAT_ITEM_F32, // tf - FORMAT_ITEM_HEX16, // x + ParsedFormatterItemInfo::new(FORMAT_ITEM_DEC64S, false), // I + ParsedFormatterItemInfo::new(FORMAT_ITEM_DEC8U, true), // tu1z + ParsedFormatterItemInfo::new(FORMAT_ITEM_HEX16, false), // x + ParsedFormatterItemInfo::new(FORMAT_ITEM_A, false), // ta + ParsedFormatterItemInfo::new(FORMAT_ITEM_C, false), // tc + ParsedFormatterItemInfo::new(FORMAT_ITEM_DEC32S, false), // td + ParsedFormatterItemInfo::new(FORMAT_ITEM_HEX8, false), // tx1 + ParsedFormatterItemInfo::new(FORMAT_ITEM_DEC16U, false), // tu2 + ParsedFormatterItemInfo::new(FORMAT_ITEM_C, false), // tc + ParsedFormatterItemInfo::new(FORMAT_ITEM_F32, false), // tf + ParsedFormatterItemInfo::new(FORMAT_ITEM_HEX16, false), // x )); } diff --git a/src/od/prn_char.rs b/src/od/prn_char.rs index f4d096aa8..9686c9160 100644 --- a/src/od/prn_char.rs +++ b/src/od/prn_char.rs @@ -100,6 +100,23 @@ fn format_item_c(bytes: &[u8]) -> String { } } +pub fn format_ascii_dump(bytes: &[u8]) -> String { + let mut result = String::new(); + + result.push('>'); + for c in bytes.iter() { + if *c >= 0x20 && *c <= 0x7e { + result.push_str(C_CHRS[*c as usize]); + } + else { + result.push('.'); + } + } + result.push('<'); + + result +} + #[test] fn test_format_item_a() { assert_eq!(" nul", format_item_a(0x00, 1, 4)); @@ -147,3 +164,9 @@ fn test_format_item_c() { assert_eq!(" 365", format_item_c(&[0xf5, 0x80, 0x80, 0x80])); // invalid utf-8 assert_eq!(" 377", format_item_c(&[0xff])); // invalid utf-8 } + +#[test] +fn test_format_ascii_dump() { + assert_eq!(">.<", format_ascii_dump(&[0x00])); + assert_eq!(">. A~.<", format_ascii_dump(&[0x1f, 0x20, 0x41, 0x7e, 0x7f])); +} diff --git a/tests/test_od.rs b/tests/test_od.rs index d78dd316c..09fd79837 100644 --- a/tests/test_od.rs +++ b/tests/test_od.rs @@ -527,3 +527,24 @@ fn test_read_bytes(){ assert!(result.success); assert_eq!(result.stdout, unindent(ALPHA_OUT)); } + +#[test] +fn test_ascii_dump(){ + + let input : [u8; 22] = [ + 0x00, 0x01, 0x0a, 0x0d, 0x10, 0x1f, 0x20, 0x61, 0x62, 0x63, 0x7d, + 0x7e, 0x7f, 0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0, 0xff]; + let result = new_ucmd!().arg("-tx1zacz").run_piped_stdin(&input[..]); + + assert_empty_stderr!(result); + assert!(result.success); + assert_eq!(result.stdout, unindent(r" + 0000000 00 01 0a 0d 10 1f 20 61 62 63 7d 7e 7f 80 90 a0 >...... abc}~....< + nul soh nl cr dle us sp a b c } ~ del nul dle sp + \0 001 \n \r 020 037 a b c } ~ 177 ** ** ** >...... abc}~....< + 0000020 b0 c0 d0 e0 f0 ff >......< + 0 @ P ` p del + ** 300 320 340 360 377 >......< + 0000026 + ")); +} From 9e33c3a48c167fe3b7e1d6357df392943745421e Mon Sep 17 00:00:00 2001 From: Wim Hueskes Date: Sat, 13 Aug 2016 22:51:21 +0200 Subject: [PATCH 29/41] od: fix command-line parsing of file names --- src/od/od.rs | 7 +++---- tests/fixtures/od/-f | 1 + tests/fixtures/od/c | 1 + tests/fixtures/od/x | 1 + tests/test_od.rs | 18 +++++++++++++++++- 5 files changed, 23 insertions(+), 5 deletions(-) create mode 100644 tests/fixtures/od/-f create mode 100644 tests/fixtures/od/c create mode 100644 tests/fixtures/od/x diff --git a/src/od/od.rs b/src/od/od.rs index c7aeed168..42546a638 100644 --- a/src/od/od.rs +++ b/src/od/od.rs @@ -134,12 +134,11 @@ pub fn uumain(args: Vec) -> i32 { } }; - // Gather up file names - args which don't start with '-' - let mut inputs = args[1..] + // Gather up file names + let mut inputs = matches.free .iter() .filter_map(|w| match w as &str { - "--" => Some(InputSource::Stdin), - o if o.starts_with("-") => None, + "-" => Some(InputSource::Stdin), x => Some(InputSource::FileName(x)), }) .collect::>(); diff --git a/tests/fixtures/od/-f b/tests/fixtures/od/-f new file mode 100644 index 000000000..370c31180 --- /dev/null +++ b/tests/fixtures/od/-f @@ -0,0 +1 @@ +minus lowercase f diff --git a/tests/fixtures/od/c b/tests/fixtures/od/c new file mode 100644 index 000000000..109c7e9b0 --- /dev/null +++ b/tests/fixtures/od/c @@ -0,0 +1 @@ +lowercase c diff --git a/tests/fixtures/od/x b/tests/fixtures/od/x new file mode 100644 index 000000000..584b8c5f2 --- /dev/null +++ b/tests/fixtures/od/x @@ -0,0 +1 @@ +lowercase x diff --git a/tests/test_od.rs b/tests/test_od.rs index 09fd79837..58567053c 100644 --- a/tests/test_od.rs +++ b/tests/test_od.rs @@ -117,7 +117,7 @@ fn test_from_mixed() { } } - let result = new_ucmd!().arg("--endian=little").arg(file1.as_os_str()).arg("--").arg(file3.as_os_str()).run_piped_stdin(data2.as_bytes()); + let result = new_ucmd!().arg("--endian=little").arg(file1.as_os_str()).arg("-").arg(file3.as_os_str()).run_piped_stdin(data2.as_bytes()); assert_empty_stderr!(result); assert!(result.success); @@ -548,3 +548,19 @@ fn test_ascii_dump(){ 0000026 ")); } + +#[test] +fn test_filename_parsing(){ + // files "a" and "x" both exists, but are no filenames in the commandline below + // "-f" must be treated as a filename, it contains the text: minus lowercase f + // so "-f" should not be interpreted as a formatting option. + let result = new_ucmd!().arg("--format").arg("a").arg("-A").arg("x").arg("--").arg("-f").run(); + + assert_empty_stderr!(result); + assert!(result.success); + assert_eq!(result.stdout, unindent(" + 000000 m i n u s sp l o w e r c a s e sp + 000010 f nl + 000012 + ")); +} From 26ec46835c898d104984cd16648ce70ac5b2588f Mon Sep 17 00:00:00 2001 From: Wim Hueskes Date: Tue, 16 Aug 2016 00:37:33 +0200 Subject: [PATCH 30/41] od: implement +size to skip bytes --- src/od/od.rs | 48 ++++---- src/od/parse_inputs.rs | 268 +++++++++++++++++++++++++++++++++++++++++ tests/test_od.rs | 27 +++++ 3 files changed, 322 insertions(+), 21 deletions(-) create mode 100644 src/od/parse_inputs.rs diff --git a/src/od/od.rs b/src/od/od.rs index 42546a638..b4f128f06 100644 --- a/src/od/od.rs +++ b/src/od/od.rs @@ -26,6 +26,7 @@ mod prn_char; mod prn_float; mod parse_nrofbytes; mod parse_formats; +mod parse_inputs; #[cfg(test)] mod mockstream; @@ -37,9 +38,10 @@ use multifilereader::*; use partialreader::*; use peekreader::*; use formatteriteminfo::*; -use parse_nrofbytes::*; +use parse_nrofbytes::parse_number_of_bytes; use parse_formats::{parse_format_flags, ParsedFormatterItemInfo}; use prn_char::format_ascii_dump; +use parse_inputs::{parse_inputs, CommandLineInputs}; static VERSION: &'static str = env!("CARGO_PKG_VERSION"); const MAX_BYTES_PER_UNIT: usize = 8; @@ -134,17 +136,33 @@ pub fn uumain(args: Vec) -> i32 { } }; - // Gather up file names - let mut inputs = matches.free + let mut skip_bytes = match matches.opt_default("skip-bytes", "0") { + None => 0, + Some(s) => { + match parse_number_of_bytes(&s) { + Ok(i) => { i } + Err(_) => { + disp_err!("Invalid argument --skip-bytes={}", s); + return 1; + } + } + } + }; + + let input_strings = match parse_inputs(&matches) { + CommandLineInputs::FileNames(v) => v, + CommandLineInputs::FileAndOffset((f, s, _)) => { + skip_bytes = s; + vec!{f} + }, + }; + let inputs = input_strings .iter() - .filter_map(|w| match w as &str { - "-" => Some(InputSource::Stdin), - x => Some(InputSource::FileName(x)), + .map(|w| match w as &str { + "-" => InputSource::Stdin, + x => InputSource::FileName(x), }) .collect::>(); - if inputs.len() == 0 { - inputs.push(InputSource::Stdin); - } let formats = match parse_format_flags(&args) { Ok(f) => f, @@ -171,18 +189,6 @@ pub fn uumain(args: Vec) -> i32 { let output_duplicates = matches.opt_present("v"); - let skip_bytes = match matches.opt_default("skip-bytes", "0") { - None => 0, - Some(s) => { - match parse_number_of_bytes(&s) { - Ok(i) => { i } - Err(_) => { - disp_err!("Invalid argument --skip-bytes={}", s); - return 1; - } - } - } - }; let read_bytes = match matches.opt_str("read-bytes") { None => None, Some(s) => { diff --git a/src/od/parse_inputs.rs b/src/od/parse_inputs.rs new file mode 100644 index 000000000..4db4826e2 --- /dev/null +++ b/src/od/parse_inputs.rs @@ -0,0 +1,268 @@ +use getopts::Matches; + +/// Abstraction for getopts +pub trait CommandLineOpts { + /// returns all commandline parameters which do not belong to an option. + fn inputs(&self) -> Vec; + /// tests if any of the specified options is present. + fn opts_present(&self, &[&str]) -> bool; +} + +/// Implementation for `getopts` +impl CommandLineOpts for Matches { + fn inputs(&self) -> Vec { + self.free.clone() + } + fn opts_present(&self, opts: &[&str]) -> bool { + self.opts_present(&opts.iter().map(|s| s.to_string()).collect::>()) + } +} + +/// Contains the Input filename(s) with an optional offset. +/// +/// `FileNames` is used for one or more file inputs ("-" = stdin) +/// `FileAndOffset` is used for a single file input, with an offset +/// and an optional label. Offset and label are specified in bytes. +/// `FileAndOffset` will be only used if an offset is specified, +/// but it might be 0. +#[derive(PartialEq, Debug)] +pub enum CommandLineInputs { + FileNames(Vec), + FileAndOffset((String, usize, Option)), +} + + +/// Interprets the commandline inputs of od. +/// +/// Returns either an unspecified number of filenames. +/// Or it will return a single filename, with an offset and optional label. +/// Offset and label are specified in bytes. +/// '-' is used as filename if stdin is meant. This is also returned if +/// there is no input, as stdin is the default input. +pub fn parse_inputs(matches: &CommandLineOpts) -> CommandLineInputs { + + let mut input_strings: Vec = matches.inputs(); + + // test if commandline contains: [file] + if input_strings.len() == 1 || input_strings.len() == 2 { + // if any of the options -A, -j, -N, -t, -v or -w are present there is no offset + if !matches.opts_present(&["A", "j", "N", "t", "v", "w"]) { + // test if the last input can be parsed as an offset. + let offset=parse_offset_operand(&input_strings[input_strings.len()-1]); + match offset { + Ok(n) => { + // if there is just 1 input (stdin), an offset must start with '+' + if input_strings.len() == 1 && input_strings[0].starts_with("+") { + return CommandLineInputs::FileAndOffset(("-".to_string(), n, None)); + } + if input_strings.len() == 2 { + return CommandLineInputs::FileAndOffset((input_strings[0].clone(), n, None)); + } + } + _ => { + // if it cannot be parsed, it is considered a filename + } + } + } + } + + if input_strings.len() == 0 { + input_strings.push("-".to_string()); + } + CommandLineInputs::FileNames(input_strings) +} + +/// parses format used by offset and label on the commandline +pub fn parse_offset_operand(s: &String) -> Result { + let mut start = 0; + let mut len = s.len(); + let mut radix = 8; + let mut multiply = 1; + + if s.starts_with("+") { + start += 1; + } + + if s[start..len].starts_with("0x") || s[start..len].starts_with("0X") { + start += 2; + radix = 16; + } + else { + if s[start..len].ends_with("b") { + len -= 1; + multiply = 512; + } + if s[start..len].ends_with(".") { + len -= 1; + radix = 10; + } + } + match usize::from_str_radix(&s[start..len], radix) { + Ok(i) => Ok(i * multiply), + Err(_) => Err("parse failed"), + } +} + + +#[cfg(test)] +mod tests { + use super::*; + + /// A mock for the commandline options type + /// + /// `inputs` are all commandline parameters which do not belong to an option. + /// `option_names` are the names of the options on the commandline. + struct MockOptions<'a> { + inputs: Vec, + option_names: Vec<&'a str>, + } + + impl<'a> MockOptions<'a> { + fn new(inputs: Vec<&'a str>, option_names: Vec<&'a str>) -> MockOptions<'a> { + MockOptions { + inputs: inputs.iter().map(|s| s.to_string()).collect::>(), + option_names: option_names, + } + } + } + + impl<'a> CommandLineOpts for MockOptions<'a> { + fn inputs(&self) -> Vec { + self.inputs.clone() + } + fn opts_present(&self, opts: &[&str]) -> bool { + for expected in opts.iter() { + for actual in self.option_names.iter() { + if *expected==*actual { + return true; + } + } + } + false + } + } + + #[test] + fn test_parse_inputs_normal() { + + assert_eq!(CommandLineInputs::FileNames(vec!{"-".to_string()}), + parse_inputs(&MockOptions::new( + vec!{}, + vec!{}))); + + assert_eq!(CommandLineInputs::FileNames(vec!{"-".to_string()}), + parse_inputs(&MockOptions::new( + vec!{"-"}, + vec!{}))); + + assert_eq!(CommandLineInputs::FileNames(vec!{"file1".to_string()}), + parse_inputs(&MockOptions::new( + vec!{"file1"}, + vec!{}))); + + assert_eq!(CommandLineInputs::FileNames(vec!{"file1".to_string(), "file2".to_string()}), + parse_inputs(&MockOptions::new( + vec!{"file1", "file2"}, + vec!{}))); + + assert_eq!(CommandLineInputs::FileNames(vec!{"-".to_string(), "file1".to_string(), "file2".to_string()}), + parse_inputs(&MockOptions::new( + vec!{"-", "file1", "file2"}, + vec!{}))); + } + + #[test] + fn test_parse_inputs_with_offset() { + // offset is found without filename, so stdin will be used. + assert_eq!(CommandLineInputs::FileAndOffset(("-".to_string(), 8, None)), + parse_inputs(&MockOptions::new( + vec!{"+10"}, + vec!{}))); + + // offset must start with "+" if no input is specified. + assert_eq!(CommandLineInputs::FileNames(vec!{"10".to_string()}), + parse_inputs(&MockOptions::new( + vec!{"10"}, + vec!{""}))); + + // offset is not valid, so it is considered a filename. + assert_eq!(CommandLineInputs::FileNames(vec!{"+10a".to_string()}), + parse_inputs(&MockOptions::new( + vec!{"+10a"}, + vec!{""}))); + + // if -j is included in the commandline, there cannot be an offset. + assert_eq!(CommandLineInputs::FileNames(vec!{"+10".to_string()}), + parse_inputs(&MockOptions::new( + vec!{"+10"}, + vec!{"j"}))); + + // if -v is included in the commandline, there cannot be an offset. + assert_eq!(CommandLineInputs::FileNames(vec!{"+10".to_string()}), + parse_inputs(&MockOptions::new( + vec!{"+10"}, + vec!{"o", "v"}))); + + assert_eq!(CommandLineInputs::FileAndOffset(("file1".to_string(), 8, None)), + parse_inputs(&MockOptions::new( + vec!{"file1", "+10"}, + vec!{}))); + + // offset does not need to start with "+" if a filename is included. + assert_eq!(CommandLineInputs::FileAndOffset(("file1".to_string(), 8, None)), + parse_inputs(&MockOptions::new( + vec!{"file1", "10"}, + vec!{}))); + + assert_eq!(CommandLineInputs::FileNames(vec!{"file1".to_string(), "+10a".to_string()}), + parse_inputs(&MockOptions::new( + vec!{"file1", "+10a"}, + vec!{""}))); + + assert_eq!(CommandLineInputs::FileNames(vec!{"file1".to_string(), "+10".to_string()}), + parse_inputs(&MockOptions::new( + vec!{"file1", "+10"}, + vec!{"j"}))); + + // offset must be last on the commandline + assert_eq!(CommandLineInputs::FileNames(vec!{"+10".to_string(), "file1".to_string()}), + parse_inputs(&MockOptions::new( + vec!{"+10", "file1"}, + vec!{""}))); + } + + fn parse_offset_operand_str(s: &str) -> Result { + parse_offset_operand(&String::from(s)) + } + + #[test] + fn test_parse_offset_operand_invalid() { + parse_offset_operand_str("").unwrap_err(); + parse_offset_operand_str("a").unwrap_err(); + parse_offset_operand_str("+").unwrap_err(); + parse_offset_operand_str("+b").unwrap_err(); + parse_offset_operand_str("0x1.").unwrap_err(); + parse_offset_operand_str("0x1.b").unwrap_err(); + parse_offset_operand_str("-").unwrap_err(); + parse_offset_operand_str("-1").unwrap_err(); + parse_offset_operand_str("1e10").unwrap_err(); + } + + #[test] + fn test_parse_offset_operand() { + assert_eq!(8, parse_offset_operand_str("10").unwrap()); // default octal + assert_eq!(0, parse_offset_operand_str("0").unwrap()); + assert_eq!(8, parse_offset_operand_str("+10").unwrap()); // optional leading '+' + assert_eq!(16, parse_offset_operand_str("0x10").unwrap()); // hex + assert_eq!(16, parse_offset_operand_str("0X10").unwrap()); // hex + assert_eq!(16, parse_offset_operand_str("+0X10").unwrap()); // hex + assert_eq!(10, parse_offset_operand_str("10.").unwrap()); // decimal + assert_eq!(10, parse_offset_operand_str("+10.").unwrap()); // decimal + assert_eq!(4096, parse_offset_operand_str("10b").unwrap()); // b suffix = *512 + assert_eq!(4096, parse_offset_operand_str("+10b").unwrap()); // b suffix = *512 + assert_eq!(5120, parse_offset_operand_str("10.b").unwrap()); // b suffix = *512 + assert_eq!(5120, parse_offset_operand_str("+10.b").unwrap()); // b suffix = *512 + assert_eq!(267, parse_offset_operand_str("0x10b").unwrap()); // hex + } + +} diff --git a/tests/test_od.rs b/tests/test_od.rs index 58567053c..5aa0a1185 100644 --- a/tests/test_od.rs +++ b/tests/test_od.rs @@ -564,3 +564,30 @@ fn test_filename_parsing(){ 000012 ")); } + +#[test] +fn test_stdin_offset(){ + + let input = "abcdefghijklmnopq"; + let result = new_ucmd!().arg("-c").arg("+5").run_piped_stdin(input.as_bytes()); + + assert_empty_stderr!(result); + assert!(result.success); + assert_eq!(result.stdout, unindent(" + 0000005 f g h i j k l m n o p q + 0000021 + ")); +} + +#[test] +fn test_file_offset(){ + + let result = new_ucmd!().arg("-c").arg("--").arg("-f").arg("10").run(); + + assert_empty_stderr!(result); + assert!(result.success); + assert_eq!(result.stdout, unindent(r" + 0000010 w e r c a s e f \n + 0000022 + ")); +} From 2f12b06ba14d75e84972a9a40c0658916a7c59da Mon Sep 17 00:00:00 2001 From: Wim Hueskes Date: Thu, 18 Aug 2016 22:17:03 +0200 Subject: [PATCH 31/41] od: implement --traditional --- src/od/od.rs | 50 ++++++++++----- src/od/parse_inputs.rs | 137 +++++++++++++++++++++++++++++++++++------ tests/fixtures/od/0 | 1 + tests/test_od.rs | 68 ++++++++++++++++++++ 4 files changed, 221 insertions(+), 35 deletions(-) create mode 100644 tests/fixtures/od/0 diff --git a/src/od/od.rs b/src/od/od.rs index b4f128f06..c8cb5a30b 100644 --- a/src/od/od.rs +++ b/src/od/od.rs @@ -95,6 +95,7 @@ pub fn uumain(args: Vec) -> i32 { "BYTES"); opts.optflag("h", "help", "display this help and exit."); opts.optflag("", "version", "output version information and exit."); + opts.optflag("", "traditional", "compatibility mode with one input, offset and label."); let matches = match opts.parse(&args[1..]) { Ok(m) => m, @@ -149,12 +150,19 @@ pub fn uumain(args: Vec) -> i32 { } }; + let mut label: Option = None; + let input_strings = match parse_inputs(&matches) { - CommandLineInputs::FileNames(v) => v, - CommandLineInputs::FileAndOffset((f, s, _)) => { + Ok(CommandLineInputs::FileNames(v)) => v, + Ok(CommandLineInputs::FileAndOffset((f, s, l))) => { skip_bytes = s; + label = l; vec!{f} }, + Err(e) => { + disp_err!("Invalid inputs: {}", e); + return 1; + } }; let inputs = input_strings .iter() @@ -203,12 +211,13 @@ pub fn uumain(args: Vec) -> i32 { }; odfunc(line_bytes, input_offset_base, byte_order, inputs, &formats[..], - output_duplicates, skip_bytes, read_bytes) + output_duplicates, skip_bytes, read_bytes, label) } +// TODO: refactor, too many arguments fn odfunc(line_bytes: usize, input_offset_base: Radix, byte_order: ByteOrder, fnames: Vec, formats: &[ParsedFormatterItemInfo], output_duplicates: bool, - skip_bytes: usize, read_bytes: Option) -> i32 { + skip_bytes: usize, read_bytes: Option, mut label: Option) -> i32 { let mf = MultifileReader::new(fnames); let pr = PartialReader::new(mf, skip_bytes, read_bytes); @@ -263,7 +272,7 @@ fn odfunc(line_bytes: usize, input_offset_base: Radix, byte_order: ByteOrder, match input.peek_read(bytes.as_mut_slice(), PEEK_BUFFER_SIZE) { Ok((0, _)) => { - print_final_offset(input_offset_base, addr); + print_final_offset(input_offset_base, addr, label); break; } Ok((n, peekbytes)) => { @@ -297,15 +306,18 @@ fn odfunc(line_bytes: usize, input_offset_base: Radix, byte_order: ByteOrder, } print_bytes(byte_order, &bytes, n, peekbytes, - &print_with_radix(input_offset_base, addr), + &print_with_radix(input_offset_base, addr, label), &spaced_formatters, byte_size_block, print_width_line); } addr += n; + if let Some(l) = label { + label = Some(l + n); + } } Err(e) => { show_error!("{}", e); - print_final_offset(input_offset_base, addr); + print_final_offset(input_offset_base, addr, label); return 1; } }; @@ -415,18 +427,24 @@ fn parse_radix(radix_str: Option) -> Result { } } -fn print_with_radix(r: Radix, x: usize) -> String{ - match r { - Radix::Decimal => format!("{:07}", x), - Radix::Hexadecimal => format!("{:06X}", x), - Radix::Octal => format!("{:07o}", x), - Radix::NoPrefix => String::from(""), +fn print_with_radix(r: Radix, x: usize, label: Option) -> String{ + match (r, label) { + (Radix::Decimal, None) => format!("{:07}", x), + (Radix::Decimal, Some(l)) => format!("{:07} ({:07})", x, l), + (Radix::Hexadecimal, None) => format!("{:06X}", x), + (Radix::Hexadecimal, Some(l)) => format!("{:06X} ({:06X})", x, l), + (Radix::Octal, None) => format!("{:07o}", x), + (Radix::Octal, Some(l)) => format!("{:07o} ({:07o})", x, l), + (Radix::NoPrefix, None) => String::from(""), + (Radix::NoPrefix, Some(l)) => format!("({:07o})", l), } } -fn print_final_offset(r: Radix, x: usize) { - if r != Radix::NoPrefix { - print!("{}\n", print_with_radix(r, x)); +/// Prints the byte offset followed by a newline, or nothing at all if +/// both `Radix::NoPrefix` was set and no label (--traditional) is used. +fn print_final_offset(r: Radix, x: usize, label: Option) { + if r != Radix::NoPrefix || label.is_some() { + print!("{}\n", print_with_radix(r, x, label)); } } diff --git a/src/od/parse_inputs.rs b/src/od/parse_inputs.rs index 4db4826e2..e87a85a06 100644 --- a/src/od/parse_inputs.rs +++ b/src/od/parse_inputs.rs @@ -39,11 +39,16 @@ pub enum CommandLineInputs { /// Offset and label are specified in bytes. /// '-' is used as filename if stdin is meant. This is also returned if /// there is no input, as stdin is the default input. -pub fn parse_inputs(matches: &CommandLineOpts) -> CommandLineInputs { +pub fn parse_inputs(matches: &CommandLineOpts) -> Result { let mut input_strings: Vec = matches.inputs(); + if matches.opts_present(&["traditional"]) { + return parse_inputs_traditional(input_strings); + } + // test if commandline contains: [file] + // fall-through if no (valid) offset is found if input_strings.len() == 1 || input_strings.len() == 2 { // if any of the options -A, -j, -N, -t, -v or -w are present there is no offset if !matches.opts_present(&["A", "j", "N", "t", "v", "w"]) { @@ -53,10 +58,10 @@ pub fn parse_inputs(matches: &CommandLineOpts) -> CommandLineInputs { Ok(n) => { // if there is just 1 input (stdin), an offset must start with '+' if input_strings.len() == 1 && input_strings[0].starts_with("+") { - return CommandLineInputs::FileAndOffset(("-".to_string(), n, None)); + return Ok(CommandLineInputs::FileAndOffset(("-".to_string(), n, None))); } if input_strings.len() == 2 { - return CommandLineInputs::FileAndOffset((input_strings[0].clone(), n, None)); + return Ok(CommandLineInputs::FileAndOffset((input_strings[0].clone(), n, None))); } } _ => { @@ -69,7 +74,47 @@ pub fn parse_inputs(matches: &CommandLineOpts) -> CommandLineInputs { if input_strings.len() == 0 { input_strings.push("-".to_string()); } - CommandLineInputs::FileNames(input_strings) + Ok(CommandLineInputs::FileNames(input_strings)) +} + +/// interprets inputs when --traditional is on the commandline +/// +/// normally returns CommandLineInputs::FileAndOffset, but if no offset is found, +/// it returns CommandLineInputs::FileNames (also to differentiate from the offset==0) +pub fn parse_inputs_traditional(input_strings: Vec) -> Result { + match input_strings.len() { + 0 => { + Ok(CommandLineInputs::FileNames(vec!{"-".to_string()})) + } + 1 => { + let offset0=parse_offset_operand(&input_strings[0]); + Ok(match offset0 { + Ok(n) => CommandLineInputs::FileAndOffset(("-".to_string(), n, None)), + _ => CommandLineInputs::FileNames(input_strings), + }) + } + 2 => { + let offset0=parse_offset_operand(&input_strings[0]); + let offset1=parse_offset_operand(&input_strings[1]); + match (offset0, offset1) { + (Ok(n), Ok(m)) => Ok(CommandLineInputs::FileAndOffset(("-".to_string(), n, Some(m)))), + (_, Ok(m)) => Ok(CommandLineInputs::FileAndOffset((input_strings[0].clone(), m, None))), + _ => Err(format!("invalid offset: {}", input_strings[1])), + } + } + 3 => { + let offset=parse_offset_operand(&input_strings[1]); + let label=parse_offset_operand(&input_strings[2]); + match (offset, label) { + (Ok(n), Ok(m)) => Ok(CommandLineInputs::FileAndOffset((input_strings[0].clone(), n, Some(m)))), + (Err(_), _) => Err(format!("invalid offset: {}", input_strings[1])), + (_, Err(_)) => Err(format!("invalid label: {}", input_strings[2])), + } + } + _ => { + Err(format!("too many inputs after --traditional: {}", input_strings[3])) + } + } } /// parses format used by offset and label on the commandline @@ -148,27 +193,27 @@ mod tests { assert_eq!(CommandLineInputs::FileNames(vec!{"-".to_string()}), parse_inputs(&MockOptions::new( vec!{}, - vec!{}))); + vec!{})).unwrap()); assert_eq!(CommandLineInputs::FileNames(vec!{"-".to_string()}), parse_inputs(&MockOptions::new( vec!{"-"}, - vec!{}))); + vec!{})).unwrap()); assert_eq!(CommandLineInputs::FileNames(vec!{"file1".to_string()}), parse_inputs(&MockOptions::new( vec!{"file1"}, - vec!{}))); + vec!{})).unwrap()); assert_eq!(CommandLineInputs::FileNames(vec!{"file1".to_string(), "file2".to_string()}), parse_inputs(&MockOptions::new( vec!{"file1", "file2"}, - vec!{}))); + vec!{})).unwrap()); assert_eq!(CommandLineInputs::FileNames(vec!{"-".to_string(), "file1".to_string(), "file2".to_string()}), parse_inputs(&MockOptions::new( vec!{"-", "file1", "file2"}, - vec!{}))); + vec!{})).unwrap()); } #[test] @@ -177,58 +222,112 @@ mod tests { assert_eq!(CommandLineInputs::FileAndOffset(("-".to_string(), 8, None)), parse_inputs(&MockOptions::new( vec!{"+10"}, - vec!{}))); + vec!{})).unwrap()); // offset must start with "+" if no input is specified. assert_eq!(CommandLineInputs::FileNames(vec!{"10".to_string()}), parse_inputs(&MockOptions::new( vec!{"10"}, - vec!{""}))); + vec!{""})).unwrap()); // offset is not valid, so it is considered a filename. assert_eq!(CommandLineInputs::FileNames(vec!{"+10a".to_string()}), parse_inputs(&MockOptions::new( vec!{"+10a"}, - vec!{""}))); + vec!{""})).unwrap()); // if -j is included in the commandline, there cannot be an offset. assert_eq!(CommandLineInputs::FileNames(vec!{"+10".to_string()}), parse_inputs(&MockOptions::new( vec!{"+10"}, - vec!{"j"}))); + vec!{"j"})).unwrap()); // if -v is included in the commandline, there cannot be an offset. assert_eq!(CommandLineInputs::FileNames(vec!{"+10".to_string()}), parse_inputs(&MockOptions::new( vec!{"+10"}, - vec!{"o", "v"}))); + vec!{"o", "v"})).unwrap()); assert_eq!(CommandLineInputs::FileAndOffset(("file1".to_string(), 8, None)), parse_inputs(&MockOptions::new( vec!{"file1", "+10"}, - vec!{}))); + vec!{})).unwrap()); // offset does not need to start with "+" if a filename is included. assert_eq!(CommandLineInputs::FileAndOffset(("file1".to_string(), 8, None)), parse_inputs(&MockOptions::new( vec!{"file1", "10"}, - vec!{}))); + vec!{})).unwrap()); assert_eq!(CommandLineInputs::FileNames(vec!{"file1".to_string(), "+10a".to_string()}), parse_inputs(&MockOptions::new( vec!{"file1", "+10a"}, - vec!{""}))); + vec!{""})).unwrap()); assert_eq!(CommandLineInputs::FileNames(vec!{"file1".to_string(), "+10".to_string()}), parse_inputs(&MockOptions::new( vec!{"file1", "+10"}, - vec!{"j"}))); + vec!{"j"})).unwrap()); // offset must be last on the commandline assert_eq!(CommandLineInputs::FileNames(vec!{"+10".to_string(), "file1".to_string()}), parse_inputs(&MockOptions::new( vec!{"+10", "file1"}, - vec!{""}))); + vec!{""})).unwrap()); + } + + #[test] + fn test_parse_inputs_traditional() { + + // it should not return FileAndOffset to signal no offset was entered on the commandline. + assert_eq!(CommandLineInputs::FileNames(vec!{"-".to_string()}), + parse_inputs(&MockOptions::new( + vec!{}, + vec!{"traditional"})).unwrap()); + + assert_eq!(CommandLineInputs::FileNames(vec!{"file1".to_string()}), + parse_inputs(&MockOptions::new( + vec!{"file1"}, + vec!{"traditional"})).unwrap()); + + // offset does not need to start with a + + assert_eq!(CommandLineInputs::FileAndOffset(("-".to_string(), 8, None)), + parse_inputs(&MockOptions::new( + vec!{"10"}, + vec!{"traditional"})).unwrap()); + + // valid offset and valid label + assert_eq!(CommandLineInputs::FileAndOffset(("-".to_string(), 8, Some(8))), + parse_inputs(&MockOptions::new( + vec!{"10", "10"}, + vec!{"traditional"})).unwrap()); + + assert_eq!(CommandLineInputs::FileAndOffset(("file1".to_string(), 8, None)), + parse_inputs(&MockOptions::new( + vec!{"file1", "10"}, + vec!{"traditional"})).unwrap()); + + // only one file is allowed, it must be the first + parse_inputs(&MockOptions::new( + vec!{"10", "file1"}, + vec!{"traditional"})).unwrap_err(); + + assert_eq!(CommandLineInputs::FileAndOffset(("file1".to_string(), 8, Some(8))), + parse_inputs(&MockOptions::new( + vec!{"file1", "10", "10"}, + vec!{"traditional"})).unwrap()); + + parse_inputs(&MockOptions::new( + vec!{"10", "file1", "10"}, + vec!{"traditional"})).unwrap_err(); + + parse_inputs(&MockOptions::new( + vec!{"10", "10", "file1"}, + vec!{"traditional"})).unwrap_err(); + + parse_inputs(&MockOptions::new( + vec!{"10", "10", "10", "10"}, + vec!{"traditional"})).unwrap_err(); } fn parse_offset_operand_str(s: &str) -> Result { diff --git a/tests/fixtures/od/0 b/tests/fixtures/od/0 new file mode 100644 index 000000000..26af6a865 --- /dev/null +++ b/tests/fixtures/od/0 @@ -0,0 +1 @@ +zero diff --git a/tests/test_od.rs b/tests/test_od.rs index 5aa0a1185..adddacbe9 100644 --- a/tests/test_od.rs +++ b/tests/test_od.rs @@ -591,3 +591,71 @@ fn test_file_offset(){ 0000022 ")); } + +#[test] +fn test_traditional(){ + // note gnu od does not align both lines + let input = "abcdefghijklmnopq"; + let result = new_ucmd!().arg("--traditional").arg("-a").arg("-c").arg("-").arg("10").arg("0").run_piped_stdin(input.as_bytes()); + + assert_empty_stderr!(result); + assert!(result.success); + assert_eq!(result.stdout, unindent(r" + 0000010 (0000000) i j k l m n o p q + i j k l m n o p q + 0000021 (0000011) + ")); +} + +#[test] +fn test_traditional_with_skip_bytes_override(){ + // --skip-bytes is ignored in this case + let input = "abcdefghijklmnop"; + let result = new_ucmd!().arg("--traditional").arg("--skip-bytes=10").arg("-c").arg("0").run_piped_stdin(input.as_bytes()); + + assert_empty_stderr!(result); + assert!(result.success); + assert_eq!(result.stdout, unindent(r" + 0000000 a b c d e f g h i j k l m n o p + 0000020 + ")); +} + +#[test] +fn test_traditional_with_skip_bytes_non_override(){ + // no offset specified in the traditional way, so --skip-bytes is used + let input = "abcdefghijklmnop"; + let result = new_ucmd!().arg("--traditional").arg("--skip-bytes=10").arg("-c").run_piped_stdin(input.as_bytes()); + + assert_empty_stderr!(result); + assert!(result.success); + assert_eq!(result.stdout, unindent(r" + 0000012 k l m n o p + 0000020 + ")); +} + +#[test] +fn test_traditional_error(){ + // file "0" exists - don't fail on that, but --traditional only accepts a single input + let input = "abcdefghijklmnopq"; + let result = new_ucmd!().arg("--traditional").arg("0").arg("0").arg("0").arg("0").run_piped_stdin(input.as_bytes()); + + assert!(!result.success); +} + +#[test] +fn test_traditional_only_label(){ + let input = "abcdefghijklmnopqrstuvwxyz"; + let result = new_ucmd!().arg("-An").arg("--traditional").arg("-a").arg("-c").arg("-").arg("10").arg("0x10").run_piped_stdin(input.as_bytes()); + + assert_empty_stderr!(result); + assert!(result.success); + assert_eq!(result.stdout, unindent(r" + (0000020) i j k l m n o p q r s t u v w x + i j k l m n o p q r s t u v w x + (0000040) y z + y z + (0000042) + ")); +} From d705dc46ce7e4be80637311cd59b9ed5e212f737 Mon Sep 17 00:00:00 2001 From: Wim Hueskes Date: Fri, 19 Aug 2016 22:43:52 +0200 Subject: [PATCH 32/41] od: improve/extend --help text --- Cargo.lock | 1 - src/od/Cargo.toml | 1 - src/od/od.rs | 53 ++++++++++++++++++++++++++++++++++++++++------- 3 files changed, 45 insertions(+), 10 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ebe84879a..0a8b0fe86 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -666,7 +666,6 @@ dependencies = [ "byteorder 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)", "getopts 0.2.14 (registry+https://github.com/rust-lang/crates.io-index)", "libc 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)", - "unindent 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "uucore 0.0.1", ] diff --git a/src/od/Cargo.toml b/src/od/Cargo.toml index 0b44a8633..c6b86e354 100644 --- a/src/od/Cargo.toml +++ b/src/od/Cargo.toml @@ -10,7 +10,6 @@ path = "od.rs" [dependencies] getopts = "*" libc = "*" -unindent = "*" byteorder = "*" uucore = { path="../uucore" } diff --git a/src/od/od.rs b/src/od/od.rs index c8cb5a30b..7cbd8a101 100644 --- a/src/od/od.rs +++ b/src/od/od.rs @@ -10,7 +10,6 @@ */ extern crate getopts; -extern crate unindent; extern crate byteorder; #[macro_use] @@ -32,7 +31,6 @@ mod mockstream; use std::cmp; use std::io::Write; -use unindent::*; use byteorder_io::*; use multifilereader::*; use partialreader::*; @@ -50,6 +48,50 @@ const PEEK_BUFFER_SIZE: usize = 4; // utf-8 can be 4 bytes #[derive(Copy, Clone, Debug, Eq, PartialEq)] enum Radix { Decimal, Hexadecimal, Octal, NoPrefix } +static USAGE: &'static str = +r#"Usage: + od [OPTION]... [--] [FILENAME]... + od [-abcdDefFhHiIlLoOsxX] [FILENAME] [[+][0x]OFFSET[.][b]] + od --traditional [OPTION]... [FILENAME] [[+][0x]OFFSET[.][b] [[+][0x]LABEL[.][b]]] + +Displays data in various human-readable formats. If multiple formats are +specified, the output will contain all formats in the order they appear on the +commandline. Each format will be printed on a new line. Only the line +containing the first format will be prefixed with the offset. + +If no filename is specified, or it is "-", stdin will be used. After a "--", no +more options will be recognised. This allows for filenames starting with a "-". + +If a filename is a valid number which can be used as an offset in the second +form, you can force it to be recognised as a filename if you include an option +like "-j0", which is only valid in the first form. + +RADIX is one of o,d,x,n for octal, decimal, hexadecimal or none. + +BYTES is decimal by default, octal if prefixed with a "0", or hexadecimal if +prefixed with "0x". The suffixes b, KB, K, MB, M, GB, G, will multiply the +number with 512, 1000, 1024, 1000^2, 1024^2, 1000^3, 1024^3, 1000^2, 1024^2. + +OFFSET and LABEL are octal by default, hexadecimal if prefixed with "0x" or +decimal if a "." suffix is added. The "b" suffix will multiply with 512. + +TYPE contains one or more format specifications consisting of: + a for printable 7-bits ASCII + c for utf-8 characters or octal for undefined characters + d[SIZE] for signed decimal + f[SIZE] for floating point + o[SIZE] for octal + u[SIZE] for unsigned decimal + x[SIZE] for hexadecimal +SIZE is the number of bytes which can be the number 1, 2, 4, 8 or 16, + or C, I, S, L for 1, 2, 4, 8 bytes for integer types, + or F, D, L for 4, 8, 16 bytes for floating point. +Any type specification can have a "z" suffic, which will add a ASCII dump at + the end of the line. + +If an error occurred, a diagnostic message will be printed to stderr, and the +exitcode will be non-zero."#; + pub fn uumain(args: Vec) -> i32 { let mut opts = getopts::Options::new(); @@ -106,12 +148,7 @@ pub fn uumain(args: Vec) -> i32 { }; if matches.opt_present("h") { - let msg = unindent(&format!(" - Usage: - {0} [OPTION]... [FILENAME]... - - Displays data in various human-readable formats.", executable!())); - println!("{}", opts.usage(&msg)); + println!("{}", opts.usage(&USAGE)); return 0; } if matches.opt_present("version") { From f2db897c476a07da8a483f68d7895fcc4091d46d Mon Sep 17 00:00:00 2001 From: Wim Hueskes Date: Sat, 20 Aug 2016 21:59:40 +0200 Subject: [PATCH 33/41] od: refactor: reduce arguments of odfunc Pass the input stream itself instead of the parameters required to open it. Create InputOffset to handle functionality required for the byte offset. --- src/od/inputoffset.rs | 129 ++++++++++++++++++++++++++++++++++++++++++ src/od/od.rs | 113 ++++++++++++------------------------ 2 files changed, 166 insertions(+), 76 deletions(-) create mode 100644 src/od/inputoffset.rs diff --git a/src/od/inputoffset.rs b/src/od/inputoffset.rs new file mode 100644 index 000000000..44e5106b7 --- /dev/null +++ b/src/od/inputoffset.rs @@ -0,0 +1,129 @@ + +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub enum Radix { Decimal, Hexadecimal, Octal, NoPrefix } + +/// provides the byte offset printed at the left margin +pub struct InputOffset { + /// The radix to print the byte offset. NoPrefix will not print a byte offset. + radix: Radix, + /// The current position. Initialize at `new`, increase using `increase_position`. + byte_pos: usize, + /// An optional label printed in parentheses, typically different from `byte_pos`, + /// but will increase with the same value if `byte_pos` in increased. + label: Option, +} + +impl InputOffset { + /// creates a new `InputOffset` using the provided values. + pub fn new(radix: Radix, byte_pos: usize, label: Option) -> InputOffset { + InputOffset { + radix: radix, + byte_pos: byte_pos, + label: label, + } + } + + /// Increase `byte_pos` and `label` if a label is used. + pub fn increase_position(&mut self, n: usize) { + self.byte_pos += n; + if let Some(l) = self.label { + self.label = Some(l + n); + } + } + + /// set `self.radix` to the value provided by the --address-radix commandline option + pub fn parse_radix_from_commandline(&mut self, radix_str: Option) -> Result<(), &'static str> { + match radix_str { + None => self.radix = Radix::Octal, + Some(s) => { + let st = s.into_bytes(); + if st.len() != 1 { + return Err("Radix must be one of [d, o, n, x]\n") + } else { + let radix: char = *(st.get(0) + .expect("byte string of length 1 lacks a 0th elem")) as char; + match radix { + 'd' => self.radix = Radix::Decimal, + 'x' => self.radix = Radix::Hexadecimal, + 'o' => self.radix = Radix::Octal, + 'n' => self.radix = Radix::NoPrefix, + _ => return Err("Radix must be one of [d, o, n, x]\n") + } + } + } + } + Ok(()) + } + + /// returns a string with the current byte offset + pub fn format_byte_offset(&self) -> String { + match (self.radix, self.label) { + (Radix::Decimal, None) => format!("{:07}", self.byte_pos), + (Radix::Decimal, Some(l)) => format!("{:07} ({:07})", self.byte_pos, l), + (Radix::Hexadecimal, None) => format!("{:06X}", self.byte_pos), + (Radix::Hexadecimal, Some(l)) => format!("{:06X} ({:06X})", self.byte_pos, l), + (Radix::Octal, None) => format!("{:07o}", self.byte_pos), + (Radix::Octal, Some(l)) => format!("{:07o} ({:07o})", self.byte_pos, l), + (Radix::NoPrefix, None) => String::from(""), + (Radix::NoPrefix, Some(l)) => format!("({:07o})", l), + } + } + + /// Prints the byte offset followed by a newline, or nothing at all if + /// both `Radix::NoPrefix` was set and no label (--traditional) is used. + pub fn print_final_offset(&self) { + if self.radix != Radix::NoPrefix || self.label.is_some() { + print!("{}\n", self.format_byte_offset()); + } + } +} + +#[test] +fn test_input_offset() { + let mut sut = InputOffset::new(Radix::Hexadecimal, 10, None); + assert_eq!("00000A", &sut.format_byte_offset()); + sut.increase_position(10); + assert_eq!("000014", &sut.format_byte_offset()); + + // note normally the radix will not change after initialisation + sut.parse_radix_from_commandline(Some("d".to_string())).unwrap(); + assert_eq!("0000020", &sut.format_byte_offset()); + + sut.parse_radix_from_commandline(Some("x".to_string())).unwrap(); + assert_eq!("000014", &sut.format_byte_offset()); + + sut.parse_radix_from_commandline(Some("o".to_string())).unwrap(); + assert_eq!("0000024", &sut.format_byte_offset()); + + sut.parse_radix_from_commandline(Some("n".to_string())).unwrap(); + assert_eq!("", &sut.format_byte_offset()); + + sut.increase_position(10); + sut.parse_radix_from_commandline(None).unwrap(); + assert_eq!("0000036", &sut.format_byte_offset()); +} + +#[test] +fn test_input_offset_with_label() { + let mut sut = InputOffset::new(Radix::Hexadecimal, 10, Some(20)); + assert_eq!("00000A (000014)", &sut.format_byte_offset()); + sut.increase_position(10); + assert_eq!("000014 (00001E)", &sut.format_byte_offset()); + + // note normally the radix will not change after initialisation + sut.parse_radix_from_commandline(Some("d".to_string())).unwrap(); + assert_eq!("0000020 (0000030)", &sut.format_byte_offset()); + + sut.parse_radix_from_commandline(Some("x".to_string())).unwrap(); + assert_eq!("000014 (00001E)", &sut.format_byte_offset()); + + sut.parse_radix_from_commandline(Some("o".to_string())).unwrap(); + assert_eq!("0000024 (0000036)", &sut.format_byte_offset()); + + sut.parse_radix_from_commandline(Some("n".to_string())).unwrap(); + assert_eq!("(0000036)", &sut.format_byte_offset()); + + sut.increase_position(10); + sut.parse_radix_from_commandline(None).unwrap(); + assert_eq!("0000036 (0000050)", &sut.format_byte_offset()); +} diff --git a/src/od/od.rs b/src/od/od.rs index 7cbd8a101..625ecef74 100644 --- a/src/od/od.rs +++ b/src/od/od.rs @@ -26,6 +26,7 @@ mod prn_float; mod parse_nrofbytes; mod parse_formats; mod parse_inputs; +mod inputoffset; #[cfg(test)] mod mockstream; @@ -40,14 +41,12 @@ use parse_nrofbytes::parse_number_of_bytes; use parse_formats::{parse_format_flags, ParsedFormatterItemInfo}; use prn_char::format_ascii_dump; use parse_inputs::{parse_inputs, CommandLineInputs}; +use inputoffset::{InputOffset, Radix}; static VERSION: &'static str = env!("CARGO_PKG_VERSION"); const MAX_BYTES_PER_UNIT: usize = 8; const PEEK_BUFFER_SIZE: usize = 4; // utf-8 can be 4 bytes -#[derive(Copy, Clone, Debug, Eq, PartialEq)] -enum Radix { Decimal, Hexadecimal, Octal, NoPrefix } - static USAGE: &'static str = r#"Usage: od [OPTION]... [--] [FILENAME]... @@ -156,14 +155,6 @@ pub fn uumain(args: Vec) -> i32 { return 0; } - let input_offset_base = match parse_radix(matches.opt_str("A")) { - Ok(r) => r, - Err(f) => { - disp_err!("Invalid -A/--address-radix\n{}", f); - return 1; - } - }; - let byte_order = match matches.opt_str("endian").as_ref().map(String::as_ref) { None => { ByteOrder::Native }, Some("little") => { ByteOrder::Little }, @@ -201,13 +192,6 @@ pub fn uumain(args: Vec) -> i32 { return 1; } }; - let inputs = input_strings - .iter() - .map(|w| match w as &str { - "-" => InputSource::Stdin, - x => InputSource::FileName(x), - }) - .collect::>(); let formats = match parse_format_flags(&args) { Ok(f) => f, @@ -247,19 +231,23 @@ pub fn uumain(args: Vec) -> i32 { } }; - odfunc(line_bytes, input_offset_base, byte_order, inputs, &formats[..], - output_duplicates, skip_bytes, read_bytes, label) + let mut input = open_input_peek_reader(&input_strings, skip_bytes, read_bytes); + + let mut input_offset = InputOffset::new(Radix::Octal, skip_bytes, label); + if let Err(e) = input_offset.parse_radix_from_commandline(matches.opt_str("A")) { + disp_err!("Invalid -A/--address-radix\n{}", e); + return 1; + } + + odfunc(&mut input, &mut input_offset, line_bytes, byte_order, &formats[..], + output_duplicates) } // TODO: refactor, too many arguments -fn odfunc(line_bytes: usize, input_offset_base: Radix, byte_order: ByteOrder, - fnames: Vec, formats: &[ParsedFormatterItemInfo], output_duplicates: bool, - skip_bytes: usize, read_bytes: Option, mut label: Option) -> i32 { +fn odfunc(input: &mut I, input_offset: &mut InputOffset, line_bytes: usize, byte_order: ByteOrder, + formats: &[ParsedFormatterItemInfo], output_duplicates: bool) -> i32 + where I : PeekRead+HasError { - let mf = MultifileReader::new(fnames); - let pr = PartialReader::new(mf, skip_bytes, read_bytes); - let mut input = PeekReader::new(pr); - let mut addr = skip_bytes; let mut duplicate_line = false; let mut previous_bytes: Vec = Vec::new(); let mut bytes: Vec = Vec::with_capacity(line_bytes + PEEK_BUFFER_SIZE); @@ -305,11 +293,10 @@ fn odfunc(line_bytes: usize, input_offset_base: Radix, byte_order: ByteOrder, loop { // print each line data (or multi-format raster of several lines describing the same data). - // TODO: we need to read more data in case a multi-byte sequence starts at the end of the line match input.peek_read(bytes.as_mut_slice(), PEEK_BUFFER_SIZE) { Ok((0, _)) => { - print_final_offset(input_offset_base, addr, label); + input_offset.print_final_offset(); break; } Ok((n, peekbytes)) => { @@ -343,18 +330,15 @@ fn odfunc(line_bytes: usize, input_offset_base: Radix, byte_order: ByteOrder, } print_bytes(byte_order, &bytes, n, peekbytes, - &print_with_radix(input_offset_base, addr, label), + &input_offset.format_byte_offset(), &spaced_formatters, byte_size_block, print_width_line); } - addr += n; - if let Some(l) = label { - label = Some(l + n); - } + input_offset.increase_position(n); } Err(e) => { show_error!("{}", e); - print_final_offset(input_offset_base, addr, label); + input_offset.print_final_offset(); return 1; } }; @@ -441,48 +425,25 @@ fn print_bytes(byte_order: ByteOrder, bytes: &[u8], length: usize, peekbytes: us } } -// For file byte offset printed at left margin. -fn parse_radix(radix_str: Option) -> Result { - match radix_str { - None => Ok(Radix::Octal), - Some(s) => { - let st = s.into_bytes(); - if st.len() != 1 { - Err("Radix must be one of [d, o, n, x]\n") - } else { - let radix: char = *(st.get(0) - .expect("byte string of length 1 lacks a 0th elem")) as char; - match radix { - 'd' => Ok(Radix::Decimal), - 'x' => Ok(Radix::Hexadecimal), - 'o' => Ok(Radix::Octal), - 'n' => Ok(Radix::NoPrefix), - _ => Err("Radix must be one of [d, o, n, x]\n") - } - } - } - } -} +/// returns a reader implementing `PeekRead+Read+HasError` providing the combined input +/// +/// `skip_bytes` is the number of bytes skipped from the input +/// `read_bytes` is an optinal limit to the number of bytes to read +fn open_input_peek_reader<'a>(input_strings: &'a Vec, skip_bytes: usize, + read_bytes: Option) -> PeekReader>> { + // should return "impl PeekRead+Read+HasError" when supported in (stable) rust + let inputs = input_strings + .iter() + .map(|w| match w as &str { + "-" => InputSource::Stdin, + x => InputSource::FileName(x), + }) + .collect::>(); -fn print_with_radix(r: Radix, x: usize, label: Option) -> String{ - match (r, label) { - (Radix::Decimal, None) => format!("{:07}", x), - (Radix::Decimal, Some(l)) => format!("{:07} ({:07})", x, l), - (Radix::Hexadecimal, None) => format!("{:06X}", x), - (Radix::Hexadecimal, Some(l)) => format!("{:06X} ({:06X})", x, l), - (Radix::Octal, None) => format!("{:07o}", x), - (Radix::Octal, Some(l)) => format!("{:07o} ({:07o})", x, l), - (Radix::NoPrefix, None) => String::from(""), - (Radix::NoPrefix, Some(l)) => format!("({:07o})", l), - } -} - -/// Prints the byte offset followed by a newline, or nothing at all if -/// both `Radix::NoPrefix` was set and no label (--traditional) is used. -fn print_final_offset(r: Radix, x: usize, label: Option) { - if r != Radix::NoPrefix || label.is_some() { - print!("{}\n", print_with_radix(r, x, label)); - } + let mf = MultifileReader::new(inputs); + let pr = PartialReader::new(mf, skip_bytes, read_bytes); + let input = PeekReader::new(pr); + input } struct SpacedFormatterItemInfo { From 283a29fd2c993eb108dcaee50e0f6da4d5e8f5ad Mon Sep 17 00:00:00 2001 From: Wim Hueskes Date: Sun, 21 Aug 2016 02:03:48 +0200 Subject: [PATCH 34/41] od: refactor IntWriter, do not require parameters use macros to define a function for each format so byte_size and print_width do not have to be provided by the caller. --- src/od/formatteriteminfo.rs | 2 +- src/od/od.rs | 2 +- src/od/prn_char.rs | 22 ++-- src/od/prn_int.rs | 247 +++++++++++++++++++----------------- 4 files changed, 146 insertions(+), 127 deletions(-) diff --git a/src/od/formatteriteminfo.rs b/src/od/formatteriteminfo.rs index 034eece53..5118571fe 100644 --- a/src/od/formatteriteminfo.rs +++ b/src/od/formatteriteminfo.rs @@ -2,7 +2,7 @@ use std::fmt; #[derive(Copy, Eq)] pub enum FormatWriter { - IntWriter(fn(u64, usize, usize) -> String), + IntWriter(fn(u64) -> String), FloatWriter(fn(f64) -> String), MultibyteWriter(fn(&[u8]) -> String), } diff --git a/src/od/od.rs b/src/od/od.rs index 625ecef74..0b32b48cb 100644 --- a/src/od/od.rs +++ b/src/od/od.rs @@ -382,7 +382,7 @@ fn print_bytes(byte_order: ByteOrder, bytes: &[u8], length: usize, peekbytes: us } _ => { panic!("Invalid byte_size: {}", f.frm.formatter_item_info.byte_size); } }; - output_text.push_str(&func(p, f.frm.formatter_item_info.byte_size, f.frm.formatter_item_info.print_width)); + output_text.push_str(&func(p)); } FormatWriter::FloatWriter(func) => { let p: f64 = match f.frm.formatter_item_info.byte_size { diff --git a/src/od/prn_char.rs b/src/od/prn_char.rs index 9686c9160..381ec5ace 100644 --- a/src/od/prn_char.rs +++ b/src/od/prn_char.rs @@ -32,7 +32,7 @@ static A_CHRS : [&'static str; 128] = "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "{", "|", "}", "~", "del"]; -fn format_item_a(p: u64, _: usize, _: usize) -> String { +fn format_item_a(p: u64) -> String { // itembytes == 1 let b = (p & 0x7f) as u8; format!("{:>4}", A_CHRS.get(b as usize).unwrap_or(&"??") @@ -119,17 +119,17 @@ pub fn format_ascii_dump(bytes: &[u8]) -> String { #[test] fn test_format_item_a() { - assert_eq!(" nul", format_item_a(0x00, 1, 4)); - assert_eq!(" soh", format_item_a(0x01, 1, 4)); - assert_eq!(" sp", format_item_a(0x20, 1, 4)); - assert_eq!(" A", format_item_a(0x41, 1, 4)); - assert_eq!(" ~", format_item_a(0x7e, 1, 4)); - assert_eq!(" del", format_item_a(0x7f, 1, 4)); + assert_eq!(" nul", format_item_a(0x00)); + assert_eq!(" soh", format_item_a(0x01)); + assert_eq!(" sp", format_item_a(0x20)); + assert_eq!(" A", format_item_a(0x41)); + assert_eq!(" ~", format_item_a(0x7e)); + assert_eq!(" del", format_item_a(0x7f)); - assert_eq!(" nul", format_item_a(0x80, 1, 4)); - assert_eq!(" A", format_item_a(0xc1, 1, 4)); - assert_eq!(" ~", format_item_a(0xfe, 1, 4)); - assert_eq!(" del", format_item_a(0xff, 1, 4)); + assert_eq!(" nul", format_item_a(0x80)); + assert_eq!(" A", format_item_a(0xc1)); + assert_eq!(" ~", format_item_a(0xfe)); + assert_eq!(" del", format_item_a(0xff)); } #[test] diff --git a/src/od/prn_int.rs b/src/od/prn_int.rs index da3fb1fb3..9dd1fe58c 100644 --- a/src/od/prn_int.rs +++ b/src/od/prn_int.rs @@ -1,132 +1,151 @@ use formatteriteminfo::*; -pub static FORMAT_ITEM_OCT8: FormatterItemInfo = FormatterItemInfo { - byte_size: 1, - print_width: 4, // max: 377 - formatter: FormatWriter::IntWriter(format_item_oct), -}; +/// format string to print octal using `int_writer_unsigned` +macro_rules! OCT { () => { " {:0width$o}" }} +/// format string to print hexadecimal using `int_writer_unsigned` +macro_rules! HEX { () => { " {:0width$x}" }} +/// format string to print decimal using `int_writer_unsigned` or `int_writer_signed` +macro_rules! DEC { () => { " {:width$}" }} -pub static FORMAT_ITEM_OCT16: FormatterItemInfo = FormatterItemInfo { - byte_size: 2, - print_width: 7, // max: 177777 - formatter: FormatWriter::IntWriter(format_item_oct), -}; +/// defines a static struct of type `FormatterItemInfo` called `$NAME` +/// +/// Used to format unsigned integer types with help of a function called `$function` +/// `$byte_size` is the size of the type, `$print_width` is the maximum width in +/// human-readable format. `$format_str` is one of OCT, HEX or DEC +macro_rules! int_writer_unsigned { + ($NAME:ident, $byte_size:expr, $print_width:expr, $function:ident, $format_str:expr) => { + fn $function(p: u64) -> String { + format!($format_str, + p, + width = $print_width - 1) + } -pub static FORMAT_ITEM_OCT32: FormatterItemInfo = FormatterItemInfo { - byte_size: 4, - print_width: 12, // max: 37777777777 - formatter: FormatWriter::IntWriter(format_item_oct), -}; - -pub static FORMAT_ITEM_OCT64: FormatterItemInfo = FormatterItemInfo { - byte_size: 8, - print_width: 23, // max: 1777777777777777777777 - formatter: FormatWriter::IntWriter(format_item_oct), -}; - -pub static FORMAT_ITEM_HEX8: FormatterItemInfo = FormatterItemInfo { - byte_size: 1, - print_width: 3, // max: ff - formatter: FormatWriter::IntWriter(format_item_hex), -}; - -pub static FORMAT_ITEM_HEX16: FormatterItemInfo = FormatterItemInfo { - byte_size: 2, - print_width: 5, // max: ffff - formatter: FormatWriter::IntWriter(format_item_hex), -}; - -pub static FORMAT_ITEM_HEX32: FormatterItemInfo = FormatterItemInfo { - byte_size: 4, - print_width: 9, // max: ffffffff - formatter: FormatWriter::IntWriter(format_item_hex), -}; - -pub static FORMAT_ITEM_HEX64: FormatterItemInfo = FormatterItemInfo { - byte_size: 8, - print_width: 17, // max: ffffffffffffffff - formatter: FormatWriter::IntWriter(format_item_hex), -}; - - -pub static FORMAT_ITEM_DEC8U: FormatterItemInfo = FormatterItemInfo { - byte_size: 1, - print_width: 4, // max: 255 - formatter: FormatWriter::IntWriter(format_item_dec_u), -}; - -pub static FORMAT_ITEM_DEC16U: FormatterItemInfo = FormatterItemInfo { - byte_size: 2, - print_width: 6, // max: 65535 - formatter: FormatWriter::IntWriter(format_item_dec_u), -}; - -pub static FORMAT_ITEM_DEC32U: FormatterItemInfo = FormatterItemInfo { - byte_size: 4, - print_width: 11, // max: 4294967295 - formatter: FormatWriter::IntWriter(format_item_dec_u), -}; - -pub static FORMAT_ITEM_DEC64U: FormatterItemInfo = FormatterItemInfo { - byte_size: 8, - print_width: 21, // max: 18446744073709551615 - formatter: FormatWriter::IntWriter(format_item_dec_u), -}; - - -pub static FORMAT_ITEM_DEC8S: FormatterItemInfo = FormatterItemInfo { - byte_size: 1, - print_width: 5, // max: -128 - formatter: FormatWriter::IntWriter(format_item_dec_s), -}; - -pub static FORMAT_ITEM_DEC16S: FormatterItemInfo = FormatterItemInfo { - byte_size: 2, - print_width: 7, // max: -32768 - formatter: FormatWriter::IntWriter(format_item_dec_s), -}; - -pub static FORMAT_ITEM_DEC32S: FormatterItemInfo = FormatterItemInfo { - byte_size: 4, - print_width: 12, // max: -2147483648 - formatter: FormatWriter::IntWriter(format_item_dec_s), -}; - -pub static FORMAT_ITEM_DEC64S: FormatterItemInfo = FormatterItemInfo { - byte_size: 8, - print_width: 21, // max: -9223372036854775808 - formatter: FormatWriter::IntWriter(format_item_dec_s), -}; - - -// TODO: use some sort of byte iterator, instead of passing bytes in u64 -pub fn format_item_oct(p: u64, _: usize, print_width: usize) -> String { - - format!(" {:0width$o}", - p, - width = print_width - 1) + pub static $NAME: FormatterItemInfo = FormatterItemInfo { + byte_size: $byte_size, + print_width: $print_width, + formatter: FormatWriter::IntWriter($function), + }; + } } -pub fn format_item_hex(p: u64, _: usize, print_width: usize) -> String { +/// defines a static struct of type `FormatterItemInfo` called `$NAME` +/// +/// Used to format signed integer types with help of a function called `$function` +/// `$byte_size` is the size of the type, `$print_width` is the maximum width in +/// human-readable format. `$format_str` should be DEC +macro_rules! int_writer_signed { + ($NAME:ident, $byte_size:expr, $print_width:expr, $function:ident, $format_str:expr) => { + fn $function(p: u64) -> String { + let s = sign_extend(p, $byte_size); + format!($format_str, + s, + width = $print_width - 1) + } - format!(" {:0width$x}", - p, - width = print_width - 1) + pub static $NAME: FormatterItemInfo = FormatterItemInfo { + byte_size: $byte_size, + print_width: $print_width, + formatter: FormatWriter::IntWriter($function), + }; + } } - +/// Extends a signed number in `item` of `itembytes` bytes into a (signed) i64 fn sign_extend(item: u64, itembytes: usize) -> i64{ let shift = 64 - itembytes * 8; (item << shift) as i64 >> shift } -pub fn format_item_dec_s(p: u64, itembytes: usize, print_width: usize) -> String { - // sign extend - let s = sign_extend(p, itembytes); - format!("{:width$}", s, width = print_width) +int_writer_unsigned!(FORMAT_ITEM_OCT8, 1, 4, format_item_oct8, OCT!()); // max: 377 +int_writer_unsigned!(FORMAT_ITEM_OCT16, 2, 7, format_item_oct16, OCT!()); // max: 177777 +int_writer_unsigned!(FORMAT_ITEM_OCT32, 4, 12, format_item_oct32, OCT!()); // max: 37777777777 +int_writer_unsigned!(FORMAT_ITEM_OCT64, 8, 23, format_item_oct64, OCT!()); // max: 1777777777777777777777 + +int_writer_unsigned!(FORMAT_ITEM_HEX8, 1, 3, format_item_hex8, HEX!()); // max: ff +int_writer_unsigned!(FORMAT_ITEM_HEX16, 2, 5, format_item_hex16, HEX!()); // max: ffff +int_writer_unsigned!(FORMAT_ITEM_HEX32, 4, 9, format_item_hex32, HEX!()); // max: ffffffff +int_writer_unsigned!(FORMAT_ITEM_HEX64, 8, 17, format_item_hex64, HEX!()); // max: ffffffffffffffff + +int_writer_unsigned!(FORMAT_ITEM_DEC8U, 1, 4, format_item_dec_u8, DEC!()); // max: 255 +int_writer_unsigned!(FORMAT_ITEM_DEC16U, 2, 6, format_item_dec_u16, DEC!()); // max: 65535 +int_writer_unsigned!(FORMAT_ITEM_DEC32U, 4, 11, format_item_dec_u32, DEC!()); // max: 4294967295 +int_writer_unsigned!(FORMAT_ITEM_DEC64U, 8, 21, format_item_dec_u64, DEC!()); // max: 18446744073709551615 + +int_writer_signed!(FORMAT_ITEM_DEC8S, 1, 5, format_item_dec_s8, DEC!()); // max: -128 +int_writer_signed!(FORMAT_ITEM_DEC16S, 2, 7, format_item_dec_s16, DEC!()); // max: -32768 +int_writer_signed!(FORMAT_ITEM_DEC32S, 4, 12, format_item_dec_s32, DEC!()); // max: -2147483648 +int_writer_signed!(FORMAT_ITEM_DEC64S, 8, 21, format_item_dec_s64, DEC!()); // max: -9223372036854775808 + +#[test] +fn test_sign_extend() { + assert_eq!(0xffffffffffffff80u64 as i64, sign_extend(0x0000000000000080, 1)); + assert_eq!(0xffffffffffff8000u64 as i64, sign_extend(0x0000000000008000, 2)); + assert_eq!(0xffffffffff800000u64 as i64, sign_extend(0x0000000000800000, 3)); + assert_eq!(0xffffffff80000000u64 as i64, sign_extend(0x0000000080000000, 4)); + assert_eq!(0xffffff8000000000u64 as i64, sign_extend(0x0000008000000000, 5)); + assert_eq!(0xffff800000000000u64 as i64, sign_extend(0x0000800000000000, 6)); + assert_eq!(0xff80000000000000u64 as i64, sign_extend(0x0080000000000000, 7)); + assert_eq!(0x8000000000000000u64 as i64, sign_extend(0x8000000000000000, 8)); + + assert_eq!(0x000000000000007f, sign_extend(0x000000000000007f, 1)); + assert_eq!(0x0000000000007fff, sign_extend(0x0000000000007fff, 2)); + assert_eq!(0x00000000007fffff, sign_extend(0x00000000007fffff, 3)); + assert_eq!(0x000000007fffffff, sign_extend(0x000000007fffffff, 4)); + assert_eq!(0x0000007fffffffff, sign_extend(0x0000007fffffffff, 5)); + assert_eq!(0x00007fffffffffff, sign_extend(0x00007fffffffffff, 6)); + assert_eq!(0x007fffffffffffff, sign_extend(0x007fffffffffffff, 7)); + assert_eq!(0x7fffffffffffffff, sign_extend(0x7fffffffffffffff, 8)); } -pub fn format_item_dec_u(p: u64, _: usize, print_width: usize) -> String { - format!("{:width$}", p, width = print_width) +#[test] +fn test_format_item_oct() { + assert_eq!(" 000", format_item_oct8(0)); + assert_eq!(" 377", format_item_oct8(0xff)); + assert_eq!(" 000000", format_item_oct16(0)); + assert_eq!(" 177777", format_item_oct16(0xffff)); + assert_eq!(" 00000000000", format_item_oct32(0)); + assert_eq!(" 37777777777", format_item_oct32(0xffffffff)); + assert_eq!(" 0000000000000000000000", format_item_oct64(0)); + assert_eq!(" 1777777777777777777777", format_item_oct64(0xffffffffffffffff)); +} + +#[test] +fn test_format_item_hex() { + assert_eq!(" 00", format_item_hex8(0)); + assert_eq!(" ff", format_item_hex8(0xff)); + assert_eq!(" 0000", format_item_hex16(0)); + assert_eq!(" ffff", format_item_hex16(0xffff)); + assert_eq!(" 00000000", format_item_hex32(0)); + assert_eq!(" ffffffff", format_item_hex32(0xffffffff)); + assert_eq!(" 0000000000000000", format_item_hex64(0)); + assert_eq!(" ffffffffffffffff", format_item_hex64(0xffffffffffffffff)); +} + +#[test] +fn test_format_item_dec_u() { + assert_eq!(" 0", format_item_dec_u8(0)); + assert_eq!(" 255", format_item_dec_u8(0xff)); + assert_eq!(" 0", format_item_dec_u16(0)); + assert_eq!(" 65535", format_item_dec_u16(0xffff)); + assert_eq!(" 0", format_item_dec_u32(0)); + assert_eq!(" 4294967295", format_item_dec_u32(0xffffffff)); + assert_eq!(" 0", format_item_dec_u64(0)); + assert_eq!(" 18446744073709551615", format_item_dec_u64(0xffffffffffffffff)); +} + +#[test] +fn test_format_item_dec_s() { + assert_eq!(" 0", format_item_dec_s8(0)); + assert_eq!(" 127", format_item_dec_s8(0x7f)); + assert_eq!(" -128", format_item_dec_s8(0x80)); + assert_eq!(" 0", format_item_dec_s16(0)); + assert_eq!(" 32767", format_item_dec_s16(0x7fff)); + assert_eq!(" -32768", format_item_dec_s16(0x8000)); + assert_eq!(" 0", format_item_dec_s32(0)); + assert_eq!(" 2147483647", format_item_dec_s32(0x7fffffff)); + assert_eq!(" -2147483648", format_item_dec_s32(0x80000000)); + assert_eq!(" 0", format_item_dec_s64(0)); + assert_eq!(" 9223372036854775807", format_item_dec_s64(0x7fffffffffffffff)); + assert_eq!(" -9223372036854775808", format_item_dec_s64(0x8000000000000000)); } From 83a1ff404fe4f78898539ae2a782c8795c937b94 Mon Sep 17 00:00:00 2001 From: Wim Hueskes Date: Mon, 22 Aug 2016 00:37:11 +0200 Subject: [PATCH 35/41] od: refactor: create InputDecoder to convert input It reads from the input and provides data conversion functions. --- src/od/inputdecoder.rs | 182 +++++++++++++++++++++++++++++++++++++++++ src/od/od.rs | 88 ++++++++------------ 2 files changed, 215 insertions(+), 55 deletions(-) create mode 100644 src/od/inputdecoder.rs diff --git a/src/od/inputdecoder.rs b/src/od/inputdecoder.rs new file mode 100644 index 000000000..01143d24b --- /dev/null +++ b/src/od/inputdecoder.rs @@ -0,0 +1,182 @@ +use std::io; +use byteorder_io::ByteOrder; +use multifilereader::HasError; +use peekreader::PeekRead; + +/// Processes an input and provides access to the data read in various formats +/// +/// Currently only useful if the input implements `PeekRead`. +pub struct InputDecoder<'a, I> where I: 'a { + /// The input from which data is read + input: &'a mut I, + + /// A memory buffer, it's size is set in `new`. + data: Vec, + /// The numer of bytes in the buffer reserved for the peek data from `PeekRead`. + reserved_peek_length: usize, + + /// The number of (valid) bytes in the buffer. + used_normal_length: usize, + /// The number of peek bytes in the buffer. + used_peek_length: usize, + + /// Byte order used to read data from the buffer. + byte_order: ByteOrder, +} + +impl<'a, I> InputDecoder<'a, I> { + /// Creates a new `InputDecoder` with an allocated buffer of `normal_length`+`peek_length` bytes. + /// `byte_order` determines how to read multibyte formats from the buffer. + pub fn new(input: &mut I, normal_length: usize, peek_length: usize, byte_order: ByteOrder) -> InputDecoder { + + let mut bytes: Vec = Vec::with_capacity(normal_length+peek_length); + unsafe { bytes.set_len(normal_length+peek_length); } // fast but uninitialized + + InputDecoder { + input: input, + data: bytes, + reserved_peek_length: peek_length, + used_normal_length: 0, + used_peek_length: 0, + byte_order: byte_order, + } + } +} + + +impl<'a, I> InputDecoder<'a, I> where I : PeekRead { + /// calls `peek_read` on the internal stream to (re)fill the buffer. Returns a + /// MemoryDecoder providing access to the result or returns an i/o error. + pub fn peek_read(&mut self) -> io::Result { + match self.input.peek_read(self.data.as_mut_slice(), self.reserved_peek_length) { + Ok((n, p)) => { + self.used_normal_length = n; + self.used_peek_length = p; + Ok(MemoryDecoder { + data: &mut self.data, + used_normal_length: self.used_normal_length, + used_peek_length: self.used_peek_length, + byte_order: self.byte_order, + }) + }, + Err(e) => Err(e), + } + + } +} + +impl<'a, I> HasError for InputDecoder<'a, I> where I : HasError { + /// calls has_error on the internal stream. + fn has_error(&self) -> bool { + self.input.has_error() + } +} + +/// Provides access to the internal data in various formats +pub struct MemoryDecoder<'a> { + /// A reference to the parents' data + data: &'a mut Vec, + /// The number of (valid) bytes in the buffer. + used_normal_length: usize, + /// The number of peek bytes in the buffer. + used_peek_length: usize, + /// Byte order used to read data from the buffer. + byte_order: ByteOrder, +} + +impl<'a> MemoryDecoder<'a> { + /// Set a part of the internal buffer to zero. + /// access to the whole buffer is possible, not just to the valid data. + pub fn zero_out_buffer(&mut self, start:usize, end:usize) { + for i in start..end { + self.data[i] = 0; + } + } + + /// Returns the current length of the buffer. (ie. how much valid data it contains.) + pub fn length(&self) -> usize { + self.used_normal_length + } + + /// Creates a clone of the internal buffer. The clone only contain the valid data. + pub fn clone_buffer(&self, other: &mut Vec) { + other.clone_from(&self.data); + other.resize(self.used_normal_length, 0); + } + + /// Returns a slice to the internal buffer starting at `start`. + pub fn get_buffer(&self, start: usize) -> &[u8] { + &self.data[start..self.used_normal_length] + } + + /// Returns a slice to the internal buffer including the peek data starting at `start`. + pub fn get_full_buffer(&self, start: usize) -> &[u8] { + &self.data[start..self.used_normal_length+self.used_peek_length] + } + + /// Returns a u8/u16/u32/u64 from the internal buffer at position `start`. + pub fn read_uint(&self, start: usize, byte_size: usize) -> u64 { + match byte_size { + 1 => self.data[start] as u64, + 2 => self.byte_order.read_u16(&self.data[start..start + 2]) as u64, + 4 => self.byte_order.read_u32(&self.data[start..start + 4]) as u64, + 8 => self.byte_order.read_u64(&self.data[start..start + 8]), + _ => panic!("Invalid byte_size: {}", byte_size), + } + } + + /// Returns a f32/f64 from the internal buffer at position `start`. + pub fn read_float(&self, start: usize, byte_size: usize) -> f64 { + match byte_size { + 4 => self.byte_order.read_f32(&self.data[start..start + 4]) as f64, + 8 => self.byte_order.read_f64(&self.data[start..start + 8]), + _ => panic!("Invalid byte_size: {}", byte_size), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::io::Cursor; + use peekreader::PeekReader; + use byteorder_io::ByteOrder; + + #[test] + fn smoke_test() { + let data = [0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xC0, 0xff, 0xff]; + let mut input=PeekReader::new(Cursor::new(&data)); + let mut sut=InputDecoder::new(&mut input, 8, 2, ByteOrder::Little); + + match sut.peek_read() { + Ok(mut mem) => { + assert_eq!(8, mem.length()); + + assert_eq!(-2.0, mem.read_float(0, 8)); + assert_eq!(-2.0, mem.read_float(4, 4)); + assert_eq!(0xc000000000000000, mem.read_uint(0, 8)); + assert_eq!(0xc0000000, mem.read_uint(4, 4)); + assert_eq!(0xc000, mem.read_uint(6, 2)); + assert_eq!(0xc0, mem.read_uint(7, 1)); + assert_eq!(&[0, 0xc0], mem.get_buffer(6)); + assert_eq!(&[0, 0xc0, 0xff, 0xff], mem.get_full_buffer(6)); + + let mut copy: Vec = Vec::new(); + mem.clone_buffer(&mut copy); + assert_eq!(vec!{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xC0}, copy); + + mem.zero_out_buffer(7, 8); + assert_eq!(&[0, 0, 0xff, 0xff], mem.get_full_buffer(6)); + } + Err(e) => { assert!(false, e); } + } + + match sut.peek_read() { + Ok(mem) => { + assert_eq!(2, mem.length()); + assert_eq!(0xffff, mem.read_uint(0, 2)); + } + Err(e) => { assert!(false, e); } + } + } +} diff --git a/src/od/od.rs b/src/od/od.rs index 0b32b48cb..8270e74a3 100644 --- a/src/od/od.rs +++ b/src/od/od.rs @@ -27,6 +27,7 @@ mod parse_nrofbytes; mod parse_formats; mod parse_inputs; mod inputoffset; +mod inputdecoder; #[cfg(test)] mod mockstream; @@ -42,6 +43,7 @@ use parse_formats::{parse_format_flags, ParsedFormatterItemInfo}; use prn_char::format_ascii_dump; use parse_inputs::{parse_inputs, CommandLineInputs}; use inputoffset::{InputOffset, Radix}; +use inputdecoder::{InputDecoder,MemoryDecoder}; static VERSION: &'static str = env!("CARGO_PKG_VERSION"); const MAX_BYTES_PER_UNIT: usize = 8; @@ -233,25 +235,25 @@ pub fn uumain(args: Vec) -> i32 { let mut input = open_input_peek_reader(&input_strings, skip_bytes, read_bytes); + let mut input_decoder = InputDecoder::new(&mut input, line_bytes, PEEK_BUFFER_SIZE, byte_order); + let mut input_offset = InputOffset::new(Radix::Octal, skip_bytes, label); if let Err(e) = input_offset.parse_radix_from_commandline(matches.opt_str("A")) { disp_err!("Invalid -A/--address-radix\n{}", e); return 1; } - odfunc(&mut input, &mut input_offset, line_bytes, byte_order, &formats[..], + odfunc(&mut input_decoder, &mut input_offset, line_bytes, &formats[..], output_duplicates) } // TODO: refactor, too many arguments -fn odfunc(input: &mut I, input_offset: &mut InputOffset, line_bytes: usize, byte_order: ByteOrder, +fn odfunc(input_decoder: &mut InputDecoder, input_offset: &mut InputOffset, line_bytes: usize, formats: &[ParsedFormatterItemInfo], output_duplicates: bool) -> i32 where I : PeekRead+HasError { let mut duplicate_line = false; let mut previous_bytes: Vec = Vec::new(); - let mut bytes: Vec = Vec::with_capacity(line_bytes + PEEK_BUFFER_SIZE); - unsafe { bytes.set_len(line_bytes + PEEK_BUFFER_SIZE); } // fast but uninitialized let byte_size_block = formats.iter().fold(1, |max, next| cmp::max(max, next.formatter_item_info.byte_size)); let print_width_block = formats @@ -294,29 +296,29 @@ fn odfunc(input: &mut I, input_offset: &mut InputOffset, line_bytes: usize, b loop { // print each line data (or multi-format raster of several lines describing the same data). - match input.peek_read(bytes.as_mut_slice(), PEEK_BUFFER_SIZE) { - Ok((0, _)) => { - input_offset.print_final_offset(); - break; - } - Ok((n, peekbytes)) => { + match input_decoder.peek_read() { + Ok(mut memory_decoder) => { + let length=memory_decoder.length(); + + if length == 0 { + input_offset.print_final_offset(); + break; + } + // not enough byte for a whole element, this should only happen on the last line. - if n != line_bytes { + if length != line_bytes { // set zero bytes in the part of the buffer that will be used, but is not filled. - let mut max_used = n + MAX_BYTES_PER_UNIT; + let mut max_used = length + MAX_BYTES_PER_UNIT; if max_used > line_bytes { max_used = line_bytes; } - for i in n..max_used { - bytes[i] = 0; - } + memory_decoder.zero_out_buffer(length, max_used); } if !output_duplicates - && n == line_bytes - && !previous_bytes.is_empty() - && previous_bytes[..line_bytes] == bytes[..line_bytes] { + && length == line_bytes + && memory_decoder.get_buffer(0) == &previous_bytes[..] { if !duplicate_line { duplicate_line = true; println!("*"); @@ -324,17 +326,16 @@ fn odfunc(input: &mut I, input_offset: &mut InputOffset, line_bytes: usize, b } else { duplicate_line = false; - if n == line_bytes { + if length == line_bytes { // save a copy of the input unless it is the last line - previous_bytes.clone_from(&bytes); + memory_decoder.clone_buffer(&mut previous_bytes); } - print_bytes(byte_order, &bytes, n, peekbytes, - &input_offset.format_byte_offset(), + print_bytes(&input_offset.format_byte_offset(), &memory_decoder, &spaced_formatters, byte_size_block, print_width_line); } - input_offset.increase_position(n); + input_offset.increase_position(length); } Err(e) => { show_error!("{}", e); @@ -344,70 +345,47 @@ fn odfunc(input: &mut I, input_offset: &mut InputOffset, line_bytes: usize, b }; } - if input.has_error() { + if input_decoder.has_error() { 1 } else { 0 } } -fn print_bytes(byte_order: ByteOrder, bytes: &[u8], length: usize, peekbytes: usize, prefix: &str, +fn print_bytes(prefix: &str, input_decoder: &MemoryDecoder, formats: &[SpacedFormatterItemInfo], byte_size_block: usize, print_width_line: usize) { let mut first = true; // First line of a multi-format raster. for f in formats { let mut output_text = String::new(); let mut b = 0; - while b < length { - let nextb = b + f.frm.formatter_item_info.byte_size; - + while b < input_decoder.length() { output_text.push_str(&format!("{:>width$}", "", width = f.spacing[b % byte_size_block])); match f.frm.formatter_item_info.formatter { FormatWriter::IntWriter(func) => { - let p: u64 = match f.frm.formatter_item_info.byte_size { - 1 => { - bytes[b] as u64 - } - 2 => { - byte_order.read_u16(&bytes[b..nextb]) as u64 - } - 4 => { - byte_order.read_u32(&bytes[b..nextb]) as u64 - } - 8 => { - byte_order.read_u64(&bytes[b..nextb]) - } - _ => { panic!("Invalid byte_size: {}", f.frm.formatter_item_info.byte_size); } - }; + let p = input_decoder.read_uint(b, f.frm.formatter_item_info.byte_size); output_text.push_str(&func(p)); } FormatWriter::FloatWriter(func) => { - let p: f64 = match f.frm.formatter_item_info.byte_size { - 4 => { - byte_order.read_f32(&bytes[b..nextb]) as f64 - } - 8 => { - byte_order.read_f64(&bytes[b..nextb]) - } - _ => { panic!("Invalid byte_size: {}", f.frm.formatter_item_info.byte_size); } - }; + let p = input_decoder.read_float(b, f.frm.formatter_item_info.byte_size); output_text.push_str(&func(p)); } FormatWriter::MultibyteWriter(func) => { - output_text.push_str(&func(&bytes[b..length+peekbytes])); + output_text.push_str(&func(input_decoder.get_full_buffer(b))); } } - b = nextb; + + b += f.frm.formatter_item_info.byte_size; } if f.frm.add_ascii_dump { let missing_spacing = print_width_line.saturating_sub(output_text.chars().count()); output_text.push_str(&format!("{:>width$} {}", "", - format_ascii_dump(&bytes[..length]), + format_ascii_dump(input_decoder.get_buffer(0)), width=missing_spacing)); } From 2c24911d9c0a7c82c5371cd76eda6a8f9ab2694f Mon Sep 17 00:00:00 2001 From: Wim Hueskes Date: Tue, 23 Aug 2016 18:11:47 +0200 Subject: [PATCH 36/41] od: refactor: create struct for formatting info also properly document the alignment algorithm. --- src/od/od.rs | 96 +++++------------ src/od/output_info.rs | 244 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 272 insertions(+), 68 deletions(-) create mode 100644 src/od/output_info.rs diff --git a/src/od/od.rs b/src/od/od.rs index 8270e74a3..31a2c932e 100644 --- a/src/od/od.rs +++ b/src/od/od.rs @@ -28,6 +28,7 @@ mod parse_formats; mod parse_inputs; mod inputoffset; mod inputdecoder; +mod output_info; #[cfg(test)] mod mockstream; @@ -39,14 +40,14 @@ use partialreader::*; use peekreader::*; use formatteriteminfo::*; use parse_nrofbytes::parse_number_of_bytes; -use parse_formats::{parse_format_flags, ParsedFormatterItemInfo}; +use parse_formats::parse_format_flags; use prn_char::format_ascii_dump; use parse_inputs::{parse_inputs, CommandLineInputs}; use inputoffset::{InputOffset, Radix}; use inputdecoder::{InputDecoder,MemoryDecoder}; +use output_info::OutputInfo; static VERSION: &'static str = env!("CARGO_PKG_VERSION"); -const MAX_BYTES_PER_UNIT: usize = 8; const PEEK_BUFFER_SIZE: usize = 4; // utf-8 can be 4 bytes static USAGE: &'static str = @@ -93,6 +94,8 @@ Any type specification can have a "z" suffic, which will add a ASCII dump at If an error occurred, a diagnostic message will be printed to stderr, and the exitcode will be non-zero."#; +/// parses and validates commandline parameters, prepares data structures, +/// opens the input and calls `odfunc` to process the input. pub fn uumain(args: Vec) -> i32 { let mut opts = getopts::Options::new(); @@ -233,65 +236,27 @@ pub fn uumain(args: Vec) -> i32 { } }; - let mut input = open_input_peek_reader(&input_strings, skip_bytes, read_bytes); - - let mut input_decoder = InputDecoder::new(&mut input, line_bytes, PEEK_BUFFER_SIZE, byte_order); - let mut input_offset = InputOffset::new(Radix::Octal, skip_bytes, label); if let Err(e) = input_offset.parse_radix_from_commandline(matches.opt_str("A")) { disp_err!("Invalid -A/--address-radix\n{}", e); return 1; } - odfunc(&mut input_decoder, &mut input_offset, line_bytes, &formats[..], - output_duplicates) + let mut input = open_input_peek_reader(&input_strings, skip_bytes, read_bytes); + let mut input_decoder = InputDecoder::new(&mut input, line_bytes, PEEK_BUFFER_SIZE, byte_order); + + let output_info = OutputInfo::new(line_bytes, &formats[..], output_duplicates); + + odfunc(&mut input_offset, &mut input_decoder, &output_info) } -// TODO: refactor, too many arguments -fn odfunc(input_decoder: &mut InputDecoder, input_offset: &mut InputOffset, line_bytes: usize, - formats: &[ParsedFormatterItemInfo], output_duplicates: bool) -> i32 +/// Loops through the input line by line, calling print_bytes to take care of the output. +fn odfunc(input_offset: &mut InputOffset, input_decoder: &mut InputDecoder, + output_info: &OutputInfo) -> i32 where I : PeekRead+HasError { - let mut duplicate_line = false; let mut previous_bytes: Vec = Vec::new(); - - let byte_size_block = formats.iter().fold(1, |max, next| cmp::max(max, next.formatter_item_info.byte_size)); - let print_width_block = formats - .iter() - .fold(1, |max, next| { - cmp::max(max, next.formatter_item_info.print_width * (byte_size_block / next.formatter_item_info.byte_size)) - }); - let print_width_line = print_width_block * (line_bytes / byte_size_block); - - if byte_size_block > MAX_BYTES_PER_UNIT { - panic!("{}-bits types are unsupported. Current max={}-bits.", - 8 * byte_size_block, - 8 * MAX_BYTES_PER_UNIT); - } - - let mut spaced_formatters: Vec = formats - .iter() - .map(|f| SpacedFormatterItemInfo { frm: *f, spacing: [0; MAX_BYTES_PER_UNIT] }) - .collect(); - - // calculate proper alignment for each item - for sf in &mut spaced_formatters { - let mut byte_size = sf.frm.formatter_item_info.byte_size; - let mut items_in_block = byte_size_block / byte_size; - let thisblock_width = sf.frm.formatter_item_info.print_width * items_in_block; - let mut missing_spacing = print_width_block - thisblock_width; - - while items_in_block > 0 { - let avg_spacing: usize = missing_spacing / items_in_block; - for i in 0..items_in_block { - sf.spacing[i * byte_size] += avg_spacing; - missing_spacing -= avg_spacing; - } - // this assumes the size of all types is a power of 2 (1, 2, 4, 8, 16, ...) - items_in_block /= 2; - byte_size *= 2; - } - } + let line_bytes = output_info.byte_size_line; loop { // print each line data (or multi-format raster of several lines describing the same data). @@ -308,7 +273,7 @@ fn odfunc(input_decoder: &mut InputDecoder, input_offset: &mut InputOffset // not enough byte for a whole element, this should only happen on the last line. if length != line_bytes { // set zero bytes in the part of the buffer that will be used, but is not filled. - let mut max_used = length + MAX_BYTES_PER_UNIT; + let mut max_used = length + output_info.byte_size_block; if max_used > line_bytes { max_used = line_bytes; } @@ -316,7 +281,7 @@ fn odfunc(input_decoder: &mut InputDecoder, input_offset: &mut InputOffset memory_decoder.zero_out_buffer(length, max_used); } - if !output_duplicates + if !output_info.output_duplicates && length == line_bytes && memory_decoder.get_buffer(0) == &previous_bytes[..] { if !duplicate_line { @@ -332,7 +297,7 @@ fn odfunc(input_decoder: &mut InputDecoder, input_offset: &mut InputOffset } print_bytes(&input_offset.format_byte_offset(), &memory_decoder, - &spaced_formatters, byte_size_block, print_width_line); + &output_info); } input_offset.increase_position(length); @@ -352,25 +317,25 @@ fn odfunc(input_decoder: &mut InputDecoder, input_offset: &mut InputOffset } } -fn print_bytes(prefix: &str, input_decoder: &MemoryDecoder, - formats: &[SpacedFormatterItemInfo], byte_size_block: usize, print_width_line: usize) { +/// Outputs a single line of input, into one or more lines human readable output. +fn print_bytes(prefix: &str, input_decoder: &MemoryDecoder, output_info: &OutputInfo) { let mut first = true; // First line of a multi-format raster. - for f in formats { + for f in output_info.spaced_formatters_iter() { let mut output_text = String::new(); let mut b = 0; while b < input_decoder.length() { output_text.push_str(&format!("{:>width$}", "", - width = f.spacing[b % byte_size_block])); + width = f.spacing[b % output_info.byte_size_block])); - match f.frm.formatter_item_info.formatter { + match f.formatter_item_info.formatter { FormatWriter::IntWriter(func) => { - let p = input_decoder.read_uint(b, f.frm.formatter_item_info.byte_size); + let p = input_decoder.read_uint(b, f.formatter_item_info.byte_size); output_text.push_str(&func(p)); } FormatWriter::FloatWriter(func) => { - let p = input_decoder.read_float(b, f.frm.formatter_item_info.byte_size); + let p = input_decoder.read_float(b, f.formatter_item_info.byte_size); output_text.push_str(&func(p)); } FormatWriter::MultibyteWriter(func) => { @@ -378,11 +343,11 @@ fn print_bytes(prefix: &str, input_decoder: &MemoryDecoder, } } - b += f.frm.formatter_item_info.byte_size; + b += f.formatter_item_info.byte_size; } - if f.frm.add_ascii_dump { - let missing_spacing = print_width_line.saturating_sub(output_text.chars().count()); + if f.add_ascii_dump { + let missing_spacing = output_info.print_width_line.saturating_sub(output_text.chars().count()); output_text.push_str(&format!("{:>width$} {}", "", format_ascii_dump(input_decoder.get_buffer(0)), @@ -423,8 +388,3 @@ fn open_input_peek_reader<'a>(input_strings: &'a Vec, skip_bytes: usize, let input = PeekReader::new(pr); input } - -struct SpacedFormatterItemInfo { - frm: ParsedFormatterItemInfo, - spacing: [usize; MAX_BYTES_PER_UNIT], -} diff --git a/src/od/output_info.rs b/src/od/output_info.rs new file mode 100644 index 000000000..4af3bef9e --- /dev/null +++ b/src/od/output_info.rs @@ -0,0 +1,244 @@ +use std::cmp; +use std::slice::Iter; +use parse_formats::ParsedFormatterItemInfo; +use formatteriteminfo::FormatterItemInfo; + +/// Size in bytes of the max datatype. ie set to 16 for 128-bit numbers. +const MAX_BYTES_PER_UNIT: usize = 8; + +/// Contains information to output single output line in human readable form +pub struct SpacedFormatterItemInfo { + /// Contains a function pointer to output data, and information about the output format. + pub formatter_item_info: FormatterItemInfo, + /// Contains the number of spaces to add to align data with other output formats. + /// + /// If the corresponding data is a single byte, each entry in this array contains + /// the number of spaces to insert when outputting each byte. If the corresponding + /// data is multi-byte, only the fist byte position is used. For example a 32-bit + /// datatype, could use positions 0, 4, 8, 12, .... + /// As each block is formatted identically, only the spacing for a single block is set. + pub spacing: [usize; MAX_BYTES_PER_UNIT], + /// if set adds a ascii dump at the end of the line + pub add_ascii_dump: bool, +} + +/// Contains information about all output lines. +pub struct OutputInfo { + /// The number of bytes of a line. + pub byte_size_line: usize, + /// The width of a line in human readable format. + pub print_width_line: usize, + + /// The number of bytes in a block. (This is the size of the largest datatype in `spaced_formatters`.) + pub byte_size_block: usize, + /// The width of a block in human readable format. (The size of the largest format.) + pub print_width_block: usize, + /// All formats. + spaced_formatters: Vec, + /// determines if duplicate output lines should be printed, or + /// skipped with a "*" showing one or more skipped lines. + pub output_duplicates: bool, +} + + +impl OutputInfo { + /// Returns an iterator over the `SpacedFormatterItemInfo` vector. + pub fn spaced_formatters_iter(&self) -> Iter { + self.spaced_formatters.iter() + } + + /// Creates a new `OutputInfo` based on the parameters + pub fn new(line_bytes: usize, formats: &[ParsedFormatterItemInfo], output_duplicates: bool) -> OutputInfo { + + let byte_size_block = formats.iter().fold(1, |max, next| cmp::max(max, next.formatter_item_info.byte_size)); + let print_width_block = formats + .iter() + .fold(1, |max, next| { + cmp::max(max, next.formatter_item_info.print_width * (byte_size_block / next.formatter_item_info.byte_size)) + }); + let print_width_line = print_width_block * (line_bytes / byte_size_block); + + let spaced_formatters = OutputInfo::create_spaced_formatter_info(&formats, byte_size_block, print_width_block); + + OutputInfo { + byte_size_line: line_bytes, + print_width_line: print_width_line, + byte_size_block: byte_size_block, + print_width_block: print_width_block, + spaced_formatters: spaced_formatters, + output_duplicates: output_duplicates, + } + } + + fn create_spaced_formatter_info(formats: &[ParsedFormatterItemInfo], + byte_size_block: usize, print_width_block: usize) -> Vec { + formats + .iter() + .map(|f| SpacedFormatterItemInfo { + formatter_item_info: f.formatter_item_info, + add_ascii_dump: f.add_ascii_dump, + spacing: OutputInfo::calculate_alignment(f, byte_size_block, print_width_block) + }) + .collect() + } + + /// calculates proper alignment for a single line of output + /// + /// Multiple representations of the same data, will be right-aligned for easy reading. + /// For example a 64 bit octal and a 32-bit decimal with a 16-bit hexadecimal looks like this: + /// ``` + /// 1777777777777777777777 1777777777777777777777 + /// 4294967295 4294967295 4294967295 4294967295 + /// ffff ffff ffff ffff ffff ffff ffff ffff + /// ``` + /// In this example is additional spacing before the first and third decimal number, + /// and there is additional spacing before the 1st, 3rd, 5th and 7th hexadecimal number. + /// This way both the octal and decimal, aswell the decimal and hexadecimal numbers + /// left align. Note that the alignment below both octal numbers is identical. + /// + /// This function calculates the required spacing for a single line, given the size + /// of a block, and the width of a block. The size of a block is the largest type + /// and the width is width of the the type which needs the most space to print that + /// number of bytes. So both numbers might refer to different types. All widths + /// include a space at the front. For example the width of a 8-bit hexadecimal, + /// is 3 characters, for example " FF". + /// + /// This algorithm first calculates how many spaces needs to be added, based the + /// block size and the size of the type, and the widths of the block and the type. + /// The required spaces are spread across the available positions. + /// If the blocksize is 8, and the size of the type is 8 too, there will be just + /// one value in a block, so all spacing will be assigned to position 0. + /// If the blocksize is 8, and the size of the type is 2, the spacing will be + /// spread across position 0, 2, 4, 6. All 4 positions will get an additional + /// space as long as there are more then 4 spaces available. If there are 2 + /// spaces available, they will be assigend to position 0 and 4. If there is + /// 1 space available, it will be assigned to position 0. This will be combined, + /// For example 7 spaces will be assigned to position 0, 2, 4, 6 like: 3, 1, 2, 1. + /// And 7 spaces with 2 positions will be assigned to position 0 and 4 like 4, 3. + /// + /// Here is another example showing the alignment of 64-bit unsigned decimal numbers, + /// 32-bit hexadecimal number, 16-bit octal numbers and 8-bit hexadecimal numbers: + /// ``` + /// 18446744073709551615 18446744073709551615 + /// ffffffff ffffffff ffffffff ffffffff + /// 177777 177777 177777 177777 177777 177777 177777 177777 + /// ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff + /// ``` + /// + /// This algorithm assumes the size of all types is a power of 2 (1, 2, 4, 8, 16, ...) + /// Increase MAX_BYTES_PER_UNIT to allow larger types. + fn calculate_alignment(sf: &TypeSizeInfo, byte_size_block: usize, + print_width_block: usize) -> [usize; MAX_BYTES_PER_UNIT] { + + if byte_size_block > MAX_BYTES_PER_UNIT { + panic!("{}-bits types are unsupported. Current max={}-bits.", + 8 * byte_size_block, + 8 * MAX_BYTES_PER_UNIT); + } + let mut spacing = [0; MAX_BYTES_PER_UNIT]; + + let mut byte_size = sf.byte_size(); + let mut items_in_block = byte_size_block / byte_size; + let thisblock_width = sf.print_width() * items_in_block; + let mut missing_spacing = print_width_block - thisblock_width; + + while items_in_block > 0 { + let avg_spacing: usize = missing_spacing / items_in_block; + for i in 0..items_in_block { + spacing[i * byte_size] += avg_spacing; + missing_spacing -= avg_spacing; + } + + items_in_block /= 2; + byte_size *= 2; + } + + spacing + } +} + +trait TypeSizeInfo { + fn byte_size(&self) -> usize; + fn print_width(&self) -> usize; +} + +impl TypeSizeInfo for ParsedFormatterItemInfo { + fn byte_size(&self) -> usize { self.formatter_item_info.byte_size } + fn print_width(&self) -> usize { self.formatter_item_info.print_width } +} + +#[cfg(test)] +struct TypeInfo { + byte_size: usize, + print_width: usize, +} + +#[cfg(test)] +impl TypeSizeInfo for TypeInfo { + fn byte_size(&self) -> usize { self.byte_size } + fn print_width(&self) -> usize { self.print_width } +} + +#[test] +fn test_calculate_alignment() { + + // For this example `byte_size_block` is 8 and 'print_width_block' is 23: + // 1777777777777777777777 1777777777777777777777 + // 4294967295 4294967295 4294967295 4294967295 + // ffff ffff ffff ffff ffff ffff ffff ffff + + // the first line has no additional spacing: + assert_eq!([0, 0, 0, 0, 0, 0, 0, 0], + OutputInfo::calculate_alignment(&TypeInfo{byte_size:8, print_width:23}, 8, 23)); + // the second line a single space at the start of the block: + assert_eq!([1, 0, 0, 0, 0, 0, 0, 0], + OutputInfo::calculate_alignment(&TypeInfo{byte_size:4, print_width:11}, 8, 23)); + // the third line two spaces at pos 0, and 1 space at pos 4: + assert_eq!([2, 0, 0, 0, 1, 0, 0, 0], + OutputInfo::calculate_alignment(&TypeInfo{byte_size:2, print_width:5}, 8, 23)); + + // For this example `byte_size_block` is 8 and 'print_width_block' is 28: + // 18446744073709551615 18446744073709551615 + // ffffffff ffffffff ffffffff ffffffff + // 177777 177777 177777 177777 177777 177777 177777 177777 + // ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff + + assert_eq!([7, 0, 0, 0, 0, 0, 0, 0], + OutputInfo::calculate_alignment(&TypeInfo{byte_size:8, print_width:21}, 8, 28)); + assert_eq!([5, 0, 0, 0, 5, 0, 0, 0], + OutputInfo::calculate_alignment(&TypeInfo{byte_size:4, print_width:9}, 8, 28)); + assert_eq!([0, 0, 0, 0, 0, 0, 0, 0], + OutputInfo::calculate_alignment(&TypeInfo{byte_size:2, print_width:7}, 8, 28)); + assert_eq!([1, 0, 1, 0, 1, 0, 1, 0], + OutputInfo::calculate_alignment(&TypeInfo{byte_size:1, print_width:3}, 8, 28)); + + // 9 tests where 8 .. 16 spaces are spread across 8 positions + assert_eq!([1, 1, 1, 1, 1, 1, 1, 1], + OutputInfo::calculate_alignment(&TypeInfo{byte_size:1, print_width:2}, 8, 16+8)); + assert_eq!([2, 1, 1, 1, 1, 1, 1, 1], + OutputInfo::calculate_alignment(&TypeInfo{byte_size:1, print_width:2}, 8, 16+9)); + assert_eq!([2, 1, 1, 1, 2, 1, 1, 1], + OutputInfo::calculate_alignment(&TypeInfo{byte_size:1, print_width:2}, 8, 16+10)); + assert_eq!([3, 1, 1, 1, 2, 1, 1, 1], + OutputInfo::calculate_alignment(&TypeInfo{byte_size:1, print_width:2}, 8, 16+11)); + assert_eq!([2, 1, 2, 1, 2, 1, 2, 1], + OutputInfo::calculate_alignment(&TypeInfo{byte_size:1, print_width:2}, 8, 16+12)); + assert_eq!([3, 1, 2, 1, 2, 1, 2, 1], + OutputInfo::calculate_alignment(&TypeInfo{byte_size:1, print_width:2}, 8, 16+13)); + assert_eq!([3, 1, 2, 1, 3, 1, 2, 1], + OutputInfo::calculate_alignment(&TypeInfo{byte_size:1, print_width:2}, 8, 16+14)); + assert_eq!([4, 1, 2, 1, 3, 1, 2, 1], + OutputInfo::calculate_alignment(&TypeInfo{byte_size:1, print_width:2}, 8, 16+15)); + assert_eq!([2, 2, 2, 2, 2, 2, 2, 2], + OutputInfo::calculate_alignment(&TypeInfo{byte_size:1, print_width:2}, 8, 16+16)); + + // 4 tests where 15 spaces are spread across 8, 4, 2 or 1 position(s) + assert_eq!([4, 1, 2, 1, 3, 1, 2, 1], + OutputInfo::calculate_alignment(&TypeInfo{byte_size:1, print_width:2}, 8, 16+15)); + assert_eq!([5, 0, 3, 0, 4, 0, 3, 0], + OutputInfo::calculate_alignment(&TypeInfo{byte_size:2, print_width:4}, 8, 16+15)); + assert_eq!([8, 0, 0, 0, 7, 0, 0, 0], + OutputInfo::calculate_alignment(&TypeInfo{byte_size:4, print_width:8}, 8, 16+15)); + assert_eq!([15, 0, 0, 0, 0, 0, 0, 0], + OutputInfo::calculate_alignment(&TypeInfo{byte_size:8, print_width:16}, 8, 16+15)); +} From 92fc286b0ef016fc40f22e6748825485fe09f2bf Mon Sep 17 00:00:00 2001 From: Wim Hueskes Date: Tue, 23 Aug 2016 22:49:37 +0200 Subject: [PATCH 37/41] od: refactor uumain into smaller functions --- src/od/inputoffset.rs | 45 +++------ src/od/od.rs | 225 ++++++++++++++++++++++++++---------------- 2 files changed, 154 insertions(+), 116 deletions(-) diff --git a/src/od/inputoffset.rs b/src/od/inputoffset.rs index 44e5106b7..9b82a214d 100644 --- a/src/od/inputoffset.rs +++ b/src/od/inputoffset.rs @@ -31,28 +31,9 @@ impl InputOffset { } } - /// set `self.radix` to the value provided by the --address-radix commandline option - pub fn parse_radix_from_commandline(&mut self, radix_str: Option) -> Result<(), &'static str> { - match radix_str { - None => self.radix = Radix::Octal, - Some(s) => { - let st = s.into_bytes(); - if st.len() != 1 { - return Err("Radix must be one of [d, o, n, x]\n") - } else { - let radix: char = *(st.get(0) - .expect("byte string of length 1 lacks a 0th elem")) as char; - match radix { - 'd' => self.radix = Radix::Decimal, - 'x' => self.radix = Radix::Hexadecimal, - 'o' => self.radix = Radix::Octal, - 'n' => self.radix = Radix::NoPrefix, - _ => return Err("Radix must be one of [d, o, n, x]\n") - } - } - } - } - Ok(()) + #[cfg(test)] + fn set_radix(&mut self, radix: Radix) { + self.radix = radix; } /// returns a string with the current byte offset @@ -86,20 +67,20 @@ fn test_input_offset() { assert_eq!("000014", &sut.format_byte_offset()); // note normally the radix will not change after initialisation - sut.parse_radix_from_commandline(Some("d".to_string())).unwrap(); + sut.set_radix(Radix::Decimal); assert_eq!("0000020", &sut.format_byte_offset()); - sut.parse_radix_from_commandline(Some("x".to_string())).unwrap(); + sut.set_radix(Radix::Hexadecimal); assert_eq!("000014", &sut.format_byte_offset()); - sut.parse_radix_from_commandline(Some("o".to_string())).unwrap(); + sut.set_radix(Radix::Octal); assert_eq!("0000024", &sut.format_byte_offset()); - sut.parse_radix_from_commandline(Some("n".to_string())).unwrap(); + sut.set_radix(Radix::NoPrefix); assert_eq!("", &sut.format_byte_offset()); sut.increase_position(10); - sut.parse_radix_from_commandline(None).unwrap(); + sut.set_radix(Radix::Octal); assert_eq!("0000036", &sut.format_byte_offset()); } @@ -111,19 +92,19 @@ fn test_input_offset_with_label() { assert_eq!("000014 (00001E)", &sut.format_byte_offset()); // note normally the radix will not change after initialisation - sut.parse_radix_from_commandline(Some("d".to_string())).unwrap(); + sut.set_radix(Radix::Decimal); assert_eq!("0000020 (0000030)", &sut.format_byte_offset()); - sut.parse_radix_from_commandline(Some("x".to_string())).unwrap(); + sut.set_radix(Radix::Hexadecimal); assert_eq!("000014 (00001E)", &sut.format_byte_offset()); - sut.parse_radix_from_commandline(Some("o".to_string())).unwrap(); + sut.set_radix(Radix::Octal); assert_eq!("0000024 (0000036)", &sut.format_byte_offset()); - sut.parse_radix_from_commandline(Some("n".to_string())).unwrap(); + sut.set_radix(Radix::NoPrefix); assert_eq!("(0000036)", &sut.format_byte_offset()); sut.increase_position(10); - sut.parse_radix_from_commandline(None).unwrap(); + sut.set_radix(Radix::Octal); assert_eq!("0000036 (0000050)", &sut.format_byte_offset()); } diff --git a/src/od/od.rs b/src/od/od.rs index 31a2c932e..a43a3fc1b 100644 --- a/src/od/od.rs +++ b/src/od/od.rs @@ -40,7 +40,7 @@ use partialreader::*; use peekreader::*; use formatteriteminfo::*; use parse_nrofbytes::parse_number_of_bytes; -use parse_formats::parse_format_flags; +use parse_formats::{parse_format_flags, ParsedFormatterItemInfo}; use prn_char::format_ascii_dump; use parse_inputs::{parse_inputs, CommandLineInputs}; use inputoffset::{InputOffset, Radix}; @@ -94,9 +94,7 @@ Any type specification can have a "z" suffic, which will add a ASCII dump at If an error occurred, a diagnostic message will be printed to stderr, and the exitcode will be non-zero."#; -/// parses and validates commandline parameters, prepares data structures, -/// opens the input and calls `odfunc` to process the input. -pub fn uumain(args: Vec) -> i32 { +fn create_getopts_options() -> getopts::Options { let mut opts = getopts::Options::new(); opts.optopt("A", "address-radix", @@ -143,6 +141,133 @@ pub fn uumain(args: Vec) -> i32 { opts.optflag("", "version", "output version information and exit."); opts.optflag("", "traditional", "compatibility mode with one input, offset and label."); + opts +} + +struct OdOptions { + byte_order: ByteOrder, + skip_bytes : usize, + read_bytes : Option, + label: Option, + input_strings: Vec, + formats: Vec, + line_bytes: usize, + output_duplicates: bool, + radix: Radix, +} + +impl OdOptions { + fn new(matches: getopts::Matches, args: Vec) -> Result { + let byte_order = match matches.opt_str("endian").as_ref().map(String::as_ref) { + None => { ByteOrder::Native }, + Some("little") => { ByteOrder::Little }, + Some("big") => { ByteOrder::Big }, + Some(s) => { + return Err(format!("Invalid argument --endian={}", s)); + } + }; + + let mut skip_bytes = match matches.opt_default("skip-bytes", "0") { + None => 0, + Some(s) => { + match parse_number_of_bytes(&s) { + Ok(i) => { i } + Err(_) => { + return Err(format!("Invalid argument --skip-bytes={}", s)); + } + } + } + }; + + let mut label: Option = None; + + let input_strings = match parse_inputs(&matches) { + Ok(CommandLineInputs::FileNames(v)) => v, + Ok(CommandLineInputs::FileAndOffset((f, s, l))) => { + skip_bytes = s; + label = l; + vec!{f} + }, + Err(e) => { + return Err(format!("Invalid inputs: {}", e)); + } + }; + + let formats = match parse_format_flags(&args) { + Ok(f) => f, + Err(e) => { + return Err(format!("{}", e)); + } + }; + + let mut line_bytes = match matches.opt_default("w", "32") { + None => 16, + Some(s) => { + match s.parse::() { + Ok(i) => { i } + Err(_) => { 2 } + } + } + }; + let min_bytes = formats.iter().fold(1, |max, next| cmp::max(max, next.formatter_item_info.byte_size)); + if line_bytes % min_bytes != 0 { + show_warning!("invalid width {}; using {} instead", line_bytes, min_bytes); + line_bytes = min_bytes; + } + + let output_duplicates = matches.opt_present("v"); + + let read_bytes = match matches.opt_str("read-bytes") { + None => None, + Some(s) => { + match parse_number_of_bytes(&s) { + Ok(i) => { Some(i) } + Err(_) => { + return Err(format!("Invalid argument --read-bytes={}", s)); + } + } + } + }; + + let radix = match matches.opt_str("A") { + None => Radix::Octal, + Some(s) => { + let st = s.into_bytes(); + if st.len() != 1 { + return Err(format!("Radix must be one of [d, o, n, x]")) + } else { + let radix: char = *(st.get(0) + .expect("byte string of length 1 lacks a 0th elem")) as char; + match radix { + 'd' => Radix::Decimal, + 'x' => Radix::Hexadecimal, + 'o' => Radix::Octal, + 'n' => Radix::NoPrefix, + _ => return Err(format!("Radix must be one of [d, o, n, x]")) + } + } + } + }; + + Ok(OdOptions { + byte_order: byte_order, + skip_bytes: skip_bytes, + read_bytes: read_bytes, + label: label, + input_strings: input_strings, + formats: formats, + line_bytes: line_bytes, + output_duplicates: output_duplicates, + radix: radix, + }) + } +} + +/// parses and validates commandline parameters, prepares data structures, +/// opens the input and calls `odfunc` to process the input. +pub fn uumain(args: Vec) -> i32 { + let opts = create_getopts_options(); + let matches = match opts.parse(&args[1..]) { Ok(m) => m, Err(f) => { @@ -160,92 +285,24 @@ pub fn uumain(args: Vec) -> i32 { return 0; } - let byte_order = match matches.opt_str("endian").as_ref().map(String::as_ref) { - None => { ByteOrder::Native }, - Some("little") => { ByteOrder::Little }, - Some("big") => { ByteOrder::Big }, - Some(s) => { - disp_err!("Invalid argument --endian={}", s); + let od_options = match OdOptions::new(matches, args) { + Err(s) => { + disp_err!("{}", s); return 1; - } - }; - - let mut skip_bytes = match matches.opt_default("skip-bytes", "0") { - None => 0, - Some(s) => { - match parse_number_of_bytes(&s) { - Ok(i) => { i } - Err(_) => { - disp_err!("Invalid argument --skip-bytes={}", s); - return 1; - } - } - } - }; - - let mut label: Option = None; - - let input_strings = match parse_inputs(&matches) { - Ok(CommandLineInputs::FileNames(v)) => v, - Ok(CommandLineInputs::FileAndOffset((f, s, l))) => { - skip_bytes = s; - label = l; - vec!{f} }, - Err(e) => { - disp_err!("Invalid inputs: {}", e); - return 1; - } + Ok(o) => o, }; - let formats = match parse_format_flags(&args) { - Ok(f) => f, - Err(e) => { - disp_err!("{}", e); - return 1; - } - }; + let mut input_offset = InputOffset::new(od_options.radix, od_options.skip_bytes, + od_options.label); - let mut line_bytes = match matches.opt_default("w", "32") { - None => 16, - Some(s) => { - match s.parse::() { - Ok(i) => { i } - Err(_) => { 2 } - } - } - }; - let min_bytes = formats.iter().fold(1, |max, next| cmp::max(max, next.formatter_item_info.byte_size)); - if line_bytes % min_bytes != 0 { - show_warning!("invalid width {}; using {} instead", line_bytes, min_bytes); - line_bytes = min_bytes; - } + let mut input = open_input_peek_reader(&od_options.input_strings, + od_options.skip_bytes, od_options.read_bytes); + let mut input_decoder = InputDecoder::new(&mut input, od_options.line_bytes, + PEEK_BUFFER_SIZE, od_options.byte_order); - let output_duplicates = matches.opt_present("v"); - - let read_bytes = match matches.opt_str("read-bytes") { - None => None, - Some(s) => { - match parse_number_of_bytes(&s) { - Ok(i) => { Some(i) } - Err(_) => { - disp_err!("Invalid argument --read-bytes={}", s); - return 1; - } - } - } - }; - - let mut input_offset = InputOffset::new(Radix::Octal, skip_bytes, label); - if let Err(e) = input_offset.parse_radix_from_commandline(matches.opt_str("A")) { - disp_err!("Invalid -A/--address-radix\n{}", e); - return 1; - } - - let mut input = open_input_peek_reader(&input_strings, skip_bytes, read_bytes); - let mut input_decoder = InputDecoder::new(&mut input, line_bytes, PEEK_BUFFER_SIZE, byte_order); - - let output_info = OutputInfo::new(line_bytes, &formats[..], output_duplicates); + let output_info = OutputInfo::new(od_options.line_bytes, &od_options.formats[..], + od_options.output_duplicates); odfunc(&mut input_offset, &mut input_decoder, &output_info) } From 184c4af76de23361f2e434b9e7d2845ea3363773 Mon Sep 17 00:00:00 2001 From: Wim Hueskes Date: Tue, 23 Aug 2016 22:55:40 +0200 Subject: [PATCH 38/41] od: fix zero width user input (-w0) --- src/od/od.rs | 4 ++-- tests/test_od.rs | 17 +++++++++++++++++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/src/od/od.rs b/src/od/od.rs index a43a3fc1b..a20856ba0 100644 --- a/src/od/od.rs +++ b/src/od/od.rs @@ -205,12 +205,12 @@ impl OdOptions { Some(s) => { match s.parse::() { Ok(i) => { i } - Err(_) => { 2 } + Err(_) => { 0 } } } }; let min_bytes = formats.iter().fold(1, |max, next| cmp::max(max, next.formatter_item_info.byte_size)); - if line_bytes % min_bytes != 0 { + if line_bytes == 0 || line_bytes % min_bytes != 0 { show_warning!("invalid width {}; using {} instead", line_bytes, min_bytes); line_bytes = min_bytes; } diff --git a/tests/test_od.rs b/tests/test_od.rs index adddacbe9..92828012f 100644 --- a/tests/test_od.rs +++ b/tests/test_od.rs @@ -292,6 +292,23 @@ fn test_invalid_width(){ assert_eq!(result.stdout, expected_output); } +#[test] +fn test_zero_width(){ + + let input : [u8; 4] = [0x00, 0x00, 0x00, 0x00]; + let expected_output = unindent(" + 0000000 000000 + 0000002 000000 + 0000004 + "); + + let result = new_ucmd!().arg("-w0").arg("-v").run_piped_stdin(&input[..]); + + assert_eq!(result.stderr, "od: warning: invalid width 0; using 2 instead\n"); + assert!(result.success); + assert_eq!(result.stdout, expected_output); +} + #[test] fn test_width_without_value(){ From fd5879dcf22727ef824cbd496abd3614b30eca3a Mon Sep 17 00:00:00 2001 From: Wim Hueskes Date: Wed, 31 Aug 2016 23:47:07 +0200 Subject: [PATCH 39/41] od: refactor parse_formats.rs simplified, more but shorter functions --- src/od/parse_formats.rs | 378 +++++++++++++++++++++------------------- 1 file changed, 197 insertions(+), 181 deletions(-) diff --git a/src/od/parse_formats.rs b/src/od/parse_formats.rs index 3394493e7..db4b20daa 100644 --- a/src/od/parse_formats.rs +++ b/src/od/parse_formats.rs @@ -1,18 +1,8 @@ -use std::collections::HashSet; use formatteriteminfo::FormatterItemInfo; use prn_int::*; use prn_char::*; use prn_float::*; -//This is available in some versions of std, but not all that we target. -macro_rules! hashmap { - ($( $key: expr => $val: expr ),*) => {{ - let mut map = ::std::collections::HashMap::new(); - $( map.insert($key, $val); )* - map - }} -} - #[derive(Copy, Clone, PartialEq, Eq, Debug)] pub struct ParsedFormatterItemInfo { pub formatter_item_info: FormatterItemInfo, @@ -28,6 +18,76 @@ impl ParsedFormatterItemInfo { } } +fn od_argument_traditional_format(ch: char) -> Option { + match ch { + 'a' => Some(FORMAT_ITEM_A), + 'B' => Some(FORMAT_ITEM_OCT16), + 'b' => Some(FORMAT_ITEM_OCT8), + 'c' => Some(FORMAT_ITEM_C), + 'D' => Some(FORMAT_ITEM_DEC32U), + 'd' => Some(FORMAT_ITEM_DEC16U), + 'e' => Some(FORMAT_ITEM_F64), + 'F' => Some(FORMAT_ITEM_F64), + 'f' => Some(FORMAT_ITEM_F32), + 'H' => Some(FORMAT_ITEM_HEX32), + 'h' => Some(FORMAT_ITEM_HEX16), + 'i' => Some(FORMAT_ITEM_DEC32S), + 'I' => Some(FORMAT_ITEM_DEC64S), + 'L' => Some(FORMAT_ITEM_DEC64S), + 'l' => Some(FORMAT_ITEM_DEC64S), + 'O' => Some(FORMAT_ITEM_OCT32), + 'o' => Some(FORMAT_ITEM_OCT16), + 's' => Some(FORMAT_ITEM_DEC16S), + 'X' => Some(FORMAT_ITEM_HEX32), + 'x' => Some(FORMAT_ITEM_HEX16), + _ => None, + } +} + +fn od_format_type(type_char: FormatType, byte_size: u8) -> Option { + match (type_char, byte_size) { + (FormatType::Ascii, _) => Some(FORMAT_ITEM_A), + (FormatType::Char, _) => Some(FORMAT_ITEM_C), + + (FormatType::DecimalInt, 1) => Some(FORMAT_ITEM_DEC8S), + (FormatType::DecimalInt, 2) => Some(FORMAT_ITEM_DEC16S), + (FormatType::DecimalInt, 0) | + (FormatType::DecimalInt, 4) => Some(FORMAT_ITEM_DEC32S), + (FormatType::DecimalInt, 8) => Some(FORMAT_ITEM_DEC64S), + + (FormatType::OctalInt, 1) => Some(FORMAT_ITEM_OCT8), + (FormatType::OctalInt, 2) => Some(FORMAT_ITEM_OCT16), + (FormatType::OctalInt, 0) | + (FormatType::OctalInt, 4) => Some(FORMAT_ITEM_OCT32), + (FormatType::OctalInt, 8) => Some(FORMAT_ITEM_OCT64), + + (FormatType::UnsignedInt, 1) => Some(FORMAT_ITEM_DEC8U), + (FormatType::UnsignedInt, 2) => Some(FORMAT_ITEM_DEC16U), + (FormatType::UnsignedInt, 0) | + (FormatType::UnsignedInt, 4) => Some(FORMAT_ITEM_DEC32U), + (FormatType::UnsignedInt, 8) => Some(FORMAT_ITEM_DEC64U), + + (FormatType::HexadecimalInt, 1) => Some(FORMAT_ITEM_HEX8), + (FormatType::HexadecimalInt, 2) => Some(FORMAT_ITEM_HEX16), + (FormatType::HexadecimalInt, 0) | + (FormatType::HexadecimalInt, 4) => Some(FORMAT_ITEM_HEX32), + (FormatType::HexadecimalInt, 8) => Some(FORMAT_ITEM_HEX64), + + (FormatType::Float, 0) | + (FormatType::Float, 4) => Some(FORMAT_ITEM_F32), + (FormatType::Float, 8) => Some(FORMAT_ITEM_F64), + + _ => None, + } +} + +fn od_argument_with_option(ch:char) -> bool { + match ch { + 'A' | 'j' | 'N' | 'S' | 'w' => true, + _ => false, + } +} + /// Parses format flags from commandline /// @@ -41,32 +101,6 @@ impl ParsedFormatterItemInfo { /// parameters of -t/--format specify 1 or more formats. /// if -- appears on the commandline, parsing should stop. pub fn parse_format_flags(args: &Vec) -> Result, String> { - - let known_formats = hashmap![ - 'a' => FORMAT_ITEM_A, - 'B' => FORMAT_ITEM_OCT16, - 'b' => FORMAT_ITEM_OCT8, - 'c' => FORMAT_ITEM_C, - 'D' => FORMAT_ITEM_DEC32U, - 'd' => FORMAT_ITEM_DEC16U, - 'e' => FORMAT_ITEM_F64, - 'F' => FORMAT_ITEM_F64, - 'f' => FORMAT_ITEM_F32, - 'H' => FORMAT_ITEM_HEX32, - 'h' => FORMAT_ITEM_HEX16, - 'i' => FORMAT_ITEM_DEC32S, - 'I' => FORMAT_ITEM_DEC64S, - 'L' => FORMAT_ITEM_DEC64S, - 'l' => FORMAT_ITEM_DEC64S, - 'O' => FORMAT_ITEM_OCT32, - 'o' => FORMAT_ITEM_OCT16, - 's' => FORMAT_ITEM_DEC16S, - 'X' => FORMAT_ITEM_HEX32, - 'x' => FORMAT_ITEM_HEX16 - ]; - - let ignored_arg_opts: HashSet<_> = ['A', 'j', 'N', 'S', 'w'].iter().cloned().collect(); - let mut formats = Vec::new(); // args[0] is the name of the binary @@ -103,17 +137,17 @@ pub fn parse_format_flags(args: &Vec) -> Result {} // not every option is a format Some(r) => { - formats.push(ParsedFormatterItemInfo::new(*r, false)) + formats.push(ParsedFormatterItemInfo::new(r, false)) } } } @@ -138,166 +172,148 @@ pub fn parse_format_flags(args: &Vec) -> Result Option { + match ch { + 'a' => Some(FormatType::Ascii), + 'c' => Some(FormatType::Char), + 'd' => Some(FormatType::DecimalInt), + 'o' => Some(FormatType::OctalInt), + 'u' => Some(FormatType::UnsignedInt), + 'x' => Some(FormatType::HexadecimalInt), + 'f' => Some(FormatType::Float), + _ => None, + } +} + + +fn format_type_category(t: FormatType) -> FormatTypeCategory { + match t { + FormatType::Ascii | FormatType::Char + => FormatTypeCategory::Char, + FormatType::DecimalInt | FormatType::OctalInt | FormatType::UnsignedInt | FormatType::HexadecimalInt + => FormatTypeCategory::Integer, + FormatType::Float + => FormatTypeCategory::Float, + } +} + +fn is_format_size_char(ch: Option, format_type: FormatTypeCategory, byte_size: &mut u8) -> bool { + + match (format_type, ch) { + (FormatTypeCategory::Integer, Some('C')) => { + *byte_size = 1; + true + }, + (FormatTypeCategory::Integer, Some('S')) => { + *byte_size = 2; + true + }, + (FormatTypeCategory::Integer, Some('I')) => { + *byte_size = 4; + true + }, + (FormatTypeCategory::Integer, Some('L')) => { + *byte_size = 8; + true + }, + + (FormatTypeCategory::Float, Some('F')) => { + *byte_size = 4; + true + }, + (FormatTypeCategory::Float, Some('D')) => { + *byte_size = 8; + true + }, + // FormatTypeCategory::Float, 'L' => *byte_size = 16, // TODO support f128 + + _ => false, + } +} + +fn is_format_size_decimal(ch: Option, format_type: FormatTypeCategory, decimal_size: &mut String) -> bool { + if format_type == FormatTypeCategory::Char { return false; } + match ch { + Some(d) if d.is_digit(10) => { + decimal_size.push(d); + return true; + } + _ => false, + } +} + +fn is_format_dump_char(ch: Option, show_ascii_dump: &mut bool) -> bool { + match ch { + Some('z') => { + *show_ascii_dump = true; + return true; + } + _ => false, + } } fn parse_type_string(params: &String) -> Result, String> { - - let type_chars: HashSet<_> = ['a', 'c'].iter().cloned().collect(); - let type_ints: HashSet<_> = ['d', 'o', 'u', 'x'].iter().cloned().collect(); - let type_floats: HashSet<_> = ['f'].iter().cloned().collect(); - let type_all: HashSet<_> = - type_chars.iter() - .chain(type_ints.iter()) - .chain(type_floats.iter()) - .collect(); - let mut formats = Vec::new(); - // first split a type string into parts refering a single type - let mut type_parts = Vec::new(); - let mut s = String::new(); - for c in params.chars() { - if type_all.contains(&c) { - if !s.is_empty() { - type_parts.push(s); - s = String::new(); + let mut chars=params.chars(); + let mut ch = chars.next(); + + while ch.is_some() { + let type_char = ch.unwrap(); + let type_char = match format_type(type_char) { + Some(t) => t, + None => { + return Err(format!("unexpected char '{}' in format specification '{}'", type_char, params)); } - s.push(c); - } - else { - if s.is_empty() { - return Err(format!("unexpected char '{}' in format specification '{}'", c, params)); - } - s.push(c); - } - } - if !s.is_empty() { - type_parts.push(s); - } + }; - for format_type in type_parts.iter() { - let mut chars=format_type.chars(); + let type_cat = format_type_category(type_char); - let type_char = chars.next().unwrap(); + ch = chars.next(); - let mut parse_state = ParseState::ExpectSize; - let mut decimal_size = String::new(); let mut byte_size = 0u8; let mut show_ascii_dump = false; - - if type_chars.contains(&type_char) { - parse_state = ParseState::ExpectDump; + if is_format_size_char(ch, type_cat, &mut byte_size) { + ch = chars.next(); } - - loop { - match chars.next() { - None => break, - Some('z') if parse_state != ParseState::Finished => { - show_ascii_dump = true; - parse_state = ParseState::Finished; - }, - Some(d) if d.is_digit(10) - && (parse_state == ParseState::ExpectSize || parse_state == ParseState::ExpectDecimal) => { - decimal_size.push(d); - parse_state = ParseState::ExpectDecimal; - }, - - Some('C') if type_ints.contains(&type_char) && parse_state == ParseState::ExpectSize => { - byte_size = 1; - parse_state = ParseState::ExpectDump; - }, - Some('S') if type_ints.contains(&type_char) && parse_state == ParseState::ExpectSize => { - byte_size = 2; - parse_state = ParseState::ExpectDump; - }, - Some('I') if type_ints.contains(&type_char) && parse_state == ParseState::ExpectSize => { - byte_size = 4; - parse_state = ParseState::ExpectDump; - }, - Some('L') if type_ints.contains(&type_char) && parse_state == ParseState::ExpectSize => { - byte_size = 8; - parse_state = ParseState::ExpectDump; - }, - - Some('F') if type_char == 'f' && parse_state == ParseState::ExpectSize => { - byte_size = 4; - parse_state = ParseState::ExpectDump; - }, - Some('D') if type_char == 'f' && parse_state == ParseState::ExpectSize => { - byte_size = 8; - parse_state = ParseState::ExpectDump; - }, - // Some('L') if type_char == 'f' => byte_size = 16, // TODO support f128 - - Some(c) => { - return Err(format!("unexpected char '{}' in format specification '{}'", c, format_type)); + else { + let mut decimal_size = String::new(); + while is_format_size_decimal(ch, type_cat, &mut decimal_size) { + ch = chars.next(); + } + if !decimal_size.is_empty() { + byte_size=match decimal_size.parse() { + Err(_) => return Err(format!("invalid number '{}' in format specification '{}'", decimal_size, params)), + Ok(n) => n, } } } - - if !decimal_size.is_empty() { - byte_size=match decimal_size.parse() { - Err(_) => return Err(format!("invalid number '{}' in format specification '{}'", decimal_size, format_type)), - Ok(n) => n, - } + if is_format_dump_char(ch, &mut show_ascii_dump) { + ch = chars.next(); } - match type_char { - 'a' => formats.push(ParsedFormatterItemInfo::new(FORMAT_ITEM_A, show_ascii_dump)), - 'c' => formats.push(ParsedFormatterItemInfo::new(FORMAT_ITEM_C, show_ascii_dump)), - 'd' => { - formats.push(ParsedFormatterItemInfo::new(match byte_size { - 1 => FORMAT_ITEM_DEC8S, - 2 => FORMAT_ITEM_DEC16S, - 4|0 => FORMAT_ITEM_DEC32S, - 8 => FORMAT_ITEM_DEC64S, - _ => return Err(format!("invalid size '{}' in format specification '{}'", byte_size, format_type)), - }, show_ascii_dump)); - }, - 'o' => { - formats.push(ParsedFormatterItemInfo::new(match byte_size { - 1 => FORMAT_ITEM_OCT8, - 2 => FORMAT_ITEM_OCT16, - 4|0 => FORMAT_ITEM_OCT32, - 8 => FORMAT_ITEM_OCT64, - _ => return Err(format!("invalid size '{}' in format specification '{}'", byte_size, format_type)), - }, show_ascii_dump)); - }, - 'u' => { - formats.push(ParsedFormatterItemInfo::new(match byte_size { - 1 => FORMAT_ITEM_DEC8U, - 2 => FORMAT_ITEM_DEC16U, - 4|0 => FORMAT_ITEM_DEC32U, - 8 => FORMAT_ITEM_DEC64U, - _ => return Err(format!("invalid size '{}' in format specification '{}'", byte_size, format_type)), - }, show_ascii_dump)); - }, - 'x' => { - formats.push(ParsedFormatterItemInfo::new(match byte_size { - 1 => FORMAT_ITEM_HEX8, - 2 => FORMAT_ITEM_HEX16, - 4|0 => FORMAT_ITEM_HEX32, - 8 => FORMAT_ITEM_HEX64, - _ => return Err(format!("invalid size '{}' in format specification '{}'", byte_size, format_type)), - }, show_ascii_dump)); - }, - 'f' => { - formats.push(ParsedFormatterItemInfo::new(match byte_size { - 4|0 => FORMAT_ITEM_F32, - 8 => FORMAT_ITEM_F64, - _ => return Err(format!("invalid size '{}' in format specification '{}'", byte_size, format_type)), - }, show_ascii_dump)); - }, - _ => unreachable!(), + match od_format_type(type_char, byte_size) { + Some(ft) => formats.push(ParsedFormatterItemInfo::new(ft, show_ascii_dump)), + None => return Err(format!("invalid size '{}' in format specification '{}'", byte_size, params)), } - - if show_ascii_dump { /*TODO*/ } } Ok(formats) From 99f70ba648773bf33a70a196ffda8af9e545ecd9 Mon Sep 17 00:00:00 2001 From: Wim Hueskes Date: Sat, 3 Sep 2016 21:47:36 +0200 Subject: [PATCH 40/41] od: implement 16-bit floating point type --- Cargo.lock | 6 ++++++ src/od/Cargo.toml | 1 + src/od/inputdecoder.rs | 2 ++ src/od/od.rs | 1 + src/od/parse_formats.rs | 3 ++- src/od/prn_float.rs | 41 +++++++++++++++++++++++++++++++++++++++++ tests/test_od.rs | 23 +++++++++++++++++++++++ 7 files changed, 76 insertions(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index 0a8b0fe86..c06c977f3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -392,6 +392,11 @@ dependencies = [ "uucore 0.0.1", ] +[[package]] +name = "half" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "hashsum" version = "0.0.1" @@ -665,6 +670,7 @@ version = "0.0.1" dependencies = [ "byteorder 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)", "getopts 0.2.14 (registry+https://github.com/rust-lang/crates.io-index)", + "half 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "libc 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)", "uucore 0.0.1", ] diff --git a/src/od/Cargo.toml b/src/od/Cargo.toml index c6b86e354..a500caecc 100644 --- a/src/od/Cargo.toml +++ b/src/od/Cargo.toml @@ -11,6 +11,7 @@ path = "od.rs" getopts = "*" libc = "*" byteorder = "*" +half = "*" uucore = { path="../uucore" } [[bin]] diff --git a/src/od/inputdecoder.rs b/src/od/inputdecoder.rs index 01143d24b..97106fa2e 100644 --- a/src/od/inputdecoder.rs +++ b/src/od/inputdecoder.rs @@ -2,6 +2,7 @@ use std::io; use byteorder_io::ByteOrder; use multifilereader::HasError; use peekreader::PeekRead; +use half::f16; /// Processes an input and provides access to the data read in various formats /// @@ -128,6 +129,7 @@ impl<'a> MemoryDecoder<'a> { /// Returns a f32/f64 from the internal buffer at position `start`. pub fn read_float(&self, start: usize, byte_size: usize) -> f64 { match byte_size { + 2 => f64::from(f16::from_bits(self.byte_order.read_u16(&self.data[start..start + 2]))), 4 => self.byte_order.read_f32(&self.data[start..start + 4]) as f64, 8 => self.byte_order.read_f64(&self.data[start..start + 8]), _ => panic!("Invalid byte_size: {}", byte_size), diff --git a/src/od/od.rs b/src/od/od.rs index a20856ba0..93e7c028d 100644 --- a/src/od/od.rs +++ b/src/od/od.rs @@ -11,6 +11,7 @@ extern crate getopts; extern crate byteorder; +extern crate half; #[macro_use] extern crate uucore; diff --git a/src/od/parse_formats.rs b/src/od/parse_formats.rs index db4b20daa..5f719deba 100644 --- a/src/od/parse_formats.rs +++ b/src/od/parse_formats.rs @@ -73,6 +73,7 @@ fn od_format_type(type_char: FormatType, byte_size: u8) -> Option Some(FORMAT_ITEM_HEX32), (FormatType::HexadecimalInt, 8) => Some(FORMAT_ITEM_HEX64), + (FormatType::Float, 2) => Some(FORMAT_ITEM_F16), (FormatType::Float, 0) | (FormatType::Float, 4) => Some(FORMAT_ITEM_F32), (FormatType::Float, 8) => Some(FORMAT_ITEM_F64), @@ -402,7 +403,7 @@ fn test_invalid_long_format() { parse_format_flags_str(&vec!("od", "--format=c1")).unwrap_err(); parse_format_flags_str(&vec!("od", "--format=x256")).unwrap_err(); parse_format_flags_str(&vec!("od", "--format=d5")).unwrap_err(); - parse_format_flags_str(&vec!("od", "--format=f2")).unwrap_err(); + parse_format_flags_str(&vec!("od", "--format=f1")).unwrap_err(); } #[test] diff --git a/src/od/prn_float.rs b/src/od/prn_float.rs index 22918ccb4..bf6403bbd 100644 --- a/src/od/prn_float.rs +++ b/src/od/prn_float.rs @@ -1,8 +1,15 @@ use std::num::FpCategory; +use half::f16; use std::f32; use std::f64; use formatteriteminfo::*; +pub static FORMAT_ITEM_F16: FormatterItemInfo = FormatterItemInfo { + byte_size: 2, + print_width: 10, + formatter: FormatWriter::FloatWriter(format_item_flo16), +}; + pub static FORMAT_ITEM_F32: FormatterItemInfo = FormatterItemInfo { byte_size: 4, print_width: 15, @@ -15,6 +22,9 @@ pub static FORMAT_ITEM_F64: FormatterItemInfo = FormatterItemInfo { formatter: FormatWriter::FloatWriter(format_item_flo64), }; +pub fn format_item_flo16(f: f64) -> String { + format!(" {}", format_flo16(f16::from_f64(f))) +} pub fn format_item_flo32(f: f64) -> String { format!(" {}", format_flo32(f as f32)) @@ -24,6 +34,10 @@ pub fn format_item_flo64(f: f64) -> String { format!(" {}", format_flo64(f)) } +fn format_flo16(f: f16) -> String { + format_float(f64::from(f), 9, 4) +} + // formats float with 8 significant digits, eg 12345678 or -1.2345678e+12 // always retuns a string of 14 characters fn format_flo32(f: f32) -> String { @@ -171,3 +185,30 @@ fn test_format_flo64() { assert_eq!(format_flo64(-0.0), " -0"); assert_eq!(format_flo64(0.0), " 0"); } + +#[test] +fn test_format_flo16() { + use half::consts::*; + + assert_eq!(format_flo16(f16::from_bits(0x8400u16)), "-6.104e-5"); + assert_eq!(format_flo16(f16::from_bits(0x8401u16)), "-6.109e-5"); + assert_eq!(format_flo16(f16::from_bits(0x8402u16)), "-6.115e-5"); + assert_eq!(format_flo16(f16::from_bits(0x8403u16)), "-6.121e-5"); + + assert_eq!(format_flo16(f16::from_f32(1.0)), " 1.000"); + assert_eq!(format_flo16(f16::from_f32(10.0)), " 10.00"); + assert_eq!(format_flo16(f16::from_f32(100.0)), " 100.0"); + assert_eq!(format_flo16(f16::from_f32(1000.0)), " 1000"); + assert_eq!(format_flo16(f16::from_f32(10000.0)), " 1.000e4"); + + assert_eq!(format_flo16(f16::from_f32(-0.2)), " -0.2000"); + assert_eq!(format_flo16(f16::from_f32(-0.02)), "-2.000e-2"); + + assert_eq!(format_flo16(MIN_POSITIVE_SUBNORMAL), " 5.966e-8"); + assert_eq!(format_flo16(MIN), " -6.550e4"); + assert_eq!(format_flo16(NAN), " NaN"); + assert_eq!(format_flo16(INFINITY), " inf"); + assert_eq!(format_flo16(NEG_INFINITY), " -inf"); + assert_eq!(format_flo16(NEG_ZERO), " -0"); + assert_eq!(format_flo16(ZERO), " 0"); +} diff --git a/tests/test_od.rs b/tests/test_od.rs index 92828012f..1446f53e3 100644 --- a/tests/test_od.rs +++ b/tests/test_od.rs @@ -198,6 +198,29 @@ fn test_hex32(){ assert_eq!(result.stdout, expected_output); } +#[test] +fn test_f16(){ + + let input : [u8; 14] = [ + 0x00, 0x3c, // 0x3C00 1.0 + 0x00, 0x00, // 0x0000 0.0 + 0x00, 0x80, // 0x8000 -0.0 + 0x00, 0x7c, // 0x7C00 Inf + 0x00, 0xfc, // 0xFC00 -Inf + 0x00, 0xfe, // 0xFE00 NaN + 0x00, 0x84];// 0x8400 -6.104e-5 + let expected_output = unindent(" + 0000000 1.000 0 -0 inf + 0000010 -inf NaN -6.104e-5 + 0000016 + "); + let result = new_ucmd!().arg("--endian=little").arg("-tf2").arg("-w8").run_piped_stdin(&input[..]); + + assert_empty_stderr!(result); + assert!(result.success); + assert_eq!(result.stdout, expected_output); +} + #[test] fn test_f32(){ From 2550e0f3c75f56d24157c1b7b769925fb57e5910 Mon Sep 17 00:00:00 2001 From: Wim Hueskes Date: Wed, 9 Nov 2016 20:26:55 +0100 Subject: [PATCH 41/41] od: several small changes after review * update status in README.md * enable busybox tests Adding `CONFIG_DESKTOP` and `CONFIG_LONG_OPTS` to busybox config. These flags also enable other tests, but those utilities are not included in `TEST_PROGS`. (eg. awk) * fix whitespace and small issues * fix Eq imp for FormatWriter on nightly + beta * fix indention in multifilereader.rs * fix intermittent errors in tests --- .busybox-config | 2 + README.md | 2 +- src/od/formatteriteminfo.rs | 4 +- src/od/inputdecoder.rs | 19 +++-- src/od/mockstream.rs | 3 +- src/od/multifilereader.rs | 16 ++-- src/od/od.rs | 22 +++--- src/od/output_info.rs | 29 +++---- src/od/parse_formats.rs | 31 +++----- src/od/parse_inputs.rs | 153 +++++++++++++++++------------------- src/od/parse_nrofbytes.rs | 43 +++++----- src/od/partialreader.rs | 2 +- src/od/peekreader.rs | 3 +- src/od/prn_char.rs | 22 ++---- src/od/prn_float.rs | 10 +-- tests/test_od.rs | 66 ++++------------ 16 files changed, 180 insertions(+), 247 deletions(-) diff --git a/.busybox-config b/.busybox-config index d1fb62794..e6921536f 100644 --- a/.busybox-config +++ b/.busybox-config @@ -1,2 +1,4 @@ CONFIG_FEATURE_FANCY_HEAD=y CONFIG_UNICODE_SUPPORT=y +CONFIG_DESKTOP=y +CONFIG_LONG_OPTS=y diff --git a/README.md b/README.md index 7257f21dd..cdb4b1173 100644 --- a/README.md +++ b/README.md @@ -201,7 +201,7 @@ To do * [x] nohup * [x] nproc * [ ] numfmt -* [ ] od (in progress, needs lots of work) +* [ ] od (almost complete, `--strings` and 128-bit datatypes are missing) * [x] paste * [x] pathchk * [x] pinky diff --git a/src/od/formatteriteminfo.rs b/src/od/formatteriteminfo.rs index 5118571fe..9a5c3e236 100644 --- a/src/od/formatteriteminfo.rs +++ b/src/od/formatteriteminfo.rs @@ -1,6 +1,6 @@ use std::fmt; -#[derive(Copy, Eq)] +#[derive(Copy)] pub enum FormatWriter { IntWriter(fn(u64) -> String), FloatWriter(fn(f64) -> String), @@ -27,6 +27,8 @@ impl PartialEq for FormatWriter { } } +impl Eq for FormatWriter {} + impl fmt::Debug for FormatWriter { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { diff --git a/src/od/inputdecoder.rs b/src/od/inputdecoder.rs index 97106fa2e..e58cf79f3 100644 --- a/src/od/inputdecoder.rs +++ b/src/od/inputdecoder.rs @@ -26,12 +26,11 @@ pub struct InputDecoder<'a, I> where I: 'a { } impl<'a, I> InputDecoder<'a, I> { - /// Creates a new `InputDecoder` with an allocated buffer of `normal_length`+`peek_length` bytes. + /// Creates a new `InputDecoder` with an allocated buffer of `normal_length` + `peek_length` bytes. /// `byte_order` determines how to read multibyte formats from the buffer. pub fn new(input: &mut I, normal_length: usize, peek_length: usize, byte_order: ByteOrder) -> InputDecoder { - - let mut bytes: Vec = Vec::with_capacity(normal_length+peek_length); - unsafe { bytes.set_len(normal_length+peek_length); } // fast but uninitialized + let mut bytes: Vec = Vec::with_capacity(normal_length + peek_length); + unsafe { bytes.set_len(normal_length + peek_length); } // fast but uninitialized InputDecoder { input: input, @@ -45,7 +44,7 @@ impl<'a, I> InputDecoder<'a, I> { } -impl<'a, I> InputDecoder<'a, I> where I : PeekRead { +impl<'a, I> InputDecoder<'a, I> where I: PeekRead { /// calls `peek_read` on the internal stream to (re)fill the buffer. Returns a /// MemoryDecoder providing access to the result or returns an i/o error. pub fn peek_read(&mut self) -> io::Result { @@ -66,7 +65,7 @@ impl<'a, I> InputDecoder<'a, I> where I : PeekRead { } } -impl<'a, I> HasError for InputDecoder<'a, I> where I : HasError { +impl<'a, I> HasError for InputDecoder<'a, I> where I: HasError { /// calls has_error on the internal stream. fn has_error(&self) -> bool { self.input.has_error() @@ -112,7 +111,7 @@ impl<'a> MemoryDecoder<'a> { /// Returns a slice to the internal buffer including the peek data starting at `start`. pub fn get_full_buffer(&self, start: usize) -> &[u8] { - &self.data[start..self.used_normal_length+self.used_peek_length] + &self.data[start..self.used_normal_length + self.used_peek_length] } /// Returns a u8/u16/u32/u64 from the internal buffer at position `start`. @@ -147,8 +146,8 @@ mod tests { #[test] fn smoke_test() { let data = [0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xC0, 0xff, 0xff]; - let mut input=PeekReader::new(Cursor::new(&data)); - let mut sut=InputDecoder::new(&mut input, 8, 2, ByteOrder::Little); + let mut input = PeekReader::new(Cursor::new(&data)); + let mut sut = InputDecoder::new(&mut input, 8, 2, ByteOrder::Little); match sut.peek_read() { Ok(mut mem) => { @@ -165,7 +164,7 @@ mod tests { let mut copy: Vec = Vec::new(); mem.clone_buffer(&mut copy); - assert_eq!(vec!{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xC0}, copy); + assert_eq!(vec![0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xC0], copy); mem.zero_out_buffer(7, 8); assert_eq!(&[0, 0, 0xff, 0xff], mem.get_full_buffer(6)); diff --git a/src/od/mockstream.rs b/src/od/mockstream.rs index 4dd02e22f..5adfce2ae 100644 --- a/src/od/mockstream.rs +++ b/src/od/mockstream.rs @@ -62,8 +62,7 @@ impl FailingMockStream { fn error(&mut self) -> Result { if self.repeat_count == 0 { return Ok(0) - } - else { + } else { if self.repeat_count > 0 { self.repeat_count -= 1; } diff --git a/src/od/multifilereader.rs b/src/od/multifilereader.rs index 6ab66cf68..127b8b123 100644 --- a/src/od/multifilereader.rs +++ b/src/od/multifilereader.rs @@ -6,7 +6,7 @@ use std::io::Write; use std::vec::Vec; pub enum InputSource<'a> { - FileName(&'a str ), + FileName(&'a str), Stdin, #[allow(dead_code)] Stream(Box), @@ -31,26 +31,26 @@ impl<'b> MultifileReader<'b> { any_err: false, }; mf.next_file(); - return mf; + mf } fn next_file(&mut self) { // loop retries with subsequent files if err - normally 'loops' once loop { if self.ni.len() == 0 { - self.curr_file = None; - return; + self.curr_file = None; + break; } match self.ni.remove(0) { InputSource::Stdin => { self.curr_file = Some(Box::new(BufReader::new(std::io::stdin()))); - return; + break; } InputSource::FileName(fname) => { match File::open(fname) { Ok(f) => { self.curr_file = Some(Box::new(BufReader::new(f))); - return; + break; } Err(e) => { // If any file can't be opened, @@ -66,7 +66,7 @@ impl<'b> MultifileReader<'b> { } InputSource::Stream(s) => { self.curr_file = Some(s); - return; + break; } } } @@ -74,7 +74,6 @@ impl<'b> MultifileReader<'b> { } impl<'b> io::Read for MultifileReader<'b> { - // Fill buf with bytes read from the list of files // Returns Ok() // Handles io errors itself, thus always returns OK @@ -192,5 +191,4 @@ mod tests { assert_eq!(sut.read(v.as_mut()).unwrap(), 3); assert_eq!(v, [0x42, 0x43, 0x44, 0x64, 0x41]); // last two bytes are not overwritten } - } diff --git a/src/od/od.rs b/src/od/od.rs index 93e7c028d..4228fe032 100644 --- a/src/od/od.rs +++ b/src/od/od.rs @@ -147,8 +147,8 @@ fn create_getopts_options() -> getopts::Options { struct OdOptions { byte_order: ByteOrder, - skip_bytes : usize, - read_bytes : Option, + skip_bytes: usize, + read_bytes: Option, label: Option, input_strings: Vec, formats: Vec, @@ -187,7 +187,7 @@ impl OdOptions { Ok(CommandLineInputs::FileAndOffset((f, s, l))) => { skip_bytes = s; label = l; - vec!{f} + vec![f] }, Err(e) => { return Err(format!("Invalid inputs: {}", e)); @@ -311,7 +311,7 @@ pub fn uumain(args: Vec) -> i32 { /// Loops through the input line by line, calling print_bytes to take care of the output. fn odfunc(input_offset: &mut InputOffset, input_decoder: &mut InputDecoder, output_info: &OutputInfo) -> i32 - where I : PeekRead+HasError { + where I: PeekRead + HasError { let mut duplicate_line = false; let mut previous_bytes: Vec = Vec::new(); let line_bytes = output_info.byte_size_line; @@ -321,7 +321,7 @@ fn odfunc(input_offset: &mut InputOffset, input_decoder: &mut InputDecoder match input_decoder.peek_read() { Ok(mut memory_decoder) => { - let length=memory_decoder.length(); + let length = memory_decoder.length(); if length == 0 { input_offset.print_final_offset(); @@ -346,8 +346,7 @@ fn odfunc(input_offset: &mut InputOffset, input_decoder: &mut InputDecoder duplicate_line = true; println!("*"); } - } - else { + } else { duplicate_line = false; if length == line_bytes { // save a copy of the input unless it is the last line @@ -409,15 +408,14 @@ fn print_bytes(prefix: &str, input_decoder: &MemoryDecoder, output_info: &Output output_text.push_str(&format!("{:>width$} {}", "", format_ascii_dump(input_decoder.get_buffer(0)), - width=missing_spacing)); + width = missing_spacing)); } if first { print!("{}", prefix); // print offset // if printing in multiple formats offset is printed only once first = false; - } - else { + } else { // this takes the space of the file offset on subsequent // lines of multi-format rasters. print!("{:>width$}", "", width=prefix.chars().count()); @@ -426,13 +424,13 @@ fn print_bytes(prefix: &str, input_decoder: &MemoryDecoder, output_info: &Output } } -/// returns a reader implementing `PeekRead+Read+HasError` providing the combined input +/// returns a reader implementing `PeekRead + Read + HasError` providing the combined input /// /// `skip_bytes` is the number of bytes skipped from the input /// `read_bytes` is an optinal limit to the number of bytes to read fn open_input_peek_reader<'a>(input_strings: &'a Vec, skip_bytes: usize, read_bytes: Option) -> PeekReader>> { - // should return "impl PeekRead+Read+HasError" when supported in (stable) rust + // should return "impl PeekRead + Read + HasError" when supported in (stable) rust let inputs = input_strings .iter() .map(|w| match w as &str { diff --git a/src/od/output_info.rs b/src/od/output_info.rs index 4af3bef9e..5f8d8733e 100644 --- a/src/od/output_info.rs +++ b/src/od/output_info.rs @@ -49,7 +49,6 @@ impl OutputInfo { /// Creates a new `OutputInfo` based on the parameters pub fn new(line_bytes: usize, formats: &[ParsedFormatterItemInfo], output_duplicates: bool) -> OutputInfo { - let byte_size_block = formats.iter().fold(1, |max, next| cmp::max(max, next.formatter_item_info.byte_size)); let print_width_block = formats .iter() @@ -129,7 +128,6 @@ impl OutputInfo { /// Increase MAX_BYTES_PER_UNIT to allow larger types. fn calculate_alignment(sf: &TypeSizeInfo, byte_size_block: usize, print_width_block: usize) -> [usize; MAX_BYTES_PER_UNIT] { - if byte_size_block > MAX_BYTES_PER_UNIT { panic!("{}-bits types are unsupported. Current max={}-bits.", 8 * byte_size_block, @@ -181,7 +179,6 @@ impl TypeSizeInfo for TypeInfo { #[test] fn test_calculate_alignment() { - // For this example `byte_size_block` is 8 and 'print_width_block' is 23: // 1777777777777777777777 1777777777777777777777 // 4294967295 4294967295 4294967295 4294967295 @@ -214,31 +211,31 @@ fn test_calculate_alignment() { // 9 tests where 8 .. 16 spaces are spread across 8 positions assert_eq!([1, 1, 1, 1, 1, 1, 1, 1], - OutputInfo::calculate_alignment(&TypeInfo{byte_size:1, print_width:2}, 8, 16+8)); + OutputInfo::calculate_alignment(&TypeInfo{byte_size:1, print_width:2}, 8, 16 + 8)); assert_eq!([2, 1, 1, 1, 1, 1, 1, 1], - OutputInfo::calculate_alignment(&TypeInfo{byte_size:1, print_width:2}, 8, 16+9)); + OutputInfo::calculate_alignment(&TypeInfo{byte_size:1, print_width:2}, 8, 16 + 9)); assert_eq!([2, 1, 1, 1, 2, 1, 1, 1], - OutputInfo::calculate_alignment(&TypeInfo{byte_size:1, print_width:2}, 8, 16+10)); + OutputInfo::calculate_alignment(&TypeInfo{byte_size:1, print_width:2}, 8, 16 + 10)); assert_eq!([3, 1, 1, 1, 2, 1, 1, 1], - OutputInfo::calculate_alignment(&TypeInfo{byte_size:1, print_width:2}, 8, 16+11)); + OutputInfo::calculate_alignment(&TypeInfo{byte_size:1, print_width:2}, 8, 16 + 11)); assert_eq!([2, 1, 2, 1, 2, 1, 2, 1], - OutputInfo::calculate_alignment(&TypeInfo{byte_size:1, print_width:2}, 8, 16+12)); + OutputInfo::calculate_alignment(&TypeInfo{byte_size:1, print_width:2}, 8, 16 + 12)); assert_eq!([3, 1, 2, 1, 2, 1, 2, 1], - OutputInfo::calculate_alignment(&TypeInfo{byte_size:1, print_width:2}, 8, 16+13)); + OutputInfo::calculate_alignment(&TypeInfo{byte_size:1, print_width:2}, 8, 16 + 13)); assert_eq!([3, 1, 2, 1, 3, 1, 2, 1], - OutputInfo::calculate_alignment(&TypeInfo{byte_size:1, print_width:2}, 8, 16+14)); + OutputInfo::calculate_alignment(&TypeInfo{byte_size:1, print_width:2}, 8, 16 + 14)); assert_eq!([4, 1, 2, 1, 3, 1, 2, 1], - OutputInfo::calculate_alignment(&TypeInfo{byte_size:1, print_width:2}, 8, 16+15)); + OutputInfo::calculate_alignment(&TypeInfo{byte_size:1, print_width:2}, 8, 16 + 15)); assert_eq!([2, 2, 2, 2, 2, 2, 2, 2], - OutputInfo::calculate_alignment(&TypeInfo{byte_size:1, print_width:2}, 8, 16+16)); + OutputInfo::calculate_alignment(&TypeInfo{byte_size:1, print_width:2}, 8, 16 + 16)); // 4 tests where 15 spaces are spread across 8, 4, 2 or 1 position(s) assert_eq!([4, 1, 2, 1, 3, 1, 2, 1], - OutputInfo::calculate_alignment(&TypeInfo{byte_size:1, print_width:2}, 8, 16+15)); + OutputInfo::calculate_alignment(&TypeInfo{byte_size:1, print_width:2}, 8, 16 + 15)); assert_eq!([5, 0, 3, 0, 4, 0, 3, 0], - OutputInfo::calculate_alignment(&TypeInfo{byte_size:2, print_width:4}, 8, 16+15)); + OutputInfo::calculate_alignment(&TypeInfo{byte_size:2, print_width:4}, 8, 16 + 15)); assert_eq!([8, 0, 0, 0, 7, 0, 0, 0], - OutputInfo::calculate_alignment(&TypeInfo{byte_size:4, print_width:8}, 8, 16+15)); + OutputInfo::calculate_alignment(&TypeInfo{byte_size:4, print_width:8}, 8, 16 + 15)); assert_eq!([15, 0, 0, 0, 0, 0, 0, 0], - OutputInfo::calculate_alignment(&TypeInfo{byte_size:8, print_width:16}, 8, 16+15)); + OutputInfo::calculate_alignment(&TypeInfo{byte_size:8, print_width:16}, 8, 16 + 15)); } diff --git a/src/od/parse_formats.rs b/src/od/parse_formats.rs index 5f719deba..472208e5a 100644 --- a/src/od/parse_formats.rs +++ b/src/od/parse_formats.rs @@ -115,8 +115,7 @@ pub fn parse_format_flags(args: &Vec) -> Result return Err(e), } expect_type_string = false; - } - else if arg.starts_with("--") { + } else if arg.starts_with("--") { if arg.len() == 2 { break; } @@ -130,26 +129,20 @@ pub fn parse_format_flags(args: &Vec) -> Result {} // not every option is a format - Some(r) => { - formats.push(ParsedFormatterItemInfo::new(r, false)) - } + } else { + // not every option is a format + if let Some(r) = od_argument_traditional_format(c) { + formats.push(ParsedFormatterItemInfo::new(r, false)) } } } @@ -217,7 +210,6 @@ fn format_type_category(t: FormatType) -> FormatTypeCategory { } fn is_format_size_char(ch: Option, format_type: FormatTypeCategory, byte_size: &mut u8) -> bool { - match (format_type, ch) { (FormatTypeCategory::Integer, Some('C')) => { *byte_size = 1; @@ -274,7 +266,7 @@ fn is_format_dump_char(ch: Option, show_ascii_dump: &mut bool) -> bool { fn parse_type_string(params: &String) -> Result, String> { let mut formats = Vec::new(); - let mut chars=params.chars(); + let mut chars = params.chars(); let mut ch = chars.next(); while ch.is_some() { @@ -294,14 +286,13 @@ fn parse_type_string(params: &String) -> Result, St let mut show_ascii_dump = false; if is_format_size_char(ch, type_cat, &mut byte_size) { ch = chars.next(); - } - else { + } else { let mut decimal_size = String::new(); while is_format_size_decimal(ch, type_cat, &mut decimal_size) { ch = chars.next(); } if !decimal_size.is_empty() { - byte_size=match decimal_size.parse() { + byte_size = match decimal_size.parse() { Err(_) => return Err(format!("invalid number '{}' in format specification '{}'", decimal_size, params)), Ok(n) => n, } diff --git a/src/od/parse_inputs.rs b/src/od/parse_inputs.rs index e87a85a06..9ac04e23c 100644 --- a/src/od/parse_inputs.rs +++ b/src/od/parse_inputs.rs @@ -40,7 +40,6 @@ pub enum CommandLineInputs { /// '-' is used as filename if stdin is meant. This is also returned if /// there is no input, as stdin is the default input. pub fn parse_inputs(matches: &CommandLineOpts) -> Result { - let mut input_strings: Vec = matches.inputs(); if matches.opts_present(&["traditional"]) { @@ -53,7 +52,7 @@ pub fn parse_inputs(matches: &CommandLineOpts) -> Result { // if there is just 1 input (stdin), an offset must start with '+' @@ -80,22 +79,22 @@ pub fn parse_inputs(matches: &CommandLineOpts) -> Result) -> Result { match input_strings.len() { 0 => { - Ok(CommandLineInputs::FileNames(vec!{"-".to_string()})) + Ok(CommandLineInputs::FileNames(vec!["-".to_string()])) } 1 => { - let offset0=parse_offset_operand(&input_strings[0]); + let offset0 = parse_offset_operand(&input_strings[0]); Ok(match offset0 { Ok(n) => CommandLineInputs::FileAndOffset(("-".to_string(), n, None)), _ => CommandLineInputs::FileNames(input_strings), }) } 2 => { - let offset0=parse_offset_operand(&input_strings[0]); - let offset1=parse_offset_operand(&input_strings[1]); + let offset0 = parse_offset_operand(&input_strings[0]); + let offset1 = parse_offset_operand(&input_strings[1]); match (offset0, offset1) { (Ok(n), Ok(m)) => Ok(CommandLineInputs::FileAndOffset(("-".to_string(), n, Some(m)))), (_, Ok(m)) => Ok(CommandLineInputs::FileAndOffset((input_strings[0].clone(), m, None))), @@ -103,8 +102,8 @@ pub fn parse_inputs_traditional(input_strings: Vec) -> Result { - let offset=parse_offset_operand(&input_strings[1]); - let label=parse_offset_operand(&input_strings[2]); + let offset = parse_offset_operand(&input_strings[1]); + let label = parse_offset_operand(&input_strings[2]); match (offset, label) { (Ok(n), Ok(m)) => Ok(CommandLineInputs::FileAndOffset((input_strings[0].clone(), n, Some(m)))), (Err(_), _) => Err(format!("invalid offset: {}", input_strings[1])), @@ -131,8 +130,7 @@ pub fn parse_offset_operand(s: &String) -> Result { if s[start..len].starts_with("0x") || s[start..len].starts_with("0X") { start += 2; radix = 16; - } - else { + } else { if s[start..len].ends_with("b") { len -= 1; multiply = 512; @@ -178,7 +176,7 @@ mod tests { fn opts_present(&self, opts: &[&str]) -> bool { for expected in opts.iter() { for actual in self.option_names.iter() { - if *expected==*actual { + if *expected == *actual { return true; } } @@ -189,31 +187,30 @@ mod tests { #[test] fn test_parse_inputs_normal() { - - assert_eq!(CommandLineInputs::FileNames(vec!{"-".to_string()}), + assert_eq!(CommandLineInputs::FileNames(vec!["-".to_string()]), parse_inputs(&MockOptions::new( - vec!{}, - vec!{})).unwrap()); + vec![], + vec![])).unwrap()); - assert_eq!(CommandLineInputs::FileNames(vec!{"-".to_string()}), + assert_eq!(CommandLineInputs::FileNames(vec!["-".to_string()]), parse_inputs(&MockOptions::new( - vec!{"-"}, - vec!{})).unwrap()); + vec!["-"], + vec![])).unwrap()); - assert_eq!(CommandLineInputs::FileNames(vec!{"file1".to_string()}), + assert_eq!(CommandLineInputs::FileNames(vec!["file1".to_string()]), parse_inputs(&MockOptions::new( - vec!{"file1"}, - vec!{})).unwrap()); + vec!["file1"], + vec![])).unwrap()); - assert_eq!(CommandLineInputs::FileNames(vec!{"file1".to_string(), "file2".to_string()}), + assert_eq!(CommandLineInputs::FileNames(vec!["file1".to_string(), "file2".to_string()]), parse_inputs(&MockOptions::new( - vec!{"file1", "file2"}, - vec!{})).unwrap()); + vec!["file1", "file2"], + vec![])).unwrap()); - assert_eq!(CommandLineInputs::FileNames(vec!{"-".to_string(), "file1".to_string(), "file2".to_string()}), + assert_eq!(CommandLineInputs::FileNames(vec!["-".to_string(), "file1".to_string(), "file2".to_string()]), parse_inputs(&MockOptions::new( - vec!{"-", "file1", "file2"}, - vec!{})).unwrap()); + vec!["-", "file1", "file2"], + vec![])).unwrap()); } #[test] @@ -221,113 +218,112 @@ mod tests { // offset is found without filename, so stdin will be used. assert_eq!(CommandLineInputs::FileAndOffset(("-".to_string(), 8, None)), parse_inputs(&MockOptions::new( - vec!{"+10"}, - vec!{})).unwrap()); + vec!["+10"], + vec![])).unwrap()); // offset must start with "+" if no input is specified. - assert_eq!(CommandLineInputs::FileNames(vec!{"10".to_string()}), + assert_eq!(CommandLineInputs::FileNames(vec!["10".to_string()]), parse_inputs(&MockOptions::new( - vec!{"10"}, - vec!{""})).unwrap()); + vec!["10"], + vec![""])).unwrap()); // offset is not valid, so it is considered a filename. - assert_eq!(CommandLineInputs::FileNames(vec!{"+10a".to_string()}), + assert_eq!(CommandLineInputs::FileNames(vec!["+10a".to_string()]), parse_inputs(&MockOptions::new( - vec!{"+10a"}, - vec!{""})).unwrap()); + vec!["+10a"], + vec![""])).unwrap()); // if -j is included in the commandline, there cannot be an offset. - assert_eq!(CommandLineInputs::FileNames(vec!{"+10".to_string()}), + assert_eq!(CommandLineInputs::FileNames(vec!["+10".to_string()]), parse_inputs(&MockOptions::new( - vec!{"+10"}, - vec!{"j"})).unwrap()); + vec!["+10"], + vec!["j"])).unwrap()); // if -v is included in the commandline, there cannot be an offset. - assert_eq!(CommandLineInputs::FileNames(vec!{"+10".to_string()}), + assert_eq!(CommandLineInputs::FileNames(vec!["+10".to_string()]), parse_inputs(&MockOptions::new( - vec!{"+10"}, - vec!{"o", "v"})).unwrap()); + vec!["+10"], + vec!["o", "v"])).unwrap()); assert_eq!(CommandLineInputs::FileAndOffset(("file1".to_string(), 8, None)), parse_inputs(&MockOptions::new( - vec!{"file1", "+10"}, - vec!{})).unwrap()); + vec!["file1", "+10"], + vec![])).unwrap()); // offset does not need to start with "+" if a filename is included. assert_eq!(CommandLineInputs::FileAndOffset(("file1".to_string(), 8, None)), parse_inputs(&MockOptions::new( - vec!{"file1", "10"}, - vec!{})).unwrap()); + vec!["file1", "10"], + vec![])).unwrap()); - assert_eq!(CommandLineInputs::FileNames(vec!{"file1".to_string(), "+10a".to_string()}), + assert_eq!(CommandLineInputs::FileNames(vec!["file1".to_string(), "+10a".to_string()]), parse_inputs(&MockOptions::new( - vec!{"file1", "+10a"}, - vec!{""})).unwrap()); + vec!["file1", "+10a"], + vec![""])).unwrap()); - assert_eq!(CommandLineInputs::FileNames(vec!{"file1".to_string(), "+10".to_string()}), + assert_eq!(CommandLineInputs::FileNames(vec!["file1".to_string(), "+10".to_string()]), parse_inputs(&MockOptions::new( - vec!{"file1", "+10"}, - vec!{"j"})).unwrap()); + vec!["file1", "+10"], + vec!["j"])).unwrap()); // offset must be last on the commandline - assert_eq!(CommandLineInputs::FileNames(vec!{"+10".to_string(), "file1".to_string()}), + assert_eq!(CommandLineInputs::FileNames(vec!["+10".to_string(), "file1".to_string()]), parse_inputs(&MockOptions::new( - vec!{"+10", "file1"}, - vec!{""})).unwrap()); + vec!["+10", "file1"], + vec![""])).unwrap()); } #[test] fn test_parse_inputs_traditional() { - // it should not return FileAndOffset to signal no offset was entered on the commandline. - assert_eq!(CommandLineInputs::FileNames(vec!{"-".to_string()}), + assert_eq!(CommandLineInputs::FileNames(vec!["-".to_string()]), parse_inputs(&MockOptions::new( - vec!{}, - vec!{"traditional"})).unwrap()); + vec![], + vec!["traditional"])).unwrap()); - assert_eq!(CommandLineInputs::FileNames(vec!{"file1".to_string()}), + assert_eq!(CommandLineInputs::FileNames(vec!["file1".to_string()]), parse_inputs(&MockOptions::new( - vec!{"file1"}, - vec!{"traditional"})).unwrap()); + vec!["file1"], + vec!["traditional"])).unwrap()); // offset does not need to start with a + assert_eq!(CommandLineInputs::FileAndOffset(("-".to_string(), 8, None)), parse_inputs(&MockOptions::new( - vec!{"10"}, - vec!{"traditional"})).unwrap()); + vec!["10"], + vec!["traditional"])).unwrap()); // valid offset and valid label assert_eq!(CommandLineInputs::FileAndOffset(("-".to_string(), 8, Some(8))), parse_inputs(&MockOptions::new( - vec!{"10", "10"}, - vec!{"traditional"})).unwrap()); + vec!["10", "10"], + vec!["traditional"])).unwrap()); assert_eq!(CommandLineInputs::FileAndOffset(("file1".to_string(), 8, None)), parse_inputs(&MockOptions::new( - vec!{"file1", "10"}, - vec!{"traditional"})).unwrap()); + vec!["file1", "10"], + vec!["traditional"])).unwrap()); // only one file is allowed, it must be the first parse_inputs(&MockOptions::new( - vec!{"10", "file1"}, - vec!{"traditional"})).unwrap_err(); + vec!["10", "file1"], + vec!["traditional"])).unwrap_err(); assert_eq!(CommandLineInputs::FileAndOffset(("file1".to_string(), 8, Some(8))), parse_inputs(&MockOptions::new( - vec!{"file1", "10", "10"}, - vec!{"traditional"})).unwrap()); + vec!["file1", "10", "10"], + vec!["traditional"])).unwrap()); parse_inputs(&MockOptions::new( - vec!{"10", "file1", "10"}, - vec!{"traditional"})).unwrap_err(); + vec!["10", "file1", "10"], + vec!["traditional"])).unwrap_err(); parse_inputs(&MockOptions::new( - vec!{"10", "10", "file1"}, - vec!{"traditional"})).unwrap_err(); + vec!["10", "10", "file1"], + vec!["traditional"])).unwrap_err(); parse_inputs(&MockOptions::new( - vec!{"10", "10", "10", "10"}, - vec!{"traditional"})).unwrap_err(); + vec!["10", "10", "10", "10"], + vec!["traditional"])).unwrap_err(); } fn parse_offset_operand_str(s: &str) -> Result { @@ -363,5 +359,4 @@ mod tests { assert_eq!(5120, parse_offset_operand_str("+10.b").unwrap()); // b suffix = *512 assert_eq!(267, parse_offset_operand_str("0x10b").unwrap()); // hex } - } diff --git a/src/od/parse_nrofbytes.rs b/src/od/parse_nrofbytes.rs index 780c5f30b..be55cb7b6 100644 --- a/src/od/parse_nrofbytes.rs +++ b/src/od/parse_nrofbytes.rs @@ -8,8 +8,7 @@ pub fn parse_number_of_bytes(s: &String) -> Result { if s.starts_with("0x") || s.starts_with("0X") { start = 2; radix = 16; - } - else if s.starts_with("0") { + } else if s.starts_with("0") { radix = 8; } @@ -24,40 +23,40 @@ pub fn parse_number_of_bytes(s: &String) -> Result { len -= 1; } Some('m') | Some('M') => { - multiply = 1024*1024; + multiply = 1024 * 1024; len -= 1; } Some('G') => { - multiply = 1024*1024*1024; + multiply = 1024 * 1024 * 1024; len -= 1; } #[cfg(target_pointer_width = "64")] Some('T') => { - multiply = 1024*1024*1024*1024; + multiply = 1024 * 1024 * 1024 * 1024; len -= 1; } #[cfg(target_pointer_width = "64")] Some('P') => { - multiply = 1024*1024*1024*1024*1024; + multiply = 1024 * 1024 * 1024 * 1024 * 1024; len -= 1; } #[cfg(target_pointer_width = "64")] Some('E') => { - multiply = 1024*1024*1024*1024*1024*1024; + multiply = 1024 * 1024 * 1024 * 1024 * 1024 * 1024; len -= 1; } Some('B') if radix != 16 => { len -= 2; multiply = match ends_with.next() { Some('k') | Some('K') => 1000, - Some('m') | Some('M') => 1000*1000, - Some('G') => 1000*1000*1000, + Some('m') | Some('M') => 1000 * 1000, + Some('G') => 1000 * 1000 * 1000, #[cfg(target_pointer_width = "64")] - Some('T') => 1000*1000*1000*1000, + Some('T') => 1000 * 1000 * 1000 * 1000, #[cfg(target_pointer_width = "64")] - Some('P') => 1000*1000*1000*1000*1000, + Some('P') => 1000 * 1000 * 1000 * 1000 * 1000, #[cfg(target_pointer_width = "64")] - Some('E') => 1000*1000*1000*1000*1000*1000, + Some('E') => 1000 * 1000 * 1000 * 1000 * 1000 * 1000, _ => return Err("parse failed"), } }, @@ -81,11 +80,11 @@ fn test_parse_number_of_bytes() { assert_eq!(0, parse_number_of_bytes_str("0").unwrap()); assert_eq!(5, parse_number_of_bytes_str("5").unwrap()); assert_eq!(999, parse_number_of_bytes_str("999").unwrap()); - assert_eq!(2*512, parse_number_of_bytes_str("2b").unwrap()); - assert_eq!(2*1024, parse_number_of_bytes_str("2k").unwrap()); - assert_eq!(4*1024, parse_number_of_bytes_str("4K").unwrap()); - assert_eq!(2*1048576, parse_number_of_bytes_str("2m").unwrap()); - assert_eq!(4*1048576, parse_number_of_bytes_str("4M").unwrap()); + assert_eq!(2 * 512, parse_number_of_bytes_str("2b").unwrap()); + assert_eq!(2 * 1024, parse_number_of_bytes_str("2k").unwrap()); + assert_eq!(4 * 1024, parse_number_of_bytes_str("4K").unwrap()); + assert_eq!(2 * 1048576, parse_number_of_bytes_str("2m").unwrap()); + assert_eq!(4 * 1048576, parse_number_of_bytes_str("4M").unwrap()); assert_eq!(1073741824, parse_number_of_bytes_str("1G").unwrap()); assert_eq!(2000, parse_number_of_bytes_str("2kB").unwrap()); assert_eq!(4000, parse_number_of_bytes_str("4KB").unwrap()); @@ -95,16 +94,16 @@ fn test_parse_number_of_bytes() { // octal input assert_eq!(8, parse_number_of_bytes_str("010").unwrap()); - assert_eq!(8*512, parse_number_of_bytes_str("010b").unwrap()); - assert_eq!(8*1024, parse_number_of_bytes_str("010k").unwrap()); - assert_eq!(8*1048576, parse_number_of_bytes_str("010m").unwrap()); + assert_eq!(8 * 512, parse_number_of_bytes_str("010b").unwrap()); + assert_eq!(8 * 1024, parse_number_of_bytes_str("010k").unwrap()); + assert_eq!(8 * 1048576, parse_number_of_bytes_str("010m").unwrap()); // hex input assert_eq!(15, parse_number_of_bytes_str("0xf").unwrap()); assert_eq!(15, parse_number_of_bytes_str("0XF").unwrap()); assert_eq!(27, parse_number_of_bytes_str("0x1b").unwrap()); - assert_eq!(16*1024, parse_number_of_bytes_str("0x10k").unwrap()); - assert_eq!(16*1048576, parse_number_of_bytes_str("0x10m").unwrap()); + assert_eq!(16 * 1024, parse_number_of_bytes_str("0x10k").unwrap()); + assert_eq!(16 * 1048576, parse_number_of_bytes_str("0x10m").unwrap()); // invalid input parse_number_of_bytes_str("").unwrap_err(); diff --git a/src/od/partialreader.rs b/src/od/partialreader.rs index e68b267e0..72a37f58b 100644 --- a/src/od/partialreader.rs +++ b/src/od/partialreader.rs @@ -5,7 +5,7 @@ use multifilereader::HasError; /// When a large number of bytes must be skipped, it will be read into a /// dynamically allocated buffer. The buffer will be limited to this size. -const MAX_SKIP_BUFFER: usize = 64*1024; +const MAX_SKIP_BUFFER: usize = 64 * 1024; /// Wrapper for `std::io::Read` which can skip bytes at the beginning /// of the input, and it can limit the returned bytes to a particular diff --git a/src/od/peekreader.rs b/src/od/peekreader.rs index 10e415273..b6e4d53ae 100644 --- a/src/od/peekreader.rs +++ b/src/od/peekreader.rs @@ -91,8 +91,7 @@ impl PeekRead for PeekReader { let unused = out.len() - bytes_in_buffer; if peek_size <= unused { Ok((bytes_in_buffer, 0)) - } - else { + } else { let actual_peek_size = peek_size - unused; let real_size = bytes_in_buffer - actual_peek_size; self.write_to_tempbuffer(&out[real_size..bytes_in_buffer]); diff --git a/src/od/prn_char.rs b/src/od/prn_char.rs index 381ec5ace..55a7c6076 100644 --- a/src/od/prn_char.rs +++ b/src/od/prn_char.rs @@ -14,7 +14,7 @@ pub static FORMAT_ITEM_C: FormatterItemInfo = FormatterItemInfo { }; -static A_CHRS : [&'static str; 128] = +static A_CHRS: [&'static str; 128] = ["nul", "soh", "stx", "etx", "eot", "enq", "ack", "bel", "bs", "ht", "nl", "vt", "ff", "cr", "so", "si", "dle", "dc1", "dc2", "dc3", "dc4", "nak", "syn", "etb", @@ -40,7 +40,7 @@ fn format_item_a(p: u64) -> String { } -static C_CHRS : [&'static str; 128] = [ +static C_CHRS: [&'static str; 128] = [ "\\0", "001", "002", "003", "004", "005", "006", "\\a", "\\b", "\\t", "\\n", "\\v", "\\f", "\\r", "016", "017", "020", "021", "022", "023", "024", "025", "026", "027", @@ -68,33 +68,28 @@ fn format_item_c(bytes: &[u8]) -> String { Some(s) => format!("{:>4}", s), None => format!("{:>4}", b), } - } - else if (b & 0xc0) == 0x80 { + } else if (b & 0xc0) == 0x80 { // second or subsequent octet of an utf-8 sequence String::from(" **") - } - else if ((b & 0xe0) == 0xc0) && (bytes.len() >= 2) { + } else if ((b & 0xe0) == 0xc0) && (bytes.len() >= 2) { // start of a 2 octet utf-8 sequence match from_utf8(&bytes[0..2]) { Ok(s) => { format!("{:>4}", s) }, Err(_) => { format!(" {:03o}", b) }, } - } - else if ((b & 0xf0) == 0xe0) && (bytes.len() >= 3) { + } else if ((b & 0xf0) == 0xe0) && (bytes.len() >= 3) { // start of a 3 octet utf-8 sequence match from_utf8(&bytes[0..3]) { Ok(s) => { format!("{:>4}", s) }, Err(_) => { format!(" {:03o}", b) }, } - } - else if ((b & 0xf8) == 0xf0) && (bytes.len() >= 4) { + } else if ((b & 0xf8) == 0xf0) && (bytes.len() >= 4) { // start of a 4 octet utf-8 sequence match from_utf8(&bytes[0..4]) { Ok(s) => { format!("{:>4}", s) }, Err(_) => { format!(" {:03o}", b) }, } - } - else { + } else { // invalid utf-8 format!(" {:03o}", b) } @@ -107,8 +102,7 @@ pub fn format_ascii_dump(bytes: &[u8]) -> String { for c in bytes.iter() { if *c >= 0x20 && *c <= 0x7e { result.push_str(C_CHRS[*c as usize]); - } - else { + } else { result.push('.'); } } diff --git a/src/od/prn_float.rs b/src/od/prn_float.rs index bf6403bbd..26037c8b4 100644 --- a/src/od/prn_float.rs +++ b/src/od/prn_float.rs @@ -47,8 +47,7 @@ fn format_flo32(f: f32) -> String { if f.classify() == FpCategory::Subnormal { // subnormal numbers will be normal as f64, so will print with a wrong precision format!("{:width$e}", f, width = width) // subnormal numbers - } - else { + } else { format_float(f as f64, width, precision) } } @@ -58,7 +57,6 @@ fn format_flo64(f: f64) -> String { } fn format_float(f: f64, width: usize, precision: usize) -> String { - if !f.is_normal() { if f == -0.0 && f.is_sign_negative() { return format!("{:>width$}", "-0", width = width) } if f == 0.0 || !f.is_finite() { return format!("{:width$}", f, width = width) } @@ -77,13 +75,11 @@ fn format_float(f: f64, width: usize, precision: usize) -> String { format!("{:width$.dec$}", f, width = width, dec = (precision-1) - l as usize) - } - else if l == -1 { + } else if l == -1 { format!("{:width$.dec$}", f, width = width, dec = precision) - } - else { + } else { format!("{:width$.dec$e}", f, width = width, dec = precision - 1) diff --git a/tests/test_od.rs b/tests/test_od.rs index 1446f53e3..a10022a75 100644 --- a/tests/test_od.rs +++ b/tests/test_od.rs @@ -89,20 +89,17 @@ fn test_no_file() { // Test that od reads from stdin instead of a file #[test] fn test_from_stdin() { - let input = "abcdefghijklmnopqrstuvwxyz\n"; let result = new_ucmd!().arg("--endian=little").run_piped_stdin(input.as_bytes()); assert_empty_stderr!(result); assert!(result.success); assert_eq!(result.stdout, unindent(ALPHA_OUT)); - } // Test that od reads from stdin and also from files #[test] fn test_from_mixed() { - let temp = env::temp_dir(); let tmpdir = Path::new(&temp); let file1 = tmpdir.join("test-1"); @@ -122,12 +119,10 @@ fn test_from_mixed() { assert_empty_stderr!(result); assert!(result.success); assert_eq!(result.stdout, unindent(ALPHA_OUT)); - } #[test] fn test_multiple_formats() { - let input = "abcdefghijklmnopqrstuvwxyz\n"; let result = new_ucmd!().arg("-c").arg("-b").run_piped_stdin(input.as_bytes()); @@ -140,12 +135,10 @@ fn test_multiple_formats() { 161 162 163 164 165 166 167 170 171 172 012 0000033 ")); - } #[test] fn test_dec() { - let input = [ 0u8, 0u8, 1u8, 0u8, @@ -163,13 +156,11 @@ fn test_dec() { assert_empty_stderr!(result); assert!(result.success); assert_eq!(result.stdout, expected_output); - } #[test] fn test_hex16(){ - - let input : [u8; 9] = [ + let input: [u8; 9] = [ 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xff]; let expected_output = unindent(" 0000000 2301 6745 ab89 efcd 00ff @@ -184,8 +175,7 @@ fn test_hex16(){ #[test] fn test_hex32(){ - - let input : [u8; 9] = [ + let input: [u8; 9] = [ 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xff]; let expected_output = unindent(" 0000000 67452301 efcdab89 000000ff @@ -200,8 +190,7 @@ fn test_hex32(){ #[test] fn test_f16(){ - - let input : [u8; 14] = [ + let input: [u8; 14] = [ 0x00, 0x3c, // 0x3C00 1.0 0x00, 0x00, // 0x0000 0.0 0x00, 0x80, // 0x8000 -0.0 @@ -223,8 +212,7 @@ fn test_f16(){ #[test] fn test_f32(){ - - let input : [u8; 28] = [ + let input: [u8; 28] = [ 0x52, 0x06, 0x9e, 0xbf, // 0xbf9e0652 -1.2345679 0x4e, 0x61, 0x3c, 0x4b, // 0x4b3c614e 12345678 0x0f, 0x9b, 0x94, 0xfe, // 0xfe949b0f -9.876543E37 @@ -246,8 +234,7 @@ fn test_f32(){ #[test] fn test_f64(){ - - let input : [u8; 40] = [ + let input: [u8; 40] = [ 0x27, 0x6b, 0x0a, 0x2f, 0x2a, 0xee, 0x45, 0x43, // 0x4345EE2A2F0A6B27 12345678912345678 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x0000000000000000 0 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x80, // 0x8010000000000000 -2.2250738585072014e-308 @@ -268,7 +255,6 @@ fn test_f64(){ #[test] fn test_multibyte() { - let result = new_ucmd!().arg("-c").arg("-w12").run_piped_stdin("Universität Tübingen \u{1B000}".as_bytes()); assert_empty_stderr!(result); @@ -283,8 +269,7 @@ fn test_multibyte() { #[test] fn test_width(){ - - let input : [u8; 8] = [0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]; + let input: [u8; 8] = [0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]; let expected_output = unindent(" 0000000 000000 000000 0000004 000000 000000 @@ -300,8 +285,7 @@ fn test_width(){ #[test] fn test_invalid_width(){ - - let input : [u8; 4] = [0x00, 0x00, 0x00, 0x00]; + let input: [u8; 4] = [0x00, 0x00, 0x00, 0x00]; let expected_output = unindent(" 0000000 000000 0000002 000000 @@ -317,8 +301,7 @@ fn test_invalid_width(){ #[test] fn test_zero_width(){ - - let input : [u8; 4] = [0x00, 0x00, 0x00, 0x00]; + let input: [u8; 4] = [0x00, 0x00, 0x00, 0x00]; let expected_output = unindent(" 0000000 000000 0000002 000000 @@ -334,8 +317,7 @@ fn test_zero_width(){ #[test] fn test_width_without_value(){ - - let input : [u8; 40] = [0 ; 40]; + let input: [u8; 40] = [0 ; 40]; let expected_output = unindent(" 0000000 000000 000000 000000 000000 000000 000000 000000 000000 000000 000000 000000 000000 000000 000000 000000 000000 0000040 000000 000000 000000 000000 @@ -351,7 +333,6 @@ fn test_width_without_value(){ #[test] fn test_suppress_duplicates(){ - let input: [u8; 41] = [ 0, 0, 0, 0, 0, 0, 0, 0, @@ -387,8 +368,7 @@ fn test_suppress_duplicates(){ #[test] fn test_big_endian() { - - let input : [u8; 8] = [ + let input: [u8; 8] = [ 0xC0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00];// 0xc000000000000000 -2 let expected_output = unindent(" @@ -409,8 +389,7 @@ fn test_big_endian() { #[test] #[allow(non_snake_case)] fn test_alignment_Xxa() { - - let input : [u8; 8] = [ + let input: [u8; 8] = [ 0x0A, 0x0D, 0x65, 0x66, 0x67, 0x00, 0x9e, 0x9f]; let expected_output = unindent(" @@ -431,8 +410,7 @@ fn test_alignment_Xxa() { #[test] #[allow(non_snake_case)] fn test_alignment_Fx() { - - let input : [u8; 8] = [ + let input: [u8; 8] = [ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xC0];// 0xc000000000000000 -2 let expected_output = unindent(" @@ -451,7 +429,6 @@ fn test_alignment_Fx() { #[test] fn test_maxuint(){ - let input = [0xFFu8 ; 8]; let expected_output = unindent(" 0000000 1777777777777777777777 @@ -474,7 +451,6 @@ fn test_maxuint(){ #[test] fn test_hex_offset(){ - let input = [0u8 ; 0x1F]; let expected_output = unindent(" 000000 00000000 00000000 00000000 00000000 @@ -493,7 +469,6 @@ fn test_hex_offset(){ #[test] fn test_dec_offset(){ - let input = [0u8 ; 19]; let expected_output = unindent(" 0000000 00000000 00000000 00000000 00000000 @@ -512,7 +487,6 @@ fn test_dec_offset(){ #[test] fn test_no_offset(){ - let input = [0u8 ; 31]; const LINE: &'static str = " 00000000 00000000 00000000 00000000\n"; let expected_output = [LINE, LINE, LINE, LINE].join(""); @@ -526,17 +500,13 @@ fn test_no_offset(){ #[test] fn test_invalid_offset(){ - - let input = [0u8 ; 4]; - - let result = new_ucmd!().arg("-Ab").run_piped_stdin(&input[..]); + let result = new_ucmd!().arg("-Ab").run(); assert!(!result.success); } #[test] fn test_skip_bytes(){ - let input = "abcdefghijklmnopq"; let result = new_ucmd!().arg("-c").arg("--skip-bytes=5").run_piped_stdin(input.as_bytes()); @@ -550,7 +520,6 @@ fn test_skip_bytes(){ #[test] fn test_skip_bytes_error(){ - let input = "12345"; let result = new_ucmd!().arg("--skip-bytes=10").run_piped_stdin(input.as_bytes()); @@ -559,7 +528,6 @@ fn test_skip_bytes_error(){ #[test] fn test_read_bytes(){ - let input = "abcdefghijklmnopqrstuvwxyz\n12345678"; let result = new_ucmd!().arg("--endian=little").arg("--read-bytes=27").run_piped_stdin(input.as_bytes()); @@ -570,8 +538,7 @@ fn test_read_bytes(){ #[test] fn test_ascii_dump(){ - - let input : [u8; 22] = [ + let input: [u8; 22] = [ 0x00, 0x01, 0x0a, 0x0d, 0x10, 0x1f, 0x20, 0x61, 0x62, 0x63, 0x7d, 0x7e, 0x7f, 0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0, 0xff]; let result = new_ucmd!().arg("-tx1zacz").run_piped_stdin(&input[..]); @@ -607,7 +574,6 @@ fn test_filename_parsing(){ #[test] fn test_stdin_offset(){ - let input = "abcdefghijklmnopq"; let result = new_ucmd!().arg("-c").arg("+5").run_piped_stdin(input.as_bytes()); @@ -621,7 +587,6 @@ fn test_stdin_offset(){ #[test] fn test_file_offset(){ - let result = new_ucmd!().arg("-c").arg("--").arg("-f").arg("10").run(); assert_empty_stderr!(result); @@ -678,8 +643,7 @@ fn test_traditional_with_skip_bytes_non_override(){ #[test] fn test_traditional_error(){ // file "0" exists - don't fail on that, but --traditional only accepts a single input - let input = "abcdefghijklmnopq"; - let result = new_ucmd!().arg("--traditional").arg("0").arg("0").arg("0").arg("0").run_piped_stdin(input.as_bytes()); + let result = new_ucmd!().arg("--traditional").arg("0").arg("0").arg("0").arg("0").run(); assert!(!result.success); }