1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-28 03:27:44 +00:00

od: implement ascii dump

This commit is contained in:
Wim Hueskes 2016-08-12 17:51:24 +02:00
parent cea4297fdf
commit e8eab8d3e8
4 changed files with 144 additions and 65 deletions

View file

@ -38,7 +38,8 @@ use partialreader::*;
use peekreader::*;
use formatteriteminfo::*;
use parse_nrofbytes::*;
use parse_formats::parse_format_flags;
use parse_formats::{parse_format_flags, ParsedFormatterItemInfo};
use prn_char::format_ascii_dump;
static VERSION: &'static str = env!("CARGO_PKG_VERSION");
const MAX_BYTES_PER_UNIT: usize = 8;
@ -163,7 +164,7 @@ pub fn uumain(args: Vec<String>) -> i32 {
}
}
};
let min_bytes = formats.iter().fold(1, |max, next| cmp::max(max, next.byte_size));
let min_bytes = formats.iter().fold(1, |max, next| cmp::max(max, next.formatter_item_info.byte_size));
if line_bytes % min_bytes != 0 {
show_warning!("invalid width {}; using {} instead", line_bytes, min_bytes);
line_bytes = min_bytes;
@ -201,7 +202,7 @@ pub fn uumain(args: Vec<String>) -> i32 {
}
fn odfunc(line_bytes: usize, input_offset_base: Radix, byte_order: ByteOrder,
fnames: Vec<InputSource>, formats: &[FormatterItemInfo], output_duplicates: bool,
fnames: Vec<InputSource>, formats: &[ParsedFormatterItemInfo], output_duplicates: bool,
skip_bytes: usize, read_bytes: Option<usize>) -> i32 {
let mf = MultifileReader::new(fnames);
@ -213,12 +214,13 @@ fn odfunc(line_bytes: usize, input_offset_base: Radix, byte_order: ByteOrder,
let mut bytes: Vec<u8> = Vec::with_capacity(line_bytes + PEEK_BUFFER_SIZE);
unsafe { bytes.set_len(line_bytes + PEEK_BUFFER_SIZE); } // fast but uninitialized
let byte_size_block = formats.iter().fold(1, |max, next| cmp::max(max, next.byte_size));
let byte_size_block = formats.iter().fold(1, |max, next| cmp::max(max, next.formatter_item_info.byte_size));
let print_width_block = formats
.iter()
.fold(1, |max, next| {
cmp::max(max, next.print_width * (byte_size_block / next.byte_size))
cmp::max(max, next.formatter_item_info.print_width * (byte_size_block / next.formatter_item_info.byte_size))
});
let print_width_line = print_width_block * (line_bytes / byte_size_block);
if byte_size_block > MAX_BYTES_PER_UNIT {
panic!("{}-bits types are unsupported. Current max={}-bits.",
@ -233,9 +235,9 @@ fn odfunc(line_bytes: usize, input_offset_base: Radix, byte_order: ByteOrder,
// calculate proper alignment for each item
for sf in &mut spaced_formatters {
let mut byte_size = sf.frm.byte_size;
let mut byte_size = sf.frm.formatter_item_info.byte_size;
let mut items_in_block = byte_size_block / byte_size;
let thisblock_width = sf.frm.print_width * items_in_block;
let thisblock_width = sf.frm.formatter_item_info.print_width * items_in_block;
let mut missing_spacing = print_width_block - thisblock_width;
while items_in_block > 0 {
@ -291,7 +293,7 @@ fn odfunc(line_bytes: usize, input_offset_base: Radix, byte_order: ByteOrder,
print_bytes(byte_order, &bytes, n, peekbytes,
&print_with_radix(input_offset_base, addr),
&spaced_formatters, byte_size_block);
&spaced_formatters, byte_size_block, print_width_line);
}
addr += n;
@ -312,22 +314,22 @@ fn odfunc(line_bytes: usize, input_offset_base: Radix, byte_order: ByteOrder,
}
fn print_bytes(byte_order: ByteOrder, bytes: &[u8], length: usize, peekbytes: usize, prefix: &str,
formats: &[SpacedFormatterItemInfo], byte_size_block: usize) {
formats: &[SpacedFormatterItemInfo], byte_size_block: usize, print_width_line: usize) {
let mut first = true; // First line of a multi-format raster.
for f in formats {
let mut output_text = String::new();
let mut b = 0;
while b < length {
let nextb = b + f.frm.byte_size;
let nextb = b + f.frm.formatter_item_info.byte_size;
output_text.push_str(&format!("{:>width$}",
"",
width = f.spacing[b % byte_size_block]));
match f.frm.formatter {
match f.frm.formatter_item_info.formatter {
FormatWriter::IntWriter(func) => {
let p: u64 = match f.frm.byte_size {
let p: u64 = match f.frm.formatter_item_info.byte_size {
1 => {
bytes[b] as u64
}
@ -340,19 +342,19 @@ fn print_bytes(byte_order: ByteOrder, bytes: &[u8], length: usize, peekbytes: us
8 => {
byte_order.read_u64(&bytes[b..nextb])
}
_ => { panic!("Invalid byte_size: {}", f.frm.byte_size); }
_ => { panic!("Invalid byte_size: {}", f.frm.formatter_item_info.byte_size); }
};
output_text.push_str(&func(p, f.frm.byte_size, f.frm.print_width));
output_text.push_str(&func(p, f.frm.formatter_item_info.byte_size, f.frm.formatter_item_info.print_width));
}
FormatWriter::FloatWriter(func) => {
let p: f64 = match f.frm.byte_size {
let p: f64 = match f.frm.formatter_item_info.byte_size {
4 => {
byte_order.read_f32(&bytes[b..nextb]) as f64
}
8 => {
byte_order.read_f64(&bytes[b..nextb])
}
_ => { panic!("Invalid byte_size: {}", f.frm.byte_size); }
_ => { panic!("Invalid byte_size: {}", f.frm.formatter_item_info.byte_size); }
};
output_text.push_str(&func(p));
}
@ -363,6 +365,14 @@ fn print_bytes(byte_order: ByteOrder, bytes: &[u8], length: usize, peekbytes: us
b = nextb;
}
if f.frm.add_ascii_dump {
let missing_spacing = print_width_line.saturating_sub(output_text.chars().count());
output_text.push_str(&format!("{:>width$} {}",
"",
format_ascii_dump(&bytes[..length]),
width=missing_spacing));
}
if first {
print!("{}", prefix); // print offset
// if printing in multiple formats offset is printed only once
@ -416,6 +426,6 @@ fn print_final_offset(r: Radix, x: usize) {
}
struct SpacedFormatterItemInfo {
frm: FormatterItemInfo,
frm: ParsedFormatterItemInfo,
spacing: [usize; MAX_BYTES_PER_UNIT],
}

View file

@ -13,6 +13,22 @@ macro_rules! hashmap {
}}
}
#[derive(Copy, Clone, PartialEq, Eq, Debug)]
pub struct ParsedFormatterItemInfo {
pub formatter_item_info: FormatterItemInfo,
pub add_ascii_dump: bool,
}
impl ParsedFormatterItemInfo {
pub fn new(formatter_item_info: FormatterItemInfo, add_ascii_dump: bool) -> ParsedFormatterItemInfo {
ParsedFormatterItemInfo {
formatter_item_info: formatter_item_info,
add_ascii_dump: add_ascii_dump,
}
}
}
/// Parses format flags from commandline
///
/// getopts, docopt, clap don't seem suitable to parse the commandline
@ -24,7 +40,7 @@ macro_rules! hashmap {
/// arguments with parameters like -w16 can only appear at the end: -fvoxw16
/// parameters of -t/--format specify 1 or more formats.
/// if -- appears on the commandline, parsing should stop.
pub fn parse_format_flags(args: &Vec<String>) -> Result<Vec<FormatterItemInfo>, String> {
pub fn parse_format_flags(args: &Vec<String>) -> Result<Vec<ParsedFormatterItemInfo>, String> {
let known_formats = hashmap![
'a' => FORMAT_ITEM_A,
@ -97,7 +113,7 @@ pub fn parse_format_flags(args: &Vec<String>) -> Result<Vec<FormatterItemInfo>,
match known_formats.get(&c) {
None => {} // not every option is a format
Some(r) => {
formats.push(*r)
formats.push(ParsedFormatterItemInfo::new(*r, false))
}
}
}
@ -116,7 +132,7 @@ pub fn parse_format_flags(args: &Vec<String>) -> Result<Vec<FormatterItemInfo>,
}
if formats.is_empty() {
formats.push(FORMAT_ITEM_OCT16); // 2 byte octal is the default
formats.push(ParsedFormatterItemInfo::new(FORMAT_ITEM_OCT16, false)); // 2 byte octal is the default
}
Ok(formats)
@ -130,7 +146,7 @@ enum ParseState {
Finished // no more characters may appear.
}
fn parse_type_string(params: &String) -> Result<Vec<FormatterItemInfo>, String> {
fn parse_type_string(params: &String) -> Result<Vec<ParsedFormatterItemInfo>, String> {
let type_chars: HashSet<_> = ['a', 'c'].iter().cloned().collect();
let type_ints: HashSet<_> = ['d', 'o', 'u', 'x'].iter().cloned().collect();
@ -233,50 +249,50 @@ fn parse_type_string(params: &String) -> Result<Vec<FormatterItemInfo>, String>
}
match type_char {
'a' => formats.push(FORMAT_ITEM_A),
'c' => formats.push(FORMAT_ITEM_C),
'a' => formats.push(ParsedFormatterItemInfo::new(FORMAT_ITEM_A, show_ascii_dump)),
'c' => formats.push(ParsedFormatterItemInfo::new(FORMAT_ITEM_C, show_ascii_dump)),
'd' => {
formats.push(match byte_size {
formats.push(ParsedFormatterItemInfo::new(match byte_size {
1 => FORMAT_ITEM_DEC8S,
2 => FORMAT_ITEM_DEC16S,
4|0 => FORMAT_ITEM_DEC32S,
8 => FORMAT_ITEM_DEC64S,
_ => return Err(format!("invalid size '{}' in format specification '{}'", byte_size, format_type)),
});
}, show_ascii_dump));
},
'o' => {
formats.push(match byte_size {
formats.push(ParsedFormatterItemInfo::new(match byte_size {
1 => FORMAT_ITEM_OCT8,
2 => FORMAT_ITEM_OCT16,
4|0 => FORMAT_ITEM_OCT32,
8 => FORMAT_ITEM_OCT64,
_ => return Err(format!("invalid size '{}' in format specification '{}'", byte_size, format_type)),
});
}, show_ascii_dump));
},
'u' => {
formats.push(match byte_size {
formats.push(ParsedFormatterItemInfo::new(match byte_size {
1 => FORMAT_ITEM_DEC8U,
2 => FORMAT_ITEM_DEC16U,
4|0 => FORMAT_ITEM_DEC32U,
8 => FORMAT_ITEM_DEC64U,
_ => return Err(format!("invalid size '{}' in format specification '{}'", byte_size, format_type)),
});
}, show_ascii_dump));
},
'x' => {
formats.push(match byte_size {
formats.push(ParsedFormatterItemInfo::new(match byte_size {
1 => FORMAT_ITEM_HEX8,
2 => FORMAT_ITEM_HEX16,
4|0 => FORMAT_ITEM_HEX32,
8 => FORMAT_ITEM_HEX64,
_ => return Err(format!("invalid size '{}' in format specification '{}'", byte_size, format_type)),
});
}, show_ascii_dump));
},
'f' => {
formats.push(match byte_size {
formats.push(ParsedFormatterItemInfo::new(match byte_size {
4|0 => FORMAT_ITEM_F32,
8 => FORMAT_ITEM_F64,
_ => return Err(format!("invalid size '{}' in format specification '{}'", byte_size, format_type)),
});
}, show_ascii_dump));
},
_ => unreachable!(),
}
@ -287,12 +303,22 @@ fn parse_type_string(params: &String) -> Result<Vec<FormatterItemInfo>, String>
Ok(formats)
}
#[allow(dead_code)]
#[cfg(test)]
pub fn parse_format_flags_str(args_str: &Vec<&'static str>) -> Result<Vec<FormatterItemInfo>, String> {
let args = args_str.iter().map(|s| s.to_string()).collect();
parse_format_flags(&args)
match parse_format_flags(&args) {
Err(e) => Err(e),
Ok(v) => {
// tests using this function asume add_ascii_dump is not set
Ok(v.into_iter()
.inspect(|f| assert!(!f.add_ascii_dump))
.map(|f| f.formatter_item_info)
.collect())
},
}
}
#[test]
fn test_no_options() {
assert_eq!(parse_format_flags_str(
@ -372,9 +398,9 @@ fn test_long_format_a() {
#[test]
fn test_long_format_cz() {
assert_eq!(parse_format_flags_str(
&vec!("od", "--format=cz")).unwrap(),
vec!(FORMAT_ITEM_C)); // TODO 'z'
assert_eq!(parse_format_flags(
&vec!("od".to_string(), "--format=cz".to_string())).unwrap(),
vec!(ParsedFormatterItemInfo::new(FORMAT_ITEM_C, true)));
}
#[test]
@ -448,36 +474,35 @@ fn test_format_next_arg_invalid() {
parse_format_flags_str(&vec!("od", "-t")).unwrap_err();
}
#[test]
fn test_mixed_formats() {
assert_eq!(parse_format_flags_str(
assert_eq!(parse_format_flags(
&vec!(
"od",
"--skip-bytes=2",
"-vItu1z",
"-N",
"1000",
"-xt",
"acdx1",
"--format=u2c",
"--format",
"f",
"-xAx",
"--",
"-h",
"--format=f8")).unwrap(),
"od".to_string(),
"--skip-bytes=2".to_string(),
"-vItu1z".to_string(),
"-N".to_string(),
"1000".to_string(),
"-xt".to_string(),
"acdx1".to_string(),
"--format=u2c".to_string(),
"--format".to_string(),
"f".to_string(),
"-xAx".to_string(),
"--".to_string(),
"-h".to_string(),
"--format=f8".to_string())).unwrap(),
vec!(
FORMAT_ITEM_DEC64S, // I
FORMAT_ITEM_DEC8U, // tu1z
FORMAT_ITEM_HEX16, // x
FORMAT_ITEM_A, // ta
FORMAT_ITEM_C, // tc
FORMAT_ITEM_DEC32S, // td
FORMAT_ITEM_HEX8, // tx1
FORMAT_ITEM_DEC16U, // tu2
FORMAT_ITEM_C, // tc
FORMAT_ITEM_F32, // tf
FORMAT_ITEM_HEX16, // x
ParsedFormatterItemInfo::new(FORMAT_ITEM_DEC64S, false), // I
ParsedFormatterItemInfo::new(FORMAT_ITEM_DEC8U, true), // tu1z
ParsedFormatterItemInfo::new(FORMAT_ITEM_HEX16, false), // x
ParsedFormatterItemInfo::new(FORMAT_ITEM_A, false), // ta
ParsedFormatterItemInfo::new(FORMAT_ITEM_C, false), // tc
ParsedFormatterItemInfo::new(FORMAT_ITEM_DEC32S, false), // td
ParsedFormatterItemInfo::new(FORMAT_ITEM_HEX8, false), // tx1
ParsedFormatterItemInfo::new(FORMAT_ITEM_DEC16U, false), // tu2
ParsedFormatterItemInfo::new(FORMAT_ITEM_C, false), // tc
ParsedFormatterItemInfo::new(FORMAT_ITEM_F32, false), // tf
ParsedFormatterItemInfo::new(FORMAT_ITEM_HEX16, false), // x
));
}

View file

@ -100,6 +100,23 @@ fn format_item_c(bytes: &[u8]) -> String {
}
}
pub fn format_ascii_dump(bytes: &[u8]) -> String {
let mut result = String::new();
result.push('>');
for c in bytes.iter() {
if *c >= 0x20 && *c <= 0x7e {
result.push_str(C_CHRS[*c as usize]);
}
else {
result.push('.');
}
}
result.push('<');
result
}
#[test]
fn test_format_item_a() {
assert_eq!(" nul", format_item_a(0x00, 1, 4));
@ -147,3 +164,9 @@ fn test_format_item_c() {
assert_eq!(" 365", format_item_c(&[0xf5, 0x80, 0x80, 0x80])); // invalid utf-8
assert_eq!(" 377", format_item_c(&[0xff])); // invalid utf-8
}
#[test]
fn test_format_ascii_dump() {
assert_eq!(">.<", format_ascii_dump(&[0x00]));
assert_eq!(">. A~.<", format_ascii_dump(&[0x1f, 0x20, 0x41, 0x7e, 0x7f]));
}

View file

@ -527,3 +527,24 @@ fn test_read_bytes(){
assert!(result.success);
assert_eq!(result.stdout, unindent(ALPHA_OUT));
}
#[test]
fn test_ascii_dump(){
let input : [u8; 22] = [
0x00, 0x01, 0x0a, 0x0d, 0x10, 0x1f, 0x20, 0x61, 0x62, 0x63, 0x7d,
0x7e, 0x7f, 0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0, 0xff];
let result = new_ucmd!().arg("-tx1zacz").run_piped_stdin(&input[..]);
assert_empty_stderr!(result);
assert!(result.success);
assert_eq!(result.stdout, unindent(r"
0000000 00 01 0a 0d 10 1f 20 61 62 63 7d 7e 7f 80 90 a0 >...... abc}~....<
nul soh nl cr dle us sp a b c } ~ del nul dle sp
\0 001 \n \r 020 037 a b c } ~ 177 ** ** ** >...... abc}~....<
0000020 b0 c0 d0 e0 f0 ff >......<
0 @ P ` p del
** 300 320 340 360 377 >......<
0000026
"));
}