From d0b4fe34c159bfdf9d1dad23eef7af4515eb7fd7 Mon Sep 17 00:00:00 2001 From: Tyler Date: Thu, 27 May 2021 15:55:29 -0700 Subject: [PATCH] Refactor. Returns code to buildable - Pushes file/stdout specific logic to trait objects. - Extracts read/write helper - Extracts conv/block/unblock helper - Impl block - WIP: Many failing tests! --- src/uu/dd/src/dd.rs | 485 ++++++++++++++++++++++++-------- src/uu/dd/src/dd_test.rs | 28 +- src/uu/dd/src/parseargs.rs | 60 ++-- src/uu/dd/src/parseargs/test.rs | 5 +- 4 files changed, 424 insertions(+), 154 deletions(-) diff --git a/src/uu/dd/src/dd.rs b/src/uu/dd/src/dd.rs index f36ac487b..304236915 100644 --- a/src/uu/dd/src/dd.rs +++ b/src/uu/dd/src/dd.rs @@ -52,9 +52,8 @@ enum SrcStat pub struct IConvFlags { ctable: Option<&'static ConversionTable>, - cbs: Option, - block: bool, - unblock: bool, + block: Option, + unblock: Option, swab: bool, sync: bool, noerror: bool, @@ -128,6 +127,7 @@ enum InternalError { WrongInputType, WrongOutputType, + InvalidConvBlockUnblockCase, } impl std::fmt::Display for InternalError @@ -137,7 +137,9 @@ impl std::fmt::Display for InternalError { Self::WrongInputType | Self::WrongOutputType => - write!(f, "Internal dd error"), + write!(f, "Internal dd error: Wrong Input/Output data type"), + Self::InvalidConvBlockUnblockCase => + write!(f, "Internal dd error: Invalid Conversion, Block, or Unblock data"), } } } @@ -147,47 +149,19 @@ impl Error for InternalError {} struct Input { src: R, + non_ascii: bool, ibs: usize, xfer_stats: StatusLevel, cflags: IConvFlags, iflags: IFlags, } -impl Read for Input -{ - fn read(&mut self, mut buf: &mut [u8]) -> io::Result - { - let len = self.src.read(&mut buf)?; - - if let Some(ct) = self.cflags.ctable - { - for idx in 0..len - { - buf[idx] = ct[buf[idx] as usize]; - } - } - - if self.cflags.swab - { - let mut tmp = DEFAULT_FILL_BYTE; - - for base in (1..len).step_by(2) - { - tmp = buf[base]; - buf[base] = buf[base-1]; - buf[base-1] = tmp; - } - } - - Ok(len) - } -} - impl Input { fn new(matches: &getopts::Matches) -> Result> { let ibs = parseargs::parse_ibs(matches)?; + let non_ascii = parseargs::parse_input_non_ascii(matches)?; let xfer_stats = parseargs::parse_status_level(matches)?; let cflags = parseargs::parse_conv_flag_input(matches)?; let iflags = parseargs::parse_iflags(matches)?; @@ -195,6 +169,7 @@ impl Input let mut i = Input { src: io::stdin(), + non_ascii, ibs, xfer_stats, cflags, @@ -217,6 +192,7 @@ impl Input fn new(matches: &getopts::Matches) -> Result> { let ibs = parseargs::parse_ibs(matches)?; + let non_ascii = parseargs::parse_input_non_ascii(matches)?; let xfer_stats = parseargs::parse_status_level(matches)?; let cflags = parseargs::parse_conv_flag_input(matches)?; let iflags = parseargs::parse_iflags(matches)?; @@ -234,6 +210,7 @@ impl Input let i = Input { src, + non_ascii, ibs, xfer_stats, cflags, @@ -250,13 +227,42 @@ impl Input } } +impl Read for Input +{ + fn read(&mut self, mut buf: &mut [u8]) -> io::Result + { + // Read from source, ignore read errors if conv=noerror + let len = match self.src.read(&mut buf) + { + Ok(len) => + len, + Err(e) => + if !self.cflags.noerror + { + return Err(e); + } + else + { + return Ok(0); + }, + }; + + Ok(len) + } +} + impl Input { - fn fill_n(&mut self, buf: &mut [u8], obs: usize) -> Result> + /// Fills to a given size n, which is expected to be 'obs'. + /// Reads in increments of 'self.ibs'. + fn fill_n(&mut self, buf: &mut [u8], obs: usize) -> Result> { let ibs = self.ibs; let mut bytes_read = 0; + // TODO: Fix this! + assert!(obs > ibs); + for n in 0..(obs/ibs) { // fill an ibs-len slice from src let this_read = self.read(&mut buf[n*ibs..(n+1)*ibs])?; @@ -268,14 +274,13 @@ impl Input } } - if bytes_read != 0 { - Ok(SrcStat::Read(bytes_read)) - } else { - Ok(SrcStat::EOF) - } - } + Ok(bytes_read) + } - fn force_fill(&mut self, mut buf: &mut [u8], len: usize) -> Result<(), Box> + /// Force-fills a buffer, ignoring zero-length reads which would otherwise be + /// interpreted as EOF. Does not continue after errors. + /// Note: This may never return. + fn force_fill(&mut self, mut buf: &mut [u8], target_len: usize) -> Result<(), Box> { let mut total_len = 0; @@ -283,12 +288,13 @@ impl Input { total_len += self.read(&mut buf)?; - if total_len == len + if total_len == target_len { return Ok(()); } } } + } struct Output @@ -313,6 +319,16 @@ impl Output { oflags, }) } + + fn fsync(&mut self) -> io::Result<()> + { + self.dst.flush() + } + + fn fdatasync(&mut self) -> io::Result<()> + { + self.dst.flush() + } } impl Output { @@ -346,9 +362,25 @@ impl Output { } else { + // The following error should only occur if someone + // mistakenly calls Output::::new() without checking + // if 'of' has been provided. In this case, + // Output::::new() is probably intended. Err(Box::new(InternalError::WrongOutputType)) } } + + fn fsync(&mut self) -> io::Result<()> + { + self.dst.flush()?; + self.dst.sync_all() + } + + fn fdatasync(&mut self) -> io::Result<()> + { + self.dst.flush()?; + self.dst.sync_data() + } } impl Seek for Output @@ -359,11 +391,29 @@ impl Seek for Output } } -impl Write for Output +impl Write for Output { fn write(&mut self, buf: &[u8]) -> io::Result { + #[inline] + fn is_sparse(buf: &[u8]) -> bool + { + buf.iter() + .all(|&e| e == 0u8) + } + // ----------------------------- + if self.cflags.sparse && is_sparse(buf) + { + let seek_amt: i64 = buf.len() + .try_into() + .expect("Internal dd Error: Seek amount greater than signed 64-bit integer"); + self.dst.seek(io::SeekFrom::Current(seek_amt))?; + Ok(buf.len()) + } + else + { self.dst.write(buf) + } } fn flush(&mut self) -> io::Result<()> @@ -372,9 +422,17 @@ impl Write for Output } } -fn is_sparse(buf: &[u8]) -> bool +impl Write for Output { - buf.iter().all(| &e | e == 0) + fn write(&mut self, buf: &[u8]) -> io::Result + { + self.dst.write(buf) + } + + fn flush(&mut self) -> io::Result<()> + { + self.dst.flush() + } } fn gen_prog_updater(rx: mpsc::Receiver) -> impl Fn() -> () @@ -400,22 +458,243 @@ fn gen_prog_updater(rx: mpsc::Receiver) -> impl Fn() -> () } } -#[inline] -fn dd_read_helper(mut buf: &mut [u8], i: &mut Input, o: &Output) -> Result> +fn pad(buf: &mut Vec, padding: u8) { - match i.fill_n(&mut buf, o.obs) + unimplemented!() +} + +/// Splits the content of buf into cbs-length blocks +/// Appends padding as specified by conv=block and cbs=N +/// +/// Example cases: +/// +/// [a...b] -> [a...b] +/// [a...b'\n'c...d] -> [a...b' '...], [c...d] +/// [a...b'\n'c...d'\n'e...] -> [a...b' '...], [c...d' '...], [e...] +/// +/// [a...b'\n'] -> [a...b' '] +/// ['\n'a...b] -> [' '...], [a...b] +/// +/// ['\n'a...b] -> [' '...], [a...b] +/// ['\n''\n'a...b] -> [' '...], [' '...], [a...b] +// +fn block(buf: &[u8], cbs: usize) -> Vec> +{ + buf.split(| &c | c == '\n' as u8) + .fold(Vec::new(), | mut blocks, split | { + let mut block = Vec::with_capacity(cbs); + block.extend(split); + pad(&mut block, ' ' as u8); + blocks.push(block); + + blocks + }) +} + +// Trims padding from each cbs-length partition of buf +// as specified by conv=unblock and cbs=N +fn unblock(buf: &[u8], cbs: usize) +{ + unimplemented!() +} + +#[inline] +fn apply_ct(buf: &mut [u8], ct: &ConversionTable) +{ + for idx in 0..buf.len() { - Ok(ss) => - Ok(ss), - Err(e) => - if !i.cflags.noerror + buf[idx] = ct[buf[idx] as usize]; + } +} + +#[inline] +fn perform_swab(buf: &mut [u8]) +{ + let mut tmp; + + for base in (1..buf.len()).step_by(2) + { + tmp = buf[base]; + buf[base] = buf[base-1]; + buf[base-1] = tmp; + } +} + +fn conv_block_unblock_helper(mut buf: Vec, i: &mut Input, o: &Output) -> Result, Box> +{ + #[inline] + fn should_block_then_conv(i: &Input) -> bool + { + !i.non_ascii + && i.cflags.block.is_some() + } + #[inline] + fn should_conv_then_block(i: &Input) -> bool + { + i.non_ascii + && i.cflags.block.is_some() + } + #[inline] + fn should_unblock_then_conv(i: &Input) -> bool + { + !i.non_ascii + && i.cflags.unblock.is_some() + } + #[inline] + fn should_conv_then_unblock(i: &Input) -> bool + { + i.non_ascii + && i.cflags.unblock.is_some() + } + fn conv_only(i: &Input) -> bool + { + i.cflags.ctable.is_some() + && i.cflags.block.is_none() + && i.cflags.unblock.is_none() + } + // -------------------------------------------------------------------- + if conv_only(&i) + { // no block/unblock + let ct = i.cflags.ctable.unwrap(); + apply_ct(&mut buf, &ct); + + Ok(buf) + } + else if should_block_then_conv(&i) + { // ascii input so perform the block first + let cbs = i.cflags.block.unwrap(); + + let mut blocks = block(&mut buf, cbs); + + if let Some(ct) = i.cflags.ctable + { + for buf in blocks.iter_mut() { - return Err(e); + apply_ct(buf, &ct); } - else - { - Ok(SrcStat::Read(0)) - }, + } + + let blocks = blocks.into_iter() + .flatten() + .collect(); + + Ok(blocks) + } + else if should_conv_then_block(&i) + { // Non-ascii so perform the conversion first + let cbs = i.cflags.block.unwrap(); + + if let Some(ct) = i.cflags.ctable + { + apply_ct(&mut buf, &ct); + } + + let blocks = block(&mut buf, cbs); + + let blocks = blocks.into_iter() + .flatten() + .collect(); + + Ok(blocks) + } + else if should_unblock_then_conv(&i) + { // ascii input so perform the unblock first + let cbs = i.cflags.unblock.unwrap(); + + unblock(&mut buf, cbs); + + if let Some(ct) = i.cflags.ctable + { + apply_ct(&mut buf, &ct); + } + + Ok(buf) + } + else if should_conv_then_unblock(&i) + { // Non-ascii input so perform the conversion first + let cbs = i.cflags.unblock.unwrap(); + + if let Some(ct) = i.cflags.ctable + { + apply_ct(&mut buf, &ct); + } + + unblock(&buf, cbs); + + Ok(buf) + } + else + { + // The following error should not happen, as it results from + // insufficient command line data. This case should be caught + // by the parser before making it this far. + // Producing this error is an alternative to risking an unwrap call + // on 'cbs' if the required data is not provided. + Err(Box::new(InternalError::InvalidConvBlockUnblockCase)) + } +} + +fn read_write_helper(i: &mut Input, o: &mut Output) -> Result<(usize, Vec), Box> +{ + #[inline] + fn is_fast_read(i: &Input, o: &Output) -> bool + { + i.ibs == o.obs + && !is_conv(i) + && !is_block(i) + && !is_unblock(i) + && !i.cflags.swab + } + #[inline] + fn is_conv(i: &Input) -> bool + { + i.cflags.ctable.is_some() + } + #[inline] + fn is_block(i: &Input) -> bool + { + i.cflags.block.is_some() + } + #[inline] + fn is_unblock(i: &Input) -> bool + { + i.cflags.unblock.is_some() + } + // -------------------------------------------------------------------- + if is_fast_read(&i, &o) + { + // TODO: fast reads are copies performed + // directly to output (without creating any buffers) + // as mentioned in the dd spec. + unimplemented!() + } + else + { + // Read + let mut buf = Vec::with_capacity(o.obs); + let rlen = i.fill_n(&mut buf, o.obs)?; + + if rlen == 0 + { + return Ok((0,Vec::new())); + } + + + // Conv etc... + if i.cflags.swab + { + perform_swab(&mut buf[..rlen]); + } + + if is_conv(&i) || is_block(&i) || is_unblock(&i) + { + let buf = conv_block_unblock_helper(buf, i, o)?; + Ok((rlen, buf)) + } + else + { + Ok((rlen, buf)) + } } } @@ -424,12 +703,13 @@ fn dd_read_helper(mut buf: &mut [u8], i: &mut Input, o: &O // and should be fixed in the future. fn dd_stdout(mut i: Input, mut o: Output) -> Result<(usize, usize), Box> { + let mut bytes_in = 0; + let mut bytes_out = 0; + let prog_tx = if i.xfer_stats == StatusLevel::Progress { let (prog_tx, prog_rx) = mpsc::channel(); - thread::spawn(gen_prog_updater(prog_rx)); - Some(prog_tx) } else @@ -437,42 +717,35 @@ fn dd_stdout(mut i: Input, mut o: Output) -> Result<(usi None }; - let mut bytes_in = 0; - let mut bytes_out = 0; - loop { - let mut buf = vec![DEFAULT_FILL_BYTE; o.obs]; - - // Read - let r_len = match dd_read_helper(&mut buf, &mut i, &o)? + match read_write_helper(&mut i, &mut o)? { - SrcStat::Read(0) => - continue, - SrcStat::Read(len) => - { - bytes_in += len; - len - }, - SrcStat::EOF => + (0, _) => break, + (rlen, buf) => + { + let wlen = o.write(&buf)?; + + bytes_in += rlen; + bytes_out += wlen; + }, }; - // Write - let w_len = o.write(&buf[..r_len])?; - // Prog - bytes_out += w_len; - if let Some(prog_tx) = &prog_tx { prog_tx.send(bytes_out)?; } } - if o.cflags.fsync || o.cflags.fdatasync + if o.cflags.fsync { - o.flush()?; + o.fsync()?; + } + else if o.cflags.fdatasync + { + o.fdatasync()?; } Ok((bytes_in, bytes_out)) @@ -483,12 +756,13 @@ fn dd_stdout(mut i: Input, mut o: Output) -> Result<(usi // and should be fixed in the future. fn dd_fileout(mut i: Input, mut o: Output) -> Result<(usize, usize), Box> { + let mut bytes_in = 0; + let mut bytes_out = 0; + let prog_tx = if i.xfer_stats == StatusLevel::Progress { let (prog_tx, prog_rx) = mpsc::channel(); - thread::spawn(gen_prog_updater(prog_rx)); - Some(prog_tx) } else @@ -496,43 +770,22 @@ fn dd_fileout(mut i: Input, mut o: Output) -> Result<(usize, u None }; - let mut bytes_in = 0; - let mut bytes_out = 0; - loop { - let mut buf = vec![DEFAULT_FILL_BYTE; o.obs]; - - // Read - let r_len = match dd_read_helper(&mut buf, &mut i, &o)? + match read_write_helper(&mut i, &mut o)? { - SrcStat::Read(0) => - continue, - SrcStat::Read(len) => - { - bytes_in += len; - len - }, - SrcStat::EOF => + (0, _) => break, - }; + (rlen, buf) => + { + let wlen = o.write(&buf)?; - - // Write - let w_len = if o.cflags.sparse && is_sparse(&buf) - { - let seek_amt: i64 = r_len.try_into()?; - o.seek(io::SeekFrom::Current(seek_amt))?; - r_len - } - else - { - o.write(&buf[..r_len])? + bytes_in += rlen; + bytes_out += wlen; + }, }; // Prog - bytes_out += w_len; - if let Some(prog_tx) = &prog_tx { prog_tx.send(bytes_out)?; @@ -541,13 +794,11 @@ fn dd_fileout(mut i: Input, mut o: Output) -> Result<(usize, u if o.cflags.fsync { - o.flush()?; - o.dst.sync_all()?; + o.fsync()?; } else if o.cflags.fdatasync { - o.flush()?; - o.dst.sync_data()?; + o.fdatasync()?; } Ok((bytes_in, bytes_out)) diff --git a/src/uu/dd/src/dd_test.rs b/src/uu/dd/src/dd_test.rs index ee30b8907..6d697b5ec 100644 --- a/src/uu/dd/src/dd_test.rs +++ b/src/uu/dd/src/dd_test.rs @@ -65,9 +65,8 @@ macro_rules! icf ( { IConvFlags { ctable: $ctable, - cbs: None, - block: false, - unblock: false, + block: None, + unblock: None, swab: false, sync: false, noerror: false, @@ -87,6 +86,7 @@ macro_rules! make_spec_test ( $test_name, Input { src: $src, + non_ascii: false, ibs: 512, xfer_stats: StatusLevel::None, cflags: icf!(), @@ -132,6 +132,7 @@ macro_rules! make_conv_test ( $test_name, Input { src: $src, + non_ascii: false, ibs: 512, xfer_stats: StatusLevel::None, cflags: icf!($ctable), @@ -156,6 +157,7 @@ macro_rules! make_icf_test ( $test_name, Input { src: $src, + non_ascii: false, ibs: 512, xfer_stats: StatusLevel::None, cflags: $icf, @@ -282,6 +284,7 @@ fn all_valid_ascii_ebcdic_ascii_roundtrip_conv_test() let i = Input { src: File::open("./test-resources/all-valid-ascii-chars-37eff01866ba3f538421b30b7cbefcac.test").unwrap(), + non_ascii: false, ibs: 128, xfer_stats: StatusLevel::None, cflags: icf!(Some(&ASCII_TO_EBCDIC)), @@ -303,6 +306,7 @@ fn all_valid_ascii_ebcdic_ascii_roundtrip_conv_test() let i = Input { src: File::open(&tmp_fname_ae).unwrap(), + non_ascii: false, ibs: 256, xfer_stats: StatusLevel::None, cflags: icf!(Some(&EBCDIC_TO_ASCII)), @@ -343,9 +347,8 @@ make_icf_test!( File::open("./test-resources/seq-byte-values.test").unwrap(), IConvFlags { ctable: None, - cbs: None, - block: false, - unblock: false, + block: None, + unblock: None, swab: true, sync: false, noerror: false, @@ -359,12 +362,19 @@ make_icf_test!( File::open("./test-resources/seq-byte-values-odd.test").unwrap(), IConvFlags { ctable: None, - cbs: None, - block: false, - unblock: false, + block: None, + unblock: None, swab: true, sync: false, noerror: false, }, File::open("./test-resources/seq-byte-values-odd.spec").unwrap() ); + +fn block_test_basic() +{ + let mut buf = vec![0u8, 1u8, 2u8, 3u8]; + let res = block(&buf, 4); + + assert_eq!(res, vec![buf]); +} diff --git a/src/uu/dd/src/parseargs.rs b/src/uu/dd/src/parseargs.rs index b18054f14..12bb1aa89 100644 --- a/src/uu/dd/src/parseargs.rs +++ b/src/uu/dd/src/parseargs.rs @@ -25,6 +25,7 @@ pub enum ParseError NoMatchingMultiplier(String), ByteStringContainsNoValue(String), MultiplierStringWouldOverflow(String), + BlockUnblockWithoutCBS, } impl std::fmt::Display for ParseError @@ -298,7 +299,6 @@ fn parse_cbs(matches: &getopts::Matches) -> Result, ParseError> pub fn parse_status_level(matches: &getopts::Matches) -> Result { - // TODO: Impl unimplemented!() } @@ -322,7 +322,7 @@ fn parse_ctable(fmt: Option, case: Option) -> Option<&'stati { match (fmt, case) { - // Both specified + // Both [ascii | ebcdic | ibm] and [lcase | ucase] specified (Some(fmt), Some(case)) => match (fmt, case) { @@ -341,7 +341,7 @@ fn parse_ctable(fmt: Option, case: Option) -> Option<&'stati (_, _) => None, }, - // Only one of {ascii, ebcdic, ibm} specified + // Only [ascii | ebcdic | ibm] specified (Some(fmt), None) => match fmt { @@ -354,7 +354,7 @@ fn parse_ctable(fmt: Option, case: Option) -> Option<&'stati _ => None, }, - // Only one of {ucase, lcase} specified + // Only [lcase | ucase] specified (None, Some(ConvFlag::UCase)) => Some(&ASCII_LCASE_TO_UCASE), (None, Some(ConvFlag::LCase)) => @@ -389,8 +389,8 @@ pub fn parse_conv_flag_input(matches: &getopts::Matches) -> Result Result - if !unblock + match (cbs, unblock) { - block = true; - } - else - { - return Err(ParseError::MultipleBlockUnblock); + (Some(cbs), None) => + block = Some(cbs), + (None, _) => + return Err(ParseError::BlockUnblockWithoutCBS), + (_, Some(_)) => + return Err(ParseError::MultipleBlockUnblock), }, ConvFlag::Unblock => - if !block + match (cbs, block) { - unblock = true; - } - else - { - return Err(ParseError::MultipleBlockUnblock); + (Some(cbs), None) => + unblock = Some(cbs), + (None, _) => + return Err(ParseError::BlockUnblockWithoutCBS), + (_, Some(_)) => + return Err(ParseError::MultipleBlockUnblock), }, ConvFlag::Swab => swab = true, @@ -476,7 +478,6 @@ pub fn parse_conv_flag_input(matches: &getopts::Matches) -> Result Result sync = true, Flag::NoCache => nocache = true, - Flag::NoCache => - nocache = true, Flag::NonBlock => nonblock = true, Flag::NoATime => @@ -665,8 +664,6 @@ pub fn parse_oflags(matches: &getopts::Matches) -> Result sync = true, Flag::NoCache => nocache = true, - Flag::NoCache => - nocache = true, Flag::NonBlock => nonblock = true, Flag::NoATime => @@ -718,7 +715,7 @@ pub fn parse_skip_amt(ibs: &usize, iflags: &IFlags, matches: &getopts::Matches) } else { - let n = parse_bytes_only(&amt)?; + let n = parse_bytes_with_opt_multiplier(amt)?; Ok(Some(ibs*n)) } } @@ -740,7 +737,7 @@ pub fn parse_seek_amt(obs: &usize, oflags: &OFlags, matches: &getopts::Matches) } else { - let n = parse_bytes_only(&amt)?; + let n = parse_bytes_with_opt_multiplier(amt)?; Ok(Some(obs*n)) } } @@ -749,3 +746,16 @@ pub fn parse_seek_amt(obs: &usize, oflags: &OFlags, matches: &getopts::Matches) Ok(None) } } + +/// Parse whether the args indicate the input is not ascii +pub fn parse_input_non_ascii(matches: &getopts::Matches) -> Result +{ + if let Some(conv_opts) = matches.opt_str("conv") + { + Ok(conv_opts.contains("ascii")) + } + else + { + Ok(false) + } +} diff --git a/src/uu/dd/src/parseargs/test.rs b/src/uu/dd/src/parseargs/test.rs index 5587c1c4b..a902a9f5e 100644 --- a/src/uu/dd/src/parseargs/test.rs +++ b/src/uu/dd/src/parseargs/test.rs @@ -14,9 +14,8 @@ fn build_icf() { let icf_expd = IConvFlags { ctable: Some(&ASCII_TO_IBM), - cbs: None, - block: false, - unblock: false, + block: None, + unblock: None, swab: false, sync: false, noerror: false,