mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-28 11:37:44 +00:00
Merge pull request #3293 from jfinkels/dd-conv-block-helper-remove-input
dd: create ConversionMode to simplify conversion, blocking, and unblocking
This commit is contained in:
commit
5fba2a78da
5 changed files with 152 additions and 130 deletions
|
@ -6,10 +6,8 @@
|
|||
// spell-checker:ignore datastructures rstat rposition cflags ctable
|
||||
|
||||
use crate::conversion_tables::ConversionTable;
|
||||
use crate::datastructures::InternalError;
|
||||
use crate::datastructures::ConversionMode;
|
||||
use crate::progress::ReadStat;
|
||||
use crate::Input;
|
||||
use std::io::Read;
|
||||
|
||||
const NEWLINE: u8 = b'\n';
|
||||
const SPACE: u8 = b' ';
|
||||
|
@ -65,105 +63,68 @@ fn unblock(buf: &[u8], cbs: usize) -> Vec<u8> {
|
|||
})
|
||||
}
|
||||
|
||||
/// A helper for teasing out which options must be applied and in which order.
|
||||
/// Some user options, such as the presence of conversion tables, will determine whether the input is assumed to be ascii. The parser sets the Input::non_ascii flag accordingly.
|
||||
/// Examples:
|
||||
/// - If conv=ebcdic or conv=ibm is specified then block, unblock or swab must be performed before the conversion happens since the source will start in ascii.
|
||||
/// - If conv=ascii is specified then block, unblock or swab must be performed after the conversion since the source starts in ebcdic.
|
||||
/// - If no conversion is specified then the source is assumed to be in ascii.
|
||||
/// For more info see `info dd`
|
||||
pub(crate) fn conv_block_unblock_helper<R: Read>(
|
||||
/// Apply the specified conversion, blocking, and/or unblocking in the right order.
|
||||
///
|
||||
/// The `mode` specifies the combination of conversion, blocking, and
|
||||
/// unblocking to apply and the order in which to apply it. This
|
||||
/// function is responsible only for applying the operations.
|
||||
///
|
||||
/// `buf` is the buffer of input bytes to transform. This function
|
||||
/// mutates this input and also returns a new buffer of bytes
|
||||
/// representing the result of the transformation.
|
||||
///
|
||||
/// `rstat` maintains a running total of the number of partial and
|
||||
/// complete blocks read before calling this function. In certain
|
||||
/// settings of `mode`, this function will update the number of
|
||||
/// records truncated; that's why `rstat` is borrowed mutably.
|
||||
pub(crate) fn conv_block_unblock_helper(
|
||||
mut buf: Vec<u8>,
|
||||
i: &mut Input<R>,
|
||||
mode: &ConversionMode,
|
||||
rstat: &mut ReadStat,
|
||||
) -> Result<Vec<u8>, InternalError> {
|
||||
// Local Predicate Fns -------------------------------------------------
|
||||
fn should_block_then_conv<R: Read>(i: &Input<R>) -> bool {
|
||||
!i.non_ascii && i.cflags.block.is_some()
|
||||
}
|
||||
fn should_conv_then_block<R: Read>(i: &Input<R>) -> bool {
|
||||
i.non_ascii && i.cflags.block.is_some()
|
||||
}
|
||||
fn should_unblock_then_conv<R: Read>(i: &Input<R>) -> bool {
|
||||
!i.non_ascii && i.cflags.unblock.is_some()
|
||||
}
|
||||
fn should_conv_then_unblock<R: Read>(i: &Input<R>) -> bool {
|
||||
i.non_ascii && i.cflags.unblock.is_some()
|
||||
}
|
||||
fn conv_only<R: Read>(i: &Input<R>) -> bool {
|
||||
i.cflags.ctable.is_some() && i.cflags.block.is_none() && i.cflags.unblock.is_none()
|
||||
}
|
||||
// Local Helper Fns ----------------------------------------------------
|
||||
) -> Vec<u8> {
|
||||
// TODO This function has a mutable input `buf` but also returns a
|
||||
// completely new `Vec`; that seems fishy. Could we either make
|
||||
// the input immutable or make the function not return anything?
|
||||
|
||||
fn apply_conversion(buf: &mut [u8], ct: &ConversionTable) {
|
||||
for idx in 0..buf.len() {
|
||||
buf[idx] = ct[buf[idx] as usize];
|
||||
}
|
||||
}
|
||||
// --------------------------------------------------------------------
|
||||
if conv_only(i) {
|
||||
// no block/unblock
|
||||
let ct = i.cflags.ctable.unwrap();
|
||||
apply_conversion(&mut buf, ct);
|
||||
|
||||
Ok(buf)
|
||||
} else if should_block_then_conv(i) {
|
||||
// ascii input so perform the block first
|
||||
let cbs = i.cflags.block.unwrap();
|
||||
|
||||
let mut blocks = block(&buf, cbs, i.cflags.sync.is_some(), rstat);
|
||||
|
||||
if let Some(ct) = i.cflags.ctable {
|
||||
match mode {
|
||||
ConversionMode::ConvertOnly(ct) => {
|
||||
apply_conversion(&mut buf, ct);
|
||||
buf
|
||||
}
|
||||
ConversionMode::BlockThenConvert(ct, cbs, sync) => {
|
||||
let mut blocks = block(&buf, *cbs, *sync, rstat);
|
||||
for buf in &mut blocks {
|
||||
apply_conversion(buf, ct);
|
||||
}
|
||||
blocks.into_iter().flatten().collect()
|
||||
}
|
||||
|
||||
let blocks = blocks.into_iter().flatten().collect();
|
||||
|
||||
Ok(blocks)
|
||||
} else if should_conv_then_block(i) {
|
||||
// Non-ascii so perform the conversion first
|
||||
let cbs = i.cflags.block.unwrap();
|
||||
|
||||
if let Some(ct) = i.cflags.ctable {
|
||||
ConversionMode::ConvertThenBlock(ct, cbs, sync) => {
|
||||
apply_conversion(&mut buf, ct);
|
||||
block(&buf, *cbs, *sync, rstat)
|
||||
.into_iter()
|
||||
.flatten()
|
||||
.collect()
|
||||
}
|
||||
|
||||
let blocks = block(&buf, cbs, i.cflags.sync.is_some(), rstat)
|
||||
ConversionMode::BlockOnly(cbs, sync) => block(&buf, *cbs, *sync, rstat)
|
||||
.into_iter()
|
||||
.flatten()
|
||||
.collect();
|
||||
|
||||
Ok(blocks)
|
||||
} else if should_unblock_then_conv(i) {
|
||||
// ascii input so perform the unblock first
|
||||
let cbs = i.cflags.unblock.unwrap();
|
||||
|
||||
let mut buf = unblock(&buf, cbs);
|
||||
|
||||
if let Some(ct) = i.cflags.ctable {
|
||||
.collect(),
|
||||
ConversionMode::UnblockThenConvert(ct, cbs) => {
|
||||
let mut buf = unblock(&buf, *cbs);
|
||||
apply_conversion(&mut buf, ct);
|
||||
buf
|
||||
}
|
||||
|
||||
Ok(buf)
|
||||
} else if should_conv_then_unblock(i) {
|
||||
// Non-ascii input so perform the conversion first
|
||||
let cbs = i.cflags.unblock.unwrap();
|
||||
|
||||
if let Some(ct) = i.cflags.ctable {
|
||||
ConversionMode::ConvertThenUnblock(ct, cbs) => {
|
||||
apply_conversion(&mut buf, ct);
|
||||
unblock(&buf, *cbs)
|
||||
}
|
||||
|
||||
let buf = unblock(&buf, cbs);
|
||||
|
||||
Ok(buf)
|
||||
} else {
|
||||
// The following error should not happen, as it results from
|
||||
// insufficient command line data. This case should be caught
|
||||
// by the parser before making it this far.
|
||||
// Producing this error is an alternative to risking an unwrap call
|
||||
// on 'cbs' if the required data is not provided.
|
||||
Err(InternalError::InvalidConvBlockUnblockCase)
|
||||
ConversionMode::UnblockOnly(cbs) => unblock(&buf, *cbs),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -14,12 +14,27 @@ use crate::conversion_tables::*;
|
|||
|
||||
type Cbs = usize;
|
||||
|
||||
/// How to apply conversion, blocking, and/or unblocking.
|
||||
///
|
||||
/// Certain settings of the `conv` parameter to `dd` require a
|
||||
/// combination of conversion, blocking, or unblocking, applied in a
|
||||
/// certain order. The variants of this enumeration give the different
|
||||
/// ways of combining those three operations.
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub(crate) enum ConversionMode<'a> {
|
||||
ConvertOnly(&'a ConversionTable),
|
||||
BlockOnly(Cbs, bool),
|
||||
UnblockOnly(Cbs),
|
||||
BlockThenConvert(&'a ConversionTable, Cbs, bool),
|
||||
ConvertThenBlock(&'a ConversionTable, Cbs, bool),
|
||||
UnblockThenConvert(&'a ConversionTable, Cbs),
|
||||
ConvertThenUnblock(&'a ConversionTable, Cbs),
|
||||
}
|
||||
|
||||
/// Stores all Conv Flags that apply to the input
|
||||
#[derive(Debug, Default, PartialEq)]
|
||||
pub struct IConvFlags {
|
||||
pub ctable: Option<&'static ConversionTable>,
|
||||
pub block: Option<Cbs>,
|
||||
pub unblock: Option<Cbs>,
|
||||
pub(crate) struct IConvFlags {
|
||||
pub mode: Option<ConversionMode<'static>>,
|
||||
pub swab: bool,
|
||||
pub sync: Option<u8>,
|
||||
pub noerror: bool,
|
||||
|
@ -91,19 +106,11 @@ pub enum CountType {
|
|||
pub enum InternalError {
|
||||
WrongInputType,
|
||||
WrongOutputType,
|
||||
InvalidConvBlockUnblockCase,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for InternalError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::WrongInputType | Self::WrongOutputType => {
|
||||
write!(f, "Internal dd error: Wrong Input/Output data type")
|
||||
}
|
||||
Self::InvalidConvBlockUnblockCase => {
|
||||
write!(f, "Invalid Conversion, Block, or Unblock data")
|
||||
}
|
||||
}
|
||||
write!(f, "Internal dd error: Wrong Input/Output data type")
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -45,7 +45,6 @@ const BUF_INIT_BYTE: u8 = 0xDD;
|
|||
|
||||
struct Input<R: Read> {
|
||||
src: R,
|
||||
non_ascii: bool,
|
||||
ibs: usize,
|
||||
print_level: Option<StatusLevel>,
|
||||
count: Option<CountType>,
|
||||
|
@ -56,7 +55,6 @@ struct Input<R: Read> {
|
|||
impl Input<io::Stdin> {
|
||||
fn new(matches: &Matches) -> UResult<Self> {
|
||||
let ibs = parseargs::parse_ibs(matches)?;
|
||||
let non_ascii = parseargs::parse_input_non_ascii(matches)?;
|
||||
let print_level = parseargs::parse_status_level(matches)?;
|
||||
let cflags = parseargs::parse_conv_flag_input(matches)?;
|
||||
let iflags = parseargs::parse_iflags(matches)?;
|
||||
|
@ -67,7 +65,6 @@ impl Input<io::Stdin> {
|
|||
|
||||
let mut i = Self {
|
||||
src: io::stdin(),
|
||||
non_ascii,
|
||||
ibs,
|
||||
print_level,
|
||||
count,
|
||||
|
@ -131,7 +128,6 @@ fn make_linux_iflags(iflags: &IFlags) -> Option<libc::c_int> {
|
|||
impl Input<File> {
|
||||
fn new(matches: &Matches) -> UResult<Self> {
|
||||
let ibs = parseargs::parse_ibs(matches)?;
|
||||
let non_ascii = parseargs::parse_input_non_ascii(matches)?;
|
||||
let print_level = parseargs::parse_status_level(matches)?;
|
||||
let cflags = parseargs::parse_conv_flag_input(matches)?;
|
||||
let iflags = parseargs::parse_iflags(matches)?;
|
||||
|
@ -163,7 +159,6 @@ impl Input<File> {
|
|||
|
||||
let i = Self {
|
||||
src,
|
||||
non_ascii,
|
||||
ibs,
|
||||
print_level,
|
||||
count,
|
||||
|
@ -607,16 +602,6 @@ impl Write for Output<io::Stdout> {
|
|||
|
||||
/// Read helper performs read operations common to all dd reads, and dispatches the buffer to relevant helper functions as dictated by the operations requested by the user.
|
||||
fn read_helper<R: Read>(i: &mut Input<R>, bsize: usize) -> std::io::Result<(ReadStat, Vec<u8>)> {
|
||||
// Local Predicate Fns -----------------------------------------------
|
||||
fn is_conv<R: Read>(i: &Input<R>) -> bool {
|
||||
i.cflags.ctable.is_some()
|
||||
}
|
||||
fn is_block<R: Read>(i: &Input<R>) -> bool {
|
||||
i.cflags.block.is_some()
|
||||
}
|
||||
fn is_unblock<R: Read>(i: &Input<R>) -> bool {
|
||||
i.cflags.unblock.is_some()
|
||||
}
|
||||
// Local Helper Fns -------------------------------------------------
|
||||
fn perform_swab(buf: &mut [u8]) {
|
||||
for base in (1..buf.len()).step_by(2) {
|
||||
|
@ -639,11 +624,13 @@ fn read_helper<R: Read>(i: &mut Input<R>, bsize: usize) -> std::io::Result<(Read
|
|||
if i.cflags.swab {
|
||||
perform_swab(&mut buf);
|
||||
}
|
||||
if is_conv(i) || is_block(i) || is_unblock(i) {
|
||||
let buf = conv_block_unblock_helper(buf, i, &mut rstat).unwrap();
|
||||
Ok((rstat, buf))
|
||||
} else {
|
||||
Ok((rstat, buf))
|
||||
|
||||
match i.cflags.mode {
|
||||
Some(ref mode) => {
|
||||
let buf = conv_block_unblock_helper(buf, mode, &mut rstat);
|
||||
Ok((rstat, buf))
|
||||
}
|
||||
None => Ok((rstat, buf)),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1089,7 +1076,6 @@ mod tests {
|
|||
src: LazyReader {
|
||||
src: File::open("./test-resources/deadbeef-16.test").unwrap(),
|
||||
},
|
||||
non_ascii: false,
|
||||
ibs: 16,
|
||||
print_level: None,
|
||||
count: None,
|
||||
|
@ -1136,7 +1122,6 @@ mod tests {
|
|||
src: File::open("./test-resources/random-5828891cb1230748e146f34223bbd3b5.test")
|
||||
.unwrap(),
|
||||
},
|
||||
non_ascii: false,
|
||||
ibs: 521,
|
||||
print_level: None,
|
||||
count: None,
|
||||
|
|
|
@ -535,9 +535,50 @@ fn parse_flag_list<T: std::str::FromStr<Err = ParseError>>(
|
|||
.collect()
|
||||
}
|
||||
|
||||
/// Given the various command-line parameters, determine the conversion mode.
|
||||
///
|
||||
/// The `conv` command-line option can take many different values,
|
||||
/// each of which may combine with others. For example, `conv=ascii`,
|
||||
/// `conv=lcase`, `conv=sync`, and so on. The arguments to this
|
||||
/// function represent the settings of those various command-line
|
||||
/// parameters. This function translates those settings to a
|
||||
/// [`ConversionMode`].
|
||||
fn conversion_mode(
|
||||
ctable: Option<&ConversionTable>,
|
||||
block: Option<usize>,
|
||||
unblock: Option<usize>,
|
||||
non_ascii: bool,
|
||||
is_sync: bool,
|
||||
) -> Option<ConversionMode> {
|
||||
match (ctable, block, unblock) {
|
||||
(Some(ct), None, None) => Some(ConversionMode::ConvertOnly(ct)),
|
||||
(Some(ct), Some(cbs), None) => {
|
||||
if non_ascii {
|
||||
Some(ConversionMode::ConvertThenBlock(ct, cbs, is_sync))
|
||||
} else {
|
||||
Some(ConversionMode::BlockThenConvert(ct, cbs, is_sync))
|
||||
}
|
||||
}
|
||||
(Some(ct), None, Some(cbs)) => {
|
||||
if non_ascii {
|
||||
Some(ConversionMode::ConvertThenUnblock(ct, cbs))
|
||||
} else {
|
||||
Some(ConversionMode::UnblockThenConvert(ct, cbs))
|
||||
}
|
||||
}
|
||||
(None, Some(cbs), None) => Some(ConversionMode::BlockOnly(cbs, is_sync)),
|
||||
(None, None, Some(cbs)) => Some(ConversionMode::UnblockOnly(cbs)),
|
||||
(None, None, None) => None,
|
||||
// The remaining variants should never happen because the
|
||||
// argument parsing above should result in an error before
|
||||
// getting to this line of code.
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse Conversion Options (Input Variety)
|
||||
/// Construct and validate a IConvFlags
|
||||
pub fn parse_conv_flag_input(matches: &Matches) -> Result<IConvFlags, ParseError> {
|
||||
pub(crate) fn parse_conv_flag_input(matches: &Matches) -> Result<IConvFlags, ParseError> {
|
||||
let mut iconvflags = IConvFlags::default();
|
||||
let mut fmt = None;
|
||||
let mut case = None;
|
||||
|
@ -546,6 +587,9 @@ pub fn parse_conv_flag_input(matches: &Matches) -> Result<IConvFlags, ParseError
|
|||
let flags = parse_flag_list(options::CONV, matches)?;
|
||||
let cbs = parse_cbs(matches)?;
|
||||
|
||||
let mut block = None;
|
||||
let mut unblock = None;
|
||||
|
||||
for flag in flags {
|
||||
match flag {
|
||||
ConvFlag::FmtEtoA => {
|
||||
|
@ -565,7 +609,7 @@ pub fn parse_conv_flag_input(matches: &Matches) -> Result<IConvFlags, ParseError
|
|||
//
|
||||
// -- https://www.gnu.org/software/coreutils/manual/html_node/dd-invocation.html
|
||||
if cbs.is_some() {
|
||||
iconvflags.unblock = cbs;
|
||||
unblock = cbs;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -585,7 +629,7 @@ pub fn parse_conv_flag_input(matches: &Matches) -> Result<IConvFlags, ParseError
|
|||
//
|
||||
// -- https://www.gnu.org/software/coreutils/manual/html_node/dd-invocation.html
|
||||
if cbs.is_some() {
|
||||
iconvflags.block = cbs;
|
||||
block = cbs;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -603,13 +647,13 @@ pub fn parse_conv_flag_input(matches: &Matches) -> Result<IConvFlags, ParseError
|
|||
case = Some(flag);
|
||||
}
|
||||
}
|
||||
ConvFlag::Block => match (cbs, iconvflags.unblock) {
|
||||
(Some(cbs), None) => iconvflags.block = Some(cbs),
|
||||
ConvFlag::Block => match (cbs, unblock) {
|
||||
(Some(cbs), None) => block = Some(cbs),
|
||||
(None, _) => return Err(ParseError::BlockUnblockWithoutCBS),
|
||||
(_, Some(_)) => return Err(ParseError::MultipleBlockUnblock),
|
||||
},
|
||||
ConvFlag::Unblock => match (cbs, iconvflags.block) {
|
||||
(Some(cbs), None) => iconvflags.unblock = Some(cbs),
|
||||
ConvFlag::Unblock => match (cbs, block) {
|
||||
(Some(cbs), None) => unblock = Some(cbs),
|
||||
(None, _) => return Err(ParseError::BlockUnblockWithoutCBS),
|
||||
(_, Some(_)) => return Err(ParseError::MultipleBlockUnblock),
|
||||
},
|
||||
|
@ -630,7 +674,7 @@ pub fn parse_conv_flag_input(matches: &Matches) -> Result<IConvFlags, ParseError
|
|||
// block implies sync with ' '
|
||||
// unblock implies sync with 0
|
||||
// So the final value can't be set until all flags are parsed.
|
||||
let sync = if is_sync && (iconvflags.block.is_some() || iconvflags.unblock.is_some()) {
|
||||
let sync = if is_sync && (block.is_some() || unblock.is_some()) {
|
||||
Some(b' ')
|
||||
} else if is_sync {
|
||||
Some(0u8)
|
||||
|
@ -638,8 +682,27 @@ pub fn parse_conv_flag_input(matches: &Matches) -> Result<IConvFlags, ParseError
|
|||
None
|
||||
};
|
||||
|
||||
// Some user options, such as the presence of conversion tables,
|
||||
// will determine whether the input is assumed to be ascii. This
|
||||
// parser sets the non_ascii flag accordingly.
|
||||
//
|
||||
// Examples:
|
||||
//
|
||||
// - If conv=ebcdic or conv=ibm is specified then block,
|
||||
// unblock or swab must be performed before the conversion
|
||||
// happens since the source will start in ascii.
|
||||
// - If conv=ascii is specified then block, unblock or swab
|
||||
// must be performed after the conversion since the source
|
||||
// starts in ebcdic.
|
||||
// - If no conversion is specified then the source is assumed
|
||||
// to be in ascii.
|
||||
//
|
||||
// For more info see `info dd`.
|
||||
let non_ascii = parseargs::parse_input_non_ascii(matches)?;
|
||||
let mode = conversion_mode(ctable, block, unblock, non_ascii, is_sync);
|
||||
|
||||
Ok(IConvFlags {
|
||||
ctable,
|
||||
mode,
|
||||
sync,
|
||||
..iconvflags
|
||||
})
|
||||
|
|
|
@ -170,8 +170,11 @@ fn test_all_top_level_args_no_leading_dashes() {
|
|||
);
|
||||
assert_eq!(
|
||||
IConvFlags {
|
||||
ctable: Some(&EBCDIC_TO_ASCII_LCASE_TO_UCASE),
|
||||
unblock: Some(1), // because ascii implies unblock
|
||||
// ascii implies unblock
|
||||
mode: Some(ConversionMode::ConvertThenUnblock(
|
||||
&EBCDIC_TO_ASCII_LCASE_TO_UCASE,
|
||||
1
|
||||
)),
|
||||
..IConvFlags::default()
|
||||
},
|
||||
parse_conv_flag_input(&matches).unwrap()
|
||||
|
@ -269,8 +272,11 @@ fn test_all_top_level_args_with_leading_dashes() {
|
|||
);
|
||||
assert_eq!(
|
||||
IConvFlags {
|
||||
ctable: Some(&EBCDIC_TO_ASCII_LCASE_TO_UCASE),
|
||||
unblock: Some(1), // because ascii implies unblock
|
||||
// ascii implies unblock
|
||||
mode: Some(ConversionMode::ConvertThenUnblock(
|
||||
&EBCDIC_TO_ASCII_LCASE_TO_UCASE,
|
||||
1
|
||||
)),
|
||||
..IConvFlags::default()
|
||||
},
|
||||
parse_conv_flag_input(&matches).unwrap()
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue