1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-28 11:37:44 +00:00

Merge pull request #3293 from jfinkels/dd-conv-block-helper-remove-input

dd: create ConversionMode to simplify conversion, blocking, and unblocking
This commit is contained in:
Sylvestre Ledru 2022-03-26 10:10:48 +01:00 committed by GitHub
commit 5fba2a78da
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 152 additions and 130 deletions

View file

@ -6,10 +6,8 @@
// spell-checker:ignore datastructures rstat rposition cflags ctable
use crate::conversion_tables::ConversionTable;
use crate::datastructures::InternalError;
use crate::datastructures::ConversionMode;
use crate::progress::ReadStat;
use crate::Input;
use std::io::Read;
const NEWLINE: u8 = b'\n';
const SPACE: u8 = b' ';
@ -65,105 +63,68 @@ fn unblock(buf: &[u8], cbs: usize) -> Vec<u8> {
})
}
/// A helper for teasing out which options must be applied and in which order.
/// Some user options, such as the presence of conversion tables, will determine whether the input is assumed to be ascii. The parser sets the Input::non_ascii flag accordingly.
/// Examples:
/// - If conv=ebcdic or conv=ibm is specified then block, unblock or swab must be performed before the conversion happens since the source will start in ascii.
/// - If conv=ascii is specified then block, unblock or swab must be performed after the conversion since the source starts in ebcdic.
/// - If no conversion is specified then the source is assumed to be in ascii.
/// For more info see `info dd`
pub(crate) fn conv_block_unblock_helper<R: Read>(
/// Apply the specified conversion, blocking, and/or unblocking in the right order.
///
/// The `mode` specifies the combination of conversion, blocking, and
/// unblocking to apply and the order in which to apply it. This
/// function is responsible only for applying the operations.
///
/// `buf` is the buffer of input bytes to transform. This function
/// mutates this input and also returns a new buffer of bytes
/// representing the result of the transformation.
///
/// `rstat` maintains a running total of the number of partial and
/// complete blocks read before calling this function. In certain
/// settings of `mode`, this function will update the number of
/// records truncated; that's why `rstat` is borrowed mutably.
pub(crate) fn conv_block_unblock_helper(
mut buf: Vec<u8>,
i: &mut Input<R>,
mode: &ConversionMode,
rstat: &mut ReadStat,
) -> Result<Vec<u8>, InternalError> {
// Local Predicate Fns -------------------------------------------------
fn should_block_then_conv<R: Read>(i: &Input<R>) -> bool {
!i.non_ascii && i.cflags.block.is_some()
}
fn should_conv_then_block<R: Read>(i: &Input<R>) -> bool {
i.non_ascii && i.cflags.block.is_some()
}
fn should_unblock_then_conv<R: Read>(i: &Input<R>) -> bool {
!i.non_ascii && i.cflags.unblock.is_some()
}
fn should_conv_then_unblock<R: Read>(i: &Input<R>) -> bool {
i.non_ascii && i.cflags.unblock.is_some()
}
fn conv_only<R: Read>(i: &Input<R>) -> bool {
i.cflags.ctable.is_some() && i.cflags.block.is_none() && i.cflags.unblock.is_none()
}
// Local Helper Fns ----------------------------------------------------
) -> Vec<u8> {
// TODO This function has a mutable input `buf` but also returns a
// completely new `Vec`; that seems fishy. Could we either make
// the input immutable or make the function not return anything?
fn apply_conversion(buf: &mut [u8], ct: &ConversionTable) {
for idx in 0..buf.len() {
buf[idx] = ct[buf[idx] as usize];
}
}
// --------------------------------------------------------------------
if conv_only(i) {
// no block/unblock
let ct = i.cflags.ctable.unwrap();
apply_conversion(&mut buf, ct);
Ok(buf)
} else if should_block_then_conv(i) {
// ascii input so perform the block first
let cbs = i.cflags.block.unwrap();
let mut blocks = block(&buf, cbs, i.cflags.sync.is_some(), rstat);
if let Some(ct) = i.cflags.ctable {
match mode {
ConversionMode::ConvertOnly(ct) => {
apply_conversion(&mut buf, ct);
buf
}
ConversionMode::BlockThenConvert(ct, cbs, sync) => {
let mut blocks = block(&buf, *cbs, *sync, rstat);
for buf in &mut blocks {
apply_conversion(buf, ct);
}
blocks.into_iter().flatten().collect()
}
let blocks = blocks.into_iter().flatten().collect();
Ok(blocks)
} else if should_conv_then_block(i) {
// Non-ascii so perform the conversion first
let cbs = i.cflags.block.unwrap();
if let Some(ct) = i.cflags.ctable {
ConversionMode::ConvertThenBlock(ct, cbs, sync) => {
apply_conversion(&mut buf, ct);
block(&buf, *cbs, *sync, rstat)
.into_iter()
.flatten()
.collect()
}
let blocks = block(&buf, cbs, i.cflags.sync.is_some(), rstat)
ConversionMode::BlockOnly(cbs, sync) => block(&buf, *cbs, *sync, rstat)
.into_iter()
.flatten()
.collect();
Ok(blocks)
} else if should_unblock_then_conv(i) {
// ascii input so perform the unblock first
let cbs = i.cflags.unblock.unwrap();
let mut buf = unblock(&buf, cbs);
if let Some(ct) = i.cflags.ctable {
.collect(),
ConversionMode::UnblockThenConvert(ct, cbs) => {
let mut buf = unblock(&buf, *cbs);
apply_conversion(&mut buf, ct);
buf
}
Ok(buf)
} else if should_conv_then_unblock(i) {
// Non-ascii input so perform the conversion first
let cbs = i.cflags.unblock.unwrap();
if let Some(ct) = i.cflags.ctable {
ConversionMode::ConvertThenUnblock(ct, cbs) => {
apply_conversion(&mut buf, ct);
unblock(&buf, *cbs)
}
let buf = unblock(&buf, cbs);
Ok(buf)
} else {
// The following error should not happen, as it results from
// insufficient command line data. This case should be caught
// by the parser before making it this far.
// Producing this error is an alternative to risking an unwrap call
// on 'cbs' if the required data is not provided.
Err(InternalError::InvalidConvBlockUnblockCase)
ConversionMode::UnblockOnly(cbs) => unblock(&buf, *cbs),
}
}

View file

@ -14,12 +14,27 @@ use crate::conversion_tables::*;
type Cbs = usize;
/// How to apply conversion, blocking, and/or unblocking.
///
/// Certain settings of the `conv` parameter to `dd` require a
/// combination of conversion, blocking, or unblocking, applied in a
/// certain order. The variants of this enumeration give the different
/// ways of combining those three operations.
#[derive(Debug, PartialEq)]
pub(crate) enum ConversionMode<'a> {
ConvertOnly(&'a ConversionTable),
BlockOnly(Cbs, bool),
UnblockOnly(Cbs),
BlockThenConvert(&'a ConversionTable, Cbs, bool),
ConvertThenBlock(&'a ConversionTable, Cbs, bool),
UnblockThenConvert(&'a ConversionTable, Cbs),
ConvertThenUnblock(&'a ConversionTable, Cbs),
}
/// Stores all Conv Flags that apply to the input
#[derive(Debug, Default, PartialEq)]
pub struct IConvFlags {
pub ctable: Option<&'static ConversionTable>,
pub block: Option<Cbs>,
pub unblock: Option<Cbs>,
pub(crate) struct IConvFlags {
pub mode: Option<ConversionMode<'static>>,
pub swab: bool,
pub sync: Option<u8>,
pub noerror: bool,
@ -91,19 +106,11 @@ pub enum CountType {
pub enum InternalError {
WrongInputType,
WrongOutputType,
InvalidConvBlockUnblockCase,
}
impl std::fmt::Display for InternalError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::WrongInputType | Self::WrongOutputType => {
write!(f, "Internal dd error: Wrong Input/Output data type")
}
Self::InvalidConvBlockUnblockCase => {
write!(f, "Invalid Conversion, Block, or Unblock data")
}
}
write!(f, "Internal dd error: Wrong Input/Output data type")
}
}

View file

@ -45,7 +45,6 @@ const BUF_INIT_BYTE: u8 = 0xDD;
struct Input<R: Read> {
src: R,
non_ascii: bool,
ibs: usize,
print_level: Option<StatusLevel>,
count: Option<CountType>,
@ -56,7 +55,6 @@ struct Input<R: Read> {
impl Input<io::Stdin> {
fn new(matches: &Matches) -> UResult<Self> {
let ibs = parseargs::parse_ibs(matches)?;
let non_ascii = parseargs::parse_input_non_ascii(matches)?;
let print_level = parseargs::parse_status_level(matches)?;
let cflags = parseargs::parse_conv_flag_input(matches)?;
let iflags = parseargs::parse_iflags(matches)?;
@ -67,7 +65,6 @@ impl Input<io::Stdin> {
let mut i = Self {
src: io::stdin(),
non_ascii,
ibs,
print_level,
count,
@ -131,7 +128,6 @@ fn make_linux_iflags(iflags: &IFlags) -> Option<libc::c_int> {
impl Input<File> {
fn new(matches: &Matches) -> UResult<Self> {
let ibs = parseargs::parse_ibs(matches)?;
let non_ascii = parseargs::parse_input_non_ascii(matches)?;
let print_level = parseargs::parse_status_level(matches)?;
let cflags = parseargs::parse_conv_flag_input(matches)?;
let iflags = parseargs::parse_iflags(matches)?;
@ -163,7 +159,6 @@ impl Input<File> {
let i = Self {
src,
non_ascii,
ibs,
print_level,
count,
@ -607,16 +602,6 @@ impl Write for Output<io::Stdout> {
/// Read helper performs read operations common to all dd reads, and dispatches the buffer to relevant helper functions as dictated by the operations requested by the user.
fn read_helper<R: Read>(i: &mut Input<R>, bsize: usize) -> std::io::Result<(ReadStat, Vec<u8>)> {
// Local Predicate Fns -----------------------------------------------
fn is_conv<R: Read>(i: &Input<R>) -> bool {
i.cflags.ctable.is_some()
}
fn is_block<R: Read>(i: &Input<R>) -> bool {
i.cflags.block.is_some()
}
fn is_unblock<R: Read>(i: &Input<R>) -> bool {
i.cflags.unblock.is_some()
}
// Local Helper Fns -------------------------------------------------
fn perform_swab(buf: &mut [u8]) {
for base in (1..buf.len()).step_by(2) {
@ -639,11 +624,13 @@ fn read_helper<R: Read>(i: &mut Input<R>, bsize: usize) -> std::io::Result<(Read
if i.cflags.swab {
perform_swab(&mut buf);
}
if is_conv(i) || is_block(i) || is_unblock(i) {
let buf = conv_block_unblock_helper(buf, i, &mut rstat).unwrap();
Ok((rstat, buf))
} else {
Ok((rstat, buf))
match i.cflags.mode {
Some(ref mode) => {
let buf = conv_block_unblock_helper(buf, mode, &mut rstat);
Ok((rstat, buf))
}
None => Ok((rstat, buf)),
}
}
@ -1089,7 +1076,6 @@ mod tests {
src: LazyReader {
src: File::open("./test-resources/deadbeef-16.test").unwrap(),
},
non_ascii: false,
ibs: 16,
print_level: None,
count: None,
@ -1136,7 +1122,6 @@ mod tests {
src: File::open("./test-resources/random-5828891cb1230748e146f34223bbd3b5.test")
.unwrap(),
},
non_ascii: false,
ibs: 521,
print_level: None,
count: None,

View file

@ -535,9 +535,50 @@ fn parse_flag_list<T: std::str::FromStr<Err = ParseError>>(
.collect()
}
/// Given the various command-line parameters, determine the conversion mode.
///
/// The `conv` command-line option can take many different values,
/// each of which may combine with others. For example, `conv=ascii`,
/// `conv=lcase`, `conv=sync`, and so on. The arguments to this
/// function represent the settings of those various command-line
/// parameters. This function translates those settings to a
/// [`ConversionMode`].
fn conversion_mode(
ctable: Option<&ConversionTable>,
block: Option<usize>,
unblock: Option<usize>,
non_ascii: bool,
is_sync: bool,
) -> Option<ConversionMode> {
match (ctable, block, unblock) {
(Some(ct), None, None) => Some(ConversionMode::ConvertOnly(ct)),
(Some(ct), Some(cbs), None) => {
if non_ascii {
Some(ConversionMode::ConvertThenBlock(ct, cbs, is_sync))
} else {
Some(ConversionMode::BlockThenConvert(ct, cbs, is_sync))
}
}
(Some(ct), None, Some(cbs)) => {
if non_ascii {
Some(ConversionMode::ConvertThenUnblock(ct, cbs))
} else {
Some(ConversionMode::UnblockThenConvert(ct, cbs))
}
}
(None, Some(cbs), None) => Some(ConversionMode::BlockOnly(cbs, is_sync)),
(None, None, Some(cbs)) => Some(ConversionMode::UnblockOnly(cbs)),
(None, None, None) => None,
// The remaining variants should never happen because the
// argument parsing above should result in an error before
// getting to this line of code.
_ => unreachable!(),
}
}
/// Parse Conversion Options (Input Variety)
/// Construct and validate a IConvFlags
pub fn parse_conv_flag_input(matches: &Matches) -> Result<IConvFlags, ParseError> {
pub(crate) fn parse_conv_flag_input(matches: &Matches) -> Result<IConvFlags, ParseError> {
let mut iconvflags = IConvFlags::default();
let mut fmt = None;
let mut case = None;
@ -546,6 +587,9 @@ pub fn parse_conv_flag_input(matches: &Matches) -> Result<IConvFlags, ParseError
let flags = parse_flag_list(options::CONV, matches)?;
let cbs = parse_cbs(matches)?;
let mut block = None;
let mut unblock = None;
for flag in flags {
match flag {
ConvFlag::FmtEtoA => {
@ -565,7 +609,7 @@ pub fn parse_conv_flag_input(matches: &Matches) -> Result<IConvFlags, ParseError
//
// -- https://www.gnu.org/software/coreutils/manual/html_node/dd-invocation.html
if cbs.is_some() {
iconvflags.unblock = cbs;
unblock = cbs;
}
}
}
@ -585,7 +629,7 @@ pub fn parse_conv_flag_input(matches: &Matches) -> Result<IConvFlags, ParseError
//
// -- https://www.gnu.org/software/coreutils/manual/html_node/dd-invocation.html
if cbs.is_some() {
iconvflags.block = cbs;
block = cbs;
}
}
}
@ -603,13 +647,13 @@ pub fn parse_conv_flag_input(matches: &Matches) -> Result<IConvFlags, ParseError
case = Some(flag);
}
}
ConvFlag::Block => match (cbs, iconvflags.unblock) {
(Some(cbs), None) => iconvflags.block = Some(cbs),
ConvFlag::Block => match (cbs, unblock) {
(Some(cbs), None) => block = Some(cbs),
(None, _) => return Err(ParseError::BlockUnblockWithoutCBS),
(_, Some(_)) => return Err(ParseError::MultipleBlockUnblock),
},
ConvFlag::Unblock => match (cbs, iconvflags.block) {
(Some(cbs), None) => iconvflags.unblock = Some(cbs),
ConvFlag::Unblock => match (cbs, block) {
(Some(cbs), None) => unblock = Some(cbs),
(None, _) => return Err(ParseError::BlockUnblockWithoutCBS),
(_, Some(_)) => return Err(ParseError::MultipleBlockUnblock),
},
@ -630,7 +674,7 @@ pub fn parse_conv_flag_input(matches: &Matches) -> Result<IConvFlags, ParseError
// block implies sync with ' '
// unblock implies sync with 0
// So the final value can't be set until all flags are parsed.
let sync = if is_sync && (iconvflags.block.is_some() || iconvflags.unblock.is_some()) {
let sync = if is_sync && (block.is_some() || unblock.is_some()) {
Some(b' ')
} else if is_sync {
Some(0u8)
@ -638,8 +682,27 @@ pub fn parse_conv_flag_input(matches: &Matches) -> Result<IConvFlags, ParseError
None
};
// Some user options, such as the presence of conversion tables,
// will determine whether the input is assumed to be ascii. This
// parser sets the non_ascii flag accordingly.
//
// Examples:
//
// - If conv=ebcdic or conv=ibm is specified then block,
// unblock or swab must be performed before the conversion
// happens since the source will start in ascii.
// - If conv=ascii is specified then block, unblock or swab
// must be performed after the conversion since the source
// starts in ebcdic.
// - If no conversion is specified then the source is assumed
// to be in ascii.
//
// For more info see `info dd`.
let non_ascii = parseargs::parse_input_non_ascii(matches)?;
let mode = conversion_mode(ctable, block, unblock, non_ascii, is_sync);
Ok(IConvFlags {
ctable,
mode,
sync,
..iconvflags
})

View file

@ -170,8 +170,11 @@ fn test_all_top_level_args_no_leading_dashes() {
);
assert_eq!(
IConvFlags {
ctable: Some(&EBCDIC_TO_ASCII_LCASE_TO_UCASE),
unblock: Some(1), // because ascii implies unblock
// ascii implies unblock
mode: Some(ConversionMode::ConvertThenUnblock(
&EBCDIC_TO_ASCII_LCASE_TO_UCASE,
1
)),
..IConvFlags::default()
},
parse_conv_flag_input(&matches).unwrap()
@ -269,8 +272,11 @@ fn test_all_top_level_args_with_leading_dashes() {
);
assert_eq!(
IConvFlags {
ctable: Some(&EBCDIC_TO_ASCII_LCASE_TO_UCASE),
unblock: Some(1), // because ascii implies unblock
// ascii implies unblock
mode: Some(ConversionMode::ConvertThenUnblock(
&EBCDIC_TO_ASCII_LCASE_TO_UCASE,
1
)),
..IConvFlags::default()
},
parse_conv_flag_input(&matches).unwrap()