From 3fbaa79359f1712489f13f5d4273caed36bce40e Mon Sep 17 00:00:00 2001
From: Jeffrey Finkelstein <jeffrey.finkelstein@protonmail.com>
Date: Tue, 1 Feb 2022 22:53:19 -0500
Subject: [PATCH] dd: add support for 'b' and 'x' multipliers

Support the suffix 'b' (multiply by 512) and 'x' (multiply by an
arbitrary amount) when specifying numeric arguments to dd.
---
 src/uu/dd/src/parseargs.rs | 124 ++++++++++++++++++++++++++++++++-----
 tests/by-util/test_dd.rs   |  22 ++++++-
 2 files changed, 128 insertions(+), 18 deletions(-)

diff --git a/src/uu/dd/src/parseargs.rs b/src/uu/dd/src/parseargs.rs
index 492ab70cb..915a99344 100644
--- a/src/uu/dd/src/parseargs.rs
+++ b/src/uu/dd/src/parseargs.rs
@@ -4,7 +4,7 @@
 //
 // For the full copyright and license information, please view the LICENSE
 // file that was distributed with this source code.
-// spell-checker:ignore ctty, ctable, iconvflags, oconvflags
+// spell-checker:ignore ctty, ctable, iconvflags, oconvflags parseargs
 
 #[cfg(test)]
 mod unit_tests;
@@ -12,6 +12,7 @@ mod unit_tests;
 use super::*;
 use std::error::Error;
 use uucore::error::UError;
+use uucore::parse_size::ParseSizeError;
 
 pub type Matches = ArgMatches;
 
@@ -31,6 +32,25 @@ pub enum ParseError {
     Unimplemented(String),
 }
 
+impl ParseError {
+    /// Replace the argument, if any, with the given string, consuming self.
+    fn with_arg(self, s: String) -> Self {
+        match self {
+            Self::MultipleFmtTable => Self::MultipleFmtTable,
+            Self::MultipleUCaseLCase => Self::MultipleUCaseLCase,
+            Self::MultipleBlockUnblock => Self::MultipleBlockUnblock,
+            Self::MultipleExclNoCreate => Self::MultipleExclNoCreate,
+            Self::FlagNoMatch(_) => Self::FlagNoMatch(s),
+            Self::ConvFlagNoMatch(_) => Self::ConvFlagNoMatch(s),
+            Self::MultiplierStringParseFailure(_) => Self::MultiplierStringParseFailure(s),
+            Self::MultiplierStringOverflow(_) => Self::MultiplierStringOverflow(s),
+            Self::BlockUnblockWithoutCBS => Self::BlockUnblockWithoutCBS,
+            Self::StatusLevelNotRecognized(_) => Self::StatusLevelNotRecognized(s),
+            Self::Unimplemented(_) => Self::Unimplemented(s),
+        }
+    }
+}
+
 impl std::fmt::Display for ParseError {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         match self {
@@ -310,28 +330,76 @@ fn parse_bytes_only(s: &str) -> Result<usize, ParseError> {
         .map_err(|_| ParseError::MultiplierStringParseFailure(s.to_string()))
 }
 
+/// Parse a number of bytes from the given string, assuming no `'x'` characters.
+///
+/// The `'x'` character means "multiply the number before the `'x'` by
+/// the number after the `'x'`". In order to compute the numbers
+/// before and after the `'x'`, use this function, which assumes there
+/// are no `'x'` characters in the string.
+///
+/// A suffix `'c'` means multiply by 1, `'w'` by 2, and `'b'` by
+/// 512. You can also use standard block size suffixes like `'k'` for
+/// 1024.
+///
+/// # Errors
+///
+/// If a number cannot be parsed or if the multiplication would cause
+/// an overflow.
+///
+/// # Examples
+///
+/// ```rust,ignore
+/// assert_eq!(parse_bytes_no_x("123").unwrap(), 123);
+/// assert_eq!(parse_bytes_no_x("2c").unwrap(), 2 * 1);
+/// assert_eq!(parse_bytes_no_x("3w").unwrap(), 3 * 2);
+/// assert_eq!(parse_bytes_no_x("2b").unwrap(), 2 * 512);
+/// assert_eq!(parse_bytes_no_x("2k").unwrap(), 2 * 1024);
+/// ```
+fn parse_bytes_no_x(s: &str) -> Result<usize, ParseError> {
+    let (num, multiplier) = match (s.find('c'), s.rfind('w'), s.rfind('b')) {
+        (None, None, None) => match uucore::parse_size::parse_size(s) {
+            Ok(n) => (n, 1),
+            Err(ParseSizeError::ParseFailure(s)) => {
+                return Err(ParseError::MultiplierStringParseFailure(s))
+            }
+            Err(ParseSizeError::SizeTooBig(s)) => {
+                return Err(ParseError::MultiplierStringOverflow(s))
+            }
+        },
+        (Some(i), None, None) => (parse_bytes_only(&s[..i])?, 1),
+        (None, Some(i), None) => (parse_bytes_only(&s[..i])?, 2),
+        (None, None, Some(i)) => (parse_bytes_only(&s[..i])?, 512),
+        _ => return Err(ParseError::MultiplierStringParseFailure(s.to_string())),
+    };
+    num.checked_mul(multiplier)
+        .ok_or_else(|| ParseError::MultiplierStringOverflow(s.to_string()))
+}
+
 /// Parse byte and multiplier like 512, 5KiB, or 1G.
 /// Uses uucore::parse_size, and adds the 'w' and 'c' suffixes which are mentioned
 /// in dd's info page.
 fn parse_bytes_with_opt_multiplier(s: &str) -> Result<usize, ParseError> {
-    if let Some(idx) = s.rfind('c') {
-        parse_bytes_only(&s[..idx])
-    } else if let Some(idx) = s.rfind('w') {
-        let partial = parse_bytes_only(&s[..idx])?;
+    // TODO On my Linux system, there seems to be a maximum block size of 4096 bytes:
+    //
+    //     $ printf "%0.sa" {1..10000} | dd bs=4095 count=1 status=none | wc -c
+    //     4095
+    //     $ printf "%0.sa" {1..10000} | dd bs=4k count=1 status=none | wc -c
+    //     4096
+    //     $ printf "%0.sa" {1..10000} | dd bs=4097 count=1 status=none | wc -c
+    //     4096
+    //     $ printf "%0.sa" {1..10000} | dd bs=5k count=1 status=none | wc -c
+    //     4096
+    //
 
-        partial
-            .checked_mul(2)
-            .ok_or_else(|| ParseError::MultiplierStringOverflow(s.to_string()))
-    } else {
-        uucore::parse_size::parse_size(s).map_err(|e| match e {
-            uucore::parse_size::ParseSizeError::ParseFailure(s) => {
-                ParseError::MultiplierStringParseFailure(s)
-            }
-            uucore::parse_size::ParseSizeError::SizeTooBig(s) => {
-                ParseError::MultiplierStringOverflow(s)
-            }
-        })
+    // Split on the 'x' characters. Each component will be parsed
+    // individually, then multiplied together.
+    let mut total = 1;
+    for part in s.split('x') {
+        let num = parse_bytes_no_x(part).map_err(|e| e.with_arg(s.to_string()))?;
+        total *= num;
     }
+
+    Ok(total)
 }
 
 pub fn parse_ibs(matches: &Matches) -> Result<usize, ParseError> {
@@ -689,3 +757,25 @@ pub fn parse_input_non_ascii(matches: &Matches) -> Result<bool, ParseError> {
         Ok(false)
     }
 }
+
+#[cfg(test)]
+mod tests {
+
+    use crate::parseargs::parse_bytes_with_opt_multiplier;
+
+    #[test]
+    fn test_parse_bytes_with_opt_multiplier() {
+        assert_eq!(parse_bytes_with_opt_multiplier("123").unwrap(), 123);
+        assert_eq!(parse_bytes_with_opt_multiplier("123c").unwrap(), 123 * 1);
+        assert_eq!(parse_bytes_with_opt_multiplier("123w").unwrap(), 123 * 2);
+        assert_eq!(parse_bytes_with_opt_multiplier("123b").unwrap(), 123 * 512);
+        assert_eq!(parse_bytes_with_opt_multiplier("123x3").unwrap(), 123 * 3);
+        assert_eq!(parse_bytes_with_opt_multiplier("123k").unwrap(), 123 * 1024);
+        assert_eq!(parse_bytes_with_opt_multiplier("1x2x3").unwrap(), 1 * 2 * 3);
+        assert_eq!(
+            parse_bytes_with_opt_multiplier("1wx2cx3w").unwrap(),
+            (1 * 2) * (2 * 1) * (3 * 2)
+        );
+        assert!(parse_bytes_with_opt_multiplier("123asdf").is_err());
+    }
+}
diff --git a/tests/by-util/test_dd.rs b/tests/by-util/test_dd.rs
index e73fe0673..e9a1f9468 100644
--- a/tests/by-util/test_dd.rs
+++ b/tests/by-util/test_dd.rs
@@ -1,4 +1,4 @@
-// spell-checker:ignore fname, tname, fpath, specfile, testfile, unspec, ifile, ofile, outfile, fullblock, urand, fileio, atoe, atoibm, availible, behaviour, bmax, bremain, btotal, cflags, creat, ctable, ctty, datastructures, doesnt, etoa, fileout, fname, gnudd, iconvflags, nocache, noctty, noerror, nofollow, nolinks, nonblock, oconvflags, outfile, parseargs, rlen, rmax, rposition, rremain, rsofar, rstat, sigusr, sigval, wlen, wstat abcdefghijklm
+// spell-checker:ignore fname, tname, fpath, specfile, testfile, unspec, ifile, ofile, outfile, fullblock, urand, fileio, atoe, atoibm, availible, behaviour, bmax, bremain, btotal, cflags, creat, ctable, ctty, datastructures, doesnt, etoa, fileout, fname, gnudd, iconvflags, nocache, noctty, noerror, nofollow, nolinks, nonblock, oconvflags, outfile, parseargs, rlen, rmax, rposition, rremain, rsofar, rstat, sigusr, sigval, wlen, wstat abcdefghijklm abcdefghi
 
 use crate::common::util::*;
 
@@ -178,6 +178,26 @@ fn test_stdin_stdout_count_w_multiplier() {
         .success();
 }
 
+#[test]
+fn test_b_multiplier() {
+    // "2b" means 2 * 512, which is 1024.
+    new_ucmd!()
+        .args(&["bs=2b", "count=1"])
+        .pipe_in("a".repeat(1025))
+        .succeeds()
+        .stdout_is("a".repeat(1024));
+}
+
+#[test]
+fn test_x_multiplier() {
+    // "2x3" means 2 * 3, which is 6.
+    new_ucmd!()
+        .args(&["bs=2x3", "count=1"])
+        .pipe_in("abcdefghi")
+        .succeeds()
+        .stdout_is("abcdef");
+}
+
 #[test]
 fn test_final_stats_noxfer() {
     new_ucmd!()