From 9f367b72e61b2267a68d041867d09205c341c208 Mon Sep 17 00:00:00 2001
From: Jeffrey Finkelstein <jeffrey.finkelstein@protonmail.com>
Date: Thu, 17 Feb 2022 18:58:25 -0500
Subject: [PATCH] dd: pad partial record with spaces in some cases

If `conv=block,sync` command-line arguments are given and there is at
least one partial record read from the input (for example, if the
length of the input is not divisible by the value of the `ibs`
argument), then output an extra block of `cbs` spaces.

For example, no extra spaces are printed in this example because the
input is of length 10, a multiple of `ibs`:

    $ printf "012\nabcde\n" \
    > | dd ibs=5 cbs=5 conv=block,sync status=noxfer \
    > && echo $
    012  abcde$
    2+0 records in
    0+1 records out

But in this example, 5 extra spaces are printed because the length of
the input is not a multiple of `ibs`:

    $ printf "012\nabcdefg\n" \
    > | dd ibs=5 cbs=5 conv=block,sync status=noxfer \
    > && echo $
    012  abcde     $
    2+1 records in
    0+1 records out
    1 truncated record

The number of spaces printed is the size of the conversion block,
given by `cbs`.
---
 src/uu/dd/src/blocks.rs  | 53 +++++++++++++++++++++++++---------------
 tests/by-util/test_dd.rs | 25 ++++++++++++++++++-
 2 files changed, 57 insertions(+), 21 deletions(-)
diff --git a/src/uu/dd/src/blocks.rs b/src/uu/dd/src/blocks.rs
index 61a2a6675..331bad56b 100644
--- a/src/uu/dd/src/blocks.rs
+++ b/src/uu/dd/src/blocks.rs
@@ -14,10 +14,17 @@ use std::io::Read;
 const NEWLINE: u8 = b'\n';
 const SPACE: u8 = b' ';
 
-/// Splits the content of buf into cbs-length blocks
-/// Appends padding as specified by conv=block and cbs=N
-/// Expects ascii encoded data
-fn block(buf: &[u8], cbs: usize, rstat: &mut ReadStat) -> Vec<Vec<u8>> {
+/// Split a slice into chunks, padding or truncating as necessary.
+///
+/// The slice `buf` is split on newlines, then each block is resized
+/// to `cbs` bytes, padding with spaces if necessary. This function
+/// expects the input bytes to be ASCII-encoded.
+///
+/// If `sync` is true and there has been at least one partial record
+/// read from the input (as indicated in `rstat`), then leave an
+/// all-spaces block at the end. Otherwise, remove the last block if
+/// it is all spaces.
+fn block(buf: &[u8], cbs: usize, sync: bool, rstat: &mut ReadStat) -> Vec<Vec<u8>> {
     let mut blocks = buf
         .split(|&e| e == NEWLINE)
         .map(|split| split.to_vec())
@@ -31,8 +38,11 @@ fn block(buf: &[u8], cbs: usize, rstat: &mut ReadStat) -> Vec<Vec<u8>> {
             blocks
         });
 
+    // If `sync` is true and there has been at least one partial
+    // record read from the input, then leave the all-spaces block at
+    // the end. Otherwise, remove it.
     if let Some(last) = blocks.last() {
-        if last.iter().all(|&e| e == SPACE) {
+        if (!sync || rstat.reads_partial == 0) && last.iter().all(|&e| e == SPACE) {
             blocks.pop();
         }
     }
@@ -100,7 +110,7 @@ pub(crate) fn conv_block_unblock_helper<R: Read>(
         // ascii input so perform the block first
         let cbs = i.cflags.block.unwrap();
 
-        let mut blocks = block(&buf, cbs, rstat);
+        let mut blocks = block(&buf, cbs, i.cflags.sync.is_some(), rstat);
 
         if let Some(ct) = i.cflags.ctable {
             for buf in &mut blocks {
@@ -119,7 +129,10 @@ pub(crate) fn conv_block_unblock_helper<R: Read>(
             apply_conversion(&mut buf, ct);
         }
 
-        let blocks = block(&buf, cbs, rstat).into_iter().flatten().collect();
+        let blocks = block(&buf, cbs, i.cflags.sync.is_some(), rstat)
+            .into_iter()
+            .flatten()
+            .collect();
 
         Ok(blocks)
     } else if should_unblock_then_conv(i) {
@@ -167,7 +180,7 @@ mod tests {
     fn block_test_no_nl() {
         let mut rs = ReadStat::default();
         let buf = [0u8, 1u8, 2u8, 3u8];
-        let res = block(&buf, 4, &mut rs);
+        let res = block(&buf, 4, false, &mut rs);
 
         assert_eq!(res, vec![vec![0u8, 1u8, 2u8, 3u8],]);
     }
@@ -176,7 +189,7 @@ mod tests {
     fn block_test_no_nl_short_record() {
         let mut rs = ReadStat::default();
         let buf = [0u8, 1u8, 2u8, 3u8];
-        let res = block(&buf, 8, &mut rs);
+        let res = block(&buf, 8, false, &mut rs);
 
         assert_eq!(
             res,
@@ -188,7 +201,7 @@ mod tests {
     fn block_test_no_nl_trunc() {
         let mut rs = ReadStat::default();
         let buf = [0u8, 1u8, 2u8, 3u8, 4u8];
-        let res = block(&buf, 4, &mut rs);
+        let res = block(&buf, 4, false, &mut rs);
 
         // Commented section(s) should be truncated and appear for reference only.
         assert_eq!(res, vec![vec![0u8, 1u8, 2u8, 3u8 /*, 4u8*/],]);
@@ -201,7 +214,7 @@ mod tests {
         let buf = [
             0u8, 1u8, 2u8, 3u8, 4u8, NEWLINE, 0u8, 1u8, 2u8, 3u8, 4u8, NEWLINE, 5u8, 6u8, 7u8, 8u8,
         ];
-        let res = block(&buf, 4, &mut rs);
+        let res = block(&buf, 4, false, &mut rs);
 
         assert_eq!(
             res,
@@ -221,7 +234,7 @@ mod tests {
     fn block_test_surrounded_nl() {
         let mut rs = ReadStat::default();
         let buf = [0u8, 1u8, 2u8, 3u8, NEWLINE, 4u8, 5u8, 6u8, 7u8, 8u8];
-        let res = block(&buf, 8, &mut rs);
+        let res = block(&buf, 8, false, &mut rs);
 
         assert_eq!(
             res,
@@ -238,7 +251,7 @@ mod tests {
         let buf = [
             0u8, 1u8, 2u8, 3u8, NEWLINE, 4u8, NEWLINE, 5u8, 6u8, 7u8, 8u8, 9u8,
         ];
-        let res = block(&buf, 8, &mut rs);
+        let res = block(&buf, 8, false, &mut rs);
 
         assert_eq!(
             res,
@@ -256,7 +269,7 @@ mod tests {
         let buf = [
             0u8, 1u8, 2u8, 3u8, NEWLINE, 4u8, 5u8, 6u8, 7u8, NEWLINE, 8u8, 9u8,
         ];
-        let res = block(&buf, 8, &mut rs);
+        let res = block(&buf, 8, false, &mut rs);
 
         assert_eq!(
             res,
@@ -272,7 +285,7 @@ mod tests {
     fn block_test_end_nl_diff_cbs_block() {
         let mut rs = ReadStat::default();
         let buf = [0u8, 1u8, 2u8, 3u8, NEWLINE];
-        let res = block(&buf, 4, &mut rs);
+        let res = block(&buf, 4, false, &mut rs);
 
         assert_eq!(res, vec![vec![0u8, 1u8, 2u8, 3u8],]);
     }
@@ -281,7 +294,7 @@ mod tests {
     fn block_test_end_nl_same_cbs_block() {
         let mut rs = ReadStat::default();
         let buf = [0u8, 1u8, 2u8, NEWLINE];
-        let res = block(&buf, 4, &mut rs);
+        let res = block(&buf, 4, false, &mut rs);
 
         assert_eq!(res, vec![vec![0u8, 1u8, 2u8, SPACE]]);
     }
@@ -290,7 +303,7 @@ mod tests {
     fn block_test_double_end_nl() {
         let mut rs = ReadStat::default();
         let buf = [0u8, 1u8, 2u8, NEWLINE, NEWLINE];
-        let res = block(&buf, 4, &mut rs);
+        let res = block(&buf, 4, false, &mut rs);
 
         assert_eq!(
             res,
@@ -302,7 +315,7 @@ mod tests {
     fn block_test_start_nl() {
         let mut rs = ReadStat::default();
         let buf = [NEWLINE, 0u8, 1u8, 2u8, 3u8];
-        let res = block(&buf, 4, &mut rs);
+        let res = block(&buf, 4, false, &mut rs);
 
         assert_eq!(
             res,
@@ -314,7 +327,7 @@ mod tests {
     fn block_test_double_surrounded_nl_no_trunc() {
         let mut rs = ReadStat::default();
         let buf = [0u8, 1u8, 2u8, 3u8, NEWLINE, NEWLINE, 4u8, 5u8, 6u8, 7u8];
-        let res = block(&buf, 8, &mut rs);
+        let res = block(&buf, 8, false, &mut rs);
 
         assert_eq!(
             res,
@@ -332,7 +345,7 @@ mod tests {
         let buf = [
             0u8, 1u8, 2u8, 3u8, NEWLINE, NEWLINE, 4u8, 5u8, 6u8, 7u8, 8u8,
         ];
-        let res = block(&buf, 4, &mut rs);
+        let res = block(&buf, 4, false, &mut rs);
 
         assert_eq!(
             res,
diff --git a/tests/by-util/test_dd.rs b/tests/by-util/test_dd.rs
index 04f5490ec..ddc0939a8 100644
--- a/tests/by-util/test_dd.rs
+++ b/tests/by-util/test_dd.rs
@@ -1,4 +1,4 @@
-// spell-checker:ignore fname, tname, fpath, specfile, testfile, unspec, ifile, ofile, outfile, fullblock, urand, fileio, atoe, atoibm, availible, behaviour, bmax, bremain, btotal, cflags, creat, ctable, ctty, datastructures, doesnt, etoa, fileout, fname, gnudd, iconvflags, nocache, noctty, noerror, nofollow, nolinks, nonblock, oconvflags, outfile, parseargs, rlen, rmax, rposition, rremain, rsofar, rstat, sigusr, sigval, wlen, wstat abcdefghijklm abcdefghi
+// spell-checker:ignore fname, tname, fpath, specfile, testfile, unspec, ifile, ofile, outfile, fullblock, urand, fileio, atoe, atoibm, availible, behaviour, bmax, bremain, btotal, cflags, creat, ctable, ctty, datastructures, doesnt, etoa, fileout, fname, gnudd, iconvflags, nocache, noctty, noerror, nofollow, nolinks, nonblock, oconvflags, outfile, parseargs, rlen, rmax, rposition, rremain, rsofar, rstat, sigusr, sigval, wlen, wstat abcdefghijklm abcdefghi nabcde nabcdefg abcdefg
 
 use crate::common::util::*;
 
@@ -1102,3 +1102,26 @@ fn test_truncated_record() {
 fn test_outfile_dev_null() {
     new_ucmd!().arg("of=/dev/null").succeeds().no_stdout();
 }
+
+#[test]
+fn test_block_sync() {
+    new_ucmd!()
+        .args(&["ibs=5", "cbs=5", "conv=block,sync", "status=noxfer"])
+        .pipe_in("012\nabcde\n")
+        .succeeds()
+        // blocks:    1    2
+        .stdout_is("012  abcde")
+        .stderr_is("2+0 records in\n0+1 records out\n");
+
+    // It seems that a partial record in is represented as an
+    // all-spaces block at the end of the output. The "1 truncated
+    // record" line is present in the status report due to the line
+    // "abcdefg\n" being truncated to "abcde".
+    new_ucmd!()
+        .args(&["ibs=5", "cbs=5", "conv=block,sync", "status=noxfer"])
+        .pipe_in("012\nabcdefg\n")
+        .succeeds()
+        // blocks:    1    2    3
+        .stdout_is("012  abcde     ")
+        .stderr_is("2+1 records in\n0+1 records out\n1 truncated record\n");
+}