Merge branch 'main' into dd-skip-beyond-file

2025-09-16 19:56:17 +00:00 · 2022-02-08 20:46:07 +01:00 · 2022-02-08 20:46:07 +01:00 · daaae90113
commit daaae90113
parent 9f8ec676c5 936ac0db38
8 changed files with 286 additions and 147 deletions
--- a/1
+++ b/1
@ -62,6 +62,7 @@ PROGS       := \
 	csplit \
 	cut \
 	date \
+	dd \
 	df \
 	dircolors \
 	dirname \
--- a/src/uu/dd/src/datastructures.rs
+++ b/src/uu/dd/src/datastructures.rs
@ -19,12 +19,34 @@ pub struct ProgUpdate {
    pub duration: time::Duration,
 }

+impl ProgUpdate {
+    pub(crate) fn new(
+        read_stat: ReadStat,
+        write_stat: WriteStat,
+        duration: time::Duration,
+    ) -> Self {
+        Self {
+            read_stat,
+            write_stat,
+            duration,
+        }
+    }
+}
+
 #[derive(Clone, Copy, Default)]
 pub struct ReadStat {
    pub reads_complete: u64,
    pub reads_partial: u64,
    pub records_truncated: u32,
 }
+
+impl ReadStat {
+    /// Whether this counter has zero complete reads and zero partial reads.
+    pub(crate) fn is_empty(&self) -> bool {
+        self.reads_complete == 0 && self.reads_partial == 0
+    }
+}
+
 impl std::ops::AddAssign for ReadStat {
    fn add_assign(&mut self, other: Self) {
        *self = Self {
@ -35,7 +57,7 @@ impl std::ops::AddAssign for ReadStat {
    }
 }

-#[derive(Clone, Copy)]
+#[derive(Clone, Copy, Default)]
 pub struct WriteStat {
    pub writes_complete: u64,
    pub writes_partial: u64,
--- a/src/uu/dd/src/dd.rs
+++ b/src/uu/dd/src/dd.rs
@ -353,78 +353,111 @@ where
        })
    }

-    fn dd_out<R: Read>(mut self, mut i: Input<R>) -> UResult<()> {
-        let mut rstat = ReadStat {
-            reads_complete: 0,
-            reads_partial: 0,
-            records_truncated: 0,
-        };
-        let mut wstat = WriteStat {
-            writes_complete: 0,
-            writes_partial: 0,
-            bytes_total: 0,
-        };
-        let start = time::Instant::now();
-        let bsize = calc_bsize(i.ibs, self.obs);
-
-        let prog_tx = {
-            let (tx, rx) = mpsc::channel();
-            thread::spawn(gen_prog_updater(rx, i.print_level));
-            tx
-        };
-
-        while below_count_limit(&i.count, &rstat, &wstat) {
-            // Read/Write
-            let loop_bsize = calc_loop_bsize(&i.count, &rstat, &wstat, i.ibs, bsize);
-            match read_helper(&mut i, loop_bsize)? {
-                (
-                    ReadStat {
-                        reads_complete: 0,
-                        reads_partial: 0,
-                        ..
-                    },
-                    _,
-                ) => break,
-                (rstat_update, buf) => {
-                    let wstat_update = self
-                        .write_blocks(&buf)
-                        .map_err_context(|| "failed to write output".to_string())?;
-
-                    rstat += rstat_update;
-                    wstat += wstat_update;
-                }
-            };
-            // Update Prog
-            prog_tx
-                .send(ProgUpdate {
-                    read_stat: rstat,
-                    write_stat: wstat,
-                    duration: start.elapsed(),
-                })
-                .map_err(|_| USimpleError::new(1, "failed to write output"))?;
-        }
-
-        if self.cflags.fsync {
-            self.fsync()
-                .map_err_context(|| "failed to write output".to_string())?;
-        } else if self.cflags.fdatasync {
-            self.fdatasync()
-                .map_err_context(|| "failed to write output".to_string())?;
-        }
-
+    /// Print the read/write statistics.
+    fn print_stats<R: Read>(&self, i: &Input<R>, prog_update: &ProgUpdate) {
        match i.print_level {
            Some(StatusLevel::None) => {}
-            Some(StatusLevel::Noxfer) => print_io_lines(&ProgUpdate {
-                read_stat: rstat,
-                write_stat: wstat,
-                duration: start.elapsed(),
-            }),
-            Some(StatusLevel::Progress) | None => print_transfer_stats(&ProgUpdate {
-                read_stat: rstat,
-                write_stat: wstat,
-                duration: start.elapsed(),
-            }),
+            Some(StatusLevel::Noxfer) => print_io_lines(prog_update),
+            Some(StatusLevel::Progress) | None => print_transfer_stats(prog_update),
        }
+    }
+
+    /// Flush the output to disk, if configured to do so.
+    fn sync(&mut self) -> std::io::Result<()> {
+        if self.cflags.fsync {
+            self.fsync()
+        } else if self.cflags.fdatasync {
+            self.fdatasync()
+        } else {
+            // Intentionally do nothing in this case.
+            Ok(())
+        }
+    }
+
+    /// Copy the given input data to this output, consuming both.
+    ///
+    /// This method contains the main loop for the `dd` program. Bytes
+    /// are read in blocks from `i` and written in blocks to this
+    /// output. Read/write statistics are reported to stderr as
+    /// configured by the `status` command-line argument.
+    ///
+    /// # Errors
+    ///
+    /// If there is a problem reading from the input or writing to
+    /// this output.
+    fn dd_out<R: Read>(mut self, mut i: Input<R>) -> std::io::Result<()> {
+        // The read and write statistics.
+        //
+        // These objects are counters, initialized to zero. After each
+        // iteration of the main loop, each will be incremented by the
+        // number of blocks read and written, respectively.
+        let mut rstat = Default::default();
+        let mut wstat = Default::default();
+
+        // The time at which the main loop starts executing.
+        //
+        // When `status=progress` is given on the command-line, the
+        // `dd` program reports its progress every second or so. Part
+        // of its report includes the throughput in bytes per second,
+        // which requires knowing how long the process has been
+        // running.
+        let start = time::Instant::now();
+
+        // A good buffer size for reading.
+        //
+        // This is an educated guess about a good buffer size based on
+        // the input and output block sizes.
+        let bsize = calc_bsize(i.ibs, self.obs);
+
+        // Start a thread that reports transfer progress.
+        //
+        // When `status=progress` is given on the command-line, the
+        // `dd` program reports its progress every second or so. We
+        // perform this reporting in a new thread so as not to take
+        // any CPU time away from the actual reading and writing of
+        // data. We send a `ProgUpdate` from the transmitter `prog_tx`
+        // to the receives `rx`, and the receiver prints the transfer
+        // information.
+        let (prog_tx, rx) = mpsc::channel();
+        thread::spawn(gen_prog_updater(rx, i.print_level));
+
+        // The main read/write loop.
+        //
+        // Each iteration reads blocks from the input and writes
+        // blocks to this output. Read/write statistics are updated on
+        // each iteration and cumulative statistics are reported to
+        // the progress reporting thread.
+        while below_count_limit(&i.count, &rstat, &wstat) {
+            // Read a block from the input then write the block to the output.
+            //
+            // As an optimization, make an educated guess about the
+            // best buffer size for reading based on the number of
+            // blocks already read and the number of blocks remaining.
+            let loop_bsize = calc_loop_bsize(&i.count, &rstat, &wstat, i.ibs, bsize);
+            let (rstat_update, buf) = read_helper(&mut i, loop_bsize)?;
+            if rstat_update.is_empty() {
+                break;
+            }
+            let wstat_update = self.write_blocks(&buf)?;
+
+            // Update the read/write stats and inform the progress thread.
+            //
+            // If the receiver is disconnected, `send()` returns an
+            // error. Since it is just reporting progress and is not
+            // crucial to the operation of `dd`, let's just ignore the
+            // error.
+            rstat += rstat_update;
+            wstat += wstat_update;
+            let prog_update = ProgUpdate::new(rstat, wstat, start.elapsed());
+            prog_tx.send(prog_update).unwrap_or(());
+        }
+
+        // Flush the output, if configured to do so.
+        self.sync()?;
+
+        // Print the final read/write statistics.
+        let prog_update = ProgUpdate::new(rstat, wstat, start.elapsed());
+        self.print_stats(&i, &prog_update);
        Ok(())
    }
 }
@ -475,7 +508,6 @@ impl OutputTrait for Output<File> {
            let mut opts = OpenOptions::new();
            opts.write(true)
                .create(!cflags.nocreat)
-                .truncate(!cflags.notrunc)
                .create_new(cflags.excl)
                .append(oflags.append);

@ -495,13 +527,13 @@ impl OutputTrait for Output<File> {
            let mut dst = open_dst(Path::new(&fname), &cflags, &oflags)
                .map_err_context(|| format!("failed to open {}", fname.quote()))?;

-            if let Some(amt) = seek {
-                let amt: u64 = amt
-                    .try_into()
-                    .map_err(|_| USimpleError::new(1, "failed to parse seek amount"))?;
-                dst.seek(io::SeekFrom::Start(amt))
-                    .map_err_context(|| "failed to seek in output file".to_string())?;
+            let i = seek.unwrap_or(0).try_into().unwrap();
+            if !cflags.notrunc {
+                dst.set_len(i)
+                    .map_err_context(|| "failed to truncate output file".to_string())?;
            }
+            dst.seek(io::SeekFrom::Start(i))
+                .map_err_context(|| "failed to seek in output file".to_string())?;

            Ok(Self { dst, obs, cflags })
        } else {
@ -704,7 +736,7 @@ fn conv_block_unblock_helper<R: Read>(
 }

 /// Read helper performs read operations common to all dd reads, and dispatches the buffer to relevant helper functions as dictated by the operations requested by the user.
-fn read_helper<R: Read>(i: &mut Input<R>, bsize: usize) -> UResult<(ReadStat, Vec<u8>)> {
+fn read_helper<R: Read>(i: &mut Input<R>, bsize: usize) -> std::io::Result<(ReadStat, Vec<u8>)> {
    // Local Predicate Fns -----------------------------------------------
    fn is_conv<R: Read>(i: &Input<R>) -> bool {
        i.cflags.ctable.is_some()
@ -725,12 +757,8 @@ fn read_helper<R: Read>(i: &mut Input<R>, bsize: usize) -> UResult<(ReadStat, Ve
    // Read
    let mut buf = vec![BUF_INIT_BYTE; bsize];
    let mut rstat = match i.cflags.sync {
-        Some(ch) => i
-            .fill_blocks(&mut buf, ch)
-            .map_err_context(|| "failed to write output".to_string())?,
-        _ => i
-            .fill_consecutive(&mut buf)
-            .map_err_context(|| "failed to write output".to_string())?,
+        Some(ch) => i.fill_blocks(&mut buf, ch)?,
+        _ => i.fill_consecutive(&mut buf)?,
    };
    // Return early if no data
    if rstat.reads_complete == 0 && rstat.reads_partial == 0 {
@ -742,7 +770,7 @@ fn read_helper<R: Read>(i: &mut Input<R>, bsize: usize) -> UResult<(ReadStat, Ve
        perform_swab(&mut buf);
    }
    if is_conv(i) || is_block(i) || is_unblock(i) {
-        let buf = conv_block_unblock_helper(buf, i, &mut rstat)?;
+        let buf = conv_block_unblock_helper(buf, i, &mut rstat).unwrap();
        Ok((rstat, buf))
    } else {
        Ok((rstat, buf))
@ -932,22 +960,22 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
        (true, true) => {
            let i = Input::<File>::new(&matches)?;
            let o = Output::<File>::new(&matches)?;
-            o.dd_out(i)
+            o.dd_out(i).map_err_context(|| "IO error".to_string())
        }
        (false, true) => {
            let i = Input::<io::Stdin>::new(&matches)?;
            let o = Output::<File>::new(&matches)?;
-            o.dd_out(i)
+            o.dd_out(i).map_err_context(|| "IO error".to_string())
        }
        (true, false) => {
            let i = Input::<File>::new(&matches)?;
            let o = Output::<io::Stdout>::new(&matches)?;
-            o.dd_out(i)
+            o.dd_out(i).map_err_context(|| "IO error".to_string())
        }
        (false, false) => {
            let i = Input::<io::Stdin>::new(&matches)?;
            let o = Output::<io::Stdout>::new(&matches)?;
-            o.dd_out(i)
+            o.dd_out(i).map_err_context(|| "IO error".to_string())
        }
    }
 }
--- a/src/uu/dd/src/parseargs.rs
+++ b/src/uu/dd/src/parseargs.rs
@ -13,6 +13,7 @@ use super::*;
 use std::error::Error;
 use uucore::error::UError;
 use uucore::parse_size::ParseSizeError;
+use uucore::show_warning;

 pub type Matches = ArgMatches;

@ -356,6 +357,13 @@ fn parse_bytes_only(s: &str) -> Result<usize, ParseError> {
 /// assert_eq!(parse_bytes_no_x("2k").unwrap(), 2 * 1024);
 /// ```
 fn parse_bytes_no_x(s: &str) -> Result<usize, ParseError> {
+    if s == "0" {
+        show_warning!(
+            "{} is a zero multiplier; use {} if that is intended",
+            "0x".quote(),
+            "00x".quote()
+        );
+    }
    let (num, multiplier) = match (s.find('c'), s.rfind('w'), s.rfind('b')) {
        (None, None, None) => match uucore::parse_size::parse_size(s) {
            Ok(n) => (n, 1),
--- a/src/uu/df/src/df.rs
+++ b/src/uu/df/src/df.rs
@ -161,64 +161,79 @@ impl Filesystem {
    }
 }

+/// Keep only the specified subset of [`MountInfo`] instances.
+///
+/// If `paths` is non-empty, this function excludes any [`MountInfo`]
+/// that is not mounted at the specified path.
+///
+/// The `opt` argument specifies a variety of ways of excluding
+/// [`MountInfo`] instances; see [`Options`] for more information.
+///
+/// Finally, if there are duplicate entries, the one with the shorter
+/// path is kept.
 fn filter_mount_list(vmi: Vec<MountInfo>, paths: &[String], opt: &Options) -> Vec<MountInfo> {
-    vmi.into_iter()
-        .filter_map(|mi| {
-            if (mi.remote && opt.show_local_fs)
-                || (mi.dummy && !opt.show_all_fs && !opt.show_listed_fs)
-                || !opt.fs_selector.should_select(&mi.fs_type)
-            {
-                None
-            } else {
-                if paths.is_empty() {
-                    // No path specified
-                    return Some((mi.dev_id.clone(), mi));
-                }
-                if paths.contains(&mi.mount_dir) {
-                    // One or more paths have been provided
-                    Some((mi.dev_id.clone(), mi))
-                } else {
-                    // Not a path we want to see
-                    None
-                }
-            }
-        })
-        .fold(
-            HashMap::<String, Cell<MountInfo>>::new(),
-            |mut acc, (id, mi)| {
-                #[allow(clippy::map_entry)]
-                {
-                    if acc.contains_key(&id) {
-                        let seen = acc[&id].replace(mi.clone());
-                        let target_nearer_root = seen.mount_dir.len() > mi.mount_dir.len();
-                        // With bind mounts, prefer items nearer the root of the source
-                        let source_below_root = !seen.mount_root.is_empty()
-                            && !mi.mount_root.is_empty()
-                            && seen.mount_root.len() < mi.mount_root.len();
-                        // let "real" devices with '/' in the name win.
-                        if (!mi.dev_name.starts_with('/') || seen.dev_name.starts_with('/'))
-                            // let points towards the root of the device win.
-                            && (!target_nearer_root || source_below_root)
-                            // let an entry over-mounted on a new device win...
-                            && (seen.dev_name == mi.dev_name
-                            /* ... but only when matching an existing mnt point,
-                            to avoid problematic replacement when given
-                            inaccurate mount lists, seen with some chroot
-                            environments for example.  */
-                            || seen.mount_dir != mi.mount_dir)
-                        {
-                            acc[&id].replace(seen);
-                        }
-                    } else {
-                        acc.insert(id, Cell::new(mi));
-                    }
-                    acc
-                }
-            },
-        )
-        .into_iter()
-        .map(|ent| ent.1.into_inner())
-        .collect::<Vec<_>>()
+    let mut mount_info_by_id = HashMap::<String, Cell<MountInfo>>::new();
+    for mi in vmi {
+        // Don't show remote filesystems if `--local` has been given.
+        if mi.remote && opt.show_local_fs {
+            continue;
+        }
+
+        // Don't show pseudo filesystems unless `--all` has been given.
+        if mi.dummy && !opt.show_all_fs && !opt.show_listed_fs {
+            continue;
+        }
+
+        // Don't show filesystems if they have been explicitly excluded.
+        if !opt.fs_selector.should_select(&mi.fs_type) {
+            continue;
+        }
+
+        // Don't show filesystems other than the ones specified on the
+        // command line, if any.
+        if !paths.is_empty() && !paths.contains(&mi.mount_dir) {
+            continue;
+        }
+
+        // If the device ID has not been encountered yet, just store it.
+        let id = mi.dev_id.clone();
+        #[allow(clippy::map_entry)]
+        if !mount_info_by_id.contains_key(&id) {
+            mount_info_by_id.insert(id, Cell::new(mi));
+            continue;
+        }
+
+        // Otherwise, if we have seen the current device ID before,
+        // then check if we need to update it or keep the previously
+        // seen one.
+        let seen = mount_info_by_id[&id].replace(mi.clone());
+        let target_nearer_root = seen.mount_dir.len() > mi.mount_dir.len();
+        // With bind mounts, prefer items nearer the root of the source
+        let source_below_root = !seen.mount_root.is_empty()
+            && !mi.mount_root.is_empty()
+            && seen.mount_root.len() < mi.mount_root.len();
+        // let "real" devices with '/' in the name win.
+        if (!mi.dev_name.starts_with('/') || seen.dev_name.starts_with('/'))
+            // let points towards the root of the device win.
+            && (!target_nearer_root || source_below_root)
+            // let an entry over-mounted on a new device win...
+            && (seen.dev_name == mi.dev_name
+                /* ... but only when matching an existing mnt point,
+                to avoid problematic replacement when given
+                inaccurate mount lists, seen with some chroot
+                environments for example.  */
+                || seen.mount_dir != mi.mount_dir)
+        {
+            mount_info_by_id[&id].replace(seen);
+        }
+    }
+
+    // Take ownership of the `MountInfo` instances and collect them
+    // into a `Vec`.
+    mount_info_by_id
+        .into_values()
+        .map(|m| m.into_inner())
+        .collect()
 }

 /// Convert `value` to a human readable string based on `base`.
--- a/src/uu/stat/src/stat.rs
+++ b/src/uu/stat/src/stat.rs
@ -193,11 +193,19 @@ impl ScanUtil for str {
 }

 pub fn group_num(s: &str) -> Cow<str> {
-    assert!(s.chars().all(char::is_numeric));
+    let is_negative = s.starts_with('-');
+    assert!(is_negative || s.chars().take(1).all(|c| c.is_digit(10)));
+    assert!(s.chars().skip(1).all(|c| c.is_digit(10)));
    if s.len() < 4 {
        return s.into();
    }
    let mut res = String::with_capacity((s.len() - 1) / 3);
+    let s = if is_negative {
+        res.push('-');
+        &s[1..]
+    } else {
+        s
+    };
    let mut alone = (s.len() - 1) % 3 + 1;
    res.push_str(&s[..alone]);
    while alone != s.len() {
--- a/tests/by-util/test_dd.rs
+++ b/tests/by-util/test_dd.rs
@ -198,6 +198,39 @@ fn test_x_multiplier() {
        .stdout_is("abcdef");
 }

+#[test]
+fn test_zero_multiplier_warning() {
+    for arg in ["count", "seek", "skip"] {
+        new_ucmd!()
+            .args(&[format!("{}=00x1", arg).as_str(), "status=none"])
+            .pipe_in("")
+            .succeeds()
+            .no_stdout()
+            .no_stderr();
+
+        new_ucmd!()
+            .args(&[format!("{}=0x1", arg).as_str(), "status=none"])
+            .pipe_in("")
+            .succeeds()
+            .no_stdout()
+            .stderr_contains("warning: '0x' is a zero multiplier; use '00x' if that is intended");
+
+        new_ucmd!()
+            .args(&[format!("{}=0x0x1", arg).as_str(), "status=none"])
+            .pipe_in("")
+            .succeeds()
+            .no_stdout()
+            .stderr_is("dd: warning: '0x' is a zero multiplier; use '00x' if that is intended\ndd: warning: '0x' is a zero multiplier; use '00x' if that is intended\n");
+
+        new_ucmd!()
+            .args(&[format!("{}=1x0x1", arg).as_str(), "status=none"])
+            .pipe_in("")
+            .succeeds()
+            .no_stdout()
+            .stderr_contains("warning: '0x' is a zero multiplier; use '00x' if that is intended");
+    }
+}
+
 #[test]
 fn test_final_stats_noxfer() {
    new_ucmd!()
@ -637,5 +670,27 @@ fn test_skip_beyond_file() {
        );
 }

+#[test]
+fn test_seek_do_not_overwrite() {
+    let (at, mut ucmd) = at_and_ucmd!();
+    let mut outfile = at.make_file("outfile");
+    outfile.write_all(b"abc").unwrap();
+    // Skip the first byte of the input, seek past the first byte of
+    // the output, and write only one byte to the output.
+    ucmd.args(&[
+        "bs=1",
+        "skip=1",
+        "seek=1",
+        "count=1",
+        "status=noxfer",
+        "of=outfile",
+    ])
+    .pipe_in("123")
+    .succeeds()
+    .stderr_is("1+0 records in\n1+0 records out\n")
+    .no_stdout();
+    assert_eq!(at.read("outfile"), "a2");
+}
+
 // conv=[ascii,ebcdic,ibm], conv=[ucase,lcase], conv=[block,unblock], conv=sync
 // TODO: Move conv tests from unit test module
--- a/tests/by-util/test_stat.rs
+++ b/tests/by-util/test_stat.rs
@ -34,6 +34,8 @@ fn test_group_num() {
    assert_eq!("24", group_num("24"));
    assert_eq!("4", group_num("4"));
    assert_eq!("", group_num(""));
+    assert_eq!("-5", group_num("-5"));
+    assert_eq!("-1,234", group_num("-1234"));
 }

 #[test]