diff --git a/GNUmakefile b/GNUmakefile index 8f9a8cae4..b3278f9ee 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -62,6 +62,7 @@ PROGS := \ csplit \ cut \ date \ + dd \ df \ dircolors \ dirname \ diff --git a/src/uu/dd/src/datastructures.rs b/src/uu/dd/src/datastructures.rs index 8fab1ffec..8380965a9 100644 --- a/src/uu/dd/src/datastructures.rs +++ b/src/uu/dd/src/datastructures.rs @@ -19,12 +19,34 @@ pub struct ProgUpdate { pub duration: time::Duration, } +impl ProgUpdate { + pub(crate) fn new( + read_stat: ReadStat, + write_stat: WriteStat, + duration: time::Duration, + ) -> Self { + Self { + read_stat, + write_stat, + duration, + } + } +} + #[derive(Clone, Copy, Default)] pub struct ReadStat { pub reads_complete: u64, pub reads_partial: u64, pub records_truncated: u32, } + +impl ReadStat { + /// Whether this counter has zero complete reads and zero partial reads. + pub(crate) fn is_empty(&self) -> bool { + self.reads_complete == 0 && self.reads_partial == 0 + } +} + impl std::ops::AddAssign for ReadStat { fn add_assign(&mut self, other: Self) { *self = Self { @@ -35,7 +57,7 @@ impl std::ops::AddAssign for ReadStat { } } -#[derive(Clone, Copy)] +#[derive(Clone, Copy, Default)] pub struct WriteStat { pub writes_complete: u64, pub writes_partial: u64, diff --git a/src/uu/dd/src/dd.rs b/src/uu/dd/src/dd.rs index 13bacd946..4f02e3a3a 100644 --- a/src/uu/dd/src/dd.rs +++ b/src/uu/dd/src/dd.rs @@ -353,78 +353,111 @@ where }) } - fn dd_out(mut self, mut i: Input) -> UResult<()> { - let mut rstat = ReadStat { - reads_complete: 0, - reads_partial: 0, - records_truncated: 0, - }; - let mut wstat = WriteStat { - writes_complete: 0, - writes_partial: 0, - bytes_total: 0, - }; - let start = time::Instant::now(); - let bsize = calc_bsize(i.ibs, self.obs); - - let prog_tx = { - let (tx, rx) = mpsc::channel(); - thread::spawn(gen_prog_updater(rx, i.print_level)); - tx - }; - - while below_count_limit(&i.count, &rstat, &wstat) { - // Read/Write - let loop_bsize = calc_loop_bsize(&i.count, &rstat, &wstat, i.ibs, bsize); - match read_helper(&mut i, loop_bsize)? { - ( - ReadStat { - reads_complete: 0, - reads_partial: 0, - .. - }, - _, - ) => break, - (rstat_update, buf) => { - let wstat_update = self - .write_blocks(&buf) - .map_err_context(|| "failed to write output".to_string())?; - - rstat += rstat_update; - wstat += wstat_update; - } - }; - // Update Prog - prog_tx - .send(ProgUpdate { - read_stat: rstat, - write_stat: wstat, - duration: start.elapsed(), - }) - .map_err(|_| USimpleError::new(1, "failed to write output"))?; - } - - if self.cflags.fsync { - self.fsync() - .map_err_context(|| "failed to write output".to_string())?; - } else if self.cflags.fdatasync { - self.fdatasync() - .map_err_context(|| "failed to write output".to_string())?; - } - + /// Print the read/write statistics. + fn print_stats(&self, i: &Input, prog_update: &ProgUpdate) { match i.print_level { Some(StatusLevel::None) => {} - Some(StatusLevel::Noxfer) => print_io_lines(&ProgUpdate { - read_stat: rstat, - write_stat: wstat, - duration: start.elapsed(), - }), - Some(StatusLevel::Progress) | None => print_transfer_stats(&ProgUpdate { - read_stat: rstat, - write_stat: wstat, - duration: start.elapsed(), - }), + Some(StatusLevel::Noxfer) => print_io_lines(prog_update), + Some(StatusLevel::Progress) | None => print_transfer_stats(prog_update), } + } + + /// Flush the output to disk, if configured to do so. + fn sync(&mut self) -> std::io::Result<()> { + if self.cflags.fsync { + self.fsync() + } else if self.cflags.fdatasync { + self.fdatasync() + } else { + // Intentionally do nothing in this case. + Ok(()) + } + } + + /// Copy the given input data to this output, consuming both. + /// + /// This method contains the main loop for the `dd` program. Bytes + /// are read in blocks from `i` and written in blocks to this + /// output. Read/write statistics are reported to stderr as + /// configured by the `status` command-line argument. + /// + /// # Errors + /// + /// If there is a problem reading from the input or writing to + /// this output. + fn dd_out(mut self, mut i: Input) -> std::io::Result<()> { + // The read and write statistics. + // + // These objects are counters, initialized to zero. After each + // iteration of the main loop, each will be incremented by the + // number of blocks read and written, respectively. + let mut rstat = Default::default(); + let mut wstat = Default::default(); + + // The time at which the main loop starts executing. + // + // When `status=progress` is given on the command-line, the + // `dd` program reports its progress every second or so. Part + // of its report includes the throughput in bytes per second, + // which requires knowing how long the process has been + // running. + let start = time::Instant::now(); + + // A good buffer size for reading. + // + // This is an educated guess about a good buffer size based on + // the input and output block sizes. + let bsize = calc_bsize(i.ibs, self.obs); + + // Start a thread that reports transfer progress. + // + // When `status=progress` is given on the command-line, the + // `dd` program reports its progress every second or so. We + // perform this reporting in a new thread so as not to take + // any CPU time away from the actual reading and writing of + // data. We send a `ProgUpdate` from the transmitter `prog_tx` + // to the receives `rx`, and the receiver prints the transfer + // information. + let (prog_tx, rx) = mpsc::channel(); + thread::spawn(gen_prog_updater(rx, i.print_level)); + + // The main read/write loop. + // + // Each iteration reads blocks from the input and writes + // blocks to this output. Read/write statistics are updated on + // each iteration and cumulative statistics are reported to + // the progress reporting thread. + while below_count_limit(&i.count, &rstat, &wstat) { + // Read a block from the input then write the block to the output. + // + // As an optimization, make an educated guess about the + // best buffer size for reading based on the number of + // blocks already read and the number of blocks remaining. + let loop_bsize = calc_loop_bsize(&i.count, &rstat, &wstat, i.ibs, bsize); + let (rstat_update, buf) = read_helper(&mut i, loop_bsize)?; + if rstat_update.is_empty() { + break; + } + let wstat_update = self.write_blocks(&buf)?; + + // Update the read/write stats and inform the progress thread. + // + // If the receiver is disconnected, `send()` returns an + // error. Since it is just reporting progress and is not + // crucial to the operation of `dd`, let's just ignore the + // error. + rstat += rstat_update; + wstat += wstat_update; + let prog_update = ProgUpdate::new(rstat, wstat, start.elapsed()); + prog_tx.send(prog_update).unwrap_or(()); + } + + // Flush the output, if configured to do so. + self.sync()?; + + // Print the final read/write statistics. + let prog_update = ProgUpdate::new(rstat, wstat, start.elapsed()); + self.print_stats(&i, &prog_update); Ok(()) } } @@ -475,7 +508,6 @@ impl OutputTrait for Output { let mut opts = OpenOptions::new(); opts.write(true) .create(!cflags.nocreat) - .truncate(!cflags.notrunc) .create_new(cflags.excl) .append(oflags.append); @@ -495,13 +527,13 @@ impl OutputTrait for Output { let mut dst = open_dst(Path::new(&fname), &cflags, &oflags) .map_err_context(|| format!("failed to open {}", fname.quote()))?; - if let Some(amt) = seek { - let amt: u64 = amt - .try_into() - .map_err(|_| USimpleError::new(1, "failed to parse seek amount"))?; - dst.seek(io::SeekFrom::Start(amt)) - .map_err_context(|| "failed to seek in output file".to_string())?; + let i = seek.unwrap_or(0).try_into().unwrap(); + if !cflags.notrunc { + dst.set_len(i) + .map_err_context(|| "failed to truncate output file".to_string())?; } + dst.seek(io::SeekFrom::Start(i)) + .map_err_context(|| "failed to seek in output file".to_string())?; Ok(Self { dst, obs, cflags }) } else { @@ -704,7 +736,7 @@ fn conv_block_unblock_helper( } /// Read helper performs read operations common to all dd reads, and dispatches the buffer to relevant helper functions as dictated by the operations requested by the user. -fn read_helper(i: &mut Input, bsize: usize) -> UResult<(ReadStat, Vec)> { +fn read_helper(i: &mut Input, bsize: usize) -> std::io::Result<(ReadStat, Vec)> { // Local Predicate Fns ----------------------------------------------- fn is_conv(i: &Input) -> bool { i.cflags.ctable.is_some() @@ -725,12 +757,8 @@ fn read_helper(i: &mut Input, bsize: usize) -> UResult<(ReadStat, Ve // Read let mut buf = vec![BUF_INIT_BYTE; bsize]; let mut rstat = match i.cflags.sync { - Some(ch) => i - .fill_blocks(&mut buf, ch) - .map_err_context(|| "failed to write output".to_string())?, - _ => i - .fill_consecutive(&mut buf) - .map_err_context(|| "failed to write output".to_string())?, + Some(ch) => i.fill_blocks(&mut buf, ch)?, + _ => i.fill_consecutive(&mut buf)?, }; // Return early if no data if rstat.reads_complete == 0 && rstat.reads_partial == 0 { @@ -742,7 +770,7 @@ fn read_helper(i: &mut Input, bsize: usize) -> UResult<(ReadStat, Ve perform_swab(&mut buf); } if is_conv(i) || is_block(i) || is_unblock(i) { - let buf = conv_block_unblock_helper(buf, i, &mut rstat)?; + let buf = conv_block_unblock_helper(buf, i, &mut rstat).unwrap(); Ok((rstat, buf)) } else { Ok((rstat, buf)) @@ -932,22 +960,22 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { (true, true) => { let i = Input::::new(&matches)?; let o = Output::::new(&matches)?; - o.dd_out(i) + o.dd_out(i).map_err_context(|| "IO error".to_string()) } (false, true) => { let i = Input::::new(&matches)?; let o = Output::::new(&matches)?; - o.dd_out(i) + o.dd_out(i).map_err_context(|| "IO error".to_string()) } (true, false) => { let i = Input::::new(&matches)?; let o = Output::::new(&matches)?; - o.dd_out(i) + o.dd_out(i).map_err_context(|| "IO error".to_string()) } (false, false) => { let i = Input::::new(&matches)?; let o = Output::::new(&matches)?; - o.dd_out(i) + o.dd_out(i).map_err_context(|| "IO error".to_string()) } } } diff --git a/src/uu/dd/src/parseargs.rs b/src/uu/dd/src/parseargs.rs index 915a99344..6bc7bcfd9 100644 --- a/src/uu/dd/src/parseargs.rs +++ b/src/uu/dd/src/parseargs.rs @@ -13,6 +13,7 @@ use super::*; use std::error::Error; use uucore::error::UError; use uucore::parse_size::ParseSizeError; +use uucore::show_warning; pub type Matches = ArgMatches; @@ -356,6 +357,13 @@ fn parse_bytes_only(s: &str) -> Result { /// assert_eq!(parse_bytes_no_x("2k").unwrap(), 2 * 1024); /// ``` fn parse_bytes_no_x(s: &str) -> Result { + if s == "0" { + show_warning!( + "{} is a zero multiplier; use {} if that is intended", + "0x".quote(), + "00x".quote() + ); + } let (num, multiplier) = match (s.find('c'), s.rfind('w'), s.rfind('b')) { (None, None, None) => match uucore::parse_size::parse_size(s) { Ok(n) => (n, 1), diff --git a/src/uu/df/src/df.rs b/src/uu/df/src/df.rs index 07aa82dc1..e856a6b1e 100644 --- a/src/uu/df/src/df.rs +++ b/src/uu/df/src/df.rs @@ -161,64 +161,79 @@ impl Filesystem { } } +/// Keep only the specified subset of [`MountInfo`] instances. +/// +/// If `paths` is non-empty, this function excludes any [`MountInfo`] +/// that is not mounted at the specified path. +/// +/// The `opt` argument specifies a variety of ways of excluding +/// [`MountInfo`] instances; see [`Options`] for more information. +/// +/// Finally, if there are duplicate entries, the one with the shorter +/// path is kept. fn filter_mount_list(vmi: Vec, paths: &[String], opt: &Options) -> Vec { - vmi.into_iter() - .filter_map(|mi| { - if (mi.remote && opt.show_local_fs) - || (mi.dummy && !opt.show_all_fs && !opt.show_listed_fs) - || !opt.fs_selector.should_select(&mi.fs_type) - { - None - } else { - if paths.is_empty() { - // No path specified - return Some((mi.dev_id.clone(), mi)); - } - if paths.contains(&mi.mount_dir) { - // One or more paths have been provided - Some((mi.dev_id.clone(), mi)) - } else { - // Not a path we want to see - None - } - } - }) - .fold( - HashMap::>::new(), - |mut acc, (id, mi)| { - #[allow(clippy::map_entry)] - { - if acc.contains_key(&id) { - let seen = acc[&id].replace(mi.clone()); - let target_nearer_root = seen.mount_dir.len() > mi.mount_dir.len(); - // With bind mounts, prefer items nearer the root of the source - let source_below_root = !seen.mount_root.is_empty() - && !mi.mount_root.is_empty() - && seen.mount_root.len() < mi.mount_root.len(); - // let "real" devices with '/' in the name win. - if (!mi.dev_name.starts_with('/') || seen.dev_name.starts_with('/')) - // let points towards the root of the device win. - && (!target_nearer_root || source_below_root) - // let an entry over-mounted on a new device win... - && (seen.dev_name == mi.dev_name - /* ... but only when matching an existing mnt point, - to avoid problematic replacement when given - inaccurate mount lists, seen with some chroot - environments for example. */ - || seen.mount_dir != mi.mount_dir) - { - acc[&id].replace(seen); - } - } else { - acc.insert(id, Cell::new(mi)); - } - acc - } - }, - ) - .into_iter() - .map(|ent| ent.1.into_inner()) - .collect::>() + let mut mount_info_by_id = HashMap::>::new(); + for mi in vmi { + // Don't show remote filesystems if `--local` has been given. + if mi.remote && opt.show_local_fs { + continue; + } + + // Don't show pseudo filesystems unless `--all` has been given. + if mi.dummy && !opt.show_all_fs && !opt.show_listed_fs { + continue; + } + + // Don't show filesystems if they have been explicitly excluded. + if !opt.fs_selector.should_select(&mi.fs_type) { + continue; + } + + // Don't show filesystems other than the ones specified on the + // command line, if any. + if !paths.is_empty() && !paths.contains(&mi.mount_dir) { + continue; + } + + // If the device ID has not been encountered yet, just store it. + let id = mi.dev_id.clone(); + #[allow(clippy::map_entry)] + if !mount_info_by_id.contains_key(&id) { + mount_info_by_id.insert(id, Cell::new(mi)); + continue; + } + + // Otherwise, if we have seen the current device ID before, + // then check if we need to update it or keep the previously + // seen one. + let seen = mount_info_by_id[&id].replace(mi.clone()); + let target_nearer_root = seen.mount_dir.len() > mi.mount_dir.len(); + // With bind mounts, prefer items nearer the root of the source + let source_below_root = !seen.mount_root.is_empty() + && !mi.mount_root.is_empty() + && seen.mount_root.len() < mi.mount_root.len(); + // let "real" devices with '/' in the name win. + if (!mi.dev_name.starts_with('/') || seen.dev_name.starts_with('/')) + // let points towards the root of the device win. + && (!target_nearer_root || source_below_root) + // let an entry over-mounted on a new device win... + && (seen.dev_name == mi.dev_name + /* ... but only when matching an existing mnt point, + to avoid problematic replacement when given + inaccurate mount lists, seen with some chroot + environments for example. */ + || seen.mount_dir != mi.mount_dir) + { + mount_info_by_id[&id].replace(seen); + } + } + + // Take ownership of the `MountInfo` instances and collect them + // into a `Vec`. + mount_info_by_id + .into_values() + .map(|m| m.into_inner()) + .collect() } /// Convert `value` to a human readable string based on `base`. diff --git a/src/uu/stat/src/stat.rs b/src/uu/stat/src/stat.rs index e2a0f57ef..38fbc0fec 100644 --- a/src/uu/stat/src/stat.rs +++ b/src/uu/stat/src/stat.rs @@ -193,11 +193,19 @@ impl ScanUtil for str { } pub fn group_num(s: &str) -> Cow { - assert!(s.chars().all(char::is_numeric)); + let is_negative = s.starts_with('-'); + assert!(is_negative || s.chars().take(1).all(|c| c.is_digit(10))); + assert!(s.chars().skip(1).all(|c| c.is_digit(10))); if s.len() < 4 { return s.into(); } let mut res = String::with_capacity((s.len() - 1) / 3); + let s = if is_negative { + res.push('-'); + &s[1..] + } else { + s + }; let mut alone = (s.len() - 1) % 3 + 1; res.push_str(&s[..alone]); while alone != s.len() { diff --git a/tests/by-util/test_dd.rs b/tests/by-util/test_dd.rs index 30adb05fc..70153621f 100644 --- a/tests/by-util/test_dd.rs +++ b/tests/by-util/test_dd.rs @@ -198,6 +198,39 @@ fn test_x_multiplier() { .stdout_is("abcdef"); } +#[test] +fn test_zero_multiplier_warning() { + for arg in ["count", "seek", "skip"] { + new_ucmd!() + .args(&[format!("{}=00x1", arg).as_str(), "status=none"]) + .pipe_in("") + .succeeds() + .no_stdout() + .no_stderr(); + + new_ucmd!() + .args(&[format!("{}=0x1", arg).as_str(), "status=none"]) + .pipe_in("") + .succeeds() + .no_stdout() + .stderr_contains("warning: '0x' is a zero multiplier; use '00x' if that is intended"); + + new_ucmd!() + .args(&[format!("{}=0x0x1", arg).as_str(), "status=none"]) + .pipe_in("") + .succeeds() + .no_stdout() + .stderr_is("dd: warning: '0x' is a zero multiplier; use '00x' if that is intended\ndd: warning: '0x' is a zero multiplier; use '00x' if that is intended\n"); + + new_ucmd!() + .args(&[format!("{}=1x0x1", arg).as_str(), "status=none"]) + .pipe_in("") + .succeeds() + .no_stdout() + .stderr_contains("warning: '0x' is a zero multiplier; use '00x' if that is intended"); + } +} + #[test] fn test_final_stats_noxfer() { new_ucmd!() @@ -637,5 +670,27 @@ fn test_skip_beyond_file() { ); } +#[test] +fn test_seek_do_not_overwrite() { + let (at, mut ucmd) = at_and_ucmd!(); + let mut outfile = at.make_file("outfile"); + outfile.write_all(b"abc").unwrap(); + // Skip the first byte of the input, seek past the first byte of + // the output, and write only one byte to the output. + ucmd.args(&[ + "bs=1", + "skip=1", + "seek=1", + "count=1", + "status=noxfer", + "of=outfile", + ]) + .pipe_in("123") + .succeeds() + .stderr_is("1+0 records in\n1+0 records out\n") + .no_stdout(); + assert_eq!(at.read("outfile"), "a2"); +} + // conv=[ascii,ebcdic,ibm], conv=[ucase,lcase], conv=[block,unblock], conv=sync // TODO: Move conv tests from unit test module diff --git a/tests/by-util/test_stat.rs b/tests/by-util/test_stat.rs index 8c1255a88..65db4804e 100644 --- a/tests/by-util/test_stat.rs +++ b/tests/by-util/test_stat.rs @@ -34,6 +34,8 @@ fn test_group_num() { assert_eq!("24", group_num("24")); assert_eq!("4", group_num("4")); assert_eq!("", group_num("")); + assert_eq!("-5", group_num("-5")); + assert_eq!("-1,234", group_num("-1234")); } #[test]