From 12e4acaec3740a19cef2c804b25f8a92725ef8a5 Mon Sep 17 00:00:00 2001 From: Jeffrey Finkelstein Date: Thu, 23 Feb 2023 22:03:36 -0500 Subject: [PATCH 1/4] dd: add bytes_total to ReadStat Add the `bytes_total` field to the `ReadStat` struct. This lets the main loop of `dd` keep track of the total number of bytes read. --- src/uu/dd/src/dd.rs | 8 ++++++-- src/uu/dd/src/progress.rs | 19 ++++++++++++------- 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/src/uu/dd/src/dd.rs b/src/uu/dd/src/dd.rs index 836456635..ea353d625 100644 --- a/src/uu/dd/src/dd.rs +++ b/src/uu/dd/src/dd.rs @@ -317,13 +317,13 @@ impl<'a> Input<'a> { _ => break, } } - buf.truncate(bytes_total); Ok(ReadStat { reads_complete, reads_partial, // Records are not truncated when filling. records_truncated: 0, + bytes_total: bytes_total.try_into().unwrap(), }) } @@ -334,6 +334,7 @@ impl<'a> Input<'a> { let mut reads_complete = 0; let mut reads_partial = 0; let mut base_idx = 0; + let mut bytes_total = 0; while base_idx < buf.len() { let next_blk = cmp::min(base_idx + self.settings.ibs, buf.len()); @@ -342,11 +343,13 @@ impl<'a> Input<'a> { match self.read(&mut buf[base_idx..next_blk])? { 0 => break, rlen if rlen < target_len => { + bytes_total += rlen; reads_partial += 1; let padding = vec![pad; target_len - rlen]; buf.splice(base_idx + rlen..next_blk, padding.into_iter()); } - _ => { + rlen => { + bytes_total += rlen; reads_complete += 1; } } @@ -359,6 +362,7 @@ impl<'a> Input<'a> { reads_complete, reads_partial, records_truncated: 0, + bytes_total: bytes_total.try_into().unwrap(), }) } } diff --git a/src/uu/dd/src/progress.rs b/src/uu/dd/src/progress.rs index 51cfa92ef..65af053b8 100644 --- a/src/uu/dd/src/progress.rs +++ b/src/uu/dd/src/progress.rs @@ -79,9 +79,9 @@ impl ProgUpdate { /// ```rust,ignore /// use std::io::Cursor; /// use std::time::Duration; - /// use crate::progress::{ProgUpdate, ReadState, WriteStat}; + /// use crate::progress::{ProgUpdate, ReadStat, WriteStat}; /// - /// let read_stat = ReadStat::new(1, 2, 3); + /// let read_stat = ReadStat::new(1, 2, 3, 999); /// let write_stat = WriteStat::new(4, 5, 6); /// let duration = Duration::new(789, 0); /// let prog_update = ProgUpdate { @@ -121,7 +121,7 @@ impl ProgUpdate { /// ```rust,ignore /// use std::io::Cursor; /// use std::time::Duration; - /// use crate::progress::{ProgUpdate, ReadState, WriteStat}; + /// use crate::progress::ProgUpdate; /// /// let prog_update = ProgUpdate { /// read_stat: Default::default(), @@ -191,7 +191,7 @@ impl ProgUpdate { /// ```rust,ignore /// use std::io::Cursor; /// use std::time::Duration; - /// use crate::progress::{ProgUpdate, ReadState, WriteStat}; + /// use crate::progress::ProgUpdate; /// /// let prog_update = ProgUpdate { /// read_stat: Default::default(), @@ -276,16 +276,20 @@ pub(crate) struct ReadStat { /// /// A truncated record can only occur in `conv=block` mode. pub(crate) records_truncated: u32, + + /// The total number of bytes read. + pub(crate) bytes_total: u64, } impl ReadStat { /// Create a new instance. #[allow(dead_code)] - fn new(complete: u64, partial: u64, truncated: u32) -> Self { + fn new(complete: u64, partial: u64, truncated: u32, bytes_total: u64) -> Self { Self { reads_complete: complete, reads_partial: partial, records_truncated: truncated, + bytes_total, } } @@ -315,6 +319,7 @@ impl std::ops::AddAssign for ReadStat { reads_complete: self.reads_complete + other.reads_complete, reads_partial: self.reads_partial + other.reads_partial, records_truncated: self.records_truncated + other.records_truncated, + bytes_total: self.bytes_total + other.bytes_total, } } } @@ -514,7 +519,7 @@ mod tests { #[test] fn test_read_stat_report() { - let read_stat = ReadStat::new(1, 2, 3); + let read_stat = ReadStat::new(1, 2, 3, 4); let mut cursor = Cursor::new(vec![]); read_stat.report(&mut cursor).unwrap(); assert_eq!(cursor.get_ref(), b"1+2 records in\n"); @@ -530,7 +535,7 @@ mod tests { #[test] fn test_prog_update_write_io_lines() { - let read_stat = ReadStat::new(1, 2, 3); + let read_stat = ReadStat::new(1, 2, 3, 4); let write_stat = WriteStat::new(4, 5, 6); let duration = Duration::new(789, 0); let complete = false; From cc2f97ba0dc61aa4cc3119f55c38e89e5808cb56 Mon Sep 17 00:00:00 2001 From: Jeffrey Finkelstein Date: Thu, 23 Feb 2023 22:05:16 -0500 Subject: [PATCH 2/4] dd: add discard_cache() funcs for Input, Output Add the `Input::discard_cache()` and `Output::discard_cache()` functions. These allow discarding the filesystem cache when `dd` no longer needs to access a specified portion of the input or output file, respectively. --- Cargo.lock | 1 + src/uu/dd/Cargo.toml | 3 ++ src/uu/dd/src/dd.rs | 109 ++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 112 insertions(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index 9827a7893..caf494625 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2552,6 +2552,7 @@ dependencies = [ "clap", "gcd", "libc", + "nix", "signal-hook", "uucore", ] diff --git a/src/uu/dd/Cargo.toml b/src/uu/dd/Cargo.toml index b81ef1a8e..04b60dc31 100644 --- a/src/uu/dd/Cargo.toml +++ b/src/uu/dd/Cargo.toml @@ -20,6 +20,9 @@ gcd = { workspace=true } libc = { workspace=true } uucore = { workspace=true, features=["memo"] } +[target.'cfg(any(target_os = "linux"))'.dependencies] +nix = { workspace=true, features = ["fs"] } + [target.'cfg(any(target_os = "linux", target_os = "android"))'.dependencies] signal-hook = { workspace=true } diff --git a/src/uu/dd/src/dd.rs b/src/uu/dd/src/dd.rs index ea353d625..43f0da5df 100644 --- a/src/uu/dd/src/dd.rs +++ b/src/uu/dd/src/dd.rs @@ -5,7 +5,7 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -// spell-checker:ignore fname, tname, fpath, specfile, testfile, unspec, ifile, ofile, outfile, fullblock, urand, fileio, atoe, atoibm, behaviour, bmax, bremain, cflags, creat, ctable, ctty, datastructures, doesnt, etoa, fileout, fname, gnudd, iconvflags, iseek, nocache, noctty, noerror, nofollow, nolinks, nonblock, oconvflags, oseek, outfile, parseargs, rlen, rmax, rremain, rsofar, rstat, sigusr, wlen, wstat seekable oconv canonicalized +// spell-checker:ignore fname, tname, fpath, specfile, testfile, unspec, ifile, ofile, outfile, fullblock, urand, fileio, atoe, atoibm, behaviour, bmax, bremain, cflags, creat, ctable, ctty, datastructures, doesnt, etoa, fileout, fname, gnudd, iconvflags, iseek, nocache, noctty, noerror, nofollow, nolinks, nonblock, oconvflags, oseek, outfile, parseargs, rlen, rmax, rremain, rsofar, rstat, sigusr, wlen, wstat seekable oconv canonicalized fadvise Fadvise FADV DONTNEED ESPIPE mod datastructures; use datastructures::*; @@ -42,9 +42,16 @@ use std::time; use clap::{crate_version, Arg, Command}; use gcd::Gcd; +#[cfg(target_os = "linux")] +use nix::{ + errno::Errno, + fcntl::{posix_fadvise, PosixFadviseAdvice}, +}; use uucore::display::Quotable; use uucore::error::{FromIo, UResult}; use uucore::{format_usage, help_about, help_section, help_usage, show_error}; +#[cfg(target_os = "linux")] +use uucore::{show, show_if_err}; const ABOUT: &str = help_about!("dd.md"); const AFTER_HELP: &str = help_section!("after help", "dd.md"); @@ -131,6 +138,16 @@ impl Source { Self::StdinFile(f) } + /// The length of the data source in number of bytes. + /// + /// If it cannot be determined, then this function returns 0. + fn len(&self) -> std::io::Result { + match self { + Self::File(f) => Ok(f.metadata()?.len().try_into().unwrap_or(i64::MAX)), + _ => Ok(0), + } + } + fn skip(&mut self, n: u64) -> io::Result { match self { #[cfg(not(unix))] @@ -156,6 +173,23 @@ impl Source { Self::Fifo(f) => io::copy(&mut f.take(n), &mut io::sink()), } } + + /// Discard the system file cache for the given portion of the data source. + /// + /// `offset` and `len` specify a contiguous portion of the data + /// source. This function informs the kernel that the specified + /// portion of the source is no longer needed. If not possible, + /// then this function returns an error. + #[cfg(target_os = "linux")] + fn discard_cache(&self, offset: libc::off_t, len: libc::off_t) -> nix::Result<()> { + match self { + Self::File(f) => { + let advice = PosixFadviseAdvice::POSIX_FADV_DONTNEED; + posix_fadvise(f.as_raw_fd(), offset, len, advice) + } + _ => Err(Errno::ESPIPE), // "Illegal seek" + } + } } impl Read for Source { @@ -296,6 +330,29 @@ impl<'a> Read for Input<'a> { } impl<'a> Input<'a> { + /// Discard the system file cache for the given portion of the input. + /// + /// `offset` and `len` specify a contiguous portion of the input. + /// This function informs the kernel that the specified portion of + /// the input file is no longer needed. If not possible, then this + /// function prints an error message to stderr and sets the exit + /// status code to 1. + #[allow(unused_variables)] + fn discard_cache(&self, offset: libc::off_t, len: libc::off_t) { + #[cfg(target_os = "linux")] + { + show_if_err!(self + .src + .discard_cache(offset, len) + .map_err_context(|| "failed to discard cache for: 'standard input'".to_string())); + } + #[cfg(not(target_os = "linux"))] + { + // TODO Is there a way to discard filesystem cache on + // these other operating systems? + } + } + /// Fills a given buffer. /// Reads in increments of 'self.ibs'. /// The start of each ibs-sized read follows the previous one. @@ -451,6 +508,33 @@ impl Dest { _ => Ok(()), } } + + /// Discard the system file cache for the given portion of the destination. + /// + /// `offset` and `len` specify a contiguous portion of the + /// destination. This function informs the kernel that the + /// specified portion of the destination is no longer needed. If + /// not possible, then this function returns an error. + #[cfg(target_os = "linux")] + fn discard_cache(&self, offset: libc::off_t, len: libc::off_t) -> nix::Result<()> { + match self { + Self::File(f, _) => { + let advice = PosixFadviseAdvice::POSIX_FADV_DONTNEED; + posix_fadvise(f.as_raw_fd(), offset, len, advice) + } + _ => Err(Errno::ESPIPE), // "Illegal seek" + } + } + + /// The length of the data destination in number of bytes. + /// + /// If it cannot be determined, then this function returns 0. + fn len(&self) -> std::io::Result { + match self { + Self::File(f, _) => Ok(f.metadata()?.len().try_into().unwrap_or(i64::MAX)), + _ => Ok(0), + } + } } /// Decide whether the given buffer is all zeros. @@ -584,6 +668,29 @@ impl<'a> Output<'a> { Ok(Self { dst, settings }) } + /// Discard the system file cache for the given portion of the output. + /// + /// `offset` and `len` specify a contiguous portion of the output. + /// This function informs the kernel that the specified portion of + /// the output file is no longer needed. If not possible, then + /// this function prints an error message to stderr and sets the + /// exit status code to 1. + #[allow(unused_variables)] + fn discard_cache(&self, offset: libc::off_t, len: libc::off_t) { + #[cfg(target_os = "linux")] + { + show_if_err!(self + .dst + .discard_cache(offset, len) + .map_err_context(|| "failed to discard cache for: 'standard output'".to_string())); + } + #[cfg(target_os = "linux")] + { + // TODO Is there a way to discard filesystem cache on + // these other operating systems? + } + } + /// Write the given bytes one block at a time. /// /// This may write partial blocks (for example, if the underlying From bd18a2a344d67a8b63746cdfa0c9eeba929ada32 Mon Sep 17 00:00:00 2001 From: Jeffrey Finkelstein Date: Thu, 23 Feb 2023 22:05:47 -0500 Subject: [PATCH 3/4] dd: support the [io]flag=nocache option Add support for the `iflag=nocache` and `oflag=nocache` to make `dd` discard the filesystem cache for the processed portion of the input or output file. --- src/uu/dd/src/dd.rs | 50 +++++++++++++++++++++++++++ src/uu/dd/src/parseargs.rs | 4 +-- src/uu/dd/src/parseargs/unit_tests.rs | 2 +- tests/by-util/test_dd.rs | 22 ++++++++++++ 4 files changed, 75 insertions(+), 3 deletions(-) diff --git a/src/uu/dd/src/dd.rs b/src/uu/dd/src/dd.rs index 43f0da5df..3b8b3addb 100644 --- a/src/uu/dd/src/dd.rs +++ b/src/uu/dd/src/dd.rs @@ -785,6 +785,25 @@ fn dd_copy(mut i: Input, mut o: Output) -> std::io::Result<()> { // Optimization: if no blocks are to be written, then don't // bother allocating any buffers. if let Some(Num::Blocks(0) | Num::Bytes(0)) = i.settings.count { + // Even though we are not reading anything from the input + // file, we still need to honor the `nocache` flag, which + // requests that we inform the system that we no longer + // need the contents of the input file in a system cache. + // + // TODO Better error handling for overflowing `len`. + if i.settings.iflags.nocache { + let offset = 0; + let len = i.src.len()?.try_into().unwrap(); + i.discard_cache(offset, len); + } + // Similarly, discard the system cache for the output file. + // + // TODO Better error handling for overflowing `len`. + if i.settings.oflags.nocache { + let offset = 0; + let len = o.dst.len()?.try_into().unwrap(); + o.discard_cache(offset, len); + } return finalize(&mut o, rstat, wstat, start, &prog_tx, output_thread); }; @@ -792,6 +811,13 @@ fn dd_copy(mut i: Input, mut o: Output) -> std::io::Result<()> { // This is the max size needed. let mut buf = vec![BUF_INIT_BYTE; bsize]; + // Index in the input file where we are reading bytes and in + // the output file where we are writing bytes. + // + // These are updated on each iteration of the main loop. + let mut read_offset = 0; + let mut write_offset = 0; + // The main read/write loop. // // Each iteration reads blocks from the input and writes @@ -811,6 +837,30 @@ fn dd_copy(mut i: Input, mut o: Output) -> std::io::Result<()> { } let wstat_update = o.write_blocks(&buf)?; + // Discard the system file cache for the read portion of + // the input file. + // + // TODO Better error handling for overflowing `offset` and `len`. + let read_len = rstat_update.bytes_total; + if i.settings.iflags.nocache { + let offset = read_offset.try_into().unwrap(); + let len = read_len.try_into().unwrap(); + i.discard_cache(offset, len); + } + read_offset += read_len; + + // Discard the system file cache for the written portion + // of the output file. + // + // TODO Better error handling for overflowing `offset` and `len`. + let write_len = wstat_update.bytes_total; + if o.settings.oflags.nocache { + let offset = write_offset.try_into().unwrap(); + let len = write_len.try_into().unwrap(); + o.discard_cache(offset, len); + } + write_offset += write_len; + // Update the read/write stats and inform the progress thread once per second. // // If the receiver is disconnected, `send()` returns an diff --git a/src/uu/dd/src/parseargs.rs b/src/uu/dd/src/parseargs.rs index b9a3baf09..781934a83 100644 --- a/src/uu/dd/src/parseargs.rs +++ b/src/uu/dd/src/parseargs.rs @@ -304,7 +304,7 @@ impl Parser { "directory" => linux_only!(f, i.directory = true), "dsync" => linux_only!(f, i.dsync = true), "sync" => linux_only!(f, i.sync = true), - "nocache" => return Err(ParseError::Unimplemented(f.to_string())), + "nocache" => linux_only!(f, i.nocache = true), "nonblock" => linux_only!(f, i.nonblock = true), "noatime" => linux_only!(f, i.noatime = true), "noctty" => linux_only!(f, i.noctty = true), @@ -336,7 +336,7 @@ impl Parser { "directory" => linux_only!(f, o.directory = true), "dsync" => linux_only!(f, o.dsync = true), "sync" => linux_only!(f, o.sync = true), - "nocache" => return Err(ParseError::Unimplemented(f.to_string())), + "nocache" => linux_only!(f, o.nocache = true), "nonblock" => linux_only!(f, o.nonblock = true), "noatime" => linux_only!(f, o.noatime = true), "noctty" => linux_only!(f, o.noctty = true), diff --git a/src/uu/dd/src/parseargs/unit_tests.rs b/src/uu/dd/src/parseargs/unit_tests.rs index a135c3572..54e17b882 100644 --- a/src/uu/dd/src/parseargs/unit_tests.rs +++ b/src/uu/dd/src/parseargs/unit_tests.rs @@ -55,7 +55,7 @@ fn unimplemented_flags_should_error() { let mut succeeded = Vec::new(); // The following flags are not implemented - for flag in ["cio", "nocache", "nolinks", "text", "binary"] { + for flag in ["cio", "nolinks", "text", "binary"] { let args = vec![format!("iflag={flag}")]; if Parser::new() diff --git a/tests/by-util/test_dd.rs b/tests/by-util/test_dd.rs index 99eae4809..a54c473ef 100644 --- a/tests/by-util/test_dd.rs +++ b/tests/by-util/test_dd.rs @@ -1536,3 +1536,25 @@ fn test_multiple_processes_reading_stdin() { .succeeds() .stdout_only("def\n"); } + +/// Test that discarding system file cache fails for stdin. +#[test] +#[cfg(target_os = "linux")] +fn test_nocache_stdin_error() { + new_ucmd!() + .args(&["iflag=nocache", "count=0", "status=noxfer"]) + .fails() + .code_is(1) + .stderr_only("dd: failed to discard cache for: 'standard input': Illegal seek\n0+0 records in\n0+0 records out\n"); +} + +/// Test for discarding system file cache. +#[test] +#[cfg(target_os = "linux")] +fn test_nocache_file() { + let (at, mut ucmd) = at_and_ucmd!(); + at.write_bytes("f", b"a".repeat(1 << 20).as_slice()); + ucmd.args(&["if=f", "of=/dev/null", "iflag=nocache", "status=noxfer"]) + .succeeds() + .stderr_only("2048+0 records in\n2048+0 records out\n"); +} From 4ff318aee1c28a4a959a35917ca1fa7cec703669 Mon Sep 17 00:00:00 2001 From: Jeffrey Finkelstein Date: Sun, 7 May 2023 16:33:21 -0400 Subject: [PATCH 4/4] fixup! dd: support the [io]flag=nocache option --- src/uu/dd/src/dd.rs | 2 ++ tests/by-util/test_dd.rs | 6 +++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/uu/dd/src/dd.rs b/src/uu/dd/src/dd.rs index 3b8b3addb..6727f4bb0 100644 --- a/src/uu/dd/src/dd.rs +++ b/src/uu/dd/src/dd.rs @@ -793,6 +793,7 @@ fn dd_copy(mut i: Input, mut o: Output) -> std::io::Result<()> { // TODO Better error handling for overflowing `len`. if i.settings.iflags.nocache { let offset = 0; + #[allow(clippy::useless_conversion)] let len = i.src.len()?.try_into().unwrap(); i.discard_cache(offset, len); } @@ -801,6 +802,7 @@ fn dd_copy(mut i: Input, mut o: Output) -> std::io::Result<()> { // TODO Better error handling for overflowing `len`. if i.settings.oflags.nocache { let offset = 0; + #[allow(clippy::useless_conversion)] let len = o.dst.len()?.try_into().unwrap(); o.discard_cache(offset, len); } diff --git a/tests/by-util/test_dd.rs b/tests/by-util/test_dd.rs index a54c473ef..79b139602 100644 --- a/tests/by-util/test_dd.rs +++ b/tests/by-util/test_dd.rs @@ -1541,11 +1541,15 @@ fn test_multiple_processes_reading_stdin() { #[test] #[cfg(target_os = "linux")] fn test_nocache_stdin_error() { + #[cfg(not(target_env = "musl"))] + let detail = "Illegal seek"; + #[cfg(target_env = "musl")] + let detail = "Invalid seek"; new_ucmd!() .args(&["iflag=nocache", "count=0", "status=noxfer"]) .fails() .code_is(1) - .stderr_only("dd: failed to discard cache for: 'standard input': Illegal seek\n0+0 records in\n0+0 records out\n"); + .stderr_only(format!("dd: failed to discard cache for: 'standard input': {detail}\n0+0 records in\n0+0 records out\n")); } /// Test for discarding system file cache.