1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-28 11:37:44 +00:00

Merge branch 'main' into dd-skip-beyond-file

This commit is contained in:
Sylvestre Ledru 2022-02-08 20:46:07 +01:00 committed by GitHub
commit daaae90113
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 286 additions and 147 deletions

View file

@ -62,6 +62,7 @@ PROGS := \
csplit \
cut \
date \
dd \
df \
dircolors \
dirname \

View file

@ -19,12 +19,34 @@ pub struct ProgUpdate {
pub duration: time::Duration,
}
impl ProgUpdate {
pub(crate) fn new(
read_stat: ReadStat,
write_stat: WriteStat,
duration: time::Duration,
) -> Self {
Self {
read_stat,
write_stat,
duration,
}
}
}
#[derive(Clone, Copy, Default)]
pub struct ReadStat {
pub reads_complete: u64,
pub reads_partial: u64,
pub records_truncated: u32,
}
impl ReadStat {
/// Whether this counter has zero complete reads and zero partial reads.
pub(crate) fn is_empty(&self) -> bool {
self.reads_complete == 0 && self.reads_partial == 0
}
}
impl std::ops::AddAssign for ReadStat {
fn add_assign(&mut self, other: Self) {
*self = Self {
@ -35,7 +57,7 @@ impl std::ops::AddAssign for ReadStat {
}
}
#[derive(Clone, Copy)]
#[derive(Clone, Copy, Default)]
pub struct WriteStat {
pub writes_complete: u64,
pub writes_partial: u64,

View file

@ -353,78 +353,111 @@ where
})
}
fn dd_out<R: Read>(mut self, mut i: Input<R>) -> UResult<()> {
let mut rstat = ReadStat {
reads_complete: 0,
reads_partial: 0,
records_truncated: 0,
};
let mut wstat = WriteStat {
writes_complete: 0,
writes_partial: 0,
bytes_total: 0,
};
let start = time::Instant::now();
let bsize = calc_bsize(i.ibs, self.obs);
let prog_tx = {
let (tx, rx) = mpsc::channel();
thread::spawn(gen_prog_updater(rx, i.print_level));
tx
};
while below_count_limit(&i.count, &rstat, &wstat) {
// Read/Write
let loop_bsize = calc_loop_bsize(&i.count, &rstat, &wstat, i.ibs, bsize);
match read_helper(&mut i, loop_bsize)? {
(
ReadStat {
reads_complete: 0,
reads_partial: 0,
..
},
_,
) => break,
(rstat_update, buf) => {
let wstat_update = self
.write_blocks(&buf)
.map_err_context(|| "failed to write output".to_string())?;
rstat += rstat_update;
wstat += wstat_update;
}
};
// Update Prog
prog_tx
.send(ProgUpdate {
read_stat: rstat,
write_stat: wstat,
duration: start.elapsed(),
})
.map_err(|_| USimpleError::new(1, "failed to write output"))?;
}
if self.cflags.fsync {
self.fsync()
.map_err_context(|| "failed to write output".to_string())?;
} else if self.cflags.fdatasync {
self.fdatasync()
.map_err_context(|| "failed to write output".to_string())?;
}
/// Print the read/write statistics.
fn print_stats<R: Read>(&self, i: &Input<R>, prog_update: &ProgUpdate) {
match i.print_level {
Some(StatusLevel::None) => {}
Some(StatusLevel::Noxfer) => print_io_lines(&ProgUpdate {
read_stat: rstat,
write_stat: wstat,
duration: start.elapsed(),
}),
Some(StatusLevel::Progress) | None => print_transfer_stats(&ProgUpdate {
read_stat: rstat,
write_stat: wstat,
duration: start.elapsed(),
}),
Some(StatusLevel::Noxfer) => print_io_lines(prog_update),
Some(StatusLevel::Progress) | None => print_transfer_stats(prog_update),
}
}
/// Flush the output to disk, if configured to do so.
fn sync(&mut self) -> std::io::Result<()> {
if self.cflags.fsync {
self.fsync()
} else if self.cflags.fdatasync {
self.fdatasync()
} else {
// Intentionally do nothing in this case.
Ok(())
}
}
/// Copy the given input data to this output, consuming both.
///
/// This method contains the main loop for the `dd` program. Bytes
/// are read in blocks from `i` and written in blocks to this
/// output. Read/write statistics are reported to stderr as
/// configured by the `status` command-line argument.
///
/// # Errors
///
/// If there is a problem reading from the input or writing to
/// this output.
fn dd_out<R: Read>(mut self, mut i: Input<R>) -> std::io::Result<()> {
// The read and write statistics.
//
// These objects are counters, initialized to zero. After each
// iteration of the main loop, each will be incremented by the
// number of blocks read and written, respectively.
let mut rstat = Default::default();
let mut wstat = Default::default();
// The time at which the main loop starts executing.
//
// When `status=progress` is given on the command-line, the
// `dd` program reports its progress every second or so. Part
// of its report includes the throughput in bytes per second,
// which requires knowing how long the process has been
// running.
let start = time::Instant::now();
// A good buffer size for reading.
//
// This is an educated guess about a good buffer size based on
// the input and output block sizes.
let bsize = calc_bsize(i.ibs, self.obs);
// Start a thread that reports transfer progress.
//
// When `status=progress` is given on the command-line, the
// `dd` program reports its progress every second or so. We
// perform this reporting in a new thread so as not to take
// any CPU time away from the actual reading and writing of
// data. We send a `ProgUpdate` from the transmitter `prog_tx`
// to the receives `rx`, and the receiver prints the transfer
// information.
let (prog_tx, rx) = mpsc::channel();
thread::spawn(gen_prog_updater(rx, i.print_level));
// The main read/write loop.
//
// Each iteration reads blocks from the input and writes
// blocks to this output. Read/write statistics are updated on
// each iteration and cumulative statistics are reported to
// the progress reporting thread.
while below_count_limit(&i.count, &rstat, &wstat) {
// Read a block from the input then write the block to the output.
//
// As an optimization, make an educated guess about the
// best buffer size for reading based on the number of
// blocks already read and the number of blocks remaining.
let loop_bsize = calc_loop_bsize(&i.count, &rstat, &wstat, i.ibs, bsize);
let (rstat_update, buf) = read_helper(&mut i, loop_bsize)?;
if rstat_update.is_empty() {
break;
}
let wstat_update = self.write_blocks(&buf)?;
// Update the read/write stats and inform the progress thread.
//
// If the receiver is disconnected, `send()` returns an
// error. Since it is just reporting progress and is not
// crucial to the operation of `dd`, let's just ignore the
// error.
rstat += rstat_update;
wstat += wstat_update;
let prog_update = ProgUpdate::new(rstat, wstat, start.elapsed());
prog_tx.send(prog_update).unwrap_or(());
}
// Flush the output, if configured to do so.
self.sync()?;
// Print the final read/write statistics.
let prog_update = ProgUpdate::new(rstat, wstat, start.elapsed());
self.print_stats(&i, &prog_update);
Ok(())
}
}
@ -475,7 +508,6 @@ impl OutputTrait for Output<File> {
let mut opts = OpenOptions::new();
opts.write(true)
.create(!cflags.nocreat)
.truncate(!cflags.notrunc)
.create_new(cflags.excl)
.append(oflags.append);
@ -495,13 +527,13 @@ impl OutputTrait for Output<File> {
let mut dst = open_dst(Path::new(&fname), &cflags, &oflags)
.map_err_context(|| format!("failed to open {}", fname.quote()))?;
if let Some(amt) = seek {
let amt: u64 = amt
.try_into()
.map_err(|_| USimpleError::new(1, "failed to parse seek amount"))?;
dst.seek(io::SeekFrom::Start(amt))
.map_err_context(|| "failed to seek in output file".to_string())?;
let i = seek.unwrap_or(0).try_into().unwrap();
if !cflags.notrunc {
dst.set_len(i)
.map_err_context(|| "failed to truncate output file".to_string())?;
}
dst.seek(io::SeekFrom::Start(i))
.map_err_context(|| "failed to seek in output file".to_string())?;
Ok(Self { dst, obs, cflags })
} else {
@ -704,7 +736,7 @@ fn conv_block_unblock_helper<R: Read>(
}
/// Read helper performs read operations common to all dd reads, and dispatches the buffer to relevant helper functions as dictated by the operations requested by the user.
fn read_helper<R: Read>(i: &mut Input<R>, bsize: usize) -> UResult<(ReadStat, Vec<u8>)> {
fn read_helper<R: Read>(i: &mut Input<R>, bsize: usize) -> std::io::Result<(ReadStat, Vec<u8>)> {
// Local Predicate Fns -----------------------------------------------
fn is_conv<R: Read>(i: &Input<R>) -> bool {
i.cflags.ctable.is_some()
@ -725,12 +757,8 @@ fn read_helper<R: Read>(i: &mut Input<R>, bsize: usize) -> UResult<(ReadStat, Ve
// Read
let mut buf = vec![BUF_INIT_BYTE; bsize];
let mut rstat = match i.cflags.sync {
Some(ch) => i
.fill_blocks(&mut buf, ch)
.map_err_context(|| "failed to write output".to_string())?,
_ => i
.fill_consecutive(&mut buf)
.map_err_context(|| "failed to write output".to_string())?,
Some(ch) => i.fill_blocks(&mut buf, ch)?,
_ => i.fill_consecutive(&mut buf)?,
};
// Return early if no data
if rstat.reads_complete == 0 && rstat.reads_partial == 0 {
@ -742,7 +770,7 @@ fn read_helper<R: Read>(i: &mut Input<R>, bsize: usize) -> UResult<(ReadStat, Ve
perform_swab(&mut buf);
}
if is_conv(i) || is_block(i) || is_unblock(i) {
let buf = conv_block_unblock_helper(buf, i, &mut rstat)?;
let buf = conv_block_unblock_helper(buf, i, &mut rstat).unwrap();
Ok((rstat, buf))
} else {
Ok((rstat, buf))
@ -932,22 +960,22 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
(true, true) => {
let i = Input::<File>::new(&matches)?;
let o = Output::<File>::new(&matches)?;
o.dd_out(i)
o.dd_out(i).map_err_context(|| "IO error".to_string())
}
(false, true) => {
let i = Input::<io::Stdin>::new(&matches)?;
let o = Output::<File>::new(&matches)?;
o.dd_out(i)
o.dd_out(i).map_err_context(|| "IO error".to_string())
}
(true, false) => {
let i = Input::<File>::new(&matches)?;
let o = Output::<io::Stdout>::new(&matches)?;
o.dd_out(i)
o.dd_out(i).map_err_context(|| "IO error".to_string())
}
(false, false) => {
let i = Input::<io::Stdin>::new(&matches)?;
let o = Output::<io::Stdout>::new(&matches)?;
o.dd_out(i)
o.dd_out(i).map_err_context(|| "IO error".to_string())
}
}
}

View file

@ -13,6 +13,7 @@ use super::*;
use std::error::Error;
use uucore::error::UError;
use uucore::parse_size::ParseSizeError;
use uucore::show_warning;
pub type Matches = ArgMatches;
@ -356,6 +357,13 @@ fn parse_bytes_only(s: &str) -> Result<usize, ParseError> {
/// assert_eq!(parse_bytes_no_x("2k").unwrap(), 2 * 1024);
/// ```
fn parse_bytes_no_x(s: &str) -> Result<usize, ParseError> {
if s == "0" {
show_warning!(
"{} is a zero multiplier; use {} if that is intended",
"0x".quote(),
"00x".quote()
);
}
let (num, multiplier) = match (s.find('c'), s.rfind('w'), s.rfind('b')) {
(None, None, None) => match uucore::parse_size::parse_size(s) {
Ok(n) => (n, 1),

View file

@ -161,64 +161,79 @@ impl Filesystem {
}
}
/// Keep only the specified subset of [`MountInfo`] instances.
///
/// If `paths` is non-empty, this function excludes any [`MountInfo`]
/// that is not mounted at the specified path.
///
/// The `opt` argument specifies a variety of ways of excluding
/// [`MountInfo`] instances; see [`Options`] for more information.
///
/// Finally, if there are duplicate entries, the one with the shorter
/// path is kept.
fn filter_mount_list(vmi: Vec<MountInfo>, paths: &[String], opt: &Options) -> Vec<MountInfo> {
vmi.into_iter()
.filter_map(|mi| {
if (mi.remote && opt.show_local_fs)
|| (mi.dummy && !opt.show_all_fs && !opt.show_listed_fs)
|| !opt.fs_selector.should_select(&mi.fs_type)
{
None
} else {
if paths.is_empty() {
// No path specified
return Some((mi.dev_id.clone(), mi));
}
if paths.contains(&mi.mount_dir) {
// One or more paths have been provided
Some((mi.dev_id.clone(), mi))
} else {
// Not a path we want to see
None
}
}
})
.fold(
HashMap::<String, Cell<MountInfo>>::new(),
|mut acc, (id, mi)| {
#[allow(clippy::map_entry)]
{
if acc.contains_key(&id) {
let seen = acc[&id].replace(mi.clone());
let target_nearer_root = seen.mount_dir.len() > mi.mount_dir.len();
// With bind mounts, prefer items nearer the root of the source
let source_below_root = !seen.mount_root.is_empty()
&& !mi.mount_root.is_empty()
&& seen.mount_root.len() < mi.mount_root.len();
// let "real" devices with '/' in the name win.
if (!mi.dev_name.starts_with('/') || seen.dev_name.starts_with('/'))
// let points towards the root of the device win.
&& (!target_nearer_root || source_below_root)
// let an entry over-mounted on a new device win...
&& (seen.dev_name == mi.dev_name
/* ... but only when matching an existing mnt point,
to avoid problematic replacement when given
inaccurate mount lists, seen with some chroot
environments for example. */
|| seen.mount_dir != mi.mount_dir)
{
acc[&id].replace(seen);
}
} else {
acc.insert(id, Cell::new(mi));
}
acc
}
},
)
.into_iter()
.map(|ent| ent.1.into_inner())
.collect::<Vec<_>>()
let mut mount_info_by_id = HashMap::<String, Cell<MountInfo>>::new();
for mi in vmi {
// Don't show remote filesystems if `--local` has been given.
if mi.remote && opt.show_local_fs {
continue;
}
// Don't show pseudo filesystems unless `--all` has been given.
if mi.dummy && !opt.show_all_fs && !opt.show_listed_fs {
continue;
}
// Don't show filesystems if they have been explicitly excluded.
if !opt.fs_selector.should_select(&mi.fs_type) {
continue;
}
// Don't show filesystems other than the ones specified on the
// command line, if any.
if !paths.is_empty() && !paths.contains(&mi.mount_dir) {
continue;
}
// If the device ID has not been encountered yet, just store it.
let id = mi.dev_id.clone();
#[allow(clippy::map_entry)]
if !mount_info_by_id.contains_key(&id) {
mount_info_by_id.insert(id, Cell::new(mi));
continue;
}
// Otherwise, if we have seen the current device ID before,
// then check if we need to update it or keep the previously
// seen one.
let seen = mount_info_by_id[&id].replace(mi.clone());
let target_nearer_root = seen.mount_dir.len() > mi.mount_dir.len();
// With bind mounts, prefer items nearer the root of the source
let source_below_root = !seen.mount_root.is_empty()
&& !mi.mount_root.is_empty()
&& seen.mount_root.len() < mi.mount_root.len();
// let "real" devices with '/' in the name win.
if (!mi.dev_name.starts_with('/') || seen.dev_name.starts_with('/'))
// let points towards the root of the device win.
&& (!target_nearer_root || source_below_root)
// let an entry over-mounted on a new device win...
&& (seen.dev_name == mi.dev_name
/* ... but only when matching an existing mnt point,
to avoid problematic replacement when given
inaccurate mount lists, seen with some chroot
environments for example. */
|| seen.mount_dir != mi.mount_dir)
{
mount_info_by_id[&id].replace(seen);
}
}
// Take ownership of the `MountInfo` instances and collect them
// into a `Vec`.
mount_info_by_id
.into_values()
.map(|m| m.into_inner())
.collect()
}
/// Convert `value` to a human readable string based on `base`.

View file

@ -193,11 +193,19 @@ impl ScanUtil for str {
}
pub fn group_num(s: &str) -> Cow<str> {
assert!(s.chars().all(char::is_numeric));
let is_negative = s.starts_with('-');
assert!(is_negative || s.chars().take(1).all(|c| c.is_digit(10)));
assert!(s.chars().skip(1).all(|c| c.is_digit(10)));
if s.len() < 4 {
return s.into();
}
let mut res = String::with_capacity((s.len() - 1) / 3);
let s = if is_negative {
res.push('-');
&s[1..]
} else {
s
};
let mut alone = (s.len() - 1) % 3 + 1;
res.push_str(&s[..alone]);
while alone != s.len() {

View file

@ -198,6 +198,39 @@ fn test_x_multiplier() {
.stdout_is("abcdef");
}
#[test]
fn test_zero_multiplier_warning() {
for arg in ["count", "seek", "skip"] {
new_ucmd!()
.args(&[format!("{}=00x1", arg).as_str(), "status=none"])
.pipe_in("")
.succeeds()
.no_stdout()
.no_stderr();
new_ucmd!()
.args(&[format!("{}=0x1", arg).as_str(), "status=none"])
.pipe_in("")
.succeeds()
.no_stdout()
.stderr_contains("warning: '0x' is a zero multiplier; use '00x' if that is intended");
new_ucmd!()
.args(&[format!("{}=0x0x1", arg).as_str(), "status=none"])
.pipe_in("")
.succeeds()
.no_stdout()
.stderr_is("dd: warning: '0x' is a zero multiplier; use '00x' if that is intended\ndd: warning: '0x' is a zero multiplier; use '00x' if that is intended\n");
new_ucmd!()
.args(&[format!("{}=1x0x1", arg).as_str(), "status=none"])
.pipe_in("")
.succeeds()
.no_stdout()
.stderr_contains("warning: '0x' is a zero multiplier; use '00x' if that is intended");
}
}
#[test]
fn test_final_stats_noxfer() {
new_ucmd!()
@ -637,5 +670,27 @@ fn test_skip_beyond_file() {
);
}
#[test]
fn test_seek_do_not_overwrite() {
let (at, mut ucmd) = at_and_ucmd!();
let mut outfile = at.make_file("outfile");
outfile.write_all(b"abc").unwrap();
// Skip the first byte of the input, seek past the first byte of
// the output, and write only one byte to the output.
ucmd.args(&[
"bs=1",
"skip=1",
"seek=1",
"count=1",
"status=noxfer",
"of=outfile",
])
.pipe_in("123")
.succeeds()
.stderr_is("1+0 records in\n1+0 records out\n")
.no_stdout();
assert_eq!(at.read("outfile"), "a2");
}
// conv=[ascii,ebcdic,ibm], conv=[ucase,lcase], conv=[block,unblock], conv=sync
// TODO: Move conv tests from unit test module

View file

@ -34,6 +34,8 @@ fn test_group_num() {
assert_eq!("24", group_num("24"));
assert_eq!("4", group_num("4"));
assert_eq!("", group_num(""));
assert_eq!("-5", group_num("-5"));
assert_eq!("-1,234", group_num("-1234"));
}
#[test]