From 97a0c06ff46b8bf05039830f6e4ff2411cfbbe44 Mon Sep 17 00:00:00 2001 From: Mahmoud Soltan Date: Mon, 30 Aug 2021 23:09:16 +0200 Subject: [PATCH 01/22] Proper columns for `ls -l` (#2623) * Used .as_path() and .as_str() when required: when the argument required is a Path and not a PathBuf, or an str and not a Path, respectively. * Changed display_items to take Vec, which is passed, instead of [PathData] * Added a pad_right function. * Implemented column-formating to mimic the behavior of GNU coreutils's ls Added returns in display_dir_entry_size that keep track of uname and group lengths. Renamed variables to make more sense. Changed display_item_long to take all the lengths it needs to render correctly. Implemented owner, group, and author padding right to mimic GNU ls. * Added a todo for future quality-of-life cache addition. * Documented display_item_long, as a first step in documenting all functions. * Revert "Used .as_path() and .as_str() when required:" This reverts commit b88db6a8170f827a7adc58de14acb59f19be2db1. * Revert "Changed display_items to take Vec, which is passed, instead of [PathData]" This reverts commit 0c690dda8d5eb1b257afb4c74d9c2dfc1f7cc97b. * Ran cargo fmt to get rid of Style/format `fmt` testing error. * Added a test for `ls -l` and `ls -lan` line formats. * Changed uname -> username for cspell. Removed extra blank line for rustfmt. --- src/uu/ls/src/ls.rs | 102 +++++++++++++++++++++++++++++++++------ tests/by-util/test_ls.rs | 55 +++++++++++++++++++++ 2 files changed, 143 insertions(+), 14 deletions(-) diff --git a/src/uu/ls/src/ls.rs b/src/uu/ls/src/ls.rs index 3d957474c..bf449d96b 100644 --- a/src/uu/ls/src/ls.rs +++ b/src/uu/ls/src/ls.rs @@ -1394,14 +1394,17 @@ fn get_metadata(entry: &Path, dereference: bool) -> std::io::Result { } } -fn display_dir_entry_size(entry: &PathData, config: &Config) -> (usize, usize) { +fn display_dir_entry_size(entry: &PathData, config: &Config) -> (usize, usize, usize, usize) { + // TODO: Cache/memoize the display_* results so we don't have to recalculate them. if let Some(md) = entry.md() { ( display_symlink_count(md).len(), + display_uname(md, config).len(), + display_group(md, config).len(), display_size_or_rdev(md, config).len(), ) } else { - (0, 0) + (0, 0, 0, 0) } } @@ -1409,15 +1412,28 @@ fn pad_left(string: String, count: usize) -> String { format!("{:>width$}", string, width = count) } +fn pad_right(string: String, count: usize) -> String { + format!("{:) { if config.format == Format::Long { - let (mut max_links, mut max_width) = (1, 1); + let ( + mut longest_link_count_len, + mut longest_uname_len, + mut longest_group_len, + mut longest_size_len, + ) = (1, 1, 1, 1); let mut total_size = 0; for item in items { - let (links, width) = display_dir_entry_size(item, config); - max_links = links.max(max_links); - max_width = width.max(max_width); + let (link_count_len, uname_len, group_len, size_len) = + display_dir_entry_size(item, config); + longest_link_count_len = link_count_len.max(longest_link_count_len); + longest_size_len = size_len.max(longest_size_len); + longest_uname_len = uname_len.max(longest_uname_len); + longest_group_len = group_len.max(longest_group_len); + longest_size_len = size_len.max(longest_size_len); total_size += item.md().map_or(0, |md| get_block_size(md, config)); } @@ -1426,7 +1442,15 @@ fn display_items(items: &[PathData], config: &Config, out: &mut BufWriter, ) { @@ -1557,27 +1619,39 @@ fn display_item_long( out, "{} {}", display_permissions(md, true), - pad_left(display_symlink_count(md), max_links), + pad_left(display_symlink_count(md), longest_link_count_len), ); if config.long.owner { - let _ = write!(out, " {}", display_uname(md, config)); + let _ = write!( + out, + " {}", + pad_right(display_uname(md, config), longest_uname_len) + ); } if config.long.group { - let _ = write!(out, " {}", display_group(md, config)); + let _ = write!( + out, + " {}", + pad_right(display_group(md, config), longest_group_len) + ); } // Author is only different from owner on GNU/Hurd, so we reuse // the owner, since GNU/Hurd is not currently supported by Rust. if config.long.author { - let _ = write!(out, " {}", display_uname(md, config)); + let _ = write!( + out, + " {}", + pad_right(display_uname(md, config), longest_uname_len) + ); } let _ = writeln!( out, " {} {} {}", - pad_left(display_size_or_rdev(md, config), max_size), + pad_left(display_size_or_rdev(md, config), longest_size_len), display_date(md, config), // unwrap is fine because it fails when metadata is not available // but we already know that it is because it's checked at the diff --git a/tests/by-util/test_ls.rs b/tests/by-util/test_ls.rs index a3372050a..546fb86a8 100644 --- a/tests/by-util/test_ls.rs +++ b/tests/by-util/test_ls.rs @@ -333,6 +333,61 @@ fn test_ls_long() { } } +#[test] +fn test_ls_long_format() { + #[cfg(not(windows))] + let last; + #[cfg(not(windows))] + { + let _guard = UMASK_MUTEX.lock(); + last = unsafe { umask(0) }; + + unsafe { + umask(0o002); + } + } + + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + at.mkdir(&at.plus_as_string("test-long-dir")); + at.touch(&at.plus_as_string("test-long-dir/test-long-file")); + at.mkdir(&at.plus_as_string("test-long-dir/test-long-dir")); + + for arg in &["-l", "--long", "--format=long", "--format=verbose"] { + let result = scene.ucmd().arg(arg).arg("test-long-dir").succeeds(); + // Assuming sane username do not have spaces within them. + // A line of the output should be: + // One of the characters -bcCdDlMnpPsStTx? + // rwx, with - for missing permissions, thrice. + // A number, preceded by column whitespace, and followed by a single space. + // A username, currently [^ ], followed by column whitespace, twice (or thrice for Hurd). + // A number, followed by a single space. + // A month, followed by a single space. + // A day, preceded by column whitespace, and followed by a single space. + // Either a year or a time, currently [0-9:]+, preceded by column whitespace, + // and followed by a single space. + // Whatever comes after is irrelevant to this specific test. + #[cfg(not(windows))] + result.stdout_matches(&Regex::new( + r"\n[-bcCdDlMnpPsStTx?]([r-][w-][xt-]){3} +\d+ [^ ]+ +[^ ]+( +[^ ]+)? +\d+ [A-Z][a-z]{2} {0,2}\d{0,2} {0,2}[0-9:]+ " + ).unwrap()); + } + + let result = scene.ucmd().arg("-lan").arg("test-long-dir").succeeds(); + // This checks for the line with the .. entry. The uname and group should be digits. + #[cfg(not(windows))] + result.stdout_matches(&Regex::new( + r"\nd([r-][w-][xt-]){3} +\d+ \d+ +\d+( +\d+)? +\d+ [A-Z][a-z]{2} {0,2}\d{0,2} {0,2}[0-9:]+ \.\." + ).unwrap()); + + #[cfg(not(windows))] + { + unsafe { + umask(last); + } + } +} + #[test] fn test_ls_long_total_size() { let scene = TestScenario::new(util_name!()); From 1e78a40e20603f66e6d3da9b58bf346a3d3087f0 Mon Sep 17 00:00:00 2001 From: Michael Debertol Date: Mon, 30 Aug 2021 23:13:31 +0200 Subject: [PATCH 02/22] CICD: use nightly rust for code coverage --- .github/workflows/CICD.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/CICD.yml b/.github/workflows/CICD.yml index d7dd4ad97..4f92d7d73 100644 --- a/.github/workflows/CICD.yml +++ b/.github/workflows/CICD.yml @@ -14,7 +14,6 @@ env: PROJECT_DESC: "Core universal (cross-platform) utilities" PROJECT_AUTH: "uutils" RUST_MIN_SRV: "1.47.0" ## MSRV v1.47.0 - RUST_COV_SRV: "2021-05-06" ## (~v1.52.0) supported rust version for code coverage; (date required/used by 'coverage') ## !maint: refactor when code coverage support is included in the stable channel on: [push, pull_request] @@ -606,7 +605,7 @@ jobs: ## VARs setup outputs() { step_id="vars"; for var in "$@" ; do echo steps.${step_id}.outputs.${var}="${!var}"; echo ::set-output name=${var}::${!var}; done; } # toolchain - TOOLCHAIN="nightly-${{ env.RUST_COV_SRV }}" ## default to "nightly" toolchain (required for certain required unstable compiler flags) ## !maint: refactor when stable channel has needed support + TOOLCHAIN="nightly" ## default to "nightly" toolchain (required for certain required unstable compiler flags) ## !maint: refactor when stable channel has needed support # * specify gnu-type TOOLCHAIN for windows; `grcov` requires gnu-style code coverage data files case ${{ matrix.job.os }} in windows-*) TOOLCHAIN="$TOOLCHAIN-x86_64-pc-windows-gnu" ;; esac; # * use requested TOOLCHAIN if specified From b82401e744e0b588c8c86dad19633c0dd5c946e2 Mon Sep 17 00:00:00 2001 From: Michael Debertol Date: Tue, 31 Aug 2021 00:36:35 +0200 Subject: [PATCH 03/22] uucore: fall back to stripping the error message for unexpected io error kinds Rust recently added new error kinds, which causes tests to fail on beta/nightly. However, we can't match for these new error kinds because they are marked as unstable. As a workaround we call `.to_string()` on the original error and strip the ` (os error XX)` bit. The downside of this is that the error messages are not capitalized. --- src/uucore/src/lib/mods/error.rs | 58 +++++++++++++++++--------------- 1 file changed, 31 insertions(+), 27 deletions(-) diff --git a/src/uucore/src/lib/mods/error.rs b/src/uucore/src/lib/mods/error.rs index 6af934d3e..9a697d822 100644 --- a/src/uucore/src/lib/mods/error.rs +++ b/src/uucore/src/lib/mods/error.rs @@ -368,7 +368,7 @@ impl UIoError { pub fn new>(kind: std::io::ErrorKind, context: S) -> Box { Box::new(Self { context: context.into(), - inner: std::io::Error::new(kind, ""), + inner: kind.into(), }) } } @@ -380,32 +380,36 @@ impl Error for UIoError {} impl Display for UIoError { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), std::fmt::Error> { use std::io::ErrorKind::*; - write!( - f, - "{}: {}", - self.context, - match self.inner.kind() { - NotFound => "No such file or directory", - PermissionDenied => "Permission denied", - ConnectionRefused => "Connection refused", - ConnectionReset => "Connection reset", - ConnectionAborted => "Connection aborted", - NotConnected => "Not connected", - AddrInUse => "Address in use", - AddrNotAvailable => "Address not available", - BrokenPipe => "Broken pipe", - AlreadyExists => "Already exists", - WouldBlock => "Would block", - InvalidInput => "Invalid input", - InvalidData => "Invalid data", - TimedOut => "Timed out", - WriteZero => "Write zero", - Interrupted => "Interrupted", - Other => "Other", - UnexpectedEof => "Unexpected end of file", - _ => panic!("Unexpected io error: {}", self.inner), - }, - ) + + let message; + let message = match self.inner.kind() { + NotFound => "No such file or directory", + PermissionDenied => "Permission denied", + ConnectionRefused => "Connection refused", + ConnectionReset => "Connection reset", + ConnectionAborted => "Connection aborted", + NotConnected => "Not connected", + AddrInUse => "Address in use", + AddrNotAvailable => "Address not available", + BrokenPipe => "Broken pipe", + AlreadyExists => "Already exists", + WouldBlock => "Would block", + InvalidInput => "Invalid input", + InvalidData => "Invalid data", + TimedOut => "Timed out", + WriteZero => "Write zero", + Interrupted => "Interrupted", + Other => "Other", + UnexpectedEof => "Unexpected end of file", + _ => { + // TODO: using `strip_errno()` causes the error message + // to not be capitalized. When the new error variants (https://github.com/rust-lang/rust/issues/86442) + // are stabilized, we should add them to the match statement. + message = strip_errno(&self.inner); + &message + } + }; + write!(f, "{}: {}", self.context, message,) } } From 4f891add5a77c7a8d15cfbec13c9577b243ed88a Mon Sep 17 00:00:00 2001 From: Jan Verbeek Date: Sun, 29 Aug 2021 20:08:43 +0200 Subject: [PATCH 04/22] uucore: Add a Quotable extension trait for displaying filenames --- .../cspell.dictionaries/jargon.wordlist.txt | 1 + .../cspell.dictionaries/shell.wordlist.txt | 1 + src/uucore/src/lib/lib.rs | 1 + src/uucore/src/lib/mods.rs | 1 + src/uucore/src/lib/mods/display.rs | 357 ++++++++++++++++++ 5 files changed, 361 insertions(+) create mode 100644 src/uucore/src/lib/mods/display.rs diff --git a/.vscode/cspell.dictionaries/jargon.wordlist.txt b/.vscode/cspell.dictionaries/jargon.wordlist.txt index 34abfc511..089adffa3 100644 --- a/.vscode/cspell.dictionaries/jargon.wordlist.txt +++ b/.vscode/cspell.dictionaries/jargon.wordlist.txt @@ -1,3 +1,4 @@ +AFAICT arity autogenerate autogenerated diff --git a/.vscode/cspell.dictionaries/shell.wordlist.txt b/.vscode/cspell.dictionaries/shell.wordlist.txt index 07c2364ac..4ed281efb 100644 --- a/.vscode/cspell.dictionaries/shell.wordlist.txt +++ b/.vscode/cspell.dictionaries/shell.wordlist.txt @@ -8,6 +8,7 @@ csh globstar inotify localtime +mksh mountinfo mountpoint mtab diff --git a/src/uucore/src/lib/lib.rs b/src/uucore/src/lib/lib.rs index a39834ec1..5352a6356 100644 --- a/src/uucore/src/lib/lib.rs +++ b/src/uucore/src/lib/lib.rs @@ -19,6 +19,7 @@ mod parser; // string parsing modules // * cross-platform modules pub use crate::mods::backup_control; pub use crate::mods::coreopts; +pub use crate::mods::display; pub use crate::mods::error; pub use crate::mods::os; pub use crate::mods::panic; diff --git a/src/uucore/src/lib/mods.rs b/src/uucore/src/lib/mods.rs index b0235832b..8f6d14976 100644 --- a/src/uucore/src/lib/mods.rs +++ b/src/uucore/src/lib/mods.rs @@ -2,6 +2,7 @@ pub mod backup_control; pub mod coreopts; +pub mod display; pub mod error; pub mod os; pub mod panic; diff --git a/src/uucore/src/lib/mods/display.rs b/src/uucore/src/lib/mods/display.rs new file mode 100644 index 000000000..42e9fceba --- /dev/null +++ b/src/uucore/src/lib/mods/display.rs @@ -0,0 +1,357 @@ +/// Utilities for printing paths, with special attention paid to special +/// characters and invalid unicode. +/// +/// For displaying paths in informational messages use `Quotable::quote`. This +/// will wrap quotes around the filename and add the necessary escapes to make +/// it copy/paste-able into a shell. +/// +/// # Examples +/// ``` +/// use std::path::Path; +/// use uucore::display::{Quotable, println_verbatim}; +/// +/// let path = Path::new("foo/bar.baz"); +/// +/// println!("Found file {}", path.quote()); // Prints "Found file 'foo/bar.baz'" +/// # Ok::<(), std::io::Error>(()) +/// ``` +// spell-checker:ignore Fbar +use std::ffi::OsStr; +#[cfg(any(unix, target_os = "wasi", windows))] +use std::fmt::Write as FmtWrite; +use std::fmt::{self, Display, Formatter}; + +#[cfg(unix)] +use std::os::unix::ffi::OsStrExt; +#[cfg(target_os = "wasi")] +use std::os::wasi::ffi::OsStrExt; +#[cfg(any(unix, target_os = "wasi"))] +use std::str::from_utf8; + +/// An extension trait for displaying filenames to users. +pub trait Quotable { + /// Returns an object that implements [`Display`] for printing filenames with + /// proper quoting and escaping for the platform. + /// + /// On Unix this corresponds to sh/bash syntax, on Windows Powershell syntax + /// is used. + /// + /// # Examples + /// + /// ``` + /// use std::path::Path; + /// use uucore::display::Quotable; + /// + /// let path = Path::new("foo/bar.baz"); + /// + /// println!("Found file {}", path.quote()); // Prints "Found file 'foo/bar.baz'" + /// ``` + fn quote(&self) -> Quoted<'_>; +} + +impl Quotable for T +where + T: AsRef, +{ + fn quote(&self) -> Quoted<'_> { + Quoted(self.as_ref()) + } +} + +/// A wrapper around [`OsStr`] for printing paths with quoting and escaping applied. +#[derive(Debug)] +pub struct Quoted<'a>(&'a OsStr); + +impl Display for Quoted<'_> { + #[cfg(any(unix, target_os = "wasi"))] + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + let text = self.0.as_bytes(); + + let mut is_single_safe = true; + let mut is_double_safe = true; + for &ch in text { + match ch { + ch if ch.is_ascii_control() => return write_escaped(f, text), + b'\'' => is_single_safe = false, + // Unsafe characters according to: + // https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_02_03 + b'"' | b'`' | b'$' | b'\\' => is_double_safe = false, + _ => (), + } + } + let text = match from_utf8(text) { + Err(_) => return write_escaped(f, text), + Ok(text) => text, + }; + if is_single_safe { + return write_simple(f, text, '\''); + } else if is_double_safe { + return write_simple(f, text, '\"'); + } else { + return write_single_escaped(f, text); + } + + fn write_simple(f: &mut Formatter<'_>, text: &str, quote: char) -> fmt::Result { + f.write_char(quote)?; + f.write_str(text)?; + f.write_char(quote)?; + Ok(()) + } + + fn write_single_escaped(f: &mut Formatter<'_>, text: &str) -> fmt::Result { + let mut iter = text.split('\''); + if let Some(chunk) = iter.next() { + if !chunk.is_empty() { + write_simple(f, chunk, '\'')?; + } + } + for chunk in iter { + f.write_str("\\'")?; + if !chunk.is_empty() { + write_simple(f, chunk, '\'')?; + } + } + Ok(()) + } + + /// Write using the syntax described here: + /// https://www.gnu.org/software/bash/manual/html_node/ANSI_002dC-Quoting.html + /// + /// Supported by these shells: + /// - bash + /// - zsh + /// - busybox sh + /// - mksh + /// + /// Not supported by these: + /// - fish + /// - dash + /// - tcsh + fn write_escaped(f: &mut Formatter<'_>, text: &[u8]) -> fmt::Result { + f.write_str("$'")?; + for chunk in from_utf8_iter(text) { + match chunk { + Ok(chunk) => { + for ch in chunk.chars() { + match ch { + '\n' => f.write_str("\\n")?, + '\t' => f.write_str("\\t")?, + '\r' => f.write_str("\\r")?, + // We could do \b, \f, \v, etc., but those are + // rare enough to be confusing. + // \0 doesn't work consistently because of the + // octal \nnn syntax, and null bytes can't appear + // in filenames anyway. + ch if ch.is_ascii_control() => write!(f, "\\x{:02X}", ch as u8)?, + '\\' | '\'' => { + // '?' and '"' can also be escaped this way + // but AFAICT there's no reason to do so + f.write_char('\\')?; + f.write_char(ch)?; + } + ch => { + f.write_char(ch)?; + } + } + } + } + Err(unit) => write!(f, "\\x{:02X}", unit)?, + } + } + f.write_char('\'')?; + Ok(()) + } + } + + #[cfg(windows)] + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + // Behavior is based on PowerShell. + // ` takes the role of \ since \ is already used as the path separator. + // Things are UTF-16-oriented, so we escape code units as "`u{1234}". + use std::char::decode_utf16; + use std::os::windows::ffi::OsStrExt; + + // Getting the "raw" representation of an OsStr is actually expensive, + // so avoid it if unnecessary. + let text = match self.0.to_str() { + None => return write_escaped(f, self.0), + Some(text) => text, + }; + + let mut is_single_safe = true; + let mut is_double_safe = true; + for ch in text.chars() { + match ch { + ch if ch.is_ascii_control() => return write_escaped(f, self.0), + '\'' => is_single_safe = false, + '"' | '`' | '$' => is_double_safe = false, + _ => (), + } + } + + if is_single_safe || !is_double_safe { + return write_simple(f, text, '\''); + } else { + return write_simple(f, text, '"'); + } + + fn write_simple(f: &mut Formatter<'_>, text: &str, quote: char) -> fmt::Result { + // Quotes in Powershell can be escaped by doubling them + f.write_char(quote)?; + let mut iter = text.split(quote); + if let Some(chunk) = iter.next() { + f.write_str(chunk)?; + } + for chunk in iter { + f.write_char(quote)?; + f.write_char(quote)?; + f.write_str(chunk)?; + } + f.write_char(quote)?; + Ok(()) + } + + fn write_escaped(f: &mut Formatter<'_>, text: &OsStr) -> fmt::Result { + f.write_char('"')?; + for ch in decode_utf16(text.encode_wide()) { + match ch { + Ok(ch) => match ch { + '\0' => f.write_str("`0")?, + '\r' => f.write_str("`r")?, + '\n' => f.write_str("`n")?, + '\t' => f.write_str("`t")?, + ch if ch.is_ascii_control() => write!(f, "`u{{{:04X}}}", ch as u8)?, + '`' => f.write_str("``")?, + '$' => f.write_str("`$")?, + '"' => f.write_str("\"\"")?, + ch => f.write_char(ch)?, + }, + Err(err) => write!(f, "`u{{{:04X}}}", err.unpaired_surrogate())?, + } + } + f.write_char('"')?; + Ok(()) + } + } + + #[cfg(not(any(unix, target_os = "wasi", windows)))] + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + // As a fallback, we use Rust's own escaping rules. + // This is reasonably sane and very easy to implement. + // We use single quotes because that's hardcoded in a lot of tests. + write!(f, "'{}'", self.0.to_string_lossy().escape_debug()) + } +} + +#[cfg(any(unix, target_os = "wasi"))] +fn from_utf8_iter(mut bytes: &[u8]) -> impl Iterator> { + std::iter::from_fn(move || { + if bytes.is_empty() { + return None; + } + match from_utf8(bytes) { + Ok(text) => { + bytes = &[]; + Some(Ok(text)) + } + Err(err) if err.valid_up_to() == 0 => { + let res = bytes[0]; + bytes = &bytes[1..]; + Some(Err(res)) + } + Err(err) => { + let (valid, rest) = bytes.split_at(err.valid_up_to()); + bytes = rest; + Some(Ok(from_utf8(valid).unwrap())) + } + } + }) +} + +#[cfg(test)] +mod tests { + use super::*; + + fn verify_quote(cases: &[(impl AsRef, &str)]) { + for (case, expected) in cases { + assert_eq!(case.quote().to_string(), *expected); + } + } + + /// This should hold on any platform, or else a lot of other tests will fail. + #[test] + fn test_basic() { + verify_quote(&[ + ("foo", "'foo'"), + ("", "''"), + ("foo/bar.baz", "'foo/bar.baz'"), + ]); + } + + #[cfg(any(unix, target_os = "wasi"))] + #[test] + fn test_unix() { + verify_quote(&[ + ("can't", r#""can't""#), + (r#"can'"t"#, r#"'can'\''"t'"#), + (r#"can'$t"#, r#"'can'\''$t'"#), + ("foo\nb\ta\r\\\0`r", r#"$'foo\nb\ta\r\\\x00`r'"#), + ("foo\x02", r#"$'foo\x02'"#), + (r#"'$''"#, r#"\''$'\'\'"#), + ]); + verify_quote(&[(OsStr::from_bytes(b"foo\xFF"), r#"$'foo\xFF'"#)]); + } + + #[cfg(windows)] + #[test] + fn test_windows() { + use std::ffi::OsString; + use std::os::windows::ffi::OsStringExt; + verify_quote(&[ + (r#"foo\bar"#, r#"'foo\bar'"#), + ("can't", r#""can't""#), + (r#"can'"t"#, r#"'can''"t'"#), + (r#"can'$t"#, r#"'can''$t'"#), + ("foo\nb\ta\r\\\0`r", r#""foo`nb`ta`r\`0``r""#), + ("foo\x02", r#""foo`u{0002}""#), + (r#"'$''"#, r#"'''$'''''"#), + ]); + verify_quote(&[( + OsString::from_wide(&[b'x' as u16, 0xD800]), + r#""x`u{D800}""#, + )]) + } + + #[cfg(any(unix, target_os = "wasi"))] + #[test] + fn test_utf8_iter() { + const CASES: &[(&[u8], &[Result<&str, u8>])] = &[ + (b"", &[]), + (b"hello", &[Ok("hello")]), + // Immediately invalid + (b"\xFF", &[Err(b'\xFF')]), + // Incomplete UTF-8 + (b"\xC2", &[Err(b'\xC2')]), + (b"\xF4\x8F", &[Err(b'\xF4'), Err(b'\x8F')]), + (b"\xFF\xFF", &[Err(b'\xFF'), Err(b'\xFF')]), + (b"hello\xC2", &[Ok("hello"), Err(b'\xC2')]), + (b"\xFFhello", &[Err(b'\xFF'), Ok("hello")]), + (b"\xFF\xC2hello", &[Err(b'\xFF'), Err(b'\xC2'), Ok("hello")]), + (b"foo\xFFbar", &[Ok("foo"), Err(b'\xFF'), Ok("bar")]), + ( + b"foo\xF4\x8Fbar", + &[Ok("foo"), Err(b'\xF4'), Err(b'\x8F'), Ok("bar")], + ), + ( + b"foo\xFF\xC2bar", + &[Ok("foo"), Err(b'\xFF'), Err(b'\xC2'), Ok("bar")], + ), + ]; + for &(case, expected) in CASES { + assert_eq!( + from_utf8_iter(case).collect::>().as_slice(), + expected + ); + } + } +} From 0e1f8f7b34823ed48cf8bb4dadabbbdb77dfb1d6 Mon Sep 17 00:00:00 2001 From: Jan Verbeek Date: Sun, 29 Aug 2021 20:09:58 +0200 Subject: [PATCH 05/22] Move verbatim path printing to uucore::Display --- src/uu/pwd/src/pwd.rs | 31 +++++------------------------- src/uucore/src/lib/mods/display.rs | 30 +++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 26 deletions(-) diff --git a/src/uu/pwd/src/pwd.rs b/src/uu/pwd/src/pwd.rs index e8f0c0013..1138dba8e 100644 --- a/src/uu/pwd/src/pwd.rs +++ b/src/uu/pwd/src/pwd.rs @@ -10,9 +10,10 @@ extern crate uucore; use clap::{crate_version, App, Arg}; use std::env; -use std::io::{self, Write}; -use std::path::{Path, PathBuf}; +use std::io; +use std::path::PathBuf; +use uucore::display::println_verbatim; use uucore::error::{FromIo, UResult}; static ABOUT: &str = "Display the full filename of the current working directory."; @@ -57,6 +58,7 @@ fn logical_path() -> io::Result { // POSIX: https://pubs.opengroup.org/onlinepubs/9699919799/utilities/pwd.html #[cfg(not(windows))] { + use std::path::Path; fn looks_reasonable(path: &Path) -> bool { // First, check if it's an absolute path. if !path.has_root() { @@ -148,30 +150,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { .map(Into::into) .unwrap_or(cwd); - print_path(&cwd).map_err_context(|| "failed to print current directory".to_owned())?; - - Ok(()) -} - -fn print_path(path: &Path) -> io::Result<()> { - let stdout = io::stdout(); - let mut stdout = stdout.lock(); - - // On Unix we print non-lossily. - #[cfg(unix)] - { - use std::os::unix::ffi::OsStrExt; - stdout.write_all(path.as_os_str().as_bytes())?; - stdout.write_all(b"\n")?; - } - - // On other platforms we potentially mangle it. - // There might be some clever way to do it correctly on Windows, but - // invalid unicode in filenames is rare there. - #[cfg(not(unix))] - { - writeln!(stdout, "{}", path.display())?; - } + println_verbatim(&cwd).map_err_context(|| "failed to print current directory".to_owned())?; Ok(()) } diff --git a/src/uucore/src/lib/mods/display.rs b/src/uucore/src/lib/mods/display.rs index 42e9fceba..b941a4b15 100644 --- a/src/uucore/src/lib/mods/display.rs +++ b/src/uucore/src/lib/mods/display.rs @@ -5,6 +5,9 @@ /// will wrap quotes around the filename and add the necessary escapes to make /// it copy/paste-able into a shell. /// +/// For writing raw paths to stdout when the output should not be quoted or escaped, +/// use `println_verbatim`. This will preserve invalid unicode. +/// /// # Examples /// ``` /// use std::path::Path; @@ -13,6 +16,7 @@ /// let path = Path::new("foo/bar.baz"); /// /// println!("Found file {}", path.quote()); // Prints "Found file 'foo/bar.baz'" +/// println_verbatim(path)?; // Prints "foo/bar.baz" /// # Ok::<(), std::io::Error>(()) /// ``` // spell-checker:ignore Fbar @@ -20,6 +24,7 @@ use std::ffi::OsStr; #[cfg(any(unix, target_os = "wasi", windows))] use std::fmt::Write as FmtWrite; use std::fmt::{self, Display, Formatter}; +use std::io::{self, Write as IoWrite}; #[cfg(unix)] use std::os::unix::ffi::OsStrExt; @@ -268,6 +273,31 @@ fn from_utf8_iter(mut bytes: &[u8]) -> impl Iterator> { }) } +/// Print a path (or `OsStr`-like object) directly to stdout, with a trailing newline, +/// without losing any information if its encoding is invalid. +/// +/// This function is appropriate for commands where printing paths is the point and the +/// output is likely to be captured, like `pwd` and `basename`. For informational output +/// use `Quotable::quote`. +/// +/// FIXME: This is lossy on Windows. It could probably be implemented using some low-level +/// API that takes UTF-16, without going through io::Write. This is not a big priority +/// because broken filenames are much rarer on Windows than on Unix. +pub fn println_verbatim>(text: S) -> io::Result<()> { + let stdout = io::stdout(); + let mut stdout = stdout.lock(); + #[cfg(any(unix, target_os = "wasi"))] + { + stdout.write_all(text.as_ref().as_bytes())?; + stdout.write_all(b"\n")?; + } + #[cfg(not(any(unix, target_os = "wasi")))] + { + writeln!(stdout, "{}", std::path::Path::new(text.as_ref()).display())?; + } + Ok(()) +} + #[cfg(test)] mod tests { use super::*; From 03b2d40154391b28114e0bd38e2d7fef31f02aec Mon Sep 17 00:00:00 2001 From: Jan Verbeek Date: Sun, 29 Aug 2021 20:18:52 +0200 Subject: [PATCH 06/22] rmdir: use modern filename display --- src/uu/rmdir/src/rmdir.rs | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/src/uu/rmdir/src/rmdir.rs b/src/uu/rmdir/src/rmdir.rs index d8cad0421..f982cf4c3 100644 --- a/src/uu/rmdir/src/rmdir.rs +++ b/src/uu/rmdir/src/rmdir.rs @@ -14,6 +14,7 @@ use clap::{crate_version, App, Arg}; use std::fs::{read_dir, remove_dir}; use std::io; use std::path::Path; +use uucore::display::Quotable; use uucore::error::{set_exit_code, strip_errno, UResult}; use uucore::util_name; @@ -77,27 +78,23 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { Ok(path.metadata()?.file_type().is_dir()) } - let path = path.as_os_str().as_bytes(); - if error.raw_os_error() == Some(libc::ENOTDIR) && path.ends_with(b"/") { + let bytes = path.as_os_str().as_bytes(); + if error.raw_os_error() == Some(libc::ENOTDIR) && bytes.ends_with(b"/") { // Strip the trailing slash or .symlink_metadata() will follow the symlink - let path: &Path = OsStr::from_bytes(&path[..path.len() - 1]).as_ref(); - if is_symlink(path).unwrap_or(false) - && points_to_directory(path).unwrap_or(true) + let no_slash: &Path = OsStr::from_bytes(&bytes[..bytes.len() - 1]).as_ref(); + if is_symlink(no_slash).unwrap_or(false) + && points_to_directory(no_slash).unwrap_or(true) { show_error!( - "failed to remove '{}/': Symbolic link not followed", - path.display() + "failed to remove {}: Symbolic link not followed", + path.quote() ); continue; } } } - show_error!( - "failed to remove '{}': {}", - path.display(), - strip_errno(&error) - ); + show_error!("failed to remove {}: {}", path.quote(), strip_errno(&error)); } } @@ -125,7 +122,7 @@ fn remove(mut path: &Path, opts: Opts) -> Result<(), Error<'_>> { fn remove_single(path: &Path, opts: Opts) -> Result<(), Error<'_>> { if opts.verbose { - println!("{}: removing directory, '{}'", util_name(), path.display()); + println!("{}: removing directory, {}", util_name(), path.quote()); } remove_dir(path).map_err(|error| Error { error, path }) } From 13bb263a502297b612352b12185d8446bd28a2cd Mon Sep 17 00:00:00 2001 From: Jan Verbeek Date: Mon, 30 Aug 2021 13:25:44 +0200 Subject: [PATCH 07/22] uucore::display: Support unquoted text --- src/uucore/src/lib/mods/display.rs | 109 ++++++++++++++++++++++++++--- 1 file changed, 98 insertions(+), 11 deletions(-) diff --git a/src/uucore/src/lib/mods/display.rs b/src/uucore/src/lib/mods/display.rs index b941a4b15..7cc48e74f 100644 --- a/src/uucore/src/lib/mods/display.rs +++ b/src/uucore/src/lib/mods/display.rs @@ -59,21 +59,55 @@ where T: AsRef, { fn quote(&self) -> Quoted<'_> { - Quoted(self.as_ref()) + Quoted { + text: self.as_ref(), + force_quote: true, + } } } /// A wrapper around [`OsStr`] for printing paths with quoting and escaping applied. -#[derive(Debug)] -pub struct Quoted<'a>(&'a OsStr); +#[derive(Debug, Copy, Clone)] +pub struct Quoted<'a> { + text: &'a OsStr, + force_quote: bool, +} + +impl Quoted<'_> { + /// Add quotes even if not strictly necessary. `true` by default. + pub fn force_quote(mut self, force: bool) -> Self { + self.force_quote = force; + self + } +} impl Display for Quoted<'_> { #[cfg(any(unix, target_os = "wasi"))] fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - let text = self.0.as_bytes(); + /// Characters with special meaning outside quotes. + // https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_02 + // % seems obscure enough to ignore, it's for job control only. + // {} were used in a version elsewhere but seem unnecessary, GNU doesn't escape them. + // ! is a common extension. + const SPECIAL_SHELL_CHARS: &[u8] = b"|&;<>()$`\\\"'*?[]=! "; + /// Characters with a special meaning at the beginning of a name. + const SPECIAL_SHELL_CHARS_START: &[u8] = b"~#"; + + let text = self.text.as_bytes(); let mut is_single_safe = true; let mut is_double_safe = true; + let mut requires_quote = self.force_quote; + + if let Some(first) = text.get(0) { + if SPECIAL_SHELL_CHARS_START.contains(first) { + requires_quote = true; + } + } else { + // Empty string + requires_quote = true; + } + for &ch in text { match ch { ch if ch.is_ascii_control() => return write_escaped(f, text), @@ -83,12 +117,21 @@ impl Display for Quoted<'_> { b'"' | b'`' | b'$' | b'\\' => is_double_safe = false, _ => (), } + if !requires_quote && SPECIAL_SHELL_CHARS.contains(&ch) { + requires_quote = true; + } } let text = match from_utf8(text) { Err(_) => return write_escaped(f, text), Ok(text) => text, }; - if is_single_safe { + if !requires_quote && text.find(char::is_whitespace).is_some() { + requires_quote = true; + } + + if !requires_quote { + return f.write_str(text); + } else if is_single_safe { return write_simple(f, text, '\''); } else if is_double_safe { return write_simple(f, text, '\"'); @@ -176,25 +219,57 @@ impl Display for Quoted<'_> { use std::char::decode_utf16; use std::os::windows::ffi::OsStrExt; + /// Characters with special meaning outside quotes. + // FIXME: I'm not a PowerShell wizard and don't know if this is correct. + // I just copied the Unix version, removed \, and added {}@ based on + // experimentation. + // I have noticed that ~?*[] only get expanded in some contexts, so watch + // out for that if doing your own tests. + // Get-ChildItem seems unwilling to quote anything so it doesn't help. + // There's the additional wrinkle that Windows has stricter requirements + // for filenames: I've been testing using a Linux build of PowerShell, but + // this code doesn't even compile on Linux. + const SPECIAL_SHELL_CHARS: &str = "|&;<>()$`\"'*?[]=!{}@ "; + /// Characters with a special meaning at the beginning of a name. + const SPECIAL_SHELL_CHARS_START: &[char] = &['~', '#']; + // Getting the "raw" representation of an OsStr is actually expensive, // so avoid it if unnecessary. - let text = match self.0.to_str() { - None => return write_escaped(f, self.0), + let text = match self.text.to_str() { + None => return write_escaped(f, self.text), Some(text) => text, }; let mut is_single_safe = true; let mut is_double_safe = true; + let mut requires_quote = self.force_quote; + + if let Some(first) = text.chars().next() { + if SPECIAL_SHELL_CHARS_START.contains(&first) { + requires_quote = true; + } + } else { + // Empty string + requires_quote = true; + } + for ch in text.chars() { match ch { - ch if ch.is_ascii_control() => return write_escaped(f, self.0), + ch if ch.is_ascii_control() => return write_escaped(f, self.text), '\'' => is_single_safe = false, '"' | '`' | '$' => is_double_safe = false, _ => (), } + if !requires_quote + && ((ch.is_ascii() && SPECIAL_SHELL_CHARS.contains(ch)) || ch.is_whitespace()) + { + requires_quote = true; + } } - if is_single_safe || !is_double_safe { + if !requires_quote { + return f.write_str(text); + } else if is_single_safe || !is_double_safe { return write_simple(f, text, '\''); } else { return write_simple(f, text, '"'); @@ -244,7 +319,12 @@ impl Display for Quoted<'_> { // As a fallback, we use Rust's own escaping rules. // This is reasonably sane and very easy to implement. // We use single quotes because that's hardcoded in a lot of tests. - write!(f, "'{}'", self.0.to_string_lossy().escape_debug()) + let text = self.text.to_string_lossy(); + if self.force_quote || !text.chars().all(|ch| ch.is_alphanumeric() || ch == '.') { + write!(f, "'{}'", text.escape_debug()) + } else { + f.write_str(&text) + } } } @@ -308,7 +388,7 @@ mod tests { } } - /// This should hold on any platform, or else a lot of other tests will fail. + /// This should hold on any platform. #[test] fn test_basic() { verify_quote(&[ @@ -316,6 +396,13 @@ mod tests { ("", "''"), ("foo/bar.baz", "'foo/bar.baz'"), ]); + assert_eq!("foo".quote().force_quote(false).to_string(), "foo"); + assert_eq!("".quote().force_quote(false).to_string(), "''"); + assert_eq!( + "foo bar".quote().force_quote(false).to_string(), + "'foo bar'" + ); + assert_eq!("$foo".quote().force_quote(false).to_string(), "'$foo'"); } #[cfg(any(unix, target_os = "wasi"))] From b5550bc4ddad5aa926f7548a9a8ebdd6b9dcdff6 Mon Sep 17 00:00:00 2001 From: Jan Verbeek Date: Mon, 30 Aug 2021 13:30:27 +0200 Subject: [PATCH 08/22] ls: Accept badly-encoded filenames --- src/uu/ls/src/ls.rs | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/src/uu/ls/src/ls.rs b/src/uu/ls/src/ls.rs index bf449d96b..afb1289ee 100644 --- a/src/uu/ls/src/ls.rs +++ b/src/uu/ls/src/ls.rs @@ -248,7 +248,7 @@ struct LongFormat { impl Config { #[allow(clippy::cognitive_complexity)] - fn from(options: clap::ArgMatches) -> UResult { + fn from(options: &clap::ArgMatches) -> UResult { let (mut format, opt) = if let Some(format_) = options.value_of(options::FORMAT) { ( match format_ { @@ -599,22 +599,19 @@ impl Config { #[uucore_procs::gen_uumain] pub fn uumain(args: impl uucore::Args) -> UResult<()> { - let args = args - .collect_str(InvalidEncodingHandling::Ignore) - .accept_any(); - let usage = usage(); let app = uu_app().usage(&usage[..]); let matches = app.get_matches_from(args); + let config = Config::from(&matches)?; let locs = matches - .values_of(options::PATHS) - .map(|v| v.map(ToString::to_string).collect()) - .unwrap_or_else(|| vec![String::from(".")]); + .values_of_os(options::PATHS) + .map(|v| v.map(Path::new).collect()) + .unwrap_or_else(|| vec![Path::new(".")]); - list(locs, Config::from(matches)?) + list(locs, config) } pub fn uu_app() -> App<'static, 'static> { @@ -1249,14 +1246,14 @@ impl PathData { } } -fn list(locs: Vec, config: Config) -> UResult<()> { +fn list(locs: Vec<&Path>, config: Config) -> UResult<()> { let mut files = Vec::::new(); let mut dirs = Vec::::new(); let mut out = BufWriter::new(stdout()); for loc in &locs { - let p = PathBuf::from(&loc); + let p = PathBuf::from(loc); let path_data = PathData::new(p, None, None, &config, true); if path_data.md().is_none() { @@ -1286,6 +1283,7 @@ fn list(locs: Vec, config: Config) -> UResult<()> { sort_entries(&mut dirs, &config); for dir in dirs { if locs.len() > 1 || config.recursive { + // FIXME: This should use the quoting style and propagate errors let _ = writeln!(out, "\n{}:", dir.p_buf.display()); } enter_directory(&dir, &config, &mut out); @@ -1671,7 +1669,6 @@ fn get_inode(metadata: &Metadata) -> String { use std::sync::Mutex; #[cfg(unix)] use uucore::entries; -use uucore::InvalidEncodingHandling; #[cfg(unix)] fn cached_uid2usr(uid: u32) -> String { From a93959aa4405abc53b501e83820e252594107845 Mon Sep 17 00:00:00 2001 From: Jan Verbeek Date: Tue, 31 Aug 2021 12:49:22 +0200 Subject: [PATCH 09/22] uucore::display: impl Quotable for Cow, add escape_control --- src/uucore/src/lib/mods/display.rs | 85 ++++++++++++++++++++++-------- 1 file changed, 64 insertions(+), 21 deletions(-) diff --git a/src/uucore/src/lib/mods/display.rs b/src/uucore/src/lib/mods/display.rs index 7cc48e74f..924ed2245 100644 --- a/src/uucore/src/lib/mods/display.rs +++ b/src/uucore/src/lib/mods/display.rs @@ -20,6 +20,7 @@ /// # Ok::<(), std::io::Error>(()) /// ``` // spell-checker:ignore Fbar +use std::borrow::{Borrow, Cow}; use std::ffi::OsStr; #[cfg(any(unix, target_os = "wasi", windows))] use std::fmt::Write as FmtWrite; @@ -54,15 +55,32 @@ pub trait Quotable { fn quote(&self) -> Quoted<'_>; } -impl Quotable for T -where - T: AsRef, -{ - fn quote(&self) -> Quoted<'_> { - Quoted { - text: self.as_ref(), - force_quote: true, +macro_rules! impl_as_ref { + ($type: ty) => { + impl Quotable for $type { + fn quote(&self) -> Quoted<'_> { + Quoted::new(self.as_ref()) + } } + }; +} + +impl_as_ref!(&'_ str); +impl_as_ref!(String); +impl_as_ref!(&'_ std::path::Path); +impl_as_ref!(std::path::PathBuf); +impl_as_ref!(std::path::Component<'_>); +impl_as_ref!(std::path::Components<'_>); +impl_as_ref!(std::path::Iter<'_>); +impl_as_ref!(&'_ std::ffi::OsStr); +impl_as_ref!(std::ffi::OsString); + +// Cow<'_, str> does not implement AsRef and this is unlikely to be fixed +// for backward compatibility reasons. Otherwise we'd use a blanket impl. +impl Quotable for Cow<'_, str> { + fn quote(&self) -> Quoted<'_> { + // I suspect there's a better way to do this but I haven't found one + Quoted::new( as Borrow>::borrow(self).as_ref()) } } @@ -71,14 +89,29 @@ where pub struct Quoted<'a> { text: &'a OsStr, force_quote: bool, + escape_control: bool, } -impl Quoted<'_> { +impl<'a> Quoted<'a> { + fn new(text: &'a OsStr) -> Self { + Quoted { + text, + force_quote: true, + escape_control: true, + } + } + /// Add quotes even if not strictly necessary. `true` by default. pub fn force_quote(mut self, force: bool) -> Self { self.force_quote = force; self } + + /// Escape control characters. `true` by default. + pub fn escape_control(mut self, escape: bool) -> Self { + self.escape_control = escape; + self + } } impl Display for Quoted<'_> { @@ -89,7 +122,7 @@ impl Display for Quoted<'_> { // % seems obscure enough to ignore, it's for job control only. // {} were used in a version elsewhere but seem unnecessary, GNU doesn't escape them. // ! is a common extension. - const SPECIAL_SHELL_CHARS: &[u8] = b"|&;<>()$`\\\"'*?[]=! "; + const SPECIAL_SHELL_CHARS: &[u8] = b"|&;<>()$`\\\"'*?[]=! \t\n"; /// Characters with a special meaning at the beginning of a name. const SPECIAL_SHELL_CHARS_START: &[u8] = b"~#"; @@ -110,7 +143,9 @@ impl Display for Quoted<'_> { for &ch in text { match ch { - ch if ch.is_ascii_control() => return write_escaped(f, text), + ch if self.escape_control && ch.is_ascii_control() => { + return write_escaped(f, text, self.escape_control) + } b'\'' => is_single_safe = false, // Unsafe characters according to: // https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_02_03 @@ -122,7 +157,7 @@ impl Display for Quoted<'_> { } } let text = match from_utf8(text) { - Err(_) => return write_escaped(f, text), + Err(_) => return write_escaped(f, text, self.escape_control), Ok(text) => text, }; if !requires_quote && text.find(char::is_whitespace).is_some() { @@ -175,7 +210,7 @@ impl Display for Quoted<'_> { /// - fish /// - dash /// - tcsh - fn write_escaped(f: &mut Formatter<'_>, text: &[u8]) -> fmt::Result { + fn write_escaped(f: &mut Formatter<'_>, text: &[u8], escape_control: bool) -> fmt::Result { f.write_str("$'")?; for chunk in from_utf8_iter(text) { match chunk { @@ -190,7 +225,9 @@ impl Display for Quoted<'_> { // \0 doesn't work consistently because of the // octal \nnn syntax, and null bytes can't appear // in filenames anyway. - ch if ch.is_ascii_control() => write!(f, "\\x{:02X}", ch as u8)?, + ch if escape_control && ch.is_ascii_control() => { + write!(f, "\\x{:02X}", ch as u8)? + } '\\' | '\'' => { // '?' and '"' can also be escaped this way // but AFAICT there's no reason to do so @@ -229,14 +266,14 @@ impl Display for Quoted<'_> { // There's the additional wrinkle that Windows has stricter requirements // for filenames: I've been testing using a Linux build of PowerShell, but // this code doesn't even compile on Linux. - const SPECIAL_SHELL_CHARS: &str = "|&;<>()$`\"'*?[]=!{}@ "; + const SPECIAL_SHELL_CHARS: &str = "|&;<>()$`\"'*?[]=!{}@ \t\r\n"; /// Characters with a special meaning at the beginning of a name. const SPECIAL_SHELL_CHARS_START: &[char] = &['~', '#']; // Getting the "raw" representation of an OsStr is actually expensive, // so avoid it if unnecessary. let text = match self.text.to_str() { - None => return write_escaped(f, self.text), + None => return write_escaped(f, self.text, self.escape_control), Some(text) => text, }; @@ -255,7 +292,9 @@ impl Display for Quoted<'_> { for ch in text.chars() { match ch { - ch if ch.is_ascii_control() => return write_escaped(f, self.text), + ch if self.escape_control && ch.is_ascii_control() => { + return write_escaped(f, self.text, self.escape_control) + } '\'' => is_single_safe = false, '"' | '`' | '$' => is_double_safe = false, _ => (), @@ -291,7 +330,7 @@ impl Display for Quoted<'_> { Ok(()) } - fn write_escaped(f: &mut Formatter<'_>, text: &OsStr) -> fmt::Result { + fn write_escaped(f: &mut Formatter<'_>, text: &OsStr, escape_control: bool) -> fmt::Result { f.write_char('"')?; for ch in decode_utf16(text.encode_wide()) { match ch { @@ -300,7 +339,9 @@ impl Display for Quoted<'_> { '\r' => f.write_str("`r")?, '\n' => f.write_str("`n")?, '\t' => f.write_str("`t")?, - ch if ch.is_ascii_control() => write!(f, "`u{{{:04X}}}", ch as u8)?, + ch if escape_control && ch.is_ascii_control() => { + write!(f, "`u{{{:04X}}}", ch as u8)? + } '`' => f.write_str("``")?, '$' => f.write_str("`$")?, '"' => f.write_str("\"\"")?, @@ -382,7 +423,7 @@ pub fn println_verbatim>(text: S) -> io::Result<()> { mod tests { use super::*; - fn verify_quote(cases: &[(impl AsRef, &str)]) { + fn verify_quote(cases: &[(impl Quotable, &str)]) { for (case, expected) in cases { assert_eq!(case.quote().to_string(), *expected); } @@ -442,7 +483,9 @@ mod tests { #[cfg(any(unix, target_os = "wasi"))] #[test] fn test_utf8_iter() { - const CASES: &[(&[u8], &[Result<&str, u8>])] = &[ + type ByteStr = &'static [u8]; + type Chunk = Result<&'static str, u8>; + const CASES: &[(ByteStr, &[Chunk])] = &[ (b"", &[]), (b"hello", &[Ok("hello")]), // Immediately invalid From 4d6faae555711e45de8c04a1a84f68aebecc03ab Mon Sep 17 00:00:00 2001 From: Jan Verbeek Date: Tue, 31 Aug 2021 12:55:17 +0200 Subject: [PATCH 10/22] ls: Fix --{show, hide}-control-chars logic --- src/uu/ls/src/ls.rs | 5 ++--- tests/by-util/test_ls.rs | 24 +++++++----------------- 2 files changed, 9 insertions(+), 20 deletions(-) diff --git a/src/uu/ls/src/ls.rs b/src/uu/ls/src/ls.rs index afb1289ee..3cfb0ad2c 100644 --- a/src/uu/ls/src/ls.rs +++ b/src/uu/ls/src/ls.rs @@ -428,11 +428,10 @@ impl Config { #[allow(clippy::needless_bool)] let show_control = if options.is_present(options::HIDE_CONTROL_CHARS) { false - } else if options.is_present(options::SHOW_CONTROL_CHARS) || atty::is(atty::Stream::Stdout) - { + } else if options.is_present(options::SHOW_CONTROL_CHARS) { true } else { - false + !atty::is(atty::Stream::Stdout) }; let quoting_style = if let Some(style) = options.value_of(options::QUOTING_STYLE) { diff --git a/tests/by-util/test_ls.rs b/tests/by-util/test_ls.rs index 546fb86a8..eb239977d 100644 --- a/tests/by-util/test_ls.rs +++ b/tests/by-util/test_ls.rs @@ -1416,6 +1416,7 @@ fn test_ls_quoting_style() { // Default is shell-escape scene .ucmd() + .arg("--hide-control-chars") .arg("one\ntwo") .succeeds() .stdout_only("'one'$'\\n''two'\n"); @@ -1437,23 +1438,8 @@ fn test_ls_quoting_style() { ] { scene .ucmd() - .arg(arg) - .arg("one\ntwo") - .succeeds() - .stdout_only(format!("{}\n", correct)); - } - - for (arg, correct) in &[ - ("--quoting-style=literal", "one?two"), - ("-N", "one?two"), - ("--literal", "one?two"), - ("--quoting-style=shell", "one?two"), - ("--quoting-style=shell-always", "'one?two'"), - ] { - scene - .ucmd() - .arg(arg) .arg("--hide-control-chars") + .arg(arg) .arg("one\ntwo") .succeeds() .stdout_only(format!("{}\n", correct)); @@ -1463,7 +1449,7 @@ fn test_ls_quoting_style() { ("--quoting-style=literal", "one\ntwo"), ("-N", "one\ntwo"), ("--literal", "one\ntwo"), - ("--quoting-style=shell", "one\ntwo"), + ("--quoting-style=shell", "one\ntwo"), // FIXME: GNU ls quotes this case ("--quoting-style=shell-always", "'one\ntwo'"), ] { scene @@ -1490,6 +1476,7 @@ fn test_ls_quoting_style() { ] { scene .ucmd() + .arg("--hide-control-chars") .arg(arg) .arg("one\\two") .succeeds() @@ -1505,6 +1492,7 @@ fn test_ls_quoting_style() { ] { scene .ucmd() + .arg("--hide-control-chars") .arg(arg) .arg("one\n&two") .succeeds() @@ -1535,6 +1523,7 @@ fn test_ls_quoting_style() { ] { scene .ucmd() + .arg("--hide-control-chars") .arg(arg) .arg("one two") .succeeds() @@ -1558,6 +1547,7 @@ fn test_ls_quoting_style() { ] { scene .ucmd() + .arg("--hide-control-chars") .arg(arg) .arg("one") .succeeds() From 483a5fd1d4cd0508af81ef41a97cdb5500d300b6 Mon Sep 17 00:00:00 2001 From: Jan Verbeek Date: Tue, 31 Aug 2021 12:57:39 +0200 Subject: [PATCH 11/22] ls: Process OsStrings instead of Strings --- src/uu/ls/src/ls.rs | 20 +++++++++----------- src/uu/ls/src/quoting_style.rs | 16 ++++++++++------ 2 files changed, 19 insertions(+), 17 deletions(-) diff --git a/src/uu/ls/src/ls.rs b/src/uu/ls/src/ls.rs index 3cfb0ad2c..fcb0b28ff 100644 --- a/src/uu/ls/src/ls.rs +++ b/src/uu/ls/src/ls.rs @@ -21,6 +21,7 @@ use lscolors::LsColors; use number_prefix::NumberPrefix; use once_cell::unsync::OnceCell; use quoting_style::{escape_name, QuotingStyle}; +use std::ffi::OsString; #[cfg(windows)] use std::os::windows::fs::MetadataExt; use std::{ @@ -1173,7 +1174,7 @@ struct PathData { md: OnceCell>, ft: OnceCell>, // Name of the file - will be empty for . or .. - display_name: String, + display_name: OsString, // PathBuf that all above data corresponds to p_buf: PathBuf, must_dereference: bool, @@ -1183,7 +1184,7 @@ impl PathData { fn new( p_buf: PathBuf, file_type: Option>, - file_name: Option, + file_name: Option, config: &Config, command_line: bool, ) -> Self { @@ -1191,16 +1192,13 @@ impl PathData { // For '..', the filename is None let display_name = if let Some(name) = file_name { name + } else if command_line { + p_buf.clone().into() } else { - let display_os_str = if command_line { - p_buf.as_os_str() - } else { - p_buf - .file_name() - .unwrap_or_else(|| p_buf.iter().next_back().unwrap()) - }; - - display_os_str.to_string_lossy().into_owned() + p_buf + .file_name() + .unwrap_or_else(|| p_buf.iter().next_back().unwrap()) + .to_owned() }; let must_dereference = match &config.dereference { Dereference::All => true, diff --git a/src/uu/ls/src/quoting_style.rs b/src/uu/ls/src/quoting_style.rs index 5f421b2ee..4839370ee 100644 --- a/src/uu/ls/src/quoting_style.rs +++ b/src/uu/ls/src/quoting_style.rs @@ -1,4 +1,5 @@ use std::char::from_digit; +use std::ffi::OsStr; // These are characters with special meaning in the shell (e.g. bash). // The first const contains characters that only have a special meaning when they appear at the beginning of a name. @@ -255,19 +256,21 @@ fn shell_with_escape(name: &str, quotes: Quotes) -> (String, bool) { (escaped_str, must_quote) } -pub(super) fn escape_name(name: &str, style: &QuotingStyle) -> String { +pub(super) fn escape_name(name: &OsStr, style: &QuotingStyle) -> String { match style { QuotingStyle::Literal { show_control } => { if !show_control { - name.chars() + name.to_string_lossy() + .chars() .flat_map(|c| EscapedChar::new_literal(c).hide_control()) .collect() } else { - name.into() + name.to_string_lossy().into_owned() } } QuotingStyle::C { quotes } => { let escaped_str: String = name + .to_string_lossy() .chars() .flat_map(|c| EscapedChar::new_c(c, *quotes)) .collect(); @@ -283,6 +286,7 @@ pub(super) fn escape_name(name: &str, style: &QuotingStyle) -> String { always_quote, show_control, } => { + let name = name.to_string_lossy(); let (quotes, must_quote) = if name.contains('"') { (Quotes::Single, true) } else if name.contains('\'') { @@ -294,9 +298,9 @@ pub(super) fn escape_name(name: &str, style: &QuotingStyle) -> String { }; let (escaped_str, contains_quote_chars) = if *escape { - shell_with_escape(name, quotes) + shell_with_escape(&name, quotes) } else { - shell_without_escape(name, quotes, *show_control) + shell_without_escape(&name, quotes, *show_control) }; match (must_quote | contains_quote_chars, quotes) { @@ -362,7 +366,7 @@ mod tests { fn check_names(name: &str, map: Vec<(&str, &str)>) { assert_eq!( map.iter() - .map(|(_, style)| escape_name(name, &get_style(style))) + .map(|(_, style)| escape_name(name.as_ref(), &get_style(style))) .collect::>(), map.iter() .map(|(correct, _)| correct.to_string()) From f0f13fe1f0f9d0b9c385b1d38fbac42f528fa80c Mon Sep 17 00:00:00 2001 From: Jan Verbeek Date: Mon, 30 Aug 2021 21:53:44 +0200 Subject: [PATCH 12/22] uucore::display: Simplify The different quoting implementations are similar enough to merge parts of them. --- src/uucore/src/lib/mods/display.rs | 166 ++++++++++++----------------- 1 file changed, 69 insertions(+), 97 deletions(-) diff --git a/src/uucore/src/lib/mods/display.rs b/src/uucore/src/lib/mods/display.rs index 924ed2245..52cd45404 100644 --- a/src/uucore/src/lib/mods/display.rs +++ b/src/uucore/src/lib/mods/display.rs @@ -20,7 +20,7 @@ /// # Ok::<(), std::io::Error>(()) /// ``` // spell-checker:ignore Fbar -use std::borrow::{Borrow, Cow}; +use std::borrow::Cow; use std::ffi::OsStr; #[cfg(any(unix, target_os = "wasi", windows))] use std::fmt::Write as FmtWrite; @@ -79,8 +79,8 @@ impl_as_ref!(std::ffi::OsString); // for backward compatibility reasons. Otherwise we'd use a blanket impl. impl Quotable for Cow<'_, str> { fn quote(&self) -> Quoted<'_> { - // I suspect there's a better way to do this but I haven't found one - Quoted::new( as Borrow>::borrow(self).as_ref()) + let text: &str = self.as_ref(); + Quoted::new(text.as_ref()) } } @@ -115,25 +115,49 @@ impl<'a> Quoted<'a> { } impl Display for Quoted<'_> { - #[cfg(any(unix, target_os = "wasi"))] + #[cfg(any(windows, unix, target_os = "wasi"))] fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + // On Unix we emulate sh syntax. On Windows Powershell. + /// Characters with special meaning outside quotes. // https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_02 // % seems obscure enough to ignore, it's for job control only. // {} were used in a version elsewhere but seem unnecessary, GNU doesn't escape them. - // ! is a common extension. - const SPECIAL_SHELL_CHARS: &[u8] = b"|&;<>()$`\\\"'*?[]=! \t\n"; - /// Characters with a special meaning at the beginning of a name. - const SPECIAL_SHELL_CHARS_START: &[u8] = b"~#"; + // ! is a common extension for expanding the shell history. + #[cfg(any(unix, target_os = "wasi"))] + const SPECIAL_SHELL_CHARS: &str = "|&;<>()$`\\\"'*?[]=! \t\n"; + // FIXME: I'm not a PowerShell wizard and don't know if this is correct. + // I just copied the Unix version, removed \, and added {}@ based on + // experimentation. + // I have noticed that ~?*[] only get expanded in some contexts, so watch + // out for that if doing your own tests. + // Get-ChildItem seems unwilling to quote anything so it doesn't help. + // There's the additional wrinkle that Windows has stricter requirements + // for filenames: I've been testing using a Linux build of PowerShell, but + // this code doesn't even compile on Linux. + #[cfg(windows)] + const SPECIAL_SHELL_CHARS: &str = "|&;<>()$`\"'*?[]=!{}@ \t\r\n"; - let text = self.text.as_bytes(); + /// Characters with a special meaning at the beginning of a name. + const SPECIAL_SHELL_CHARS_START: &[char] = &['~', '#']; + + /// Characters that are dangerous in a double-quoted string. + #[cfg(any(unix, target_os = "wasi"))] + const DOUBLE_UNSAFE: &[char] = &['"', '`', '$', '\\']; + #[cfg(windows)] + const DOUBLE_UNSAFE: &[char] = &['"', '`', '$']; + + let text = match self.text.to_str() { + None => return write_escaped(f, self.text, self.escape_control), + Some(text) => text, + }; let mut is_single_safe = true; let mut is_double_safe = true; let mut requires_quote = self.force_quote; - if let Some(first) = text.get(0) { - if SPECIAL_SHELL_CHARS_START.contains(first) { + if let Some(first) = text.chars().next() { + if SPECIAL_SHELL_CHARS_START.contains(&first) { requires_quote = true; } } else { @@ -141,28 +165,28 @@ impl Display for Quoted<'_> { requires_quote = true; } - for &ch in text { - match ch { - ch if self.escape_control && ch.is_ascii_control() => { - return write_escaped(f, text, self.escape_control) + for ch in text.chars() { + if ch.is_ascii() { + if self.escape_control && ch.is_ascii_control() { + return write_escaped(f, self.text, self.escape_control); + } + if ch == '\'' { + is_single_safe = false; + } + if DOUBLE_UNSAFE.contains(&ch) { + is_double_safe = false; + } + if !requires_quote && SPECIAL_SHELL_CHARS.contains(ch) { + requires_quote = true; } - b'\'' => is_single_safe = false, - // Unsafe characters according to: - // https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_02_03 - b'"' | b'`' | b'$' | b'\\' => is_double_safe = false, - _ => (), } - if !requires_quote && SPECIAL_SHELL_CHARS.contains(&ch) { + if !requires_quote && ch.is_whitespace() { + // This includes unicode whitespace. + // We maybe don't have to escape it, we don't escape other lookalike + // characters either, but it's confusing if it goes unquoted. requires_quote = true; } } - let text = match from_utf8(text) { - Err(_) => return write_escaped(f, text, self.escape_control), - Ok(text) => text, - }; - if !requires_quote && text.find(char::is_whitespace).is_some() { - requires_quote = true; - } if !requires_quote { return f.write_str(text); @@ -174,6 +198,7 @@ impl Display for Quoted<'_> { return write_single_escaped(f, text); } + #[cfg(any(unix, target_os = "wasi"))] fn write_simple(f: &mut Formatter<'_>, text: &str, quote: char) -> fmt::Result { f.write_char(quote)?; f.write_str(text)?; @@ -181,6 +206,7 @@ impl Display for Quoted<'_> { Ok(()) } + #[cfg(any(unix, target_os = "wasi"))] fn write_single_escaped(f: &mut Formatter<'_>, text: &str) -> fmt::Result { let mut iter = text.split('\''); if let Some(chunk) = iter.next() { @@ -210,9 +236,10 @@ impl Display for Quoted<'_> { /// - fish /// - dash /// - tcsh - fn write_escaped(f: &mut Formatter<'_>, text: &[u8], escape_control: bool) -> fmt::Result { + #[cfg(any(unix, target_os = "wasi"))] + fn write_escaped(f: &mut Formatter<'_>, text: &OsStr, escape_control: bool) -> fmt::Result { f.write_str("$'")?; - for chunk in from_utf8_iter(text) { + for chunk in from_utf8_iter(text.as_bytes()) { match chunk { Ok(chunk) => { for ch in chunk.chars() { @@ -246,74 +273,8 @@ impl Display for Quoted<'_> { f.write_char('\'')?; Ok(()) } - } - - #[cfg(windows)] - fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - // Behavior is based on PowerShell. - // ` takes the role of \ since \ is already used as the path separator. - // Things are UTF-16-oriented, so we escape code units as "`u{1234}". - use std::char::decode_utf16; - use std::os::windows::ffi::OsStrExt; - - /// Characters with special meaning outside quotes. - // FIXME: I'm not a PowerShell wizard and don't know if this is correct. - // I just copied the Unix version, removed \, and added {}@ based on - // experimentation. - // I have noticed that ~?*[] only get expanded in some contexts, so watch - // out for that if doing your own tests. - // Get-ChildItem seems unwilling to quote anything so it doesn't help. - // There's the additional wrinkle that Windows has stricter requirements - // for filenames: I've been testing using a Linux build of PowerShell, but - // this code doesn't even compile on Linux. - const SPECIAL_SHELL_CHARS: &str = "|&;<>()$`\"'*?[]=!{}@ \t\r\n"; - /// Characters with a special meaning at the beginning of a name. - const SPECIAL_SHELL_CHARS_START: &[char] = &['~', '#']; - - // Getting the "raw" representation of an OsStr is actually expensive, - // so avoid it if unnecessary. - let text = match self.text.to_str() { - None => return write_escaped(f, self.text, self.escape_control), - Some(text) => text, - }; - - let mut is_single_safe = true; - let mut is_double_safe = true; - let mut requires_quote = self.force_quote; - - if let Some(first) = text.chars().next() { - if SPECIAL_SHELL_CHARS_START.contains(&first) { - requires_quote = true; - } - } else { - // Empty string - requires_quote = true; - } - - for ch in text.chars() { - match ch { - ch if self.escape_control && ch.is_ascii_control() => { - return write_escaped(f, self.text, self.escape_control) - } - '\'' => is_single_safe = false, - '"' | '`' | '$' => is_double_safe = false, - _ => (), - } - if !requires_quote - && ((ch.is_ascii() && SPECIAL_SHELL_CHARS.contains(ch)) || ch.is_whitespace()) - { - requires_quote = true; - } - } - - if !requires_quote { - return f.write_str(text); - } else if is_single_safe || !is_double_safe { - return write_simple(f, text, '\''); - } else { - return write_simple(f, text, '"'); - } + #[cfg(windows)] fn write_simple(f: &mut Formatter<'_>, text: &str, quote: char) -> fmt::Result { // Quotes in Powershell can be escaped by doubling them f.write_char(quote)?; @@ -330,7 +291,18 @@ impl Display for Quoted<'_> { Ok(()) } + #[cfg(windows)] + fn write_single_escaped(f: &mut Formatter<'_>, text: &str) -> fmt::Result { + write_simple(f, text, '\'') + } + + #[cfg(windows)] fn write_escaped(f: &mut Formatter<'_>, text: &OsStr, escape_control: bool) -> fmt::Result { + // ` takes the role of \ since \ is already used as the path separator. + // Things are UTF-16-oriented, so we escape code units as "`u{1234}". + use std::char::decode_utf16; + use std::os::windows::ffi::OsStrExt; + f.write_char('"')?; for ch in decode_utf16(text.encode_wide()) { match ch { From 3dd6f7988044da8035567e2abc1bb270c8e2c052 Mon Sep 17 00:00:00 2001 From: Jan Verbeek Date: Tue, 31 Aug 2021 22:01:24 +0200 Subject: [PATCH 13/22] uucore::display: Remove escape_control, tweak special characters --- src/uucore/src/lib/mods/display.rs | 159 ++++++++++++++++++----------- 1 file changed, 99 insertions(+), 60 deletions(-) diff --git a/src/uucore/src/lib/mods/display.rs b/src/uucore/src/lib/mods/display.rs index 52cd45404..0245a401a 100644 --- a/src/uucore/src/lib/mods/display.rs +++ b/src/uucore/src/lib/mods/display.rs @@ -53,6 +53,29 @@ pub trait Quotable { /// println!("Found file {}", path.quote()); // Prints "Found file 'foo/bar.baz'" /// ``` fn quote(&self) -> Quoted<'_>; + + /// Like `quote()`, but don't actually add quotes unless necessary because of + /// whitespace or special characters. + /// + /// # Examples + /// + /// ``` + /// #[macro_use] + /// extern crate uucore; + /// use std::path::Path; + /// use uucore::display::Quotable; + /// + /// let foo = Path::new("foo/bar.baz"); + /// let bar = Path::new("foo bar"); + /// + /// show_error!("{}: Not found", foo); // Prints "util: foo/bar.baz: Not found" + /// show_error!("{}: Not found", bar); // Prints "util: 'foo bar': Not found" + /// ``` + fn maybe_quote(&self) -> Quoted<'_> { + let mut quoted = self.quote(); + quoted.force_quote = false; + quoted + } } macro_rules! impl_as_ref { @@ -89,7 +112,6 @@ impl Quotable for Cow<'_, str> { pub struct Quoted<'a> { text: &'a OsStr, force_quote: bool, - escape_control: bool, } impl<'a> Quoted<'a> { @@ -97,37 +119,26 @@ impl<'a> Quoted<'a> { Quoted { text, force_quote: true, - escape_control: true, } } - - /// Add quotes even if not strictly necessary. `true` by default. - pub fn force_quote(mut self, force: bool) -> Self { - self.force_quote = force; - self - } - - /// Escape control characters. `true` by default. - pub fn escape_control(mut self, escape: bool) -> Self { - self.escape_control = escape; - self - } } impl Display for Quoted<'_> { #[cfg(any(windows, unix, target_os = "wasi"))] fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { // On Unix we emulate sh syntax. On Windows Powershell. + // They're just similar enough to share some code. /// Characters with special meaning outside quotes. // https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_02 - // % seems obscure enough to ignore, it's for job control only. - // {} were used in a version elsewhere but seem unnecessary, GNU doesn't escape them. - // ! is a common extension for expanding the shell history. + // I don't know why % is in there, and GNU doesn't quote it either. + // {} were used in a version elsewhere but seem unnecessary, GNU doesn't + // quote them. They're used in function definitions but not in a way we + // have to worry about. #[cfg(any(unix, target_os = "wasi"))] - const SPECIAL_SHELL_CHARS: &str = "|&;<>()$`\\\"'*?[]=! \t\n"; + const SPECIAL_SHELL_CHARS: &[u8] = b"|&;<>()$`\\\"'*?[]="; // FIXME: I'm not a PowerShell wizard and don't know if this is correct. - // I just copied the Unix version, removed \, and added {}@ based on + // I just copied the Unix version, removed \, and added ,{} based on // experimentation. // I have noticed that ~?*[] only get expanded in some contexts, so watch // out for that if doing your own tests. @@ -136,19 +147,29 @@ impl Display for Quoted<'_> { // for filenames: I've been testing using a Linux build of PowerShell, but // this code doesn't even compile on Linux. #[cfg(windows)] - const SPECIAL_SHELL_CHARS: &str = "|&;<>()$`\"'*?[]=!{}@ \t\r\n"; + const SPECIAL_SHELL_CHARS: &[u8] = b"|&;<>()$`\"'*?[]=,{}"; /// Characters with a special meaning at the beginning of a name. - const SPECIAL_SHELL_CHARS_START: &[char] = &['~', '#']; - - /// Characters that are dangerous in a double-quoted string. + // ~ expands a home directory. + // # starts a comment. + // ! is a common extension for expanding the shell history. #[cfg(any(unix, target_os = "wasi"))] - const DOUBLE_UNSAFE: &[char] = &['"', '`', '$', '\\']; + const SPECIAL_SHELL_CHARS_START: &[char] = &['~', '#', '!']; + // Same deal as before, this is possibly incomplete. + // '-' is included because unlike in Unix, quoting an argument may stop it + // from being recognized as an option. I like that very much. + // A single stand-alone exclamation mark seems to have some special meaning. #[cfg(windows)] - const DOUBLE_UNSAFE: &[char] = &['"', '`', '$']; + const SPECIAL_SHELL_CHARS_START: &[char] = &['~', '#', '@', '-', '!']; + + /// Characters that are interpreted specially in a double-quoted string. + #[cfg(any(unix, target_os = "wasi"))] + const DOUBLE_UNSAFE: &[u8] = &[b'"', b'`', b'$', b'\\']; + #[cfg(windows)] + const DOUBLE_UNSAFE: &[u8] = &[b'"', b'`', b'$']; let text = match self.text.to_str() { - None => return write_escaped(f, self.text, self.escape_control), + None => return write_escaped(f, self.text), Some(text) => text, }; @@ -167,18 +188,19 @@ impl Display for Quoted<'_> { for ch in text.chars() { if ch.is_ascii() { - if self.escape_control && ch.is_ascii_control() { - return write_escaped(f, self.text, self.escape_control); - } - if ch == '\'' { + let ch = ch as u8; + if ch == b'\'' { is_single_safe = false; } if DOUBLE_UNSAFE.contains(&ch) { is_double_safe = false; } - if !requires_quote && SPECIAL_SHELL_CHARS.contains(ch) { + if !requires_quote && SPECIAL_SHELL_CHARS.contains(&ch) { requires_quote = true; } + if ch.is_ascii_control() { + return write_escaped(f, self.text); + } } if !requires_quote && ch.is_whitespace() { // This includes unicode whitespace. @@ -198,7 +220,6 @@ impl Display for Quoted<'_> { return write_single_escaped(f, text); } - #[cfg(any(unix, target_os = "wasi"))] fn write_simple(f: &mut Formatter<'_>, text: &str, quote: char) -> fmt::Result { f.write_char(quote)?; f.write_str(text)?; @@ -237,7 +258,7 @@ impl Display for Quoted<'_> { /// - dash /// - tcsh #[cfg(any(unix, target_os = "wasi"))] - fn write_escaped(f: &mut Formatter<'_>, text: &OsStr, escape_control: bool) -> fmt::Result { + fn write_escaped(f: &mut Formatter<'_>, text: &OsStr) -> fmt::Result { f.write_str("$'")?; for chunk in from_utf8_iter(text.as_bytes()) { match chunk { @@ -252,9 +273,7 @@ impl Display for Quoted<'_> { // \0 doesn't work consistently because of the // octal \nnn syntax, and null bytes can't appear // in filenames anyway. - ch if escape_control && ch.is_ascii_control() => { - write!(f, "\\x{:02X}", ch as u8)? - } + ch if ch.is_ascii_control() => write!(f, "\\x{:02X}", ch as u8)?, '\\' | '\'' => { // '?' and '"' can also be escaped this way // but AFAICT there's no reason to do so @@ -275,29 +294,23 @@ impl Display for Quoted<'_> { } #[cfg(windows)] - fn write_simple(f: &mut Formatter<'_>, text: &str, quote: char) -> fmt::Result { + fn write_single_escaped(f: &mut Formatter<'_>, text: &str) -> fmt::Result { // Quotes in Powershell can be escaped by doubling them - f.write_char(quote)?; - let mut iter = text.split(quote); + f.write_char('\'')?; + let mut iter = text.split('\''); if let Some(chunk) = iter.next() { f.write_str(chunk)?; } for chunk in iter { - f.write_char(quote)?; - f.write_char(quote)?; + f.write_str("''")?; f.write_str(chunk)?; } - f.write_char(quote)?; + f.write_char('\'')?; Ok(()) } #[cfg(windows)] - fn write_single_escaped(f: &mut Formatter<'_>, text: &str) -> fmt::Result { - write_simple(f, text, '\'') - } - - #[cfg(windows)] - fn write_escaped(f: &mut Formatter<'_>, text: &OsStr, escape_control: bool) -> fmt::Result { + fn write_escaped(f: &mut Formatter<'_>, text: &OsStr) -> fmt::Result { // ` takes the role of \ since \ is already used as the path separator. // Things are UTF-16-oriented, so we escape code units as "`u{1234}". use std::char::decode_utf16; @@ -311,9 +324,7 @@ impl Display for Quoted<'_> { '\r' => f.write_str("`r")?, '\n' => f.write_str("`n")?, '\t' => f.write_str("`t")?, - ch if escape_control && ch.is_ascii_control() => { - write!(f, "`u{{{:04X}}}", ch as u8)? - } + ch if ch.is_ascii_control() => write!(f, "`u{{{:04X}}}", ch as u8)?, '`' => f.write_str("``")?, '$' => f.write_str("`$")?, '"' => f.write_str("\"\"")?, @@ -401,7 +412,13 @@ mod tests { } } - /// This should hold on any platform. + fn verify_maybe(cases: &[(impl Quotable, &str)]) { + for (case, expected) in cases { + assert_eq!(case.maybe_quote().to_string(), *expected); + } + } + + /// This should hold on any platform, or else other tests fail. #[test] fn test_basic() { verify_quote(&[ @@ -409,13 +426,23 @@ mod tests { ("", "''"), ("foo/bar.baz", "'foo/bar.baz'"), ]); - assert_eq!("foo".quote().force_quote(false).to_string(), "foo"); - assert_eq!("".quote().force_quote(false).to_string(), "''"); - assert_eq!( - "foo bar".quote().force_quote(false).to_string(), - "'foo bar'" - ); - assert_eq!("$foo".quote().force_quote(false).to_string(), "'$foo'"); + verify_maybe(&[ + ("foo", "foo"), + ("", "''"), + ("foo bar", "'foo bar'"), + ("$foo", "'$foo'"), + ]); + } + + #[cfg(any(unix, target_os = "wasi", windows))] + #[test] + fn test_common() { + verify_maybe(&[ + ("a#b", "a#b"), + ("#ab", "'#ab'"), + ("a~b", "a~b"), + ("!", "'!'"), + ]); } #[cfg(any(unix, target_os = "wasi"))] @@ -430,6 +457,12 @@ mod tests { (r#"'$''"#, r#"\''$'\'\'"#), ]); verify_quote(&[(OsStr::from_bytes(b"foo\xFF"), r#"$'foo\xFF'"#)]); + verify_maybe(&[ + ("-x", "-x"), + ("a,b", "a,b"), + ("a\\b", "'a\\b'"), + ("}", ("}")), + ]); } #[cfg(windows)] @@ -449,7 +482,13 @@ mod tests { verify_quote(&[( OsString::from_wide(&[b'x' as u16, 0xD800]), r#""x`u{D800}""#, - )]) + )]); + verify_maybe(&[ + ("-x", "'-x'"), + ("a,b", "'a,b'"), + ("a\\b", "a\\b"), + ("}", "'}'"), + ]); } #[cfg(any(unix, target_os = "wasi"))] From ffe63945b7e75379c0be05c804548a016e117565 Mon Sep 17 00:00:00 2001 From: Jan Verbeek Date: Tue, 31 Aug 2021 22:02:20 +0200 Subject: [PATCH 14/22] wc: Update path display --- src/uu/wc/src/wc.rs | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/uu/wc/src/wc.rs b/src/uu/wc/src/wc.rs index 01c3d8fdc..6917eb137 100644 --- a/src/uu/wc/src/wc.rs +++ b/src/uu/wc/src/wc.rs @@ -24,6 +24,8 @@ use std::fs::{self, File}; use std::io::{self, Write}; use std::path::{Path, PathBuf}; +use uucore::display::{Quotable, Quoted}; + /// The minimum character width for formatting counts when reading from stdin. const MINIMUM_WIDTH: usize = 7; @@ -122,10 +124,10 @@ impl Input { } } - fn path_display(&self) -> std::path::Display<'_> { + fn path_display(&self) -> Quoted<'_> { match self { - Input::Path(path) => path.display(), - Input::Stdin(_) => Path::display("'standard input'".as_ref()), + Input::Path(path) => path.maybe_quote(), + Input::Stdin(_) => "standard input".maybe_quote(), } } } @@ -448,7 +450,10 @@ fn wc(inputs: Vec, settings: &Settings) -> Result<(), u32> { if let Err(err) = print_stats(settings, &result, max_width) { show_warning!( "failed to print result for {}: {}", - result.title.unwrap_or_else(|| "".as_ref()).display(), + result + .title + .unwrap_or_else(|| "".as_ref()) + .maybe_quote(), err ); failure = true; @@ -526,7 +531,7 @@ fn print_stats( } if let Some(title) = result.title { - writeln!(stdout_lock, " {}", title.display())?; + writeln!(stdout_lock, " {}", title.maybe_quote())?; } else { writeln!(stdout_lock)?; } From 7ca2f4989fcca103b2663c9af97106c85227a846 Mon Sep 17 00:00:00 2001 From: Jan Verbeek Date: Tue, 31 Aug 2021 22:06:00 +0200 Subject: [PATCH 15/22] ls: Hide unused variable warnings --- tests/by-util/test_ls.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/by-util/test_ls.rs b/tests/by-util/test_ls.rs index eb239977d..e6c23acc1 100644 --- a/tests/by-util/test_ls.rs +++ b/tests/by-util/test_ls.rs @@ -354,6 +354,7 @@ fn test_ls_long_format() { at.mkdir(&at.plus_as_string("test-long-dir/test-long-dir")); for arg in &["-l", "--long", "--format=long", "--format=verbose"] { + #[allow(unused_variables)] let result = scene.ucmd().arg(arg).arg("test-long-dir").succeeds(); // Assuming sane username do not have spaces within them. // A line of the output should be: @@ -373,6 +374,7 @@ fn test_ls_long_format() { ).unwrap()); } + #[allow(unused_variables)] let result = scene.ucmd().arg("-lan").arg("test-long-dir").succeeds(); // This checks for the line with the .. entry. The uname and group should be digits. #[cfg(not(windows))] From 575fbd4cb774b691819b925451a9a7df3da82655 Mon Sep 17 00:00:00 2001 From: Justin Tracey Date: Tue, 31 Aug 2021 13:25:03 -0400 Subject: [PATCH 16/22] join: make autoformat actually construct a format Makes the -o auto option construct a format at initialization, rather than try to handle it as a special case when printing lines. Fixes bugs when combined with -e, especially when combined with -a. --- src/uu/join/src/join.rs | 58 ++++++++++++++++++++++---------------- tests/by-util/test_join.rs | 13 +++++++++ 2 files changed, 47 insertions(+), 24 deletions(-) diff --git a/src/uu/join/src/join.rs b/src/uu/join/src/join.rs index d108a08ef..ae991489f 100644 --- a/src/uu/join/src/join.rs +++ b/src/uu/join/src/join.rs @@ -11,7 +11,7 @@ extern crate uucore; use clap::{crate_version, App, Arg}; -use std::cmp::{min, Ordering}; +use std::cmp::Ordering; use std::fs::File; use std::io::{stdin, BufRead, BufReader, Lines, Stdin}; @@ -102,17 +102,12 @@ impl<'a> Repr<'a> { } /// Print each field except the one at the index. - fn print_fields(&self, line: &Line, index: usize, max_fields: Option) { - for i in 0..min(max_fields.unwrap_or(usize::max_value()), line.fields.len()) { + fn print_fields(&self, line: &Line, index: usize) { + for i in 0..line.fields.len() { if i != index { print!("{}{}", self.separator, line.fields[i]); } } - if let Some(n) = max_fields { - for _ in line.fields.len()..n { - print!("{}", self.separator) - } - } } /// Print each field or the empty filler if the field is not set. @@ -233,7 +228,6 @@ struct State<'a> { print_unpaired: bool, lines: Lines>, seq: Vec, - max_fields: Option, line_num: usize, has_failed: bool, } @@ -262,7 +256,6 @@ impl<'a> State<'a> { print_unpaired, lines: f.lines(), seq: Vec::new(), - max_fields: None, line_num: 0, has_failed: false, } @@ -329,8 +322,8 @@ impl<'a> State<'a> { }); } else { repr.print_field(key); - repr.print_fields(line1, self.key, self.max_fields); - repr.print_fields(line2, other.key, other.max_fields); + repr.print_fields(line1, self.key); + repr.print_fields(line2, other.key); } println!(); @@ -361,14 +354,15 @@ impl<'a> State<'a> { !self.seq.is_empty() } - fn initialize(&mut self, read_sep: Sep, autoformat: bool) { + fn initialize(&mut self, read_sep: Sep, autoformat: bool) -> usize { if let Some(line) = self.read_line(read_sep) { - if autoformat { - self.max_fields = Some(line.fields.len()); - } - self.seq.push(line); + + if autoformat { + return self.seq[0].fields.len(); + } } + 0 } fn finalize(&mut self, input: &Input, repr: &Repr) { @@ -431,7 +425,7 @@ impl<'a> State<'a> { }); } else { repr.print_field(line.get_field(self.key)); - repr.print_fields(line, self.key, self.max_fields); + repr.print_fields(line, self.key); } println!(); @@ -512,7 +506,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { crash!(1, "both files cannot be standard input"); } - exec(file1, file2, &settings) + exec(file1, file2, settings) } pub fn uu_app() -> App<'static, 'static> { @@ -622,7 +616,7 @@ FILENUM is 1 or 2, corresponding to FILE1 or FILE2", ) } -fn exec(file1: &str, file2: &str, settings: &Settings) -> i32 { +fn exec(file1: &str, file2: &str, settings: Settings) -> i32 { let stdin = stdin(); let mut state1 = State::new( @@ -647,18 +641,34 @@ fn exec(file1: &str, file2: &str, settings: &Settings) -> i32 { settings.check_order, ); + let format = if settings.autoformat { + let mut format = vec![Spec::Key]; + let mut initialize = |state: &mut State| { + let max_fields = state.initialize(settings.separator, settings.autoformat); + for i in 0..max_fields { + if i != state.key { + format.push(Spec::Field(state.file_num, i)); + } + } + }; + initialize(&mut state1); + initialize(&mut state2); + format + } else { + state1.initialize(settings.separator, settings.autoformat); + state2.initialize(settings.separator, settings.autoformat); + settings.format + }; + let repr = Repr::new( match settings.separator { Sep::Char(sep) => sep, _ => ' ', }, - &settings.format, + &format, &settings.empty, ); - state1.initialize(settings.separator, settings.autoformat); - state2.initialize(settings.separator, settings.autoformat); - if settings.headers { state1.print_headers(&state2, &repr); state1.reset_read_line(&input); diff --git a/tests/by-util/test_join.rs b/tests/by-util/test_join.rs index 1cab8361a..1d92bf8e7 100644 --- a/tests/by-util/test_join.rs +++ b/tests/by-util/test_join.rs @@ -227,6 +227,19 @@ fn autoformat() { .pipe_in("1 x y z\n2 p") .succeeds() .stdout_only("1 x y z a\n2 p b\n"); + + new_ucmd!() + .arg("-") + .arg("fields_2.txt") + .arg("-a") + .arg("1") + .arg("-o") + .arg("auto") + .arg("-e") + .arg(".") + .pipe_in("1 x y z\n2 p\n99 a b\n") + .succeeds() + .stdout_only("1 x y z a\n2 p . . b\n99 a b . .\n"); } #[test] From 7bf85751b0827cd0cf78aa67a1fd9a872768bc2b Mon Sep 17 00:00:00 2001 From: Jan Verbeek Date: Wed, 1 Sep 2021 00:37:21 +0200 Subject: [PATCH 17/22] uucore::display: Fix tests --- src/uucore/src/lib/mods/display.rs | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/src/uucore/src/lib/mods/display.rs b/src/uucore/src/lib/mods/display.rs index 0245a401a..e647a8c71 100644 --- a/src/uucore/src/lib/mods/display.rs +++ b/src/uucore/src/lib/mods/display.rs @@ -60,16 +60,15 @@ pub trait Quotable { /// # Examples /// /// ``` - /// #[macro_use] - /// extern crate uucore; /// use std::path::Path; /// use uucore::display::Quotable; + /// use uucore::show_error; /// /// let foo = Path::new("foo/bar.baz"); /// let bar = Path::new("foo bar"); /// - /// show_error!("{}: Not found", foo); // Prints "util: foo/bar.baz: Not found" - /// show_error!("{}: Not found", bar); // Prints "util: 'foo bar': Not found" + /// show_error!("{}: Not found", foo.maybe_quote()); // Prints "util: foo/bar.baz: Not found" + /// show_error!("{}: Not found", bar.maybe_quote()); // Prints "util: 'foo bar': Not found" /// ``` fn maybe_quote(&self) -> Quoted<'_> { let mut quoted = self.quote(); @@ -156,11 +155,9 @@ impl Display for Quoted<'_> { #[cfg(any(unix, target_os = "wasi"))] const SPECIAL_SHELL_CHARS_START: &[char] = &['~', '#', '!']; // Same deal as before, this is possibly incomplete. - // '-' is included because unlike in Unix, quoting an argument may stop it - // from being recognized as an option. I like that very much. // A single stand-alone exclamation mark seems to have some special meaning. #[cfg(windows)] - const SPECIAL_SHELL_CHARS_START: &[char] = &['~', '#', '@', '-', '!']; + const SPECIAL_SHELL_CHARS_START: &[char] = &['~', '#', '@', '!']; /// Characters that are interpreted specially in a double-quoted string. #[cfg(any(unix, target_os = "wasi"))] @@ -181,6 +178,14 @@ impl Display for Quoted<'_> { if SPECIAL_SHELL_CHARS_START.contains(&first) { requires_quote = true; } + // Unlike in Unix, quoting an argument may stop it + // from being recognized as an option. I like that very much. + // But we don't want to quote "-" because that's a common + // special argument and PowerShell doesn't mind it. + #[cfg(windows)] + if first == '-' && text.len() > 1 { + requires_quote = true; + } } else { // Empty string requires_quote = true; @@ -431,6 +436,7 @@ mod tests { ("", "''"), ("foo bar", "'foo bar'"), ("$foo", "'$foo'"), + ("-", "-"), ]); } From 18fc4076cf6ca90d42d707c2f7787655ea490725 Mon Sep 17 00:00:00 2001 From: Michael Debertol Date: Wed, 1 Sep 2021 17:34:40 +0200 Subject: [PATCH 18/22] uucore/perms: correct some error messages - prevent duplicate errors from both us and `walkdir` by instructing `walkdir' to skip directories we failed to read metadata for. - don't directly display `walkdir`'s errors, but format them ourselves to match gnu's format --- src/uucore/src/lib/features/perms.rs | 70 +++++++++++++++++----------- tests/by-util/test_chgrp.rs | 39 +++++++++++++++- 2 files changed, 80 insertions(+), 29 deletions(-) diff --git a/src/uucore/src/lib/features/perms.rs b/src/uucore/src/lib/features/perms.rs index a5b2522b9..fe1c97a82 100644 --- a/src/uucore/src/lib/features/perms.rs +++ b/src/uucore/src/lib/features/perms.rs @@ -5,6 +5,7 @@ //! Common functions to manage permissions +use crate::error::strip_errno; use crate::error::UResult; pub use crate::features::entries; use crate::fs::resolve_relative_path; @@ -172,15 +173,6 @@ pub struct ChownExecutor { pub dereference: bool, } -macro_rules! unwrap { - ($m:expr, $e:ident, $err:block) => { - match $m { - Ok(meta) => meta, - Err($e) => $err, - } - }; -} - pub const FTS_COMFOLLOW: u8 = 1; pub const FTS_PHYSICAL: u8 = 1 << 1; pub const FTS_LOGICAL: u8 = 1 << 2; @@ -269,17 +261,42 @@ impl ChownExecutor { let mut ret = 0; let root = root.as_ref(); let follow = self.dereference || self.bit_flag & FTS_LOGICAL != 0; - for entry in WalkDir::new(root).follow_links(follow).min_depth(1) { - let entry = unwrap!(entry, e, { - ret = 1; - show_error!("{}", e); - continue; - }); + let mut iterator = WalkDir::new(root) + .follow_links(follow) + .min_depth(1) + .into_iter(); + // We can't use a for loop because we need to manipulate the iterator inside the loop. + while let Some(entry) = iterator.next() { + let entry = match entry { + Err(e) => { + ret = 1; + if let Some(path) = e.path() { + show_error!( + "cannot access '{}': {}", + path.display(), + if let Some(error) = e.io_error() { + strip_errno(error) + } else { + "Too many levels of symbolic links".into() + } + ) + } else { + show_error!("{}", e) + } + continue; + } + Ok(entry) => entry, + }; let path = entry.path(); let meta = match self.obtain_meta(path, follow) { Some(m) => m, _ => { ret = 1; + if entry.file_type().is_dir() { + // Instruct walkdir to skip this directory to avoid getting another error + // when walkdir tries to query the children of this directory. + iterator.skip_current_dir(); + } continue; } }; @@ -316,23 +333,20 @@ impl ChownExecutor { fn obtain_meta>(&self, path: P, follow: bool) -> Option { let path = path.as_ref(); let meta = if follow { - unwrap!(path.metadata(), e, { - match self.verbosity.level { - VerbosityLevel::Silent => (), - _ => show_error!("cannot access '{}': {}", path.display(), e), - } - return None; - }) + path.metadata() } else { - unwrap!(path.symlink_metadata(), e, { + path.symlink_metadata() + }; + match meta { + Err(e) => { match self.verbosity.level { VerbosityLevel::Silent => (), - _ => show_error!("cannot dereference '{}': {}", path.display(), e), + _ => show_error!("cannot access '{}': {}", path.display(), strip_errno(&e)), } - return None; - }) - }; - Some(meta) + None + } + Ok(meta) => Some(meta), + } } #[inline] diff --git a/tests/by-util/test_chgrp.rs b/tests/by-util/test_chgrp.rs index 0741838a4..0fc73520e 100644 --- a/tests/by-util/test_chgrp.rs +++ b/tests/by-util/test_chgrp.rs @@ -230,7 +230,7 @@ fn test_big_h() { } #[test] -#[cfg(target_os = "linux")] +#[cfg(not(target_vendor = "apple"))] fn basic_succeeds() { let (at, mut ucmd) = at_and_ucmd!(); let one_group = nix::unistd::getgroups().unwrap(); @@ -251,3 +251,40 @@ fn test_no_change() { at.touch("file"); ucmd.arg("").arg(at.plus("file")).succeeds(); } + +#[test] +#[cfg(not(target_vendor = "apple"))] +fn test_permission_denied() { + use std::os::unix::prelude::PermissionsExt; + + if let Some(group) = nix::unistd::getgroups().unwrap().first() { + let (at, mut ucmd) = at_and_ucmd!(); + at.mkdir("dir"); + at.touch("dir/file"); + std::fs::set_permissions(at.plus("dir"), PermissionsExt::from_mode(0o0000)).unwrap(); + ucmd.arg("-R") + .arg(group.as_raw().to_string()) + .arg("dir") + .fails() + .stderr_only("chgrp: cannot access 'dir': Permission denied"); + } +} + +#[test] +#[cfg(not(target_vendor = "apple"))] +fn test_subdir_permission_denied() { + use std::os::unix::prelude::PermissionsExt; + + if let Some(group) = nix::unistd::getgroups().unwrap().first() { + let (at, mut ucmd) = at_and_ucmd!(); + at.mkdir("dir"); + at.mkdir("dir/subdir"); + at.touch("dir/subdir/file"); + std::fs::set_permissions(at.plus("dir/subdir"), PermissionsExt::from_mode(0o0000)).unwrap(); + ucmd.arg("-R") + .arg(group.as_raw().to_string()) + .arg("dir") + .fails() + .stderr_only("chgrp: cannot access 'dir/subdir': Permission denied"); + } +} From 195f827cd46929f4dedba341442151e1291bdbf7 Mon Sep 17 00:00:00 2001 From: Michael Debertol Date: Thu, 2 Sep 2021 13:12:42 +0200 Subject: [PATCH 19/22] chown/chgrp: share more code Also share argument parsing code between `chgrp` and `chown` --- src/uu/chgrp/src/chgrp.rs | 158 +++---------------------- src/uu/chown/src/chown.rs | 165 ++++++--------------------- src/uucore/src/lib/features/perms.rs | 153 ++++++++++++++++++++++++- 3 files changed, 204 insertions(+), 272 deletions(-) diff --git a/src/uu/chgrp/src/chgrp.rs b/src/uu/chgrp/src/chgrp.rs index d37da578e..a077ab7a4 100644 --- a/src/uu/chgrp/src/chgrp.rs +++ b/src/uu/chgrp/src/chgrp.rs @@ -11,46 +11,16 @@ extern crate uucore; pub use uucore::entries; use uucore::error::{FromIo, UResult, USimpleError}; -use uucore::perms::{ - ChownExecutor, IfFrom, Verbosity, VerbosityLevel, FTS_COMFOLLOW, FTS_LOGICAL, FTS_PHYSICAL, -}; +use uucore::perms::{chown_base, options, IfFrom}; -use clap::{App, Arg}; +use clap::{App, Arg, ArgMatches}; use std::fs; use std::os::unix::fs::MetadataExt; -use uucore::InvalidEncodingHandling; - static ABOUT: &str = "Change the group of each FILE to GROUP."; static VERSION: &str = env!("CARGO_PKG_VERSION"); -pub mod options { - pub mod verbosity { - pub static CHANGES: &str = "changes"; - pub static QUIET: &str = "quiet"; - pub static SILENT: &str = "silent"; - pub static VERBOSE: &str = "verbose"; - } - pub mod preserve_root { - pub static PRESERVE: &str = "preserve-root"; - pub static NO_PRESERVE: &str = "no-preserve-root"; - } - pub mod dereference { - pub static DEREFERENCE: &str = "dereference"; - pub static NO_DEREFERENCE: &str = "no-dereference"; - } - pub static RECURSIVE: &str = "recursive"; - pub mod traverse { - pub static TRAVERSE: &str = "H"; - pub static NO_TRAVERSE: &str = "P"; - pub static EVERY: &str = "L"; - } - pub static REFERENCE: &str = "reference"; - pub static ARG_GROUP: &str = "GROUP"; - pub static ARG_FILES: &str = "FILE"; -} - fn get_usage() -> String { format!( "{0} [OPTION]... GROUP FILE...\n {0} [OPTION]... --reference=RFILE FILE...", @@ -58,101 +28,7 @@ fn get_usage() -> String { ) } -#[uucore_procs::gen_uumain] -pub fn uumain(args: impl uucore::Args) -> UResult<()> { - let args = args - .collect_str(InvalidEncodingHandling::ConvertLossy) - .accept_any(); - - let usage = get_usage(); - - let mut app = uu_app().usage(&usage[..]); - - // we change the positional args based on whether - // --reference was used. - let mut reference = false; - let mut help = false; - // stop processing options on -- - for arg in args.iter().take_while(|s| *s != "--") { - if arg.starts_with("--reference=") || arg == "--reference" { - reference = true; - } else if arg == "--help" { - // we stop processing once we see --help, - // as it doesn't matter if we've seen reference or not - help = true; - break; - } - } - - if help || !reference { - // add both positional arguments - app = app.arg( - Arg::with_name(options::ARG_GROUP) - .value_name(options::ARG_GROUP) - .required(true) - .takes_value(true) - .multiple(false), - ) - } - app = app.arg( - Arg::with_name(options::ARG_FILES) - .value_name(options::ARG_FILES) - .multiple(true) - .takes_value(true) - .required(true) - .min_values(1), - ); - - let matches = app.get_matches_from(args); - - /* Get the list of files */ - let files: Vec = matches - .values_of(options::ARG_FILES) - .map(|v| v.map(ToString::to_string).collect()) - .unwrap_or_default(); - - let preserve_root = matches.is_present(options::preserve_root::PRESERVE); - - let mut derefer = if matches.is_present(options::dereference::DEREFERENCE) { - 1 - } else if matches.is_present(options::dereference::NO_DEREFERENCE) { - 0 - } else { - -1 - }; - - let mut bit_flag = if matches.is_present(options::traverse::TRAVERSE) { - FTS_COMFOLLOW | FTS_PHYSICAL - } else if matches.is_present(options::traverse::EVERY) { - FTS_LOGICAL - } else { - FTS_PHYSICAL - }; - - let recursive = matches.is_present(options::RECURSIVE); - if recursive { - if bit_flag == FTS_PHYSICAL { - if derefer == 1 { - return Err(USimpleError::new(1, "-R --dereference requires -H or -L")); - } - derefer = 0; - } - } else { - bit_flag = FTS_PHYSICAL; - } - - let verbosity_level = if matches.is_present(options::verbosity::CHANGES) { - VerbosityLevel::Changes - } else if matches.is_present(options::verbosity::SILENT) - || matches.is_present(options::verbosity::QUIET) - { - VerbosityLevel::Silent - } else if matches.is_present(options::verbosity::VERBOSE) { - VerbosityLevel::Verbose - } else { - VerbosityLevel::Normal - }; - +fn parse_gid_and_uid(matches: &ArgMatches) -> UResult<(Option, Option, IfFrom)> { let dest_gid = if let Some(file) = matches.value_of(options::REFERENCE) { fs::metadata(&file) .map(|meta| Some(meta.gid())) @@ -168,22 +44,20 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { } } }; + Ok((dest_gid, None, IfFrom::All)) +} - let executor = ChownExecutor { - bit_flag, - dest_gid, - verbosity: Verbosity { - groups_only: true, - level: verbosity_level, - }, - recursive, - dereference: derefer != 0, - preserve_root, - files, - filter: IfFrom::All, - dest_uid: None, - }; - executor.exec() +#[uucore_procs::gen_uumain] +pub fn uumain(args: impl uucore::Args) -> UResult<()> { + let usage = get_usage(); + + chown_base( + uu_app().usage(&usage[..]), + args, + options::ARG_GROUP, + parse_gid_and_uid, + true, + ) } pub fn uu_app() -> App<'static, 'static> { diff --git a/src/uu/chown/src/chown.rs b/src/uu/chown/src/chown.rs index 06f0c6a32..20f3f8174 100644 --- a/src/uu/chown/src/chown.rs +++ b/src/uu/chown/src/chown.rs @@ -10,49 +10,17 @@ #[macro_use] extern crate uucore; pub use uucore::entries::{self, Group, Locate, Passwd}; -use uucore::perms::{ - ChownExecutor, IfFrom, Verbosity, VerbosityLevel, FTS_COMFOLLOW, FTS_LOGICAL, FTS_PHYSICAL, -}; +use uucore::perms::{chown_base, options, IfFrom}; use uucore::error::{FromIo, UResult, USimpleError}; -use clap::{crate_version, App, Arg}; +use clap::{crate_version, App, Arg, ArgMatches}; use std::fs; use std::os::unix::fs::MetadataExt; -use uucore::InvalidEncodingHandling; - static ABOUT: &str = "change file owner and group"; -pub mod options { - pub mod verbosity { - pub static CHANGES: &str = "changes"; - pub static QUIET: &str = "quiet"; - pub static SILENT: &str = "silent"; - pub static VERBOSE: &str = "verbose"; - } - pub mod preserve_root { - pub static PRESERVE: &str = "preserve-root"; - pub static NO_PRESERVE: &str = "no-preserve-root"; - } - pub mod dereference { - pub static DEREFERENCE: &str = "dereference"; - pub static NO_DEREFERENCE: &str = "no-dereference"; - } - pub static FROM: &str = "from"; - pub static RECURSIVE: &str = "recursive"; - pub mod traverse { - pub static TRAVERSE: &str = "H"; - pub static NO_TRAVERSE: &str = "P"; - pub static EVERY: &str = "L"; - } - pub static REFERENCE: &str = "reference"; -} - -static ARG_OWNER: &str = "owner"; -static ARG_FILES: &str = "files"; - fn get_usage() -> String { format!( "{0} [OPTION]... [OWNER][:[GROUP]] FILE...\n{0} [OPTION]... --reference=RFILE FILE...", @@ -60,65 +28,7 @@ fn get_usage() -> String { ) } -#[uucore_procs::gen_uumain] -pub fn uumain(args: impl uucore::Args) -> UResult<()> { - let args = args - .collect_str(InvalidEncodingHandling::Ignore) - .accept_any(); - - let usage = get_usage(); - - let matches = uu_app().usage(&usage[..]).get_matches_from(args); - - /* First arg is the owner/group */ - let owner = matches.value_of(ARG_OWNER).unwrap(); - - /* Then the list of files */ - let files: Vec = matches - .values_of(ARG_FILES) - .map(|v| v.map(ToString::to_string).collect()) - .unwrap_or_default(); - - let preserve_root = matches.is_present(options::preserve_root::PRESERVE); - - let mut derefer = if matches.is_present(options::dereference::NO_DEREFERENCE) { - 1 - } else { - 0 - }; - - let mut bit_flag = if matches.is_present(options::traverse::TRAVERSE) { - FTS_COMFOLLOW | FTS_PHYSICAL - } else if matches.is_present(options::traverse::EVERY) { - FTS_LOGICAL - } else { - FTS_PHYSICAL - }; - - let recursive = matches.is_present(options::RECURSIVE); - if recursive { - if bit_flag == FTS_PHYSICAL { - if derefer == 1 { - return Err(USimpleError::new(1, "-R --dereference requires -H or -L")); - } - derefer = 0; - } - } else { - bit_flag = FTS_PHYSICAL; - } - - let verbosity = if matches.is_present(options::verbosity::CHANGES) { - VerbosityLevel::Changes - } else if matches.is_present(options::verbosity::SILENT) - || matches.is_present(options::verbosity::QUIET) - { - VerbosityLevel::Silent - } else if matches.is_present(options::verbosity::VERBOSE) { - VerbosityLevel::Verbose - } else { - VerbosityLevel::Normal - }; - +fn parse_gid_uid_and_filter(matches: &ArgMatches) -> UResult<(Option, Option, IfFrom)> { let filter = if let Some(spec) = matches.value_of(options::FROM) { match parse_spec(spec)? { (Some(uid), None) => IfFrom::User(uid), @@ -138,25 +48,24 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { dest_gid = Some(meta.gid()); dest_uid = Some(meta.uid()); } else { - let (u, g) = parse_spec(owner)?; + let (u, g) = parse_spec(matches.value_of(options::ARG_OWNER).unwrap())?; dest_uid = u; dest_gid = g; } - let executor = ChownExecutor { - bit_flag, - dest_uid, - dest_gid, - verbosity: Verbosity { - groups_only: false, - level: verbosity, - }, - recursive, - dereference: derefer != 0, - filter, - preserve_root, - files, - }; - executor.exec() + Ok((dest_gid, dest_uid, filter)) +} + +#[uucore_procs::gen_uumain] +pub fn uumain(args: impl uucore::Args) -> UResult<()> { + let usage = get_usage(); + + chown_base( + uu_app().usage(&usage[..]), + args, + options::ARG_OWNER, + parse_gid_uid_and_filter, + false, + ) } pub fn uu_app() -> App<'static, 'static> { @@ -169,22 +78,31 @@ pub fn uu_app() -> App<'static, 'static> { .long(options::verbosity::CHANGES) .help("like verbose but report only when a change is made"), ) - .arg(Arg::with_name(options::dereference::DEREFERENCE).long(options::dereference::DEREFERENCE).help( - "affect the referent of each symbolic link (this is the default), rather than the symbolic link itself", - )) + .arg( + Arg::with_name(options::dereference::DEREFERENCE) + .long(options::dereference::DEREFERENCE) + .help( + "affect the referent of each symbolic link (this is the default), \ + rather than the symbolic link itself", + ), + ) .arg( Arg::with_name(options::dereference::NO_DEREFERENCE) .short("h") .long(options::dereference::NO_DEREFERENCE) .help( - "affect symbolic links instead of any referenced file (useful only on systems that can change the ownership of a symlink)", + "affect symbolic links instead of any referenced file \ + (useful only on systems that can change the ownership of a symlink)", ), ) .arg( Arg::with_name(options::FROM) .long(options::FROM) .help( - "change the owner and/or group of each file only if its current owner and/or group match those specified here. Either may be omitted, in which case a match is not required for the omitted attribute", + "change the owner and/or group of each file only if its \ + current owner and/or group match those specified here. \ + Either may be omitted, in which case a match is not required \ + for the omitted attribute", ) .value_name("CURRENT_OWNER:CURRENT_GROUP"), ) @@ -216,7 +134,11 @@ pub fn uu_app() -> App<'static, 'static> { .value_name("RFILE") .min_values(1), ) - .arg(Arg::with_name(options::verbosity::SILENT).short("f").long(options::verbosity::SILENT)) + .arg( + Arg::with_name(options::verbosity::SILENT) + .short("f") + .long(options::verbosity::SILENT), + ) .arg( Arg::with_name(options::traverse::TRAVERSE) .short(options::traverse::TRAVERSE) @@ -240,19 +162,6 @@ pub fn uu_app() -> App<'static, 'static> { .long(options::verbosity::VERBOSE) .help("output a diagnostic for every file processed"), ) - .arg( - Arg::with_name(ARG_OWNER) - .multiple(false) - .takes_value(true) - .required(true), - ) - .arg( - Arg::with_name(ARG_FILES) - .multiple(true) - .takes_value(true) - .required(true) - .min_values(1), - ) } fn parse_spec(spec: &str) -> UResult<(Option, Option)> { diff --git a/src/uucore/src/lib/features/perms.rs b/src/uucore/src/lib/features/perms.rs index fe1c97a82..5f470d5a8 100644 --- a/src/uucore/src/lib/features/perms.rs +++ b/src/uucore/src/lib/features/perms.rs @@ -7,10 +7,14 @@ use crate::error::strip_errno; use crate::error::UResult; +use crate::error::USimpleError; pub use crate::features::entries; use crate::fs::resolve_relative_path; use crate::show_error; -use libc::{self, gid_t, lchown, uid_t}; +use clap::App; +use clap::Arg; +use clap::ArgMatches; +use libc::{self, gid_t, uid_t}; use walkdir::WalkDir; use std::io::Error as IOError; @@ -45,7 +49,7 @@ fn chown>(path: P, uid: uid_t, gid: gid_t, follow: bool) -> IORes if follow { libc::chown(s.as_ptr(), uid, gid) } else { - lchown(s.as_ptr(), uid, gid) + libc::lchown(s.as_ptr(), uid, gid) } }; if ret == 0 { @@ -359,3 +363,148 @@ impl ChownExecutor { } } } + +pub mod options { + pub mod verbosity { + pub const CHANGES: &str = "changes"; + pub const QUIET: &str = "quiet"; + pub const SILENT: &str = "silent"; + pub const VERBOSE: &str = "verbose"; + } + pub mod preserve_root { + pub const PRESERVE: &str = "preserve-root"; + pub const NO_PRESERVE: &str = "no-preserve-root"; + } + pub mod dereference { + pub const DEREFERENCE: &str = "dereference"; + pub const NO_DEREFERENCE: &str = "no-dereference"; + } + pub const FROM: &str = "from"; + pub const RECURSIVE: &str = "recursive"; + pub mod traverse { + pub const TRAVERSE: &str = "H"; + pub const NO_TRAVERSE: &str = "P"; + pub const EVERY: &str = "L"; + } + pub const REFERENCE: &str = "reference"; + pub const ARG_OWNER: &str = "OWNER"; + pub const ARG_GROUP: &str = "GROUP"; + pub const ARG_FILES: &str = "FILE"; +} + +type GidUidFilterParser<'a> = fn(&ArgMatches<'a>) -> UResult<(Option, Option, IfFrom)>; + +/// Base implementation for `chgrp` and `chown`. +/// +/// An argument called `add_arg_if_not_reference` will be added to `app` if +/// `args` does not contain the `--reference` option. +/// `parse_gid_uid_and_filter` will be called to obtain the target gid and uid, and the filter, +/// from `ArgMatches`. +/// `groups_only` determines whether verbose output will only mention the group. +pub fn chown_base<'a>( + mut app: App<'a, 'a>, + args: impl crate::Args, + add_arg_if_not_reference: &'a str, + parse_gid_uid_and_filter: GidUidFilterParser<'a>, + groups_only: bool, +) -> UResult<()> { + let args: Vec<_> = args.collect(); + let mut reference = false; + let mut help = false; + // stop processing options on -- + for arg in args.iter().take_while(|s| *s != "--") { + if arg.to_string_lossy().starts_with("--reference=") || arg == "--reference" { + reference = true; + } else if arg == "--help" { + // we stop processing once we see --help, + // as it doesn't matter if we've seen reference or not + help = true; + break; + } + } + + if help || !reference { + // add both positional arguments + // arg_group is only required if + app = app.arg( + Arg::with_name(add_arg_if_not_reference) + .value_name(add_arg_if_not_reference) + .required(true) + .takes_value(true) + .multiple(false), + ) + } + app = app.arg( + Arg::with_name(options::ARG_FILES) + .value_name(options::ARG_FILES) + .multiple(true) + .takes_value(true) + .required(true) + .min_values(1), + ); + let matches = app.get_matches_from(args); + + let files: Vec = matches + .values_of(options::ARG_FILES) + .map(|v| v.map(ToString::to_string).collect()) + .unwrap_or_default(); + + let preserve_root = matches.is_present(options::preserve_root::PRESERVE); + + let mut dereference = if matches.is_present(options::dereference::DEREFERENCE) { + 1 + } else if matches.is_present(options::dereference::NO_DEREFERENCE) { + 0 + } else { + -1 + }; + + let mut bit_flag = if matches.is_present(options::traverse::TRAVERSE) { + FTS_COMFOLLOW | FTS_PHYSICAL + } else if matches.is_present(options::traverse::EVERY) { + FTS_LOGICAL + } else { + FTS_PHYSICAL + }; + + let recursive = matches.is_present(options::RECURSIVE); + if recursive { + if bit_flag == FTS_PHYSICAL { + if dereference == 1 { + return Err(USimpleError::new(1, "-R --dereference requires -H or -L")); + } + dereference = 0; + } + } else { + bit_flag = FTS_PHYSICAL; + } + + let verbosity_level = if matches.is_present(options::verbosity::CHANGES) { + VerbosityLevel::Changes + } else if matches.is_present(options::verbosity::SILENT) + || matches.is_present(options::verbosity::QUIET) + { + VerbosityLevel::Silent + } else if matches.is_present(options::verbosity::VERBOSE) { + VerbosityLevel::Verbose + } else { + VerbosityLevel::Normal + }; + let (dest_gid, dest_uid, filter) = parse_gid_uid_and_filter(&matches)?; + + let executor = ChownExecutor { + bit_flag, + dest_gid, + dest_uid, + verbosity: Verbosity { + groups_only: true, + level: verbosity_level, + }, + recursive, + dereference: dereference != 0, + preserve_root, + files, + filter, + }; + executor.exec() +} From a4fca2d4fc4718909658f1b0ab83734a54c8d6bc Mon Sep 17 00:00:00 2001 From: Michael Debertol Date: Thu, 2 Sep 2021 18:27:01 +0200 Subject: [PATCH 20/22] uucore/perms: remove flags in favor of enums Part of the code was transliterated from GNU's implementation in C, and used flags to store settings. Instead, we can use enums to avoid magic values or binary operations to extract flags. --- src/uu/chown/src/chown.rs | 1 + src/uucore/src/lib/features/perms.rs | 56 ++++++++++++++-------------- 2 files changed, 30 insertions(+), 27 deletions(-) diff --git a/src/uu/chown/src/chown.rs b/src/uu/chown/src/chown.rs index 20f3f8174..4abb9ac61 100644 --- a/src/uu/chown/src/chown.rs +++ b/src/uu/chown/src/chown.rs @@ -160,6 +160,7 @@ pub fn uu_app() -> App<'static, 'static> { .arg( Arg::with_name(options::verbosity::VERBOSE) .long(options::verbosity::VERBOSE) + .short("v") .help("output a diagnostic for every file processed"), ) } diff --git a/src/uucore/src/lib/features/perms.rs b/src/uucore/src/lib/features/perms.rs index 5f470d5a8..b605c0cdf 100644 --- a/src/uucore/src/lib/features/perms.rs +++ b/src/uucore/src/lib/features/perms.rs @@ -165,22 +165,26 @@ pub enum IfFrom { UserGroup(u32, u32), } +#[derive(PartialEq, Eq)] +pub enum TraverseSymlinks { + None, + First, + All, +} + pub struct ChownExecutor { pub dest_uid: Option, pub dest_gid: Option, - pub bit_flag: u8, + pub traverse_symlinks: TraverseSymlinks, pub verbosity: Verbosity, pub filter: IfFrom, pub files: Vec, pub recursive: bool, pub preserve_root: bool, + // Must be true if traverse_symlinks is not None pub dereference: bool, } -pub const FTS_COMFOLLOW: u8 = 1; -pub const FTS_PHYSICAL: u8 = 1 << 1; -pub const FTS_LOGICAL: u8 = 1 << 2; - impl ChownExecutor { pub fn exec(&self) -> UResult<()> { let mut ret = 0; @@ -194,9 +198,8 @@ impl ChownExecutor { } fn traverse>(&self, root: P) -> i32 { - let follow_arg = self.dereference || self.bit_flag != FTS_PHYSICAL; let path = root.as_ref(); - let meta = match self.obtain_meta(path, follow_arg) { + let meta = match self.obtain_meta(path, self.dereference) { Some(m) => m, _ => return 1, }; @@ -208,7 +211,7 @@ impl ChownExecutor { // (argument is symlink && should follow argument && resolved to be '/') // ) if self.recursive && self.preserve_root { - let may_exist = if follow_arg { + let may_exist = if self.dereference { path.canonicalize().ok() } else { let real = resolve_relative_path(path); @@ -234,7 +237,7 @@ impl ChownExecutor { &meta, self.dest_uid, self.dest_gid, - follow_arg, + self.dereference, self.verbosity.clone(), ) { Ok(n) => { @@ -264,9 +267,8 @@ impl ChownExecutor { fn dive_into>(&self, root: P) -> i32 { let mut ret = 0; let root = root.as_ref(); - let follow = self.dereference || self.bit_flag & FTS_LOGICAL != 0; let mut iterator = WalkDir::new(root) - .follow_links(follow) + .follow_links(self.dereference) .min_depth(1) .into_iter(); // We can't use a for loop because we need to manipulate the iterator inside the loop. @@ -292,7 +294,7 @@ impl ChownExecutor { Ok(entry) => entry, }; let path = entry.path(); - let meta = match self.obtain_meta(path, follow) { + let meta = match self.obtain_meta(path, self.dereference) { Some(m) => m, _ => { ret = 1; @@ -314,7 +316,7 @@ impl ChownExecutor { &meta, self.dest_uid, self.dest_gid, - follow, + self.dereference, self.verbosity.clone(), ) { Ok(n) => { @@ -452,31 +454,31 @@ pub fn chown_base<'a>( let preserve_root = matches.is_present(options::preserve_root::PRESERVE); let mut dereference = if matches.is_present(options::dereference::DEREFERENCE) { - 1 + Some(true) } else if matches.is_present(options::dereference::NO_DEREFERENCE) { - 0 + Some(false) } else { - -1 + None }; - let mut bit_flag = if matches.is_present(options::traverse::TRAVERSE) { - FTS_COMFOLLOW | FTS_PHYSICAL + let mut traverse_symlinks = if matches.is_present(options::traverse::TRAVERSE) { + TraverseSymlinks::First } else if matches.is_present(options::traverse::EVERY) { - FTS_LOGICAL + TraverseSymlinks::All } else { - FTS_PHYSICAL + TraverseSymlinks::None }; let recursive = matches.is_present(options::RECURSIVE); if recursive { - if bit_flag == FTS_PHYSICAL { - if dereference == 1 { + if traverse_symlinks == TraverseSymlinks::None { + if dereference == Some(true) { return Err(USimpleError::new(1, "-R --dereference requires -H or -L")); } - dereference = 0; + dereference = Some(false); } } else { - bit_flag = FTS_PHYSICAL; + traverse_symlinks = TraverseSymlinks::None; } let verbosity_level = if matches.is_present(options::verbosity::CHANGES) { @@ -493,15 +495,15 @@ pub fn chown_base<'a>( let (dest_gid, dest_uid, filter) = parse_gid_uid_and_filter(&matches)?; let executor = ChownExecutor { - bit_flag, + traverse_symlinks, dest_gid, dest_uid, verbosity: Verbosity { - groups_only: true, + groups_only, level: verbosity_level, }, recursive, - dereference: dereference != 0, + dereference: dereference.unwrap_or(true), preserve_root, files, filter, From a7f6b4420a4a28148657c437aed13dda798176bd Mon Sep 17 00:00:00 2001 From: Michael Debertol Date: Thu, 2 Sep 2021 22:31:49 +0200 Subject: [PATCH 21/22] uucore/perms: take traverse_symlinks into account --- src/uucore/src/lib/features/perms.rs | 15 ++++++- tests/by-util/test_chgrp.rs | 65 ++++++++++++++++++++++++++++ 2 files changed, 78 insertions(+), 2 deletions(-) diff --git a/src/uucore/src/lib/features/perms.rs b/src/uucore/src/lib/features/perms.rs index b605c0cdf..a4a01c499 100644 --- a/src/uucore/src/lib/features/perms.rs +++ b/src/uucore/src/lib/features/perms.rs @@ -265,10 +265,21 @@ impl ChownExecutor { } fn dive_into>(&self, root: P) -> i32 { - let mut ret = 0; let root = root.as_ref(); + + // walkdir always dereferences the root directory, so we have to check it ourselves + // TODO: replace with `root.is_symlink()` once it is stable + if self.traverse_symlinks == TraverseSymlinks::None + && std::fs::symlink_metadata(root) + .map(|m| m.file_type().is_symlink()) + .unwrap_or(false) + { + return 0; + } + + let mut ret = 0; let mut iterator = WalkDir::new(root) - .follow_links(self.dereference) + .follow_links(self.traverse_symlinks == TraverseSymlinks::All) .min_depth(1) .into_iter(); // We can't use a for loop because we need to manipulate the iterator inside the loop. diff --git a/tests/by-util/test_chgrp.rs b/tests/by-util/test_chgrp.rs index 0fc73520e..1d047cfe2 100644 --- a/tests/by-util/test_chgrp.rs +++ b/tests/by-util/test_chgrp.rs @@ -288,3 +288,68 @@ fn test_subdir_permission_denied() { .stderr_only("chgrp: cannot access 'dir/subdir': Permission denied"); } } + +#[test] +#[cfg(not(target_vendor = "apple"))] +fn test_traverse_symlinks() { + use std::os::unix::prelude::MetadataExt; + let groups = nix::unistd::getgroups().unwrap(); + if groups.len() < 2 { + return; + } + let (first_group, second_group) = (groups[0], groups[1]); + + for &(args, traverse_first, traverse_second) in &[ + (&[][..] as &[&str], false, false), + (&["-H"][..], true, false), + (&["-P"][..], false, false), + (&["-L"][..], true, true), + ] { + let scenario = TestScenario::new("chgrp"); + + let (at, mut ucmd) = (scenario.fixtures.clone(), scenario.ucmd()); + + at.mkdir("dir"); + at.mkdir("dir2"); + at.touch("dir2/file"); + at.mkdir("dir3"); + at.touch("dir3/file"); + at.symlink_dir("dir2", "dir/dir2_ln"); + at.symlink_dir("dir3", "dir3_ln"); + + scenario + .ccmd("chgrp") + .arg(first_group.to_string()) + .arg("dir2/file") + .arg("dir3/file") + .succeeds(); + + assert!(at.plus("dir2/file").metadata().unwrap().gid() == first_group.as_raw()); + assert!(at.plus("dir3/file").metadata().unwrap().gid() == first_group.as_raw()); + + ucmd.arg("-R") + .args(args) + .arg(second_group.to_string()) + .arg("dir") + .arg("dir3_ln") + .succeeds() + .no_stderr(); + + assert_eq!( + at.plus("dir2/file").metadata().unwrap().gid(), + if traverse_second { + second_group.as_raw() + } else { + first_group.as_raw() + } + ); + assert_eq!( + at.plus("dir3/file").metadata().unwrap().gid(), + if traverse_first { + second_group.as_raw() + } else { + first_group.as_raw() + } + ); + } +} From 435b7a22fb8d64d1c2c0d4ddca0b387f25984c1d Mon Sep 17 00:00:00 2001 From: Michael Debertol Date: Thu, 2 Sep 2021 22:42:09 +0200 Subject: [PATCH 22/22] uucore/perms: add more information to an error message This reverts part of https://github.com/uutils/coreutils/pull/2628, because (even though it got the test passing) it was the wrong bug fix. --- src/uucore/src/lib/features/perms.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/uucore/src/lib/features/perms.rs b/src/uucore/src/lib/features/perms.rs index a4a01c499..b071cedaa 100644 --- a/src/uucore/src/lib/features/perms.rs +++ b/src/uucore/src/lib/features/perms.rs @@ -358,7 +358,12 @@ impl ChownExecutor { Err(e) => { match self.verbosity.level { VerbosityLevel::Silent => (), - _ => show_error!("cannot access '{}': {}", path.display(), strip_errno(&e)), + _ => show_error!( + "cannot {} '{}': {}", + if follow { "dereference" } else { "access" }, + path.display(), + strip_errno(&e) + ), } None }