diff --git a/.vscode/cspell.dictionaries/jargon.wordlist.txt b/.vscode/cspell.dictionaries/jargon.wordlist.txt index 34abfc511..089adffa3 100644 --- a/.vscode/cspell.dictionaries/jargon.wordlist.txt +++ b/.vscode/cspell.dictionaries/jargon.wordlist.txt @@ -1,3 +1,4 @@ +AFAICT arity autogenerate autogenerated diff --git a/.vscode/cspell.dictionaries/shell.wordlist.txt b/.vscode/cspell.dictionaries/shell.wordlist.txt index 07c2364ac..4ed281efb 100644 --- a/.vscode/cspell.dictionaries/shell.wordlist.txt +++ b/.vscode/cspell.dictionaries/shell.wordlist.txt @@ -8,6 +8,7 @@ csh globstar inotify localtime +mksh mountinfo mountpoint mtab diff --git a/src/uu/ls/src/ls.rs b/src/uu/ls/src/ls.rs index bf449d96b..fcb0b28ff 100644 --- a/src/uu/ls/src/ls.rs +++ b/src/uu/ls/src/ls.rs @@ -21,6 +21,7 @@ use lscolors::LsColors; use number_prefix::NumberPrefix; use once_cell::unsync::OnceCell; use quoting_style::{escape_name, QuotingStyle}; +use std::ffi::OsString; #[cfg(windows)] use std::os::windows::fs::MetadataExt; use std::{ @@ -248,7 +249,7 @@ struct LongFormat { impl Config { #[allow(clippy::cognitive_complexity)] - fn from(options: clap::ArgMatches) -> UResult { + fn from(options: &clap::ArgMatches) -> UResult { let (mut format, opt) = if let Some(format_) = options.value_of(options::FORMAT) { ( match format_ { @@ -428,11 +429,10 @@ impl Config { #[allow(clippy::needless_bool)] let show_control = if options.is_present(options::HIDE_CONTROL_CHARS) { false - } else if options.is_present(options::SHOW_CONTROL_CHARS) || atty::is(atty::Stream::Stdout) - { + } else if options.is_present(options::SHOW_CONTROL_CHARS) { true } else { - false + !atty::is(atty::Stream::Stdout) }; let quoting_style = if let Some(style) = options.value_of(options::QUOTING_STYLE) { @@ -599,22 +599,19 @@ impl Config { #[uucore_procs::gen_uumain] pub fn uumain(args: impl uucore::Args) -> UResult<()> { - let args = args - .collect_str(InvalidEncodingHandling::Ignore) - .accept_any(); - let usage = usage(); let app = uu_app().usage(&usage[..]); let matches = app.get_matches_from(args); + let config = Config::from(&matches)?; let locs = matches - .values_of(options::PATHS) - .map(|v| v.map(ToString::to_string).collect()) - .unwrap_or_else(|| vec![String::from(".")]); + .values_of_os(options::PATHS) + .map(|v| v.map(Path::new).collect()) + .unwrap_or_else(|| vec![Path::new(".")]); - list(locs, Config::from(matches)?) + list(locs, config) } pub fn uu_app() -> App<'static, 'static> { @@ -1177,7 +1174,7 @@ struct PathData { md: OnceCell>, ft: OnceCell>, // Name of the file - will be empty for . or .. - display_name: String, + display_name: OsString, // PathBuf that all above data corresponds to p_buf: PathBuf, must_dereference: bool, @@ -1187,7 +1184,7 @@ impl PathData { fn new( p_buf: PathBuf, file_type: Option>, - file_name: Option, + file_name: Option, config: &Config, command_line: bool, ) -> Self { @@ -1195,16 +1192,13 @@ impl PathData { // For '..', the filename is None let display_name = if let Some(name) = file_name { name + } else if command_line { + p_buf.clone().into() } else { - let display_os_str = if command_line { - p_buf.as_os_str() - } else { - p_buf - .file_name() - .unwrap_or_else(|| p_buf.iter().next_back().unwrap()) - }; - - display_os_str.to_string_lossy().into_owned() + p_buf + .file_name() + .unwrap_or_else(|| p_buf.iter().next_back().unwrap()) + .to_owned() }; let must_dereference = match &config.dereference { Dereference::All => true, @@ -1249,14 +1243,14 @@ impl PathData { } } -fn list(locs: Vec, config: Config) -> UResult<()> { +fn list(locs: Vec<&Path>, config: Config) -> UResult<()> { let mut files = Vec::::new(); let mut dirs = Vec::::new(); let mut out = BufWriter::new(stdout()); for loc in &locs { - let p = PathBuf::from(&loc); + let p = PathBuf::from(loc); let path_data = PathData::new(p, None, None, &config, true); if path_data.md().is_none() { @@ -1286,6 +1280,7 @@ fn list(locs: Vec, config: Config) -> UResult<()> { sort_entries(&mut dirs, &config); for dir in dirs { if locs.len() > 1 || config.recursive { + // FIXME: This should use the quoting style and propagate errors let _ = writeln!(out, "\n{}:", dir.p_buf.display()); } enter_directory(&dir, &config, &mut out); @@ -1671,7 +1666,6 @@ fn get_inode(metadata: &Metadata) -> String { use std::sync::Mutex; #[cfg(unix)] use uucore::entries; -use uucore::InvalidEncodingHandling; #[cfg(unix)] fn cached_uid2usr(uid: u32) -> String { diff --git a/src/uu/ls/src/quoting_style.rs b/src/uu/ls/src/quoting_style.rs index 5f421b2ee..4839370ee 100644 --- a/src/uu/ls/src/quoting_style.rs +++ b/src/uu/ls/src/quoting_style.rs @@ -1,4 +1,5 @@ use std::char::from_digit; +use std::ffi::OsStr; // These are characters with special meaning in the shell (e.g. bash). // The first const contains characters that only have a special meaning when they appear at the beginning of a name. @@ -255,19 +256,21 @@ fn shell_with_escape(name: &str, quotes: Quotes) -> (String, bool) { (escaped_str, must_quote) } -pub(super) fn escape_name(name: &str, style: &QuotingStyle) -> String { +pub(super) fn escape_name(name: &OsStr, style: &QuotingStyle) -> String { match style { QuotingStyle::Literal { show_control } => { if !show_control { - name.chars() + name.to_string_lossy() + .chars() .flat_map(|c| EscapedChar::new_literal(c).hide_control()) .collect() } else { - name.into() + name.to_string_lossy().into_owned() } } QuotingStyle::C { quotes } => { let escaped_str: String = name + .to_string_lossy() .chars() .flat_map(|c| EscapedChar::new_c(c, *quotes)) .collect(); @@ -283,6 +286,7 @@ pub(super) fn escape_name(name: &str, style: &QuotingStyle) -> String { always_quote, show_control, } => { + let name = name.to_string_lossy(); let (quotes, must_quote) = if name.contains('"') { (Quotes::Single, true) } else if name.contains('\'') { @@ -294,9 +298,9 @@ pub(super) fn escape_name(name: &str, style: &QuotingStyle) -> String { }; let (escaped_str, contains_quote_chars) = if *escape { - shell_with_escape(name, quotes) + shell_with_escape(&name, quotes) } else { - shell_without_escape(name, quotes, *show_control) + shell_without_escape(&name, quotes, *show_control) }; match (must_quote | contains_quote_chars, quotes) { @@ -362,7 +366,7 @@ mod tests { fn check_names(name: &str, map: Vec<(&str, &str)>) { assert_eq!( map.iter() - .map(|(_, style)| escape_name(name, &get_style(style))) + .map(|(_, style)| escape_name(name.as_ref(), &get_style(style))) .collect::>(), map.iter() .map(|(correct, _)| correct.to_string()) diff --git a/src/uu/pwd/src/pwd.rs b/src/uu/pwd/src/pwd.rs index e8f0c0013..1138dba8e 100644 --- a/src/uu/pwd/src/pwd.rs +++ b/src/uu/pwd/src/pwd.rs @@ -10,9 +10,10 @@ extern crate uucore; use clap::{crate_version, App, Arg}; use std::env; -use std::io::{self, Write}; -use std::path::{Path, PathBuf}; +use std::io; +use std::path::PathBuf; +use uucore::display::println_verbatim; use uucore::error::{FromIo, UResult}; static ABOUT: &str = "Display the full filename of the current working directory."; @@ -57,6 +58,7 @@ fn logical_path() -> io::Result { // POSIX: https://pubs.opengroup.org/onlinepubs/9699919799/utilities/pwd.html #[cfg(not(windows))] { + use std::path::Path; fn looks_reasonable(path: &Path) -> bool { // First, check if it's an absolute path. if !path.has_root() { @@ -148,30 +150,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { .map(Into::into) .unwrap_or(cwd); - print_path(&cwd).map_err_context(|| "failed to print current directory".to_owned())?; - - Ok(()) -} - -fn print_path(path: &Path) -> io::Result<()> { - let stdout = io::stdout(); - let mut stdout = stdout.lock(); - - // On Unix we print non-lossily. - #[cfg(unix)] - { - use std::os::unix::ffi::OsStrExt; - stdout.write_all(path.as_os_str().as_bytes())?; - stdout.write_all(b"\n")?; - } - - // On other platforms we potentially mangle it. - // There might be some clever way to do it correctly on Windows, but - // invalid unicode in filenames is rare there. - #[cfg(not(unix))] - { - writeln!(stdout, "{}", path.display())?; - } + println_verbatim(&cwd).map_err_context(|| "failed to print current directory".to_owned())?; Ok(()) } diff --git a/src/uu/rmdir/src/rmdir.rs b/src/uu/rmdir/src/rmdir.rs index d8cad0421..f982cf4c3 100644 --- a/src/uu/rmdir/src/rmdir.rs +++ b/src/uu/rmdir/src/rmdir.rs @@ -14,6 +14,7 @@ use clap::{crate_version, App, Arg}; use std::fs::{read_dir, remove_dir}; use std::io; use std::path::Path; +use uucore::display::Quotable; use uucore::error::{set_exit_code, strip_errno, UResult}; use uucore::util_name; @@ -77,27 +78,23 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { Ok(path.metadata()?.file_type().is_dir()) } - let path = path.as_os_str().as_bytes(); - if error.raw_os_error() == Some(libc::ENOTDIR) && path.ends_with(b"/") { + let bytes = path.as_os_str().as_bytes(); + if error.raw_os_error() == Some(libc::ENOTDIR) && bytes.ends_with(b"/") { // Strip the trailing slash or .symlink_metadata() will follow the symlink - let path: &Path = OsStr::from_bytes(&path[..path.len() - 1]).as_ref(); - if is_symlink(path).unwrap_or(false) - && points_to_directory(path).unwrap_or(true) + let no_slash: &Path = OsStr::from_bytes(&bytes[..bytes.len() - 1]).as_ref(); + if is_symlink(no_slash).unwrap_or(false) + && points_to_directory(no_slash).unwrap_or(true) { show_error!( - "failed to remove '{}/': Symbolic link not followed", - path.display() + "failed to remove {}: Symbolic link not followed", + path.quote() ); continue; } } } - show_error!( - "failed to remove '{}': {}", - path.display(), - strip_errno(&error) - ); + show_error!("failed to remove {}: {}", path.quote(), strip_errno(&error)); } } @@ -125,7 +122,7 @@ fn remove(mut path: &Path, opts: Opts) -> Result<(), Error<'_>> { fn remove_single(path: &Path, opts: Opts) -> Result<(), Error<'_>> { if opts.verbose { - println!("{}: removing directory, '{}'", util_name(), path.display()); + println!("{}: removing directory, {}", util_name(), path.quote()); } remove_dir(path).map_err(|error| Error { error, path }) } diff --git a/src/uu/wc/src/wc.rs b/src/uu/wc/src/wc.rs index 01c3d8fdc..6917eb137 100644 --- a/src/uu/wc/src/wc.rs +++ b/src/uu/wc/src/wc.rs @@ -24,6 +24,8 @@ use std::fs::{self, File}; use std::io::{self, Write}; use std::path::{Path, PathBuf}; +use uucore::display::{Quotable, Quoted}; + /// The minimum character width for formatting counts when reading from stdin. const MINIMUM_WIDTH: usize = 7; @@ -122,10 +124,10 @@ impl Input { } } - fn path_display(&self) -> std::path::Display<'_> { + fn path_display(&self) -> Quoted<'_> { match self { - Input::Path(path) => path.display(), - Input::Stdin(_) => Path::display("'standard input'".as_ref()), + Input::Path(path) => path.maybe_quote(), + Input::Stdin(_) => "standard input".maybe_quote(), } } } @@ -448,7 +450,10 @@ fn wc(inputs: Vec, settings: &Settings) -> Result<(), u32> { if let Err(err) = print_stats(settings, &result, max_width) { show_warning!( "failed to print result for {}: {}", - result.title.unwrap_or_else(|| "".as_ref()).display(), + result + .title + .unwrap_or_else(|| "".as_ref()) + .maybe_quote(), err ); failure = true; @@ -526,7 +531,7 @@ fn print_stats( } if let Some(title) = result.title { - writeln!(stdout_lock, " {}", title.display())?; + writeln!(stdout_lock, " {}", title.maybe_quote())?; } else { writeln!(stdout_lock)?; } diff --git a/src/uucore/src/lib/lib.rs b/src/uucore/src/lib/lib.rs index a39834ec1..5352a6356 100644 --- a/src/uucore/src/lib/lib.rs +++ b/src/uucore/src/lib/lib.rs @@ -19,6 +19,7 @@ mod parser; // string parsing modules // * cross-platform modules pub use crate::mods::backup_control; pub use crate::mods::coreopts; +pub use crate::mods::display; pub use crate::mods::error; pub use crate::mods::os; pub use crate::mods::panic; diff --git a/src/uucore/src/lib/mods.rs b/src/uucore/src/lib/mods.rs index b0235832b..8f6d14976 100644 --- a/src/uucore/src/lib/mods.rs +++ b/src/uucore/src/lib/mods.rs @@ -2,6 +2,7 @@ pub mod backup_control; pub mod coreopts; +pub mod display; pub mod error; pub mod os; pub mod panic; diff --git a/src/uucore/src/lib/mods/display.rs b/src/uucore/src/lib/mods/display.rs new file mode 100644 index 000000000..e647a8c71 --- /dev/null +++ b/src/uucore/src/lib/mods/display.rs @@ -0,0 +1,534 @@ +/// Utilities for printing paths, with special attention paid to special +/// characters and invalid unicode. +/// +/// For displaying paths in informational messages use `Quotable::quote`. This +/// will wrap quotes around the filename and add the necessary escapes to make +/// it copy/paste-able into a shell. +/// +/// For writing raw paths to stdout when the output should not be quoted or escaped, +/// use `println_verbatim`. This will preserve invalid unicode. +/// +/// # Examples +/// ``` +/// use std::path::Path; +/// use uucore::display::{Quotable, println_verbatim}; +/// +/// let path = Path::new("foo/bar.baz"); +/// +/// println!("Found file {}", path.quote()); // Prints "Found file 'foo/bar.baz'" +/// println_verbatim(path)?; // Prints "foo/bar.baz" +/// # Ok::<(), std::io::Error>(()) +/// ``` +// spell-checker:ignore Fbar +use std::borrow::Cow; +use std::ffi::OsStr; +#[cfg(any(unix, target_os = "wasi", windows))] +use std::fmt::Write as FmtWrite; +use std::fmt::{self, Display, Formatter}; +use std::io::{self, Write as IoWrite}; + +#[cfg(unix)] +use std::os::unix::ffi::OsStrExt; +#[cfg(target_os = "wasi")] +use std::os::wasi::ffi::OsStrExt; +#[cfg(any(unix, target_os = "wasi"))] +use std::str::from_utf8; + +/// An extension trait for displaying filenames to users. +pub trait Quotable { + /// Returns an object that implements [`Display`] for printing filenames with + /// proper quoting and escaping for the platform. + /// + /// On Unix this corresponds to sh/bash syntax, on Windows Powershell syntax + /// is used. + /// + /// # Examples + /// + /// ``` + /// use std::path::Path; + /// use uucore::display::Quotable; + /// + /// let path = Path::new("foo/bar.baz"); + /// + /// println!("Found file {}", path.quote()); // Prints "Found file 'foo/bar.baz'" + /// ``` + fn quote(&self) -> Quoted<'_>; + + /// Like `quote()`, but don't actually add quotes unless necessary because of + /// whitespace or special characters. + /// + /// # Examples + /// + /// ``` + /// use std::path::Path; + /// use uucore::display::Quotable; + /// use uucore::show_error; + /// + /// let foo = Path::new("foo/bar.baz"); + /// let bar = Path::new("foo bar"); + /// + /// show_error!("{}: Not found", foo.maybe_quote()); // Prints "util: foo/bar.baz: Not found" + /// show_error!("{}: Not found", bar.maybe_quote()); // Prints "util: 'foo bar': Not found" + /// ``` + fn maybe_quote(&self) -> Quoted<'_> { + let mut quoted = self.quote(); + quoted.force_quote = false; + quoted + } +} + +macro_rules! impl_as_ref { + ($type: ty) => { + impl Quotable for $type { + fn quote(&self) -> Quoted<'_> { + Quoted::new(self.as_ref()) + } + } + }; +} + +impl_as_ref!(&'_ str); +impl_as_ref!(String); +impl_as_ref!(&'_ std::path::Path); +impl_as_ref!(std::path::PathBuf); +impl_as_ref!(std::path::Component<'_>); +impl_as_ref!(std::path::Components<'_>); +impl_as_ref!(std::path::Iter<'_>); +impl_as_ref!(&'_ std::ffi::OsStr); +impl_as_ref!(std::ffi::OsString); + +// Cow<'_, str> does not implement AsRef and this is unlikely to be fixed +// for backward compatibility reasons. Otherwise we'd use a blanket impl. +impl Quotable for Cow<'_, str> { + fn quote(&self) -> Quoted<'_> { + let text: &str = self.as_ref(); + Quoted::new(text.as_ref()) + } +} + +/// A wrapper around [`OsStr`] for printing paths with quoting and escaping applied. +#[derive(Debug, Copy, Clone)] +pub struct Quoted<'a> { + text: &'a OsStr, + force_quote: bool, +} + +impl<'a> Quoted<'a> { + fn new(text: &'a OsStr) -> Self { + Quoted { + text, + force_quote: true, + } + } +} + +impl Display for Quoted<'_> { + #[cfg(any(windows, unix, target_os = "wasi"))] + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + // On Unix we emulate sh syntax. On Windows Powershell. + // They're just similar enough to share some code. + + /// Characters with special meaning outside quotes. + // https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_02 + // I don't know why % is in there, and GNU doesn't quote it either. + // {} were used in a version elsewhere but seem unnecessary, GNU doesn't + // quote them. They're used in function definitions but not in a way we + // have to worry about. + #[cfg(any(unix, target_os = "wasi"))] + const SPECIAL_SHELL_CHARS: &[u8] = b"|&;<>()$`\\\"'*?[]="; + // FIXME: I'm not a PowerShell wizard and don't know if this is correct. + // I just copied the Unix version, removed \, and added ,{} based on + // experimentation. + // I have noticed that ~?*[] only get expanded in some contexts, so watch + // out for that if doing your own tests. + // Get-ChildItem seems unwilling to quote anything so it doesn't help. + // There's the additional wrinkle that Windows has stricter requirements + // for filenames: I've been testing using a Linux build of PowerShell, but + // this code doesn't even compile on Linux. + #[cfg(windows)] + const SPECIAL_SHELL_CHARS: &[u8] = b"|&;<>()$`\"'*?[]=,{}"; + + /// Characters with a special meaning at the beginning of a name. + // ~ expands a home directory. + // # starts a comment. + // ! is a common extension for expanding the shell history. + #[cfg(any(unix, target_os = "wasi"))] + const SPECIAL_SHELL_CHARS_START: &[char] = &['~', '#', '!']; + // Same deal as before, this is possibly incomplete. + // A single stand-alone exclamation mark seems to have some special meaning. + #[cfg(windows)] + const SPECIAL_SHELL_CHARS_START: &[char] = &['~', '#', '@', '!']; + + /// Characters that are interpreted specially in a double-quoted string. + #[cfg(any(unix, target_os = "wasi"))] + const DOUBLE_UNSAFE: &[u8] = &[b'"', b'`', b'$', b'\\']; + #[cfg(windows)] + const DOUBLE_UNSAFE: &[u8] = &[b'"', b'`', b'$']; + + let text = match self.text.to_str() { + None => return write_escaped(f, self.text), + Some(text) => text, + }; + + let mut is_single_safe = true; + let mut is_double_safe = true; + let mut requires_quote = self.force_quote; + + if let Some(first) = text.chars().next() { + if SPECIAL_SHELL_CHARS_START.contains(&first) { + requires_quote = true; + } + // Unlike in Unix, quoting an argument may stop it + // from being recognized as an option. I like that very much. + // But we don't want to quote "-" because that's a common + // special argument and PowerShell doesn't mind it. + #[cfg(windows)] + if first == '-' && text.len() > 1 { + requires_quote = true; + } + } else { + // Empty string + requires_quote = true; + } + + for ch in text.chars() { + if ch.is_ascii() { + let ch = ch as u8; + if ch == b'\'' { + is_single_safe = false; + } + if DOUBLE_UNSAFE.contains(&ch) { + is_double_safe = false; + } + if !requires_quote && SPECIAL_SHELL_CHARS.contains(&ch) { + requires_quote = true; + } + if ch.is_ascii_control() { + return write_escaped(f, self.text); + } + } + if !requires_quote && ch.is_whitespace() { + // This includes unicode whitespace. + // We maybe don't have to escape it, we don't escape other lookalike + // characters either, but it's confusing if it goes unquoted. + requires_quote = true; + } + } + + if !requires_quote { + return f.write_str(text); + } else if is_single_safe { + return write_simple(f, text, '\''); + } else if is_double_safe { + return write_simple(f, text, '\"'); + } else { + return write_single_escaped(f, text); + } + + fn write_simple(f: &mut Formatter<'_>, text: &str, quote: char) -> fmt::Result { + f.write_char(quote)?; + f.write_str(text)?; + f.write_char(quote)?; + Ok(()) + } + + #[cfg(any(unix, target_os = "wasi"))] + fn write_single_escaped(f: &mut Formatter<'_>, text: &str) -> fmt::Result { + let mut iter = text.split('\''); + if let Some(chunk) = iter.next() { + if !chunk.is_empty() { + write_simple(f, chunk, '\'')?; + } + } + for chunk in iter { + f.write_str("\\'")?; + if !chunk.is_empty() { + write_simple(f, chunk, '\'')?; + } + } + Ok(()) + } + + /// Write using the syntax described here: + /// https://www.gnu.org/software/bash/manual/html_node/ANSI_002dC-Quoting.html + /// + /// Supported by these shells: + /// - bash + /// - zsh + /// - busybox sh + /// - mksh + /// + /// Not supported by these: + /// - fish + /// - dash + /// - tcsh + #[cfg(any(unix, target_os = "wasi"))] + fn write_escaped(f: &mut Formatter<'_>, text: &OsStr) -> fmt::Result { + f.write_str("$'")?; + for chunk in from_utf8_iter(text.as_bytes()) { + match chunk { + Ok(chunk) => { + for ch in chunk.chars() { + match ch { + '\n' => f.write_str("\\n")?, + '\t' => f.write_str("\\t")?, + '\r' => f.write_str("\\r")?, + // We could do \b, \f, \v, etc., but those are + // rare enough to be confusing. + // \0 doesn't work consistently because of the + // octal \nnn syntax, and null bytes can't appear + // in filenames anyway. + ch if ch.is_ascii_control() => write!(f, "\\x{:02X}", ch as u8)?, + '\\' | '\'' => { + // '?' and '"' can also be escaped this way + // but AFAICT there's no reason to do so + f.write_char('\\')?; + f.write_char(ch)?; + } + ch => { + f.write_char(ch)?; + } + } + } + } + Err(unit) => write!(f, "\\x{:02X}", unit)?, + } + } + f.write_char('\'')?; + Ok(()) + } + + #[cfg(windows)] + fn write_single_escaped(f: &mut Formatter<'_>, text: &str) -> fmt::Result { + // Quotes in Powershell can be escaped by doubling them + f.write_char('\'')?; + let mut iter = text.split('\''); + if let Some(chunk) = iter.next() { + f.write_str(chunk)?; + } + for chunk in iter { + f.write_str("''")?; + f.write_str(chunk)?; + } + f.write_char('\'')?; + Ok(()) + } + + #[cfg(windows)] + fn write_escaped(f: &mut Formatter<'_>, text: &OsStr) -> fmt::Result { + // ` takes the role of \ since \ is already used as the path separator. + // Things are UTF-16-oriented, so we escape code units as "`u{1234}". + use std::char::decode_utf16; + use std::os::windows::ffi::OsStrExt; + + f.write_char('"')?; + for ch in decode_utf16(text.encode_wide()) { + match ch { + Ok(ch) => match ch { + '\0' => f.write_str("`0")?, + '\r' => f.write_str("`r")?, + '\n' => f.write_str("`n")?, + '\t' => f.write_str("`t")?, + ch if ch.is_ascii_control() => write!(f, "`u{{{:04X}}}", ch as u8)?, + '`' => f.write_str("``")?, + '$' => f.write_str("`$")?, + '"' => f.write_str("\"\"")?, + ch => f.write_char(ch)?, + }, + Err(err) => write!(f, "`u{{{:04X}}}", err.unpaired_surrogate())?, + } + } + f.write_char('"')?; + Ok(()) + } + } + + #[cfg(not(any(unix, target_os = "wasi", windows)))] + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + // As a fallback, we use Rust's own escaping rules. + // This is reasonably sane and very easy to implement. + // We use single quotes because that's hardcoded in a lot of tests. + let text = self.text.to_string_lossy(); + if self.force_quote || !text.chars().all(|ch| ch.is_alphanumeric() || ch == '.') { + write!(f, "'{}'", text.escape_debug()) + } else { + f.write_str(&text) + } + } +} + +#[cfg(any(unix, target_os = "wasi"))] +fn from_utf8_iter(mut bytes: &[u8]) -> impl Iterator> { + std::iter::from_fn(move || { + if bytes.is_empty() { + return None; + } + match from_utf8(bytes) { + Ok(text) => { + bytes = &[]; + Some(Ok(text)) + } + Err(err) if err.valid_up_to() == 0 => { + let res = bytes[0]; + bytes = &bytes[1..]; + Some(Err(res)) + } + Err(err) => { + let (valid, rest) = bytes.split_at(err.valid_up_to()); + bytes = rest; + Some(Ok(from_utf8(valid).unwrap())) + } + } + }) +} + +/// Print a path (or `OsStr`-like object) directly to stdout, with a trailing newline, +/// without losing any information if its encoding is invalid. +/// +/// This function is appropriate for commands where printing paths is the point and the +/// output is likely to be captured, like `pwd` and `basename`. For informational output +/// use `Quotable::quote`. +/// +/// FIXME: This is lossy on Windows. It could probably be implemented using some low-level +/// API that takes UTF-16, without going through io::Write. This is not a big priority +/// because broken filenames are much rarer on Windows than on Unix. +pub fn println_verbatim>(text: S) -> io::Result<()> { + let stdout = io::stdout(); + let mut stdout = stdout.lock(); + #[cfg(any(unix, target_os = "wasi"))] + { + stdout.write_all(text.as_ref().as_bytes())?; + stdout.write_all(b"\n")?; + } + #[cfg(not(any(unix, target_os = "wasi")))] + { + writeln!(stdout, "{}", std::path::Path::new(text.as_ref()).display())?; + } + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + + fn verify_quote(cases: &[(impl Quotable, &str)]) { + for (case, expected) in cases { + assert_eq!(case.quote().to_string(), *expected); + } + } + + fn verify_maybe(cases: &[(impl Quotable, &str)]) { + for (case, expected) in cases { + assert_eq!(case.maybe_quote().to_string(), *expected); + } + } + + /// This should hold on any platform, or else other tests fail. + #[test] + fn test_basic() { + verify_quote(&[ + ("foo", "'foo'"), + ("", "''"), + ("foo/bar.baz", "'foo/bar.baz'"), + ]); + verify_maybe(&[ + ("foo", "foo"), + ("", "''"), + ("foo bar", "'foo bar'"), + ("$foo", "'$foo'"), + ("-", "-"), + ]); + } + + #[cfg(any(unix, target_os = "wasi", windows))] + #[test] + fn test_common() { + verify_maybe(&[ + ("a#b", "a#b"), + ("#ab", "'#ab'"), + ("a~b", "a~b"), + ("!", "'!'"), + ]); + } + + #[cfg(any(unix, target_os = "wasi"))] + #[test] + fn test_unix() { + verify_quote(&[ + ("can't", r#""can't""#), + (r#"can'"t"#, r#"'can'\''"t'"#), + (r#"can'$t"#, r#"'can'\''$t'"#), + ("foo\nb\ta\r\\\0`r", r#"$'foo\nb\ta\r\\\x00`r'"#), + ("foo\x02", r#"$'foo\x02'"#), + (r#"'$''"#, r#"\''$'\'\'"#), + ]); + verify_quote(&[(OsStr::from_bytes(b"foo\xFF"), r#"$'foo\xFF'"#)]); + verify_maybe(&[ + ("-x", "-x"), + ("a,b", "a,b"), + ("a\\b", "'a\\b'"), + ("}", ("}")), + ]); + } + + #[cfg(windows)] + #[test] + fn test_windows() { + use std::ffi::OsString; + use std::os::windows::ffi::OsStringExt; + verify_quote(&[ + (r#"foo\bar"#, r#"'foo\bar'"#), + ("can't", r#""can't""#), + (r#"can'"t"#, r#"'can''"t'"#), + (r#"can'$t"#, r#"'can''$t'"#), + ("foo\nb\ta\r\\\0`r", r#""foo`nb`ta`r\`0``r""#), + ("foo\x02", r#""foo`u{0002}""#), + (r#"'$''"#, r#"'''$'''''"#), + ]); + verify_quote(&[( + OsString::from_wide(&[b'x' as u16, 0xD800]), + r#""x`u{D800}""#, + )]); + verify_maybe(&[ + ("-x", "'-x'"), + ("a,b", "'a,b'"), + ("a\\b", "a\\b"), + ("}", "'}'"), + ]); + } + + #[cfg(any(unix, target_os = "wasi"))] + #[test] + fn test_utf8_iter() { + type ByteStr = &'static [u8]; + type Chunk = Result<&'static str, u8>; + const CASES: &[(ByteStr, &[Chunk])] = &[ + (b"", &[]), + (b"hello", &[Ok("hello")]), + // Immediately invalid + (b"\xFF", &[Err(b'\xFF')]), + // Incomplete UTF-8 + (b"\xC2", &[Err(b'\xC2')]), + (b"\xF4\x8F", &[Err(b'\xF4'), Err(b'\x8F')]), + (b"\xFF\xFF", &[Err(b'\xFF'), Err(b'\xFF')]), + (b"hello\xC2", &[Ok("hello"), Err(b'\xC2')]), + (b"\xFFhello", &[Err(b'\xFF'), Ok("hello")]), + (b"\xFF\xC2hello", &[Err(b'\xFF'), Err(b'\xC2'), Ok("hello")]), + (b"foo\xFFbar", &[Ok("foo"), Err(b'\xFF'), Ok("bar")]), + ( + b"foo\xF4\x8Fbar", + &[Ok("foo"), Err(b'\xF4'), Err(b'\x8F'), Ok("bar")], + ), + ( + b"foo\xFF\xC2bar", + &[Ok("foo"), Err(b'\xFF'), Err(b'\xC2'), Ok("bar")], + ), + ]; + for &(case, expected) in CASES { + assert_eq!( + from_utf8_iter(case).collect::>().as_slice(), + expected + ); + } + } +} diff --git a/tests/by-util/test_ls.rs b/tests/by-util/test_ls.rs index 546fb86a8..e6c23acc1 100644 --- a/tests/by-util/test_ls.rs +++ b/tests/by-util/test_ls.rs @@ -354,6 +354,7 @@ fn test_ls_long_format() { at.mkdir(&at.plus_as_string("test-long-dir/test-long-dir")); for arg in &["-l", "--long", "--format=long", "--format=verbose"] { + #[allow(unused_variables)] let result = scene.ucmd().arg(arg).arg("test-long-dir").succeeds(); // Assuming sane username do not have spaces within them. // A line of the output should be: @@ -373,6 +374,7 @@ fn test_ls_long_format() { ).unwrap()); } + #[allow(unused_variables)] let result = scene.ucmd().arg("-lan").arg("test-long-dir").succeeds(); // This checks for the line with the .. entry. The uname and group should be digits. #[cfg(not(windows))] @@ -1416,6 +1418,7 @@ fn test_ls_quoting_style() { // Default is shell-escape scene .ucmd() + .arg("--hide-control-chars") .arg("one\ntwo") .succeeds() .stdout_only("'one'$'\\n''two'\n"); @@ -1437,23 +1440,8 @@ fn test_ls_quoting_style() { ] { scene .ucmd() - .arg(arg) - .arg("one\ntwo") - .succeeds() - .stdout_only(format!("{}\n", correct)); - } - - for (arg, correct) in &[ - ("--quoting-style=literal", "one?two"), - ("-N", "one?two"), - ("--literal", "one?two"), - ("--quoting-style=shell", "one?two"), - ("--quoting-style=shell-always", "'one?two'"), - ] { - scene - .ucmd() - .arg(arg) .arg("--hide-control-chars") + .arg(arg) .arg("one\ntwo") .succeeds() .stdout_only(format!("{}\n", correct)); @@ -1463,7 +1451,7 @@ fn test_ls_quoting_style() { ("--quoting-style=literal", "one\ntwo"), ("-N", "one\ntwo"), ("--literal", "one\ntwo"), - ("--quoting-style=shell", "one\ntwo"), + ("--quoting-style=shell", "one\ntwo"), // FIXME: GNU ls quotes this case ("--quoting-style=shell-always", "'one\ntwo'"), ] { scene @@ -1490,6 +1478,7 @@ fn test_ls_quoting_style() { ] { scene .ucmd() + .arg("--hide-control-chars") .arg(arg) .arg("one\\two") .succeeds() @@ -1505,6 +1494,7 @@ fn test_ls_quoting_style() { ] { scene .ucmd() + .arg("--hide-control-chars") .arg(arg) .arg("one\n&two") .succeeds() @@ -1535,6 +1525,7 @@ fn test_ls_quoting_style() { ] { scene .ucmd() + .arg("--hide-control-chars") .arg(arg) .arg("one two") .succeeds() @@ -1558,6 +1549,7 @@ fn test_ls_quoting_style() { ] { scene .ucmd() + .arg("--hide-control-chars") .arg(arg) .arg("one") .succeeds()