From 4f891add5a77c7a8d15cfbec13c9577b243ed88a Mon Sep 17 00:00:00 2001
From: Jan Verbeek <jan.verbeek@posteo.nl>
Date: Sun, 29 Aug 2021 20:08:43 +0200
Subject: [PATCH] uucore: Add a Quotable extension trait for displaying
 filenames

---
 .../cspell.dictionaries/jargon.wordlist.txt   |   1 +
 .../cspell.dictionaries/shell.wordlist.txt    |   1 +
 src/uucore/src/lib/lib.rs                     |   1 +
 src/uucore/src/lib/mods.rs                    |   1 +
 src/uucore/src/lib/mods/display.rs            | 357 ++++++++++++++++++
 5 files changed, 361 insertions(+)
 create mode 100644 src/uucore/src/lib/mods/display.rs
diff --git a/.vscode/cspell.dictionaries/jargon.wordlist.txt b/.vscode/cspell.dictionaries/jargon.wordlist.txt
index 34abfc511..089adffa3 100644
--- a/.vscode/cspell.dictionaries/jargon.wordlist.txt
+++ b/.vscode/cspell.dictionaries/jargon.wordlist.txt
@@ -1,3 +1,4 @@
+AFAICT
 arity
 autogenerate
 autogenerated
diff --git a/.vscode/cspell.dictionaries/shell.wordlist.txt b/.vscode/cspell.dictionaries/shell.wordlist.txt
index 07c2364ac..4ed281efb 100644
--- a/.vscode/cspell.dictionaries/shell.wordlist.txt
+++ b/.vscode/cspell.dictionaries/shell.wordlist.txt
@@ -8,6 +8,7 @@ csh
 globstar
 inotify
 localtime
+mksh
 mountinfo
 mountpoint
 mtab
diff --git a/src/uucore/src/lib/lib.rs b/src/uucore/src/lib/lib.rs
index a39834ec1..5352a6356 100644
--- a/src/uucore/src/lib/lib.rs
+++ b/src/uucore/src/lib/lib.rs
@@ -19,6 +19,7 @@ mod parser; // string parsing modules
 // * cross-platform modules
 pub use crate::mods::backup_control;
 pub use crate::mods::coreopts;
+pub use crate::mods::display;
 pub use crate::mods::error;
 pub use crate::mods::os;
 pub use crate::mods::panic;
diff --git a/src/uucore/src/lib/mods.rs b/src/uucore/src/lib/mods.rs
index b0235832b..8f6d14976 100644
--- a/src/uucore/src/lib/mods.rs
+++ b/src/uucore/src/lib/mods.rs
@@ -2,6 +2,7 @@
 
 pub mod backup_control;
 pub mod coreopts;
+pub mod display;
 pub mod error;
 pub mod os;
 pub mod panic;
diff --git a/src/uucore/src/lib/mods/display.rs b/src/uucore/src/lib/mods/display.rs
new file mode 100644
index 000000000..42e9fceba
--- /dev/null
+++ b/src/uucore/src/lib/mods/display.rs
@@ -0,0 +1,357 @@
+/// Utilities for printing paths, with special attention paid to special
+/// characters and invalid unicode.
+///
+/// For displaying paths in informational messages use `Quotable::quote`. This
+/// will wrap quotes around the filename and add the necessary escapes to make
+/// it copy/paste-able into a shell.
+///
+/// # Examples
+/// ```
+/// use std::path::Path;
+/// use uucore::display::{Quotable, println_verbatim};
+///
+/// let path = Path::new("foo/bar.baz");
+///
+/// println!("Found file {}", path.quote()); // Prints "Found file 'foo/bar.baz'"
+/// # Ok::<(), std::io::Error>(())
+/// ```
+// spell-checker:ignore Fbar
+use std::ffi::OsStr;
+#[cfg(any(unix, target_os = "wasi", windows))]
+use std::fmt::Write as FmtWrite;
+use std::fmt::{self, Display, Formatter};
+
+#[cfg(unix)]
+use std::os::unix::ffi::OsStrExt;
+#[cfg(target_os = "wasi")]
+use std::os::wasi::ffi::OsStrExt;
+#[cfg(any(unix, target_os = "wasi"))]
+use std::str::from_utf8;
+
+/// An extension trait for displaying filenames to users.
+pub trait Quotable {
+    /// Returns an object that implements [`Display`] for printing filenames with
+    /// proper quoting and escaping for the platform.
+    ///
+    /// On Unix this corresponds to sh/bash syntax, on Windows Powershell syntax
+    /// is used.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use std::path::Path;
+    /// use uucore::display::Quotable;
+    ///
+    /// let path = Path::new("foo/bar.baz");
+    ///
+    /// println!("Found file {}", path.quote()); // Prints "Found file 'foo/bar.baz'"
+    /// ```
+    fn quote(&self) -> Quoted<'_>;
+}
+
+impl<T> Quotable for T
+where
+    T: AsRef<OsStr>,
+{
+    fn quote(&self) -> Quoted<'_> {
+        Quoted(self.as_ref())
+    }
+}
+
+/// A wrapper around [`OsStr`] for printing paths with quoting and escaping applied.
+#[derive(Debug)]
+pub struct Quoted<'a>(&'a OsStr);
+
+impl Display for Quoted<'_> {
+    #[cfg(any(unix, target_os = "wasi"))]
+    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+        let text = self.0.as_bytes();
+
+        let mut is_single_safe = true;
+        let mut is_double_safe = true;
+        for &ch in text {
+            match ch {
+                ch if ch.is_ascii_control() => return write_escaped(f, text),
+                b'\'' => is_single_safe = false,
+                // Unsafe characters according to:
+                // https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_02_03
+                b'"' | b'`' | b'$' | b'\\' => is_double_safe = false,
+                _ => (),
+            }
+        }
+        let text = match from_utf8(text) {
+            Err(_) => return write_escaped(f, text),
+            Ok(text) => text,
+        };
+        if is_single_safe {
+            return write_simple(f, text, '\'');
+        } else if is_double_safe {
+            return write_simple(f, text, '\"');
+        } else {
+            return write_single_escaped(f, text);
+        }
+
+        fn write_simple(f: &mut Formatter<'_>, text: &str, quote: char) -> fmt::Result {
+            f.write_char(quote)?;
+            f.write_str(text)?;
+            f.write_char(quote)?;
+            Ok(())
+        }
+
+        fn write_single_escaped(f: &mut Formatter<'_>, text: &str) -> fmt::Result {
+            let mut iter = text.split('\'');
+            if let Some(chunk) = iter.next() {
+                if !chunk.is_empty() {
+                    write_simple(f, chunk, '\'')?;
+                }
+            }
+            for chunk in iter {
+                f.write_str("\\'")?;
+                if !chunk.is_empty() {
+                    write_simple(f, chunk, '\'')?;
+                }
+            }
+            Ok(())
+        }
+
+        /// Write using the syntax described here:
+        /// https://www.gnu.org/software/bash/manual/html_node/ANSI_002dC-Quoting.html
+        ///
+        /// Supported by these shells:
+        /// - bash
+        /// - zsh
+        /// - busybox sh
+        /// - mksh
+        ///
+        /// Not supported by these:
+        /// - fish
+        /// - dash
+        /// - tcsh
+        fn write_escaped(f: &mut Formatter<'_>, text: &[u8]) -> fmt::Result {
+            f.write_str("$'")?;
+            for chunk in from_utf8_iter(text) {
+                match chunk {
+                    Ok(chunk) => {
+                        for ch in chunk.chars() {
+                            match ch {
+                                '\n' => f.write_str("\\n")?,
+                                '\t' => f.write_str("\\t")?,
+                                '\r' => f.write_str("\\r")?,
+                                // We could do \b, \f, \v, etc., but those are
+                                // rare enough to be confusing.
+                                // \0 doesn't work consistently because of the
+                                // octal \nnn syntax, and null bytes can't appear
+                                // in filenames anyway.
+                                ch if ch.is_ascii_control() => write!(f, "\\x{:02X}", ch as u8)?,
+                                '\\' | '\'' => {
+                                    // '?' and '"' can also be escaped this way
+                                    // but AFAICT there's no reason to do so
+                                    f.write_char('\\')?;
+                                    f.write_char(ch)?;
+                                }
+                                ch => {
+                                    f.write_char(ch)?;
+                                }
+                            }
+                        }
+                    }
+                    Err(unit) => write!(f, "\\x{:02X}", unit)?,
+                }
+            }
+            f.write_char('\'')?;
+            Ok(())
+        }
+    }
+
+    #[cfg(windows)]
+    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+        // Behavior is based on PowerShell.
+        // ` takes the role of \ since \ is already used as the path separator.
+        // Things are UTF-16-oriented, so we escape code units as "`u{1234}".
+        use std::char::decode_utf16;
+        use std::os::windows::ffi::OsStrExt;
+
+        // Getting the "raw" representation of an OsStr is actually expensive,
+        // so avoid it if unnecessary.
+        let text = match self.0.to_str() {
+            None => return write_escaped(f, self.0),
+            Some(text) => text,
+        };
+
+        let mut is_single_safe = true;
+        let mut is_double_safe = true;
+        for ch in text.chars() {
+            match ch {
+                ch if ch.is_ascii_control() => return write_escaped(f, self.0),
+                '\'' => is_single_safe = false,
+                '"' | '`' | '$' => is_double_safe = false,
+                _ => (),
+            }
+        }
+
+        if is_single_safe || !is_double_safe {
+            return write_simple(f, text, '\'');
+        } else {
+            return write_simple(f, text, '"');
+        }
+
+        fn write_simple(f: &mut Formatter<'_>, text: &str, quote: char) -> fmt::Result {
+            // Quotes in Powershell can be escaped by doubling them
+            f.write_char(quote)?;
+            let mut iter = text.split(quote);
+            if let Some(chunk) = iter.next() {
+                f.write_str(chunk)?;
+            }
+            for chunk in iter {
+                f.write_char(quote)?;
+                f.write_char(quote)?;
+                f.write_str(chunk)?;
+            }
+            f.write_char(quote)?;
+            Ok(())
+        }
+
+        fn write_escaped(f: &mut Formatter<'_>, text: &OsStr) -> fmt::Result {
+            f.write_char('"')?;
+            for ch in decode_utf16(text.encode_wide()) {
+                match ch {
+                    Ok(ch) => match ch {
+                        '\0' => f.write_str("`0")?,
+                        '\r' => f.write_str("`r")?,
+                        '\n' => f.write_str("`n")?,
+                        '\t' => f.write_str("`t")?,
+                        ch if ch.is_ascii_control() => write!(f, "`u{{{:04X}}}", ch as u8)?,
+                        '`' => f.write_str("``")?,
+                        '$' => f.write_str("`$")?,
+                        '"' => f.write_str("\"\"")?,
+                        ch => f.write_char(ch)?,
+                    },
+                    Err(err) => write!(f, "`u{{{:04X}}}", err.unpaired_surrogate())?,
+                }
+            }
+            f.write_char('"')?;
+            Ok(())
+        }
+    }
+
+    #[cfg(not(any(unix, target_os = "wasi", windows)))]
+    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+        // As a fallback, we use Rust's own escaping rules.
+        // This is reasonably sane and very easy to implement.
+        // We use single quotes because that's hardcoded in a lot of tests.
+        write!(f, "'{}'", self.0.to_string_lossy().escape_debug())
+    }
+}
+
+#[cfg(any(unix, target_os = "wasi"))]
+fn from_utf8_iter(mut bytes: &[u8]) -> impl Iterator<Item = Result<&str, u8>> {
+    std::iter::from_fn(move || {
+        if bytes.is_empty() {
+            return None;
+        }
+        match from_utf8(bytes) {
+            Ok(text) => {
+                bytes = &[];
+                Some(Ok(text))
+            }
+            Err(err) if err.valid_up_to() == 0 => {
+                let res = bytes[0];
+                bytes = &bytes[1..];
+                Some(Err(res))
+            }
+            Err(err) => {
+                let (valid, rest) = bytes.split_at(err.valid_up_to());
+                bytes = rest;
+                Some(Ok(from_utf8(valid).unwrap()))
+            }
+        }
+    })
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn verify_quote(cases: &[(impl AsRef<OsStr>, &str)]) {
+        for (case, expected) in cases {
+            assert_eq!(case.quote().to_string(), *expected);
+        }
+    }
+
+    /// This should hold on any platform, or else a lot of other tests will fail.
+    #[test]
+    fn test_basic() {
+        verify_quote(&[
+            ("foo", "'foo'"),
+            ("", "''"),
+            ("foo/bar.baz", "'foo/bar.baz'"),
+        ]);
+    }
+
+    #[cfg(any(unix, target_os = "wasi"))]
+    #[test]
+    fn test_unix() {
+        verify_quote(&[
+            ("can't", r#""can't""#),
+            (r#"can'"t"#, r#"'can'\''"t'"#),
+            (r#"can'$t"#, r#"'can'\''$t'"#),
+            ("foo\nb\ta\r\\\0`r", r#"$'foo\nb\ta\r\\\x00`r'"#),
+            ("foo\x02", r#"$'foo\x02'"#),
+            (r#"'$''"#, r#"\''$'\'\'"#),
+        ]);
+        verify_quote(&[(OsStr::from_bytes(b"foo\xFF"), r#"$'foo\xFF'"#)]);
+    }
+
+    #[cfg(windows)]
+    #[test]
+    fn test_windows() {
+        use std::ffi::OsString;
+        use std::os::windows::ffi::OsStringExt;
+        verify_quote(&[
+            (r#"foo\bar"#, r#"'foo\bar'"#),
+            ("can't", r#""can't""#),
+            (r#"can'"t"#, r#"'can''"t'"#),
+            (r#"can'$t"#, r#"'can''$t'"#),
+            ("foo\nb\ta\r\\\0`r", r#""foo`nb`ta`r\`0``r""#),
+            ("foo\x02", r#""foo`u{0002}""#),
+            (r#"'$''"#, r#"'''$'''''"#),
+        ]);
+        verify_quote(&[(
+            OsString::from_wide(&[b'x' as u16, 0xD800]),
+            r#""x`u{D800}""#,
+        )])
+    }
+
+    #[cfg(any(unix, target_os = "wasi"))]
+    #[test]
+    fn test_utf8_iter() {
+        const CASES: &[(&[u8], &[Result<&str, u8>])] = &[
+            (b"", &[]),
+            (b"hello", &[Ok("hello")]),
+            // Immediately invalid
+            (b"\xFF", &[Err(b'\xFF')]),
+            // Incomplete UTF-8
+            (b"\xC2", &[Err(b'\xC2')]),
+            (b"\xF4\x8F", &[Err(b'\xF4'), Err(b'\x8F')]),
+            (b"\xFF\xFF", &[Err(b'\xFF'), Err(b'\xFF')]),
+            (b"hello\xC2", &[Ok("hello"), Err(b'\xC2')]),
+            (b"\xFFhello", &[Err(b'\xFF'), Ok("hello")]),
+            (b"\xFF\xC2hello", &[Err(b'\xFF'), Err(b'\xC2'), Ok("hello")]),
+            (b"foo\xFFbar", &[Ok("foo"), Err(b'\xFF'), Ok("bar")]),
+            (
+                b"foo\xF4\x8Fbar",
+                &[Ok("foo"), Err(b'\xF4'), Err(b'\x8F'), Ok("bar")],
+            ),
+            (
+                b"foo\xFF\xC2bar",
+                &[Ok("foo"), Err(b'\xFF'), Err(b'\xC2'), Ok("bar")],
+            ),
+        ];
+        for &(case, expected) in CASES {
+            assert_eq!(
+                from_utf8_iter(case).collect::<Vec<_>>().as_slice(),
+                expected
+            );
+        }
+    }
+}