Merge pull request #6882 from jtracey/quoting_style_bytes

quoting_style: Add support for non-UTF-8 bytes
2025-07-28 11:37:44 +00:00 · 2024-12-21 23:17:43 +01:00 · 2024-12-21 23:17:43 +01:00 · bb2fb66073
commit bb2fb66073
parent 21e7149a92 db1ed4c094
10 changed files with 584 additions and 175 deletions
--- a/.clippy.toml
+++ b/.clippy.toml
@ -1,4 +1,4 @@
-msrv = "1.77.0"
+msrv = "1.79.0"
 cognitive-complexity-threshold = 24
 missing-docs-in-crate-items = true
 check-private-items = true
--- a/.github/workflows/CICD.yml
+++ b/.github/workflows/CICD.yml
@ -11,7 +11,7 @@ env:
  PROJECT_NAME: coreutils
  PROJECT_DESC: "Core universal (cross-platform) utilities"
  PROJECT_AUTH: "uutils"
-  RUST_MIN_SRV: "1.77.0"
+  RUST_MIN_SRV: "1.79.0"
  # * style job configuration
  STYLE_FAIL_ON_FAULT: true ## (bool) fail the build if a style job contains a fault (error or warning); may be overridden on a per-job basis

--- a/Cargo.toml
+++ b/Cargo.toml
@ -16,7 +16,7 @@ repository = "https://github.com/uutils/coreutils"
 readme = "README.md"
 keywords = ["coreutils", "uutils", "cross-platform", "cli", "utility"]
 categories = ["command-line-utilities"]
-rust-version = "1.77.0"
+rust-version = "1.79.0"
 edition = "2021"

 build = "build.rs"
--- a/README.md
+++ b/README.md
@ -14,7 +14,7 @@
 [![dependency status](https://deps.rs/repo/github/uutils/coreutils/status.svg)](https://deps.rs/repo/github/uutils/coreutils)

 [![CodeCov](https://codecov.io/gh/uutils/coreutils/branch/master/graph/badge.svg)](https://codecov.io/gh/uutils/coreutils)
-![MSRV](https://img.shields.io/badge/MSRV-1.77.0-brightgreen)
+![MSRV](https://img.shields.io/badge/MSRV-1.79.0-brightgreen)

 </div>

@ -70,7 +70,7 @@ the [coreutils docs](https://github.com/uutils/uutils.github.io) repository.
 ### Rust Version

 uutils follows Rust's release channels and is tested against stable, beta and
-nightly. The current Minimum Supported Rust Version (MSRV) is `1.77.0`.
+nightly. The current Minimum Supported Rust Version (MSRV) is `1.79.0`.

 ## Building

--- a/src/uu/ls/src/ls.rs
+++ b/src/uu/ls/src/ls.rs
@ -21,7 +21,7 @@ use std::os::windows::fs::MetadataExt;
 use std::{
    cmp::Reverse,
    error::Error,
-    ffi::OsString,
+    ffi::{OsStr, OsString},
    fmt::{Display, Write as FmtWrite},
    fs::{self, DirEntry, FileType, Metadata, ReadDir},
    io::{stdout, BufWriter, ErrorKind, Stdout, Write},
@ -55,7 +55,7 @@ use uucore::libc::{dev_t, major, minor};
 #[cfg(unix)]
 use uucore::libc::{S_IXGRP, S_IXOTH, S_IXUSR};
 use uucore::line_ending::LineEnding;
-use uucore::quoting_style::{escape_dir_name, escape_name, QuotingStyle};
+use uucore::quoting_style::{self, QuotingStyle};
 use uucore::{
    display::Quotable,
    error::{set_exit_code, UError, UResult},
@ -2048,7 +2048,11 @@ impl PathData {
 /// file11
 /// ```
 fn show_dir_name(path_data: &PathData, out: &mut BufWriter<Stdout>, config: &Config) {
-    let escaped_name = escape_dir_name(path_data.p_buf.as_os_str(), &config.quoting_style);
+    // FIXME: replace this with appropriate behavior for literal unprintable bytes
+    let escaped_name =
+        quoting_style::escape_dir_name(path_data.p_buf.as_os_str(), &config.quoting_style)
+            .to_string_lossy()
+            .to_string();

    let name = if config.hyperlink && !config.dired {
        create_hyperlink(&escaped_name, path_data)
@ -3002,7 +3006,6 @@ use std::sync::Mutex;
 #[cfg(unix)]
 use uucore::entries;
 use uucore::fs::FileInformation;
-use uucore::quoting_style;

 #[cfg(unix)]
 fn cached_uid2usr(uid: u32) -> String {
@ -3542,3 +3545,10 @@ fn calculate_padding_collection(

    padding_collections
 }
+
+// FIXME: replace this with appropriate behavior for literal unprintable bytes
+fn escape_name(name: &OsStr, style: &QuotingStyle) -> String {
+    quoting_style::escape_name(name, style)
+        .to_string_lossy()
+        .to_string()
+}
--- a/src/uu/wc/src/wc.rs
+++ b/src/uu/wc/src/wc.rs
@ -13,7 +13,7 @@ mod word_count;
 use std::{
    borrow::{Borrow, Cow},
    cmp::max,
-    ffi::OsString,
+    ffi::{OsStr, OsString},
    fs::{self, File},
    io::{self, Write},
    iter,
@ -28,7 +28,7 @@ use utf8::{BufReadDecoder, BufReadDecoderError};
 use uucore::{
    error::{FromIo, UError, UResult},
    format_usage, help_about, help_usage,
-    quoting_style::{escape_name, QuotingStyle},
+    quoting_style::{self, QuotingStyle},
    shortcut_value_parser::ShortcutValueParser,
    show,
 };
@ -259,7 +259,7 @@ impl<'a> Input<'a> {
        match self {
            Self::Path(path) => Some(match path.to_str() {
                Some(s) if !s.contains('\n') => Cow::Borrowed(s),
-                _ => Cow::Owned(escape_name(path.as_os_str(), QS_ESCAPE)),
+                _ => Cow::Owned(escape_name_wrapper(path.as_os_str())),
            }),
            Self::Stdin(StdinKind::Explicit) => Some(Cow::Borrowed(STDIN_REPR)),
            Self::Stdin(StdinKind::Implicit) => None,
@ -269,7 +269,7 @@ impl<'a> Input<'a> {
    /// Converts input into the form that appears in errors.
    fn path_display(&self) -> String {
        match self {
-            Self::Path(path) => escape_name(path.as_os_str(), QS_ESCAPE),
+            Self::Path(path) => escape_name_wrapper(path.as_os_str()),
            Self::Stdin(_) => String::from("standard input"),
        }
    }
@ -361,7 +361,7 @@ impl WcError {
            Some((input, idx)) => {
                let path = match input {
                    Input::Stdin(_) => STDIN_REPR.into(),
-                    Input::Path(path) => escape_name(path.as_os_str(), QS_ESCAPE).into(),
+                    Input::Path(path) => escape_name_wrapper(path.as_os_str()).into(),
                };
                Self::ZeroLengthFileNameCtx { path, idx }
            }
@ -761,7 +761,9 @@ fn files0_iter_file<'a>(path: &Path) -> UResult<impl Iterator<Item = InputIterIt
        Err(e) => Err(e.map_err_context(|| {
            format!(
                "cannot open {} for reading",
-                escape_name(path.as_os_str(), QS_QUOTE_ESCAPE)
+                quoting_style::escape_name(path.as_os_str(), QS_QUOTE_ESCAPE)
+                    .into_string()
+                    .expect("All escaped names with the escaping option return valid strings.")
            )
        })),
    }
@ -793,9 +795,9 @@ fn files0_iter<'a>(
                        Ok(Input::Path(PathBuf::from(s).into()))
                    }
                }
-                Err(e) => Err(e.map_err_context(|| {
-                    format!("{}: read error", escape_name(&err_path, QS_ESCAPE))
-                }) as Box<dyn UError>),
+                Err(e) => Err(e
+                    .map_err_context(|| format!("{}: read error", escape_name_wrapper(&err_path)))
+                    as Box<dyn UError>),
            }),
    );
    // Loop until there is an error; yield that error and then nothing else.
@ -808,6 +810,12 @@ fn files0_iter<'a>(
    })
 }

+fn escape_name_wrapper(name: &OsStr) -> String {
+    quoting_style::escape_name(name, QS_ESCAPE)
+        .into_string()
+        .expect("All escaped names with the escaping option return valid strings.")
+}
+
 fn wc(inputs: &Inputs, settings: &Settings) -> UResult<()> {
    let mut total_word_count = WordCount::default();
    let mut num_inputs: usize = 0;
--- a/src/uucore/src/lib/features/format/argument.rs
+++ b/src/uucore/src/lib/features/format/argument.rs
@ -112,7 +112,8 @@ fn extract_value<T: Default>(p: Result<T, ParseError<'_, T>>, input: &str) -> T
                    Default::default()
                }
                ParseError::PartialMatch(v, rest) => {
-                    if input.starts_with('\'') {
+                    let bytes = input.as_encoded_bytes();
+                    if !bytes.is_empty() && bytes[0] == b'\'' {
                        show_warning!(
                            "{}: character(s) following character constant have been ignored",
                            &rest,
--- a/src/uucore/src/lib/features/format/spec.rs
+++ b/src/uucore/src/lib/features/format/spec.rs
@ -353,20 +353,20 @@ impl Spec {
                writer.write_all(&parsed).map_err(FormatError::IoError)
            }
            Self::QuotedString => {
-                let s = args.get_str();
-                writer
-                    .write_all(
-                        escape_name(
-                            s.as_ref(),
-                            &QuotingStyle::Shell {
-                                escape: true,
-                                always_quote: false,
-                                show_control: false,
-                            },
-                        )
-                        .as_bytes(),
-                    )
-                    .map_err(FormatError::IoError)
+                let s = escape_name(
+                    args.get_str().as_ref(),
+                    &QuotingStyle::Shell {
+                        escape: true,
+                        always_quote: false,
+                        show_control: false,
+                    },
+                );
+                #[cfg(unix)]
+                let bytes = std::os::unix::ffi::OsStringExt::into_vec(s);
+                #[cfg(not(unix))]
+                let bytes = s.to_string_lossy().as_bytes().to_owned();
+
+                writer.write_all(&bytes).map_err(FormatError::IoError)
            }
            Self::SignedInt {
                width,
--- a/src/uucore/src/lib/features/quoting_style.rs
+++ b/src/uucore/src/lib/features/quoting_style.rs
@ -6,39 +6,43 @@
 //! Set of functions for escaping names according to different quoting styles.

 use std::char::from_digit;
-use std::ffi::OsStr;
+use std::ffi::{OsStr, OsString};
 use std::fmt;

 // These are characters with special meaning in the shell (e.g. bash).
 // The first const contains characters that only have a special meaning when they appear at the beginning of a name.
-const SPECIAL_SHELL_CHARS_START: &[char] = &['~', '#'];
+const SPECIAL_SHELL_CHARS_START: &[u8] = b"~#";
 // PR#6559 : Remove `]{}` from special shell chars.
 const SPECIAL_SHELL_CHARS: &str = "`$&*()|[;\\'\"<>?! ";

 /// The quoting style to use when escaping a name.
 #[derive(Clone, Copy, Debug, Eq, PartialEq)]
 pub enum QuotingStyle {
-    /// Escape the name as a literal string.
+    /// Escape the name as a shell string.
+    /// Used in, e.g., `ls --quoting-style=shell`.
    Shell {
        /// Whether to escape characters in the name.
+        /// True in, e.g., `ls --quoting-style=shell-escape`.
        escape: bool,

        /// Whether to always quote the name.
        always_quote: bool,

-        /// Whether to show control characters.
+        /// Whether to show control and non-unicode characters, or replace them with `?`.
        show_control: bool,
    },

    /// Escape the name as a C string.
+    /// Used in, e.g., `ls --quote-name`.
    C {
        /// The type of quotes to use.
        quotes: Quotes,
    },

-    /// Escape the name as a literal string.
+    /// Do not escape the string.
+    /// Used in, e.g., `ls --literal`.
    Literal {
-        /// Whether to show control characters.
+        /// Whether to show control and non-unicode characters, or replace them with `?`.
        show_control: bool,
    },
 }
@ -72,16 +76,24 @@ enum EscapeState {
    Octal(EscapeOctal),
 }

+/// Bytes we need to present as escaped octal, in the form of `\nnn` per byte.
+/// Only supports characters up to 2 bytes long in UTF-8.
 struct EscapeOctal {
-    c: char,
+    c: [u8; 2],
    state: EscapeOctalState,
-    idx: usize,
+    idx: u8,
 }

 enum EscapeOctalState {
    Done,
-    Backslash,
-    Value,
+    FirstBackslash,
+    FirstValue,
+    LastBackslash,
+    LastValue,
+}
+
+fn byte_to_octal_digit(byte: u8, idx: u8) -> u8 {
+    (byte >> (idx * 3)) & 0o7
 }

 impl Iterator for EscapeOctal {
@ -90,29 +102,57 @@ impl Iterator for EscapeOctal {
    fn next(&mut self) -> Option<char> {
        match self.state {
            EscapeOctalState::Done => None,
-            EscapeOctalState::Backslash => {
-                self.state = EscapeOctalState::Value;
+            EscapeOctalState::FirstBackslash => {
+                self.state = EscapeOctalState::FirstValue;
                Some('\\')
            }
-            EscapeOctalState::Value => {
-                let octal_digit = ((self.c as u32) >> (self.idx * 3)) & 0o7;
+            EscapeOctalState::LastBackslash => {
+                self.state = EscapeOctalState::LastValue;
+                Some('\\')
+            }
+            EscapeOctalState::FirstValue => {
+                let octal_digit = byte_to_octal_digit(self.c[0], self.idx);
+                if self.idx == 0 {
+                    self.state = EscapeOctalState::LastBackslash;
+                    self.idx = 2;
+                } else {
+                    self.idx -= 1;
+                }
+                Some(from_digit(octal_digit.into(), 8).unwrap())
+            }
+            EscapeOctalState::LastValue => {
+                let octal_digit = byte_to_octal_digit(self.c[1], self.idx);
                if self.idx == 0 {
                    self.state = EscapeOctalState::Done;
                } else {
                    self.idx -= 1;
                }
-                Some(from_digit(octal_digit, 8).unwrap())
+                Some(from_digit(octal_digit.into(), 8).unwrap())
            }
        }
    }
 }

 impl EscapeOctal {
-    fn from(c: char) -> Self {
+    fn from_char(c: char) -> Self {
+        if c.len_utf8() == 1 {
+            return Self::from_byte(c as u8);
+        }
+
+        let mut buf = [0; 2];
+        let _s = c.encode_utf8(&mut buf);
        Self {
-            c,
+            c: buf,
            idx: 2,
-            state: EscapeOctalState::Backslash,
+            state: EscapeOctalState::FirstBackslash,
+        }
+    }
+
+    fn from_byte(b: u8) -> Self {
+        Self {
+            c: [0, b],
+            idx: 2,
+            state: EscapeOctalState::LastBackslash,
        }
    }
 }
@ -124,6 +164,12 @@ impl EscapedChar {
        }
    }

+    fn new_octal(b: u8) -> Self {
+        Self {
+            state: EscapeState::Octal(EscapeOctal::from_byte(b)),
+        }
+    }
+
    fn new_c(c: char, quotes: Quotes, dirname: bool) -> Self {
        use EscapeState::*;
        let init_state = match c {
@ -148,7 +194,7 @@ impl EscapedChar {
                _ => Char(' '),
            },
            ':' if dirname => Backslash(':'),
-            _ if c.is_ascii_control() => Octal(EscapeOctal::from(c)),
+            _ if c.is_control() => Octal(EscapeOctal::from_char(c)),
            _ => Char(c),
        };
        Self { state: init_state }
@ -165,11 +211,11 @@ impl EscapedChar {
            '\x0B' => Backslash('v'),
            '\x0C' => Backslash('f'),
            '\r' => Backslash('r'),
-            '\x00'..='\x1F' | '\x7F' => Octal(EscapeOctal::from(c)),
            '\'' => match quotes {
                Quotes::Single => Backslash('\''),
                _ => Char('\''),
            },
+            _ if c.is_control() => Octal(EscapeOctal::from_char(c)),
            _ if SPECIAL_SHELL_CHARS.contains(c) => ForceQuote(c),
            _ => Char(c),
        };
@ -205,102 +251,124 @@ impl Iterator for EscapedChar {
    }
 }

-fn shell_without_escape(name: &str, quotes: Quotes, show_control_chars: bool) -> (String, bool) {
+/// Check whether `bytes` starts with any byte in `pattern`.
+fn bytes_start_with(bytes: &[u8], pattern: &[u8]) -> bool {
+    !bytes.is_empty() && pattern.contains(&bytes[0])
+}
+
+fn shell_without_escape(name: &[u8], quotes: Quotes, show_control_chars: bool) -> (Vec<u8>, bool) {
    let mut must_quote = false;
-    let mut escaped_str = String::with_capacity(name.len());
+    let mut escaped_str = Vec::with_capacity(name.len());
+    let mut utf8_buf = vec![0; 4];

-    for c in name.chars() {
-        let escaped = {
-            let ec = EscapedChar::new_shell(c, false, quotes);
-            if show_control_chars {
-                ec
-            } else {
-                ec.hide_control()
-            }
-        };
+    for s in name.utf8_chunks() {
+        for c in s.valid().chars() {
+            let escaped = {
+                let ec = EscapedChar::new_shell(c, false, quotes);
+                if show_control_chars {
+                    ec
+                } else {
+                    ec.hide_control()
+                }
+            };

-        match escaped.state {
-            EscapeState::Backslash('\'') => escaped_str.push_str("'\\''"),
-            EscapeState::ForceQuote(x) => {
-                must_quote = true;
-                escaped_str.push(x);
-            }
-            _ => {
-                for char in escaped {
-                    escaped_str.push(char);
+            match escaped.state {
+                EscapeState::Backslash('\'') => escaped_str.extend_from_slice(b"'\\''"),
+                EscapeState::ForceQuote(x) => {
+                    must_quote = true;
+                    escaped_str.extend_from_slice(x.encode_utf8(&mut utf8_buf).as_bytes());
+                }
+                _ => {
+                    for c in escaped {
+                        escaped_str.extend_from_slice(c.encode_utf8(&mut utf8_buf).as_bytes());
+                    }
                }
            }
        }
+
+        if show_control_chars {
+            escaped_str.extend_from_slice(s.invalid());
+        } else {
+            escaped_str.resize(escaped_str.len() + s.invalid().len(), b'?');
+        }
    }

-    must_quote = must_quote || name.starts_with(SPECIAL_SHELL_CHARS_START);
+    must_quote = must_quote || bytes_start_with(name, SPECIAL_SHELL_CHARS_START);
    (escaped_str, must_quote)
 }

-fn shell_with_escape(name: &str, quotes: Quotes) -> (String, bool) {
+fn shell_with_escape(name: &[u8], quotes: Quotes) -> (Vec<u8>, bool) {
    // We need to keep track of whether we are in a dollar expression
    // because e.g. \b\n is escaped as $'\b\n' and not like $'b'$'n'
    let mut in_dollar = false;
    let mut must_quote = false;
    let mut escaped_str = String::with_capacity(name.len());

-    for c in name.chars() {
-        let escaped = EscapedChar::new_shell(c, true, quotes);
-        match escaped.state {
-            EscapeState::Char(x) => {
-                if in_dollar {
-                    escaped_str.push_str("''");
+    for s in name.utf8_chunks() {
+        for c in s.valid().chars() {
+            let escaped = EscapedChar::new_shell(c, true, quotes);
+            match escaped.state {
+                EscapeState::Char(x) => {
+                    if in_dollar {
+                        escaped_str.push_str("''");
+                        in_dollar = false;
+                    }
+                    escaped_str.push(x);
+                }
+                EscapeState::ForceQuote(x) => {
+                    if in_dollar {
+                        escaped_str.push_str("''");
+                        in_dollar = false;
+                    }
+                    must_quote = true;
+                    escaped_str.push(x);
+                }
+                // Single quotes are not put in dollar expressions, but are escaped
+                // if the string also contains double quotes. In that case, they must
+                // be handled separately.
+                EscapeState::Backslash('\'') => {
+                    must_quote = true;
                    in_dollar = false;
+                    escaped_str.push_str("'\\''");
                }
-                escaped_str.push(x);
-            }
-            EscapeState::ForceQuote(x) => {
-                if in_dollar {
-                    escaped_str.push_str("''");
-                    in_dollar = false;
-                }
-                must_quote = true;
-                escaped_str.push(x);
-            }
-            // Single quotes are not put in dollar expressions, but are escaped
-            // if the string also contains double quotes. In that case, they must
-            // be handled separately.
-            EscapeState::Backslash('\'') => {
-                must_quote = true;
-                in_dollar = false;
-                escaped_str.push_str("'\\''");
-            }
-            _ => {
-                if !in_dollar {
-                    escaped_str.push_str("'$'");
-                    in_dollar = true;
-                }
-                must_quote = true;
-                for char in escaped {
-                    escaped_str.push(char);
+                _ => {
+                    if !in_dollar {
+                        escaped_str.push_str("'$'");
+                        in_dollar = true;
+                    }
+                    must_quote = true;
+                    for char in escaped {
+                        escaped_str.push(char);
+                    }
                }
            }
        }
+        if !s.invalid().is_empty() {
+            if !in_dollar {
+                escaped_str.push_str("'$'");
+                in_dollar = true;
+            }
+            must_quote = true;
+            let escaped_bytes: String = s
+                .invalid()
+                .iter()
+                .flat_map(|b| EscapedChar::new_octal(*b))
+                .collect();
+            escaped_str.push_str(&escaped_bytes);
+        }
    }
-    must_quote = must_quote || name.starts_with(SPECIAL_SHELL_CHARS_START);
-    (escaped_str, must_quote)
+    must_quote = must_quote || bytes_start_with(name, SPECIAL_SHELL_CHARS_START);
+    (escaped_str.into(), must_quote)
 }

 /// Return a set of characters that implies quoting of the word in
 /// shell-quoting mode.
-fn shell_escaped_char_set(is_dirname: bool) -> &'static [char] {
-    const ESCAPED_CHARS: &[char] = &[
-        // the ':' colon character only induce quoting in the
-        // context of ls displaying a directory name before listing its content.
-        // (e.g. with the recursive flag -R)
-        ':',
-        // Under this line are the control characters that should be
-        // quoted in shell mode in all cases.
-        '"', '`', '$', '\\', '^', '\n', '\t', '\r', '=',
-    ];
-
+fn shell_escaped_char_set(is_dirname: bool) -> &'static [u8] {
+    const ESCAPED_CHARS: &[u8] = b":\"`$\\^\n\t\r=";
+    // the ':' colon character only induce quoting in the
+    // context of ls displaying a directory name before listing its content.
+    // (e.g. with the recursive flag -R)
    let start_index = if is_dirname { 0 } else { 1 };
-
    &ESCAPED_CHARS[start_index..]
 }

@ -308,41 +376,57 @@ fn shell_escaped_char_set(is_dirname: bool) -> &'static [char] {
 ///
 /// This inner function provides an additional flag `dirname` which
 /// is meant for ls' directory name display.
-fn escape_name_inner(name: &OsStr, style: &QuotingStyle, dirname: bool) -> String {
+fn escape_name_inner(name: &[u8], style: &QuotingStyle, dirname: bool) -> Vec<u8> {
    match style {
        QuotingStyle::Literal { show_control } => {
            if *show_control {
-                name.to_string_lossy().into_owned()
+                name.to_owned()
            } else {
-                name.to_string_lossy()
-                    .chars()
-                    .flat_map(|c| EscapedChar::new_literal(c).hide_control())
-                    .collect()
+                name.utf8_chunks()
+                    .map(|s| {
+                        let valid: String = s
+                            .valid()
+                            .chars()
+                            .flat_map(|c| EscapedChar::new_literal(c).hide_control())
+                            .collect();
+                        let invalid = "?".repeat(s.invalid().len());
+                        valid + &invalid
+                    })
+                    .collect::<String>()
+                    .into()
            }
        }
        QuotingStyle::C { quotes } => {
            let escaped_str: String = name
-                .to_string_lossy()
-                .chars()
-                .flat_map(|c| EscapedChar::new_c(c, *quotes, dirname))
-                .collect();
+                .utf8_chunks()
+                .flat_map(|s| {
+                    let valid = s
+                        .valid()
+                        .chars()
+                        .flat_map(|c| EscapedChar::new_c(c, *quotes, dirname));
+                    let invalid = s.invalid().iter().flat_map(|b| EscapedChar::new_octal(*b));
+                    valid.chain(invalid)
+                })
+                .collect::<String>();

            match quotes {
                Quotes::Single => format!("'{escaped_str}'"),
                Quotes::Double => format!("\"{escaped_str}\""),
                Quotes::None => escaped_str,
            }
+            .into()
        }
        QuotingStyle::Shell {
            escape,
            always_quote,
            show_control,
        } => {
-            let name = name.to_string_lossy();
-
-            let (quotes, must_quote) = if name.contains(shell_escaped_char_set(dirname)) {
+            let (quotes, must_quote) = if name
+                .iter()
+                .any(|c| shell_escaped_char_set(dirname).contains(c))
+            {
                (Quotes::Single, true)
-            } else if name.contains('\'') {
+            } else if name.contains(&b'\'') {
                (Quotes::Double, true)
            } else if *always_quote {
                (Quotes::Single, true)
@ -351,30 +435,43 @@ fn escape_name_inner(name: &OsStr, style: &QuotingStyle, dirname: bool) -> Strin
            };

            let (escaped_str, contains_quote_chars) = if *escape {
-                shell_with_escape(&name, quotes)
+                shell_with_escape(name, quotes)
            } else {
-                shell_without_escape(&name, quotes, *show_control)
+                shell_without_escape(name, quotes, *show_control)
            };

-            match (must_quote | contains_quote_chars, quotes) {
-                (true, Quotes::Single) => format!("'{escaped_str}'"),
-                (true, Quotes::Double) => format!("\"{escaped_str}\""),
-                _ => escaped_str,
+            if must_quote | contains_quote_chars && quotes != Quotes::None {
+                let mut quoted_str = Vec::<u8>::with_capacity(escaped_str.len() + 2);
+                let quote = if quotes == Quotes::Single {
+                    b'\''
+                } else {
+                    b'"'
+                };
+                quoted_str.push(quote);
+                quoted_str.extend(escaped_str);
+                quoted_str.push(quote);
+                quoted_str
+            } else {
+                escaped_str
            }
        }
    }
 }

 /// Escape a filename with respect to the given style.
-pub fn escape_name(name: &OsStr, style: &QuotingStyle) -> String {
-    escape_name_inner(name, style, false)
+pub fn escape_name(name: &OsStr, style: &QuotingStyle) -> OsString {
+    let name = crate::os_str_as_bytes_lossy(name);
+    crate::os_string_from_vec(escape_name_inner(&name, style, false))
+        .expect("all byte sequences should be valid for platform, or already replaced in name")
 }

 /// Escape a directory name with respect to the given style.
 /// This is mainly meant to be used for ls' directory name printing and is not
 /// likely to be used elsewhere.
-pub fn escape_dir_name(dir_name: &OsStr, style: &QuotingStyle) -> String {
-    escape_name_inner(dir_name, style, true)
+pub fn escape_dir_name(dir_name: &OsStr, style: &QuotingStyle) -> OsString {
+    let name = crate::os_str_as_bytes_lossy(dir_name);
+    crate::os_string_from_vec(escape_name_inner(&name, style, true))
+        .expect("all byte sequences should be valid for platform, or already replaced in name")
 }

 impl fmt::Display for QuotingStyle {
@ -415,7 +512,7 @@ impl fmt::Display for Quotes {

 #[cfg(test)]
 mod tests {
-    use crate::quoting_style::{escape_name, Quotes, QuotingStyle};
+    use crate::quoting_style::{escape_name_inner, Quotes, QuotingStyle};

    // spell-checker:ignore (tests/words) one\'two one'two

@ -465,14 +562,31 @@ mod tests {
        }
    }

+    fn check_names_inner<T>(name: &[u8], map: &[(T, &str)]) -> Vec<Vec<u8>> {
+        map.iter()
+            .map(|(_, style)| escape_name_inner(name, &get_style(style), false))
+            .collect()
+    }
+
    fn check_names(name: &str, map: &[(&str, &str)]) {
        assert_eq!(
            map.iter()
-                .map(|(_, style)| escape_name(name.as_ref(), &get_style(style)))
-                .collect::<Vec<String>>(),
+                .map(|(correct, _)| *correct)
+                .collect::<Vec<&str>>(),
+            check_names_inner(name.as_bytes(), map)
+                .iter()
+                .map(|bytes| std::str::from_utf8(bytes)
+                    .expect("valid str goes in, valid str comes out"))
+                .collect::<Vec<&str>>()
+        );
+    }
+
+    fn check_names_raw(name: &[u8], map: &[(&[u8], &str)]) {
+        assert_eq!(
            map.iter()
-                .map(|(correct, _)| correct.to_string())
-                .collect::<Vec<String>>()
+                .map(|(correct, _)| *correct)
+                .collect::<Vec<&[u8]>>(),
+            check_names_inner(name, map)
        );
    }

@ -487,10 +601,10 @@ mod tests {
                ("\"one_two\"", "c"),
                ("one_two", "shell"),
                ("one_two", "shell-show"),
-                ("\'one_two\'", "shell-always"),
-                ("\'one_two\'", "shell-always-show"),
+                ("'one_two'", "shell-always"),
+                ("'one_two'", "shell-always-show"),
                ("one_two", "shell-escape"),
-                ("\'one_two\'", "shell-escape-always"),
+                ("'one_two'", "shell-escape-always"),
            ],
        );
    }
@ -504,12 +618,12 @@ mod tests {
                ("one two", "literal-show"),
                ("one\\ two", "escape"),
                ("\"one two\"", "c"),
-                ("\'one two\'", "shell"),
-                ("\'one two\'", "shell-show"),
-                ("\'one two\'", "shell-always"),
-                ("\'one two\'", "shell-always-show"),
-                ("\'one two\'", "shell-escape"),
-                ("\'one two\'", "shell-escape-always"),
+                ("'one two'", "shell"),
+                ("'one two'", "shell-show"),
+                ("'one two'", "shell-always"),
+                ("'one two'", "shell-always-show"),
+                ("'one two'", "shell-escape"),
+                ("'one two'", "shell-escape-always"),
            ],
        );

@ -551,7 +665,7 @@ mod tests {

        // One single quote
        check_names(
-            "one\'two",
+            "one'two",
            &[
                ("one'two", "literal"),
                ("one'two", "literal-show"),
@ -637,7 +751,7 @@ mod tests {
            ],
        );

-        // The first 16 control characters. NUL is also included, even though it is of
+        // The first 16 ASCII control characters. NUL is also included, even though it is of
        // no importance for file names.
        check_names(
            "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F",
@ -676,7 +790,7 @@ mod tests {
            ],
        );

-        // The last 16 control characters.
+        // The last 16 ASCII control characters.
        check_names(
            "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F",
            &[
@ -730,6 +844,265 @@ mod tests {
                ("''$'\\177'", "shell-escape-always"),
            ],
        );
+
+        // The first 16 Unicode control characters.
+        let test_str = std::str::from_utf8(b"\xC2\x80\xC2\x81\xC2\x82\xC2\x83\xC2\x84\xC2\x85\xC2\x86\xC2\x87\xC2\x88\xC2\x89\xC2\x8A\xC2\x8B\xC2\x8C\xC2\x8D\xC2\x8E\xC2\x8F").unwrap();
+        check_names(
+            test_str,
+            &[
+                ("????????????????", "literal"),
+                (test_str, "literal-show"),
+                ("\\302\\200\\302\\201\\302\\202\\302\\203\\302\\204\\302\\205\\302\\206\\302\\207\\302\\210\\302\\211\\302\\212\\302\\213\\302\\214\\302\\215\\302\\216\\302\\217", "escape"),
+                ("\"\\302\\200\\302\\201\\302\\202\\302\\203\\302\\204\\302\\205\\302\\206\\302\\207\\302\\210\\302\\211\\302\\212\\302\\213\\302\\214\\302\\215\\302\\216\\302\\217\"", "c"),
+                ("????????????????", "shell"),
+                (test_str, "shell-show"),
+                ("'????????????????'", "shell-always"),
+                (&format!("'{}'", test_str), "shell-always-show"),
+                ("''$'\\302\\200\\302\\201\\302\\202\\302\\203\\302\\204\\302\\205\\302\\206\\302\\207\\302\\210\\302\\211\\302\\212\\302\\213\\302\\214\\302\\215\\302\\216\\302\\217'", "shell-escape"),
+                ("''$'\\302\\200\\302\\201\\302\\202\\302\\203\\302\\204\\302\\205\\302\\206\\302\\207\\302\\210\\302\\211\\302\\212\\302\\213\\302\\214\\302\\215\\302\\216\\302\\217'", "shell-escape-always"),
+            ],
+        );
+
+        // The last 16 Unicode control characters.
+        let test_str = std::str::from_utf8(b"\xC2\x90\xC2\x91\xC2\x92\xC2\x93\xC2\x94\xC2\x95\xC2\x96\xC2\x97\xC2\x98\xC2\x99\xC2\x9A\xC2\x9B\xC2\x9C\xC2\x9D\xC2\x9E\xC2\x9F").unwrap();
+        check_names(
+            test_str,
+            &[
+                ("????????????????", "literal"),
+                (test_str, "literal-show"),
+                ("\\302\\220\\302\\221\\302\\222\\302\\223\\302\\224\\302\\225\\302\\226\\302\\227\\302\\230\\302\\231\\302\\232\\302\\233\\302\\234\\302\\235\\302\\236\\302\\237", "escape"),
+                ("\"\\302\\220\\302\\221\\302\\222\\302\\223\\302\\224\\302\\225\\302\\226\\302\\227\\302\\230\\302\\231\\302\\232\\302\\233\\302\\234\\302\\235\\302\\236\\302\\237\"", "c"),
+                ("????????????????", "shell"),
+                (test_str, "shell-show"),
+                ("'????????????????'", "shell-always"),
+                (&format!("'{}'", test_str), "shell-always-show"),
+                ("''$'\\302\\220\\302\\221\\302\\222\\302\\223\\302\\224\\302\\225\\302\\226\\302\\227\\302\\230\\302\\231\\302\\232\\302\\233\\302\\234\\302\\235\\302\\236\\302\\237'", "shell-escape"),
+                ("''$'\\302\\220\\302\\221\\302\\222\\302\\223\\302\\224\\302\\225\\302\\226\\302\\227\\302\\230\\302\\231\\302\\232\\302\\233\\302\\234\\302\\235\\302\\236\\302\\237'", "shell-escape-always"),
+            ],
+        );
+    }
+
+    #[test]
+    fn test_non_unicode_bytes() {
+        let ascii = b'_';
+        let continuation = b'\xA7';
+        let first2byte = b'\xC2';
+        let first3byte = b'\xE0';
+        let first4byte = b'\xF0';
+        let invalid = b'\xC0';
+
+        // a single byte value invalid outside of additional context in UTF-8
+        check_names_raw(
+            &[continuation],
+            &[
+                (b"?", "literal"),
+                (b"\xA7", "literal-show"),
+                (b"\\247", "escape"),
+                (b"\"\\247\"", "c"),
+                (b"?", "shell"),
+                (b"\xA7", "shell-show"),
+                (b"'?'", "shell-always"),
+                (b"'\xA7'", "shell-always-show"),
+                (b"''$'\\247'", "shell-escape"),
+                (b"''$'\\247'", "shell-escape-always"),
+            ],
+        );
+
+        // ...but the byte becomes valid with appropriate context
+        // (this is just the § character in UTF-8, written as bytes)
+        check_names_raw(
+            &[first2byte, continuation],
+            &[
+                (b"\xC2\xA7", "literal"),
+                (b"\xC2\xA7", "literal-show"),
+                (b"\xC2\xA7", "escape"),
+                (b"\"\xC2\xA7\"", "c"),
+                (b"\xC2\xA7", "shell"),
+                (b"\xC2\xA7", "shell-show"),
+                (b"'\xC2\xA7'", "shell-always"),
+                (b"'\xC2\xA7'", "shell-always-show"),
+                (b"\xC2\xA7", "shell-escape"),
+                (b"'\xC2\xA7'", "shell-escape-always"),
+            ],
+        );
+
+        // mixed with valid characters
+        check_names_raw(
+            &[continuation, ascii],
+            &[
+                (b"?_", "literal"),
+                (b"\xA7_", "literal-show"),
+                (b"\\247_", "escape"),
+                (b"\"\\247_\"", "c"),
+                (b"?_", "shell"),
+                (b"\xA7_", "shell-show"),
+                (b"'?_'", "shell-always"),
+                (b"'\xA7_'", "shell-always-show"),
+                (b"''$'\\247''_'", "shell-escape"),
+                (b"''$'\\247''_'", "shell-escape-always"),
+            ],
+        );
+        check_names_raw(
+            &[ascii, continuation],
+            &[
+                (b"_?", "literal"),
+                (b"_\xA7", "literal-show"),
+                (b"_\\247", "escape"),
+                (b"\"_\\247\"", "c"),
+                (b"_?", "shell"),
+                (b"_\xA7", "shell-show"),
+                (b"'_?'", "shell-always"),
+                (b"'_\xA7'", "shell-always-show"),
+                (b"'_'$'\\247'", "shell-escape"),
+                (b"'_'$'\\247'", "shell-escape-always"),
+            ],
+        );
+        check_names_raw(
+            &[ascii, continuation, ascii],
+            &[
+                (b"_?_", "literal"),
+                (b"_\xA7_", "literal-show"),
+                (b"_\\247_", "escape"),
+                (b"\"_\\247_\"", "c"),
+                (b"_?_", "shell"),
+                (b"_\xA7_", "shell-show"),
+                (b"'_?_'", "shell-always"),
+                (b"'_\xA7_'", "shell-always-show"),
+                (b"'_'$'\\247''_'", "shell-escape"),
+                (b"'_'$'\\247''_'", "shell-escape-always"),
+            ],
+        );
+        check_names_raw(
+            &[continuation, ascii, continuation],
+            &[
+                (b"?_?", "literal"),
+                (b"\xA7_\xA7", "literal-show"),
+                (b"\\247_\\247", "escape"),
+                (b"\"\\247_\\247\"", "c"),
+                (b"?_?", "shell"),
+                (b"\xA7_\xA7", "shell-show"),
+                (b"'?_?'", "shell-always"),
+                (b"'\xA7_\xA7'", "shell-always-show"),
+                (b"''$'\\247''_'$'\\247'", "shell-escape"),
+                (b"''$'\\247''_'$'\\247'", "shell-escape-always"),
+            ],
+        );
+
+        // contiguous invalid bytes
+        check_names_raw(
+            &[
+                ascii,
+                invalid,
+                ascii,
+                continuation,
+                continuation,
+                ascii,
+                continuation,
+                continuation,
+                continuation,
+                ascii,
+                continuation,
+                continuation,
+                continuation,
+                continuation,
+                ascii,
+            ],
+            &[
+                (b"_?_??_???_????_", "literal"),
+                (
+                    b"_\xC0_\xA7\xA7_\xA7\xA7\xA7_\xA7\xA7\xA7\xA7_",
+                    "literal-show",
+                ),
+                (
+                    b"_\\300_\\247\\247_\\247\\247\\247_\\247\\247\\247\\247_",
+                    "escape",
+                ),
+                (
+                    b"\"_\\300_\\247\\247_\\247\\247\\247_\\247\\247\\247\\247_\"",
+                    "c",
+                ),
+                (b"_?_??_???_????_", "shell"),
+                (
+                    b"_\xC0_\xA7\xA7_\xA7\xA7\xA7_\xA7\xA7\xA7\xA7_",
+                    "shell-show",
+                ),
+                (b"'_?_??_???_????_'", "shell-always"),
+                (
+                    b"'_\xC0_\xA7\xA7_\xA7\xA7\xA7_\xA7\xA7\xA7\xA7_'",
+                    "shell-always-show",
+                ),
+                (
+                    b"'_'$'\\300''_'$'\\247\\247''_'$'\\247\\247\\247''_'$'\\247\\247\\247\\247''_'",
+                    "shell-escape",
+                ),
+                (
+                    b"'_'$'\\300''_'$'\\247\\247''_'$'\\247\\247\\247''_'$'\\247\\247\\247\\247''_'",
+                    "shell-escape-always",
+                ),
+            ],
+        );
+
+        // invalid multi-byte sequences that start valid
+        check_names_raw(
+            &[first2byte, ascii],
+            &[
+                (b"?_", "literal"),
+                (b"\xC2_", "literal-show"),
+                (b"\\302_", "escape"),
+                (b"\"\\302_\"", "c"),
+                (b"?_", "shell"),
+                (b"\xC2_", "shell-show"),
+                (b"'?_'", "shell-always"),
+                (b"'\xC2_'", "shell-always-show"),
+                (b"''$'\\302''_'", "shell-escape"),
+                (b"''$'\\302''_'", "shell-escape-always"),
+            ],
+        );
+        check_names_raw(
+            &[first2byte, first2byte, continuation],
+            &[
+                (b"?\xC2\xA7", "literal"),
+                (b"\xC2\xC2\xA7", "literal-show"),
+                (b"\\302\xC2\xA7", "escape"),
+                (b"\"\\302\xC2\xA7\"", "c"),
+                (b"?\xC2\xA7", "shell"),
+                (b"\xC2\xC2\xA7", "shell-show"),
+                (b"'?\xC2\xA7'", "shell-always"),
+                (b"'\xC2\xC2\xA7'", "shell-always-show"),
+                (b"''$'\\302''\xC2\xA7'", "shell-escape"),
+                (b"''$'\\302''\xC2\xA7'", "shell-escape-always"),
+            ],
+        );
+        check_names_raw(
+            &[first3byte, continuation, ascii],
+            &[
+                (b"??_", "literal"),
+                (b"\xE0\xA7_", "literal-show"),
+                (b"\\340\\247_", "escape"),
+                (b"\"\\340\\247_\"", "c"),
+                (b"??_", "shell"),
+                (b"\xE0\xA7_", "shell-show"),
+                (b"'??_'", "shell-always"),
+                (b"'\xE0\xA7_'", "shell-always-show"),
+                (b"''$'\\340\\247''_'", "shell-escape"),
+                (b"''$'\\340\\247''_'", "shell-escape-always"),
+            ],
+        );
+        check_names_raw(
+            &[first4byte, continuation, continuation, ascii],
+            &[
+                (b"???_", "literal"),
+                (b"\xF0\xA7\xA7_", "literal-show"),
+                (b"\\360\\247\\247_", "escape"),
+                (b"\"\\360\\247\\247_\"", "c"),
+                (b"???_", "shell"),
+                (b"\xF0\xA7\xA7_", "shell-show"),
+                (b"'???_'", "shell-always"),
+                (b"'\xF0\xA7\xA7_'", "shell-always-show"),
+                (b"''$'\\360\\247\\247''_'", "shell-escape"),
+                (b"''$'\\360\\247\\247''_'", "shell-escape-always"),
+            ],
+        );
    }

    #[test]
@ -765,7 +1138,7 @@ mod tests {
                ("one\\\\two", "escape"),
                ("\"one\\\\two\"", "c"),
                ("'one\\two'", "shell"),
-                ("\'one\\two\'", "shell-always"),
+                ("'one\\two'", "shell-always"),
                ("'one\\two'", "shell-escape"),
                ("'one\\two'", "shell-escape-always"),
            ],
--- a/src/uucore/src/lib/lib.rs
+++ b/src/uucore/src/lib/lib.rs
@ -255,9 +255,10 @@ pub fn read_yes() -> bool {
    }
 }

-/// Helper function for processing delimiter values (which could be non UTF-8)
-/// It converts OsString to &[u8] for unix targets only
-/// On non-unix (i.e. Windows) it will just return an error if delimiter value is not UTF-8
+/// Converts an `OsStr` to a UTF-8 `&[u8]`.
+///
+/// This always succeeds on unix platforms,
+/// and fails on other platforms if the string can't be coerced to UTF-8.
 pub fn os_str_as_bytes(os_string: &OsStr) -> mods::error::UResult<&[u8]> {
    #[cfg(unix)]
    let bytes = os_string.as_bytes();
@ -273,13 +274,28 @@ pub fn os_str_as_bytes(os_string: &OsStr) -> mods::error::UResult<&[u8]> {
    Ok(bytes)
 }

-/// Helper function for converting a slice of bytes into an &OsStr
-/// or OsString in non-unix targets.
+/// Performs a potentially lossy conversion from `OsStr` to UTF-8 bytes.
 ///
-/// It converts `&[u8]` to `Cow<OsStr>` for unix targets only.
-/// On non-unix (i.e. Windows), the conversion goes through the String type
-/// and thus undergo UTF-8 validation, making it fail if the stream contains
-/// non-UTF-8 characters.
+/// This is always lossless on unix platforms,
+/// and wraps [`OsStr::to_string_lossy`] on non-unix platforms.
+pub fn os_str_as_bytes_lossy(os_string: &OsStr) -> Cow<[u8]> {
+    #[cfg(unix)]
+    let bytes = Cow::from(os_string.as_bytes());
+
+    #[cfg(not(unix))]
+    let bytes = match os_string.to_string_lossy() {
+        Cow::Borrowed(slice) => Cow::from(slice.as_bytes()),
+        Cow::Owned(owned) => Cow::from(owned.into_bytes()),
+    };
+
+    bytes
+}
+
+/// Converts a `&[u8]` to an `&OsStr`,
+/// or parses it as UTF-8 into an [`OsString`] on non-unix platforms.
+///
+/// This always succeeds on unix platforms,
+/// and fails on other platforms if the bytes can't be parsed as UTF-8.
 pub fn os_str_from_bytes(bytes: &[u8]) -> mods::error::UResult<Cow<'_, OsStr>> {
    #[cfg(unix)]
    let os_str = Cow::Borrowed(OsStr::from_bytes(bytes));
@ -291,9 +307,10 @@ pub fn os_str_from_bytes(bytes: &[u8]) -> mods::error::UResult<Cow<'_, OsStr>> {
    Ok(os_str)
 }

-/// Helper function for making an `OsString` from a byte field
-/// It converts `Vec<u8>` to `OsString` for unix targets only.
-/// On non-unix (i.e. Windows) it may fail if the bytes are not valid UTF-8
+/// Converts a `Vec<u8>` into an `OsString`, parsing as UTF-8 on non-unix platforms.
+///
+/// This always succeeds on unix platforms,
+/// and fails on other platforms if the bytes can't be parsed as UTF-8.
 pub fn os_string_from_vec(vec: Vec<u8>) -> mods::error::UResult<OsString> {
    #[cfg(unix)]
    let s = OsString::from_vec(vec);