Merge pull request #6882 from jtracey/quoting_style_bytes

quoting_style: Add support for non-UTF-8 bytes
2025-07-29 03:57:44 +00:00 · 2024-12-21 23:17:43 +01:00 · 2024-12-21 23:17:43 +01:00 · bb2fb66073
commit bb2fb66073
parent 21e7149a92 db1ed4c094
10 changed files with 584 additions and 175 deletions
--- a/.clippy.toml
+++ b/.clippy.toml
@ -1,4 +1,4 @@
-msrv = "1.77.0"
+msrv = "1.79.0"
 cognitive-complexity-threshold = 24
 missing-docs-in-crate-items = true
 check-private-items = true
--- a/.github/workflows/CICD.yml
+++ b/.github/workflows/CICD.yml
@ -11,7 +11,7 @@ env:
  PROJECT_NAME: coreutils
  PROJECT_DESC: "Core universal (cross-platform) utilities"
  PROJECT_AUTH: "uutils"
-  RUST_MIN_SRV: "1.77.0"
+  RUST_MIN_SRV: "1.79.0"
  # * style job configuration
  STYLE_FAIL_ON_FAULT: true ## (bool) fail the build if a style job contains a fault (error or warning); may be overridden on a per-job basis
--- a/Cargo.toml
+++ b/Cargo.toml
@ -16,7 +16,7 @@ repository = "https://github.com/uutils/coreutils"
 readme = "README.md"
 keywords = ["coreutils", "uutils", "cross-platform", "cli", "utility"]
 categories = ["command-line-utilities"]
-rust-version = "1.77.0"
+rust-version = "1.79.0"
 edition = "2021"
 build = "build.rs"
--- a/README.md
+++ b/README.md
@ -14,7 +14,7 @@
 [![dependency status](https://deps.rs/repo/github/uutils/coreutils/status.svg)](https://deps.rs/repo/github/uutils/coreutils)
 [![CodeCov](https://codecov.io/gh/uutils/coreutils/branch/master/graph/badge.svg)](https://codecov.io/gh/uutils/coreutils)
-![MSRV](https://img.shields.io/badge/MSRV-1.77.0-brightgreen)
+![MSRV](https://img.shields.io/badge/MSRV-1.79.0-brightgreen)
 </div>
@ -70,7 +70,7 @@ the [coreutils docs](https://github.com/uutils/uutils.github.io) repository.
 ### Rust Version
 uutils follows Rust's release channels and is tested against stable, beta and
-nightly. The current Minimum Supported Rust Version (MSRV) is `1.77.0`.
+nightly. The current Minimum Supported Rust Version (MSRV) is `1.79.0`.
 ## Building
--- a/src/uu/ls/src/ls.rs
+++ b/src/uu/ls/src/ls.rs
@ -21,7 +21,7 @@ use std::os::windows::fs::MetadataExt;
 use std::{
    cmp::Reverse,
    error::Error,
-    ffi::OsString,
+    ffi::{OsStr, OsString},
    fmt::{Display, Write as FmtWrite},
    fs::{self, DirEntry, FileType, Metadata, ReadDir},
    io::{stdout, BufWriter, ErrorKind, Stdout, Write},
@ -55,7 +55,7 @@ use uucore::libc::{dev_t, major, minor};
 #[cfg(unix)]
 use uucore::libc::{S_IXGRP, S_IXOTH, S_IXUSR};
 use uucore::line_ending::LineEnding;
-use uucore::quoting_style::{escape_dir_name, escape_name, QuotingStyle};
+use uucore::quoting_style::{self, QuotingStyle};
 use uucore::{
    display::Quotable,
    error::{set_exit_code, UError, UResult},
@ -2048,7 +2048,11 @@ impl PathData {
 /// file11
 /// ```
 fn show_dir_name(path_data: &PathData, out: &mut BufWriter<Stdout>, config: &Config) {
-    let escaped_name = escape_dir_name(path_data.p_buf.as_os_str(), &config.quoting_style);
+    // FIXME: replace this with appropriate behavior for literal unprintable bytes
    let escaped_name =
        quoting_style::escape_dir_name(path_data.p_buf.as_os_str(), &config.quoting_style)
            .to_string_lossy()
            .to_string();
    let name = if config.hyperlink && !config.dired {
        create_hyperlink(&escaped_name, path_data)
@ -3002,7 +3006,6 @@ use std::sync::Mutex;
 #[cfg(unix)]
 use uucore::entries;
 use uucore::fs::FileInformation;
 use uucore::quoting_style;
 #[cfg(unix)]
 fn cached_uid2usr(uid: u32) -> String {
@ -3542,3 +3545,10 @@ fn calculate_padding_collection(
    padding_collections
 }
 // FIXME: replace this with appropriate behavior for literal unprintable bytes
 fn escape_name(name: &OsStr, style: &QuotingStyle) -> String {
    quoting_style::escape_name(name, style)
        .to_string_lossy()
        .to_string()
 }
--- a/src/uu/wc/src/wc.rs
+++ b/src/uu/wc/src/wc.rs
@ -13,7 +13,7 @@ mod word_count;
 use std::{
    borrow::{Borrow, Cow},
    cmp::max,
-    ffi::OsString,
+    ffi::{OsStr, OsString},
    fs::{self, File},
    io::{self, Write},
    iter,
@ -28,7 +28,7 @@ use utf8::{BufReadDecoder, BufReadDecoderError};
 use uucore::{
    error::{FromIo, UError, UResult},
    format_usage, help_about, help_usage,
-    quoting_style::{escape_name, QuotingStyle},
+    quoting_style::{self, QuotingStyle},
    shortcut_value_parser::ShortcutValueParser,
    show,
 };
@ -259,7 +259,7 @@ impl<'a> Input<'a> {
        match self {
            Self::Path(path) => Some(match path.to_str() {
                Some(s) if !s.contains('\n') => Cow::Borrowed(s),
-                _ => Cow::Owned(escape_name(path.as_os_str(), QS_ESCAPE)),
+                _ => Cow::Owned(escape_name_wrapper(path.as_os_str())),
            }),
            Self::Stdin(StdinKind::Explicit) => Some(Cow::Borrowed(STDIN_REPR)),
            Self::Stdin(StdinKind::Implicit) => None,
@ -269,7 +269,7 @@ impl<'a> Input<'a> {
    /// Converts input into the form that appears in errors.
    fn path_display(&self) -> String {
        match self {
-            Self::Path(path) => escape_name(path.as_os_str(), QS_ESCAPE),
+            Self::Path(path) => escape_name_wrapper(path.as_os_str()),
            Self::Stdin(_) => String::from("standard input"),
        }
    }
@ -361,7 +361,7 @@ impl WcError {
            Some((input, idx)) => {
                let path = match input {
                    Input::Stdin(_) => STDIN_REPR.into(),
-                    Input::Path(path) => escape_name(path.as_os_str(), QS_ESCAPE).into(),
+                    Input::Path(path) => escape_name_wrapper(path.as_os_str()).into(),
                };
                Self::ZeroLengthFileNameCtx { path, idx }
            }
@ -761,7 +761,9 @@ fn files0_iter_file<'a>(path: &Path) -> UResult<impl Iterator<Item = InputIterIt
        Err(e) => Err(e.map_err_context(|| {
            format!(
                "cannot open {} for reading",
-                escape_name(path.as_os_str(), QS_QUOTE_ESCAPE)
+                quoting_style::escape_name(path.as_os_str(), QS_QUOTE_ESCAPE)
                    .into_string()
                    .expect("All escaped names with the escaping option return valid strings.")
            )
        })),
    }
@ -793,9 +795,9 @@ fn files0_iter<'a>(
                        Ok(Input::Path(PathBuf::from(s).into()))
                    }
                }
-                Err(e) => Err(e.map_err_context(|| {
+                Err(e) => Err(e
-                    format!("{}: read error", escape_name(&err_path, QS_ESCAPE))
+                    .map_err_context(|| format!("{}: read error", escape_name_wrapper(&err_path)))
-                }) as Box<dyn UError>),
+                    as Box<dyn UError>),
            }),
    );
    // Loop until there is an error; yield that error and then nothing else.
@ -808,6 +810,12 @@ fn files0_iter<'a>(
    })
 }
 fn escape_name_wrapper(name: &OsStr) -> String {
    quoting_style::escape_name(name, QS_ESCAPE)
        .into_string()
        .expect("All escaped names with the escaping option return valid strings.")
 }
 fn wc(inputs: &Inputs, settings: &Settings) -> UResult<()> {
    let mut total_word_count = WordCount::default();
    let mut num_inputs: usize = 0;
--- a/src/uucore/src/lib/features/format/argument.rs
+++ b/src/uucore/src/lib/features/format/argument.rs
@ -112,7 +112,8 @@ fn extract_value<T: Default>(p: Result<T, ParseError<'_, T>>, input: &str) -> T
                    Default::default()
                }
                ParseError::PartialMatch(v, rest) => {
-                    if input.starts_with('\'') {
+                    let bytes = input.as_encoded_bytes();
                    if !bytes.is_empty() && bytes[0] == b'\'' {
                        show_warning!(
                            "{}: character(s) following character constant have been ignored",
                            &rest,
--- a/src/uucore/src/lib/features/format/spec.rs
+++ b/src/uucore/src/lib/features/format/spec.rs
@ -353,20 +353,20 @@ impl Spec {
                writer.write_all(&parsed).map_err(FormatError::IoError)
            }
            Self::QuotedString => {
-                let s = args.get_str();
+                let s = escape_name(
-                writer
+                    args.get_str().as_ref(),
                    .write_all(
                        escape_name(
                            s.as_ref(),
                    &QuotingStyle::Shell {
                        escape: true,
                        always_quote: false,
                        show_control: false,
                    },
-                        )
+                );
-                        .as_bytes(),
+                #[cfg(unix)]
-                    )
+                let bytes = std::os::unix::ffi::OsStringExt::into_vec(s);
-                    .map_err(FormatError::IoError)
+                #[cfg(not(unix))]
                let bytes = s.to_string_lossy().as_bytes().to_owned();
                writer.write_all(&bytes).map_err(FormatError::IoError)
            }
            Self::SignedInt {
                width,
--- a/src/uucore/src/lib/features/quoting_style.rs
+++ b/src/uucore/src/lib/features/quoting_style.rs
@ -6,39 +6,43 @@
 //! Set of functions for escaping names according to different quoting styles.
 use std::char::from_digit;
-use std::ffi::OsStr;
+use std::ffi::{OsStr, OsString};
 use std::fmt;
 // These are characters with special meaning in the shell (e.g. bash).
 // The first const contains characters that only have a special meaning when they appear at the beginning of a name.
-const SPECIAL_SHELL_CHARS_START: &[char] = &['~', '#'];
+const SPECIAL_SHELL_CHARS_START: &[u8] = b"~#";
 // PR#6559 : Remove `]{}` from special shell chars.
 const SPECIAL_SHELL_CHARS: &str = "`$&*()|[;\\'\"<>?! ";
 /// The quoting style to use when escaping a name.
 #[derive(Clone, Copy, Debug, Eq, PartialEq)]
 pub enum QuotingStyle {
-    /// Escape the name as a literal string.
+    /// Escape the name as a shell string.
    /// Used in, e.g., `ls --quoting-style=shell`.
    Shell {
        /// Whether to escape characters in the name.
        /// True in, e.g., `ls --quoting-style=shell-escape`.
        escape: bool,
        /// Whether to always quote the name.
        always_quote: bool,
-        /// Whether to show control characters.
+        /// Whether to show control and non-unicode characters, or replace them with `?`.
        show_control: bool,
    },
    /// Escape the name as a C string.
    /// Used in, e.g., `ls --quote-name`.
    C {
        /// The type of quotes to use.
        quotes: Quotes,
    },
-    /// Escape the name as a literal string.
+    /// Do not escape the string.
    /// Used in, e.g., `ls --literal`.
    Literal {
-        /// Whether to show control characters.
+        /// Whether to show control and non-unicode characters, or replace them with `?`.
        show_control: bool,
    },
 }
@ -72,16 +76,24 @@ enum EscapeState {
    Octal(EscapeOctal),
 }
 /// Bytes we need to present as escaped octal, in the form of `\nnn` per byte.
 /// Only supports characters up to 2 bytes long in UTF-8.
 struct EscapeOctal {
-    c: char,
+    c: [u8; 2],
    state: EscapeOctalState,
-    idx: usize,
+    idx: u8,
 }
 enum EscapeOctalState {
    Done,
-    Backslash,
+    FirstBackslash,
-    Value,
+    FirstValue,
    LastBackslash,
    LastValue,
 }
 fn byte_to_octal_digit(byte: u8, idx: u8) -> u8 {
    (byte >> (idx * 3)) & 0o7
 }
 impl Iterator for EscapeOctal {
@ -90,29 +102,57 @@ impl Iterator for EscapeOctal {
    fn next(&mut self) -> Option<char> {
        match self.state {
            EscapeOctalState::Done => None,
-            EscapeOctalState::Backslash => {
+            EscapeOctalState::FirstBackslash => {
-                self.state = EscapeOctalState::Value;
+                self.state = EscapeOctalState::FirstValue;
                Some('\\')
            }
-            EscapeOctalState::Value => {
+            EscapeOctalState::LastBackslash => {
-                let octal_digit = ((self.c as u32) >> (self.idx * 3)) & 0o7;
+                self.state = EscapeOctalState::LastValue;
                Some('\\')
            }
            EscapeOctalState::FirstValue => {
                let octal_digit = byte_to_octal_digit(self.c[0], self.idx);
                if self.idx == 0 {
                    self.state = EscapeOctalState::LastBackslash;
                    self.idx = 2;
                } else {
                    self.idx -= 1;
                }
                Some(from_digit(octal_digit.into(), 8).unwrap())
            }
            EscapeOctalState::LastValue => {
                let octal_digit = byte_to_octal_digit(self.c[1], self.idx);
                if self.idx == 0 {
                    self.state = EscapeOctalState::Done;
                } else {
                    self.idx -= 1;
                }
-                Some(from_digit(octal_digit, 8).unwrap())
+                Some(from_digit(octal_digit.into(), 8).unwrap())
            }
        }
    }
 }
 impl EscapeOctal {
-    fn from(c: char) -> Self {
+    fn from_char(c: char) -> Self {
        if c.len_utf8() == 1 {
            return Self::from_byte(c as u8);
        }
        let mut buf = [0; 2];
        let _s = c.encode_utf8(&mut buf);
        Self {
-            c,
+            c: buf,
            idx: 2,
-            state: EscapeOctalState::Backslash,
+            state: EscapeOctalState::FirstBackslash,
        }
    }
    fn from_byte(b: u8) -> Self {
        Self {
            c: [0, b],
            idx: 2,
            state: EscapeOctalState::LastBackslash,
        }
    }
 }
@ -124,6 +164,12 @@ impl EscapedChar {
        }
    }
    fn new_octal(b: u8) -> Self {
        Self {
            state: EscapeState::Octal(EscapeOctal::from_byte(b)),
        }
    }
    fn new_c(c: char, quotes: Quotes, dirname: bool) -> Self {
        use EscapeState::*;
        let init_state = match c {
@ -148,7 +194,7 @@ impl EscapedChar {
                _ => Char(' '),
            },
            ':' if dirname => Backslash(':'),
-            _ if c.is_ascii_control() => Octal(EscapeOctal::from(c)),
+            _ if c.is_control() => Octal(EscapeOctal::from_char(c)),
            _ => Char(c),
        };
        Self { state: init_state }
@ -165,11 +211,11 @@ impl EscapedChar {
            '\x0B' => Backslash('v'),
            '\x0C' => Backslash('f'),
            '\r' => Backslash('r'),
            '\x00'..='\x1F' | '\x7F' => Octal(EscapeOctal::from(c)),
            '\'' => match quotes {
                Quotes::Single => Backslash('\''),
                _ => Char('\''),
            },
            _ if c.is_control() => Octal(EscapeOctal::from_char(c)),
            _ if SPECIAL_SHELL_CHARS.contains(c) => ForceQuote(c),
            _ => Char(c),
        };
@ -205,11 +251,18 @@ impl Iterator for EscapedChar {
    }
 }
-fn shell_without_escape(name: &str, quotes: Quotes, show_control_chars: bool) -> (String, bool) {
+/// Check whether `bytes` starts with any byte in `pattern`.
-    let mut must_quote = false;
+fn bytes_start_with(bytes: &[u8], pattern: &[u8]) -> bool {
-    let mut escaped_str = String::with_capacity(name.len());
+    !bytes.is_empty() && pattern.contains(&bytes[0])
 }
-    for c in name.chars() {
+fn shell_without_escape(name: &[u8], quotes: Quotes, show_control_chars: bool) -> (Vec<u8>, bool) {
    let mut must_quote = false;
    let mut escaped_str = Vec::with_capacity(name.len());
    let mut utf8_buf = vec![0; 4];
    for s in name.utf8_chunks() {
        for c in s.valid().chars() {
            let escaped = {
                let ec = EscapedChar::new_shell(c, false, quotes);
                if show_control_chars {
@ -220,31 +273,39 @@ fn shell_without_escape(name: &str, quotes: Quotes, show_control_chars: bool) ->
            };
            match escaped.state {
-            EscapeState::Backslash('\'') => escaped_str.push_str("'\\''"),
+                EscapeState::Backslash('\'') => escaped_str.extend_from_slice(b"'\\''"),
                EscapeState::ForceQuote(x) => {
                    must_quote = true;
-                escaped_str.push(x);
+                    escaped_str.extend_from_slice(x.encode_utf8(&mut utf8_buf).as_bytes());
                }
                _ => {
-                for char in escaped {
+                    for c in escaped {
-                    escaped_str.push(char);
+                        escaped_str.extend_from_slice(c.encode_utf8(&mut utf8_buf).as_bytes());
                    }
                }
            }
        }
-    must_quote = must_quote || name.starts_with(SPECIAL_SHELL_CHARS_START);
+        if show_control_chars {
            escaped_str.extend_from_slice(s.invalid());
        } else {
            escaped_str.resize(escaped_str.len() + s.invalid().len(), b'?');
        }
    }
    must_quote = must_quote || bytes_start_with(name, SPECIAL_SHELL_CHARS_START);
    (escaped_str, must_quote)
 }
-fn shell_with_escape(name: &str, quotes: Quotes) -> (String, bool) {
+fn shell_with_escape(name: &[u8], quotes: Quotes) -> (Vec<u8>, bool) {
    // We need to keep track of whether we are in a dollar expression
    // because e.g. \b\n is escaped as $'\b\n' and not like $'b'$'n'
    let mut in_dollar = false;
    let mut must_quote = false;
    let mut escaped_str = String::with_capacity(name.len());
-    for c in name.chars() {
+    for s in name.utf8_chunks() {
        for c in s.valid().chars() {
            let escaped = EscapedChar::new_shell(c, true, quotes);
            match escaped.state {
                EscapeState::Char(x) => {
@ -282,25 +343,32 @@ fn shell_with_escape(name: &str, quotes: Quotes) -> (String, bool) {
                }
            }
        }
-    must_quote = must_quote || name.starts_with(SPECIAL_SHELL_CHARS_START);
+        if !s.invalid().is_empty() {
-    (escaped_str, must_quote)
+            if !in_dollar {
                escaped_str.push_str("'$'");
                in_dollar = true;
            }
            must_quote = true;
            let escaped_bytes: String = s
                .invalid()
                .iter()
                .flat_map(|b| EscapedChar::new_octal(*b))
                .collect();
            escaped_str.push_str(&escaped_bytes);
        }
    }
    must_quote = must_quote || bytes_start_with(name, SPECIAL_SHELL_CHARS_START);
    (escaped_str.into(), must_quote)
 }
 /// Return a set of characters that implies quoting of the word in
 /// shell-quoting mode.
-fn shell_escaped_char_set(is_dirname: bool) -> &'static [char] {
+fn shell_escaped_char_set(is_dirname: bool) -> &'static [u8] {
-    const ESCAPED_CHARS: &[char] = &[
+    const ESCAPED_CHARS: &[u8] = b":\"`$\\^\n\t\r=";
    // the ':' colon character only induce quoting in the
    // context of ls displaying a directory name before listing its content.
    // (e.g. with the recursive flag -R)
        ':',
        // Under this line are the control characters that should be
        // quoted in shell mode in all cases.
        '"', '`', '$', '\\', '^', '\n', '\t', '\r', '=',
    ];
    let start_index = if is_dirname { 0 } else { 1 };
    &ESCAPED_CHARS[start_index..]
 }
@ -308,41 +376,57 @@ fn shell_escaped_char_set(is_dirname: bool) -> &'static [char] {
 ///
 /// This inner function provides an additional flag `dirname` which
 /// is meant for ls' directory name display.
-fn escape_name_inner(name: &OsStr, style: &QuotingStyle, dirname: bool) -> String {
+fn escape_name_inner(name: &[u8], style: &QuotingStyle, dirname: bool) -> Vec<u8> {
    match style {
        QuotingStyle::Literal { show_control } => {
            if *show_control {
-                name.to_string_lossy().into_owned()
+                name.to_owned()
            } else {
-                name.to_string_lossy()
+                name.utf8_chunks()
                    .map(|s| {
                        let valid: String = s
                            .valid()
                            .chars()
                            .flat_map(|c| EscapedChar::new_literal(c).hide_control())
-                    .collect()
+                            .collect();
                        let invalid = "?".repeat(s.invalid().len());
                        valid + &invalid
                    })
                    .collect::<String>()
                    .into()
            }
        }
        QuotingStyle::C { quotes } => {
            let escaped_str: String = name
-                .to_string_lossy()
+                .utf8_chunks()
                .flat_map(|s| {
                    let valid = s
                        .valid()
                        .chars()
-                .flat_map(|c| EscapedChar::new_c(c, *quotes, dirname))
+                        .flat_map(|c| EscapedChar::new_c(c, *quotes, dirname));
-                .collect();
+                    let invalid = s.invalid().iter().flat_map(|b| EscapedChar::new_octal(*b));
                    valid.chain(invalid)
                })
                .collect::<String>();
            match quotes {
                Quotes::Single => format!("'{escaped_str}'"),
                Quotes::Double => format!("\"{escaped_str}\""),
                Quotes::None => escaped_str,
            }
            .into()
        }
        QuotingStyle::Shell {
            escape,
            always_quote,
            show_control,
        } => {
-            let name = name.to_string_lossy();
+            let (quotes, must_quote) = if name
-
+                .iter()
-            let (quotes, must_quote) = if name.contains(shell_escaped_char_set(dirname)) {
+                .any(|c| shell_escaped_char_set(dirname).contains(c))
            {
                (Quotes::Single, true)
-            } else if name.contains('\'') {
+            } else if name.contains(&b'\'') {
                (Quotes::Double, true)
            } else if *always_quote {
                (Quotes::Single, true)
@ -351,30 +435,43 @@ fn escape_name_inner(name: &OsStr, style: &QuotingStyle, dirname: bool) -> Strin
            };
            let (escaped_str, contains_quote_chars) = if *escape {
-                shell_with_escape(&name, quotes)
+                shell_with_escape(name, quotes)
            } else {
-                shell_without_escape(&name, quotes, *show_control)
+                shell_without_escape(name, quotes, *show_control)
            };
-            match (must_quote | contains_quote_chars, quotes) {
+            if must_quote | contains_quote_chars && quotes != Quotes::None {
-                (true, Quotes::Single) => format!("'{escaped_str}'"),
+                let mut quoted_str = Vec::<u8>::with_capacity(escaped_str.len() + 2);
-                (true, Quotes::Double) => format!("\"{escaped_str}\""),
+                let quote = if quotes == Quotes::Single {
-                _ => escaped_str,
+                    b'\''
                } else {
                    b'"'
                };
                quoted_str.push(quote);
                quoted_str.extend(escaped_str);
                quoted_str.push(quote);
                quoted_str
            } else {
                escaped_str
            }
        }
    }
 }
 /// Escape a filename with respect to the given style.
-pub fn escape_name(name: &OsStr, style: &QuotingStyle) -> String {
+pub fn escape_name(name: &OsStr, style: &QuotingStyle) -> OsString {
-    escape_name_inner(name, style, false)
+    let name = crate::os_str_as_bytes_lossy(name);
    crate::os_string_from_vec(escape_name_inner(&name, style, false))
        .expect("all byte sequences should be valid for platform, or already replaced in name")
 }
 /// Escape a directory name with respect to the given style.
 /// This is mainly meant to be used for ls' directory name printing and is not
 /// likely to be used elsewhere.
-pub fn escape_dir_name(dir_name: &OsStr, style: &QuotingStyle) -> String {
+pub fn escape_dir_name(dir_name: &OsStr, style: &QuotingStyle) -> OsString {
-    escape_name_inner(dir_name, style, true)
+    let name = crate::os_str_as_bytes_lossy(dir_name);
    crate::os_string_from_vec(escape_name_inner(&name, style, true))
        .expect("all byte sequences should be valid for platform, or already replaced in name")
 }
 impl fmt::Display for QuotingStyle {
@ -415,7 +512,7 @@ impl fmt::Display for Quotes {
 #[cfg(test)]
 mod tests {
-    use crate::quoting_style::{escape_name, Quotes, QuotingStyle};
+    use crate::quoting_style::{escape_name_inner, Quotes, QuotingStyle};
    // spell-checker:ignore (tests/words) one\'two one'two
@ -465,14 +562,31 @@ mod tests {
        }
    }
    fn check_names_inner<T>(name: &[u8], map: &[(T, &str)]) -> Vec<Vec<u8>> {
        map.iter()
            .map(|(_, style)| escape_name_inner(name, &get_style(style), false))
            .collect()
    }
    fn check_names(name: &str, map: &[(&str, &str)]) {
        assert_eq!(
            map.iter()
-                .map(|(_, style)| escape_name(name.as_ref(), &get_style(style)))
+                .map(|(correct, _)| *correct)
-                .collect::<Vec<String>>(),
+                .collect::<Vec<&str>>(),
            check_names_inner(name.as_bytes(), map)
                .iter()
                .map(|bytes| std::str::from_utf8(bytes)
                    .expect("valid str goes in, valid str comes out"))
                .collect::<Vec<&str>>()
        );
    }
    fn check_names_raw(name: &[u8], map: &[(&[u8], &str)]) {
        assert_eq!(
            map.iter()
-                .map(|(correct, _)| correct.to_string())
+                .map(|(correct, _)| *correct)
-                .collect::<Vec<String>>()
+                .collect::<Vec<&[u8]>>(),
            check_names_inner(name, map)
        );
    }
@ -487,10 +601,10 @@ mod tests {
                ("\"one_two\"", "c"),
                ("one_two", "shell"),
                ("one_two", "shell-show"),
-                ("\'one_two\'", "shell-always"),
+                ("'one_two'", "shell-always"),
-                ("\'one_two\'", "shell-always-show"),
+                ("'one_two'", "shell-always-show"),
                ("one_two", "shell-escape"),
-                ("\'one_two\'", "shell-escape-always"),
+                ("'one_two'", "shell-escape-always"),
            ],
        );
    }
@ -504,12 +618,12 @@ mod tests {
                ("one two", "literal-show"),
                ("one\\ two", "escape"),
                ("\"one two\"", "c"),
-                ("\'one two\'", "shell"),
+                ("'one two'", "shell"),
-                ("\'one two\'", "shell-show"),
+                ("'one two'", "shell-show"),
-                ("\'one two\'", "shell-always"),
+                ("'one two'", "shell-always"),
-                ("\'one two\'", "shell-always-show"),
+                ("'one two'", "shell-always-show"),
-                ("\'one two\'", "shell-escape"),
+                ("'one two'", "shell-escape"),
-                ("\'one two\'", "shell-escape-always"),
+                ("'one two'", "shell-escape-always"),
            ],
        );
@ -551,7 +665,7 @@ mod tests {
        // One single quote
        check_names(
-            "one\'two",
+            "one'two",
            &[
                ("one'two", "literal"),
                ("one'two", "literal-show"),
@ -637,7 +751,7 @@ mod tests {
            ],
        );
-        // The first 16 control characters. NUL is also included, even though it is of
+        // The first 16 ASCII control characters. NUL is also included, even though it is of
        // no importance for file names.
        check_names(
            "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F",
@ -676,7 +790,7 @@ mod tests {
            ],
        );
-        // The last 16 control characters.
+        // The last 16 ASCII control characters.
        check_names(
            "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F",
            &[
@ -730,6 +844,265 @@ mod tests {
                ("''$'\\177'", "shell-escape-always"),
            ],
        );
        // The first 16 Unicode control characters.
        let test_str = std::str::from_utf8(b"\xC2\x80\xC2\x81\xC2\x82\xC2\x83\xC2\x84\xC2\x85\xC2\x86\xC2\x87\xC2\x88\xC2\x89\xC2\x8A\xC2\x8B\xC2\x8C\xC2\x8D\xC2\x8E\xC2\x8F").unwrap();
        check_names(
            test_str,
            &[
                ("????????????????", "literal"),
                (test_str, "literal-show"),
                ("\\302\\200\\302\\201\\302\\202\\302\\203\\302\\204\\302\\205\\302\\206\\302\\207\\302\\210\\302\\211\\302\\212\\302\\213\\302\\214\\302\\215\\302\\216\\302\\217", "escape"),
                ("\"\\302\\200\\302\\201\\302\\202\\302\\203\\302\\204\\302\\205\\302\\206\\302\\207\\302\\210\\302\\211\\302\\212\\302\\213\\302\\214\\302\\215\\302\\216\\302\\217\"", "c"),
                ("????????????????", "shell"),
                (test_str, "shell-show"),
                ("'????????????????'", "shell-always"),
                (&format!("'{}'", test_str), "shell-always-show"),
                ("''$'\\302\\200\\302\\201\\302\\202\\302\\203\\302\\204\\302\\205\\302\\206\\302\\207\\302\\210\\302\\211\\302\\212\\302\\213\\302\\214\\302\\215\\302\\216\\302\\217'", "shell-escape"),
                ("''$'\\302\\200\\302\\201\\302\\202\\302\\203\\302\\204\\302\\205\\302\\206\\302\\207\\302\\210\\302\\211\\302\\212\\302\\213\\302\\214\\302\\215\\302\\216\\302\\217'", "shell-escape-always"),
            ],
        );
        // The last 16 Unicode control characters.
        let test_str = std::str::from_utf8(b"\xC2\x90\xC2\x91\xC2\x92\xC2\x93\xC2\x94\xC2\x95\xC2\x96\xC2\x97\xC2\x98\xC2\x99\xC2\x9A\xC2\x9B\xC2\x9C\xC2\x9D\xC2\x9E\xC2\x9F").unwrap();
        check_names(
            test_str,
            &[
                ("????????????????", "literal"),
                (test_str, "literal-show"),
                ("\\302\\220\\302\\221\\302\\222\\302\\223\\302\\224\\302\\225\\302\\226\\302\\227\\302\\230\\302\\231\\302\\232\\302\\233\\302\\234\\302\\235\\302\\236\\302\\237", "escape"),
                ("\"\\302\\220\\302\\221\\302\\222\\302\\223\\302\\224\\302\\225\\302\\226\\302\\227\\302\\230\\302\\231\\302\\232\\302\\233\\302\\234\\302\\235\\302\\236\\302\\237\"", "c"),
                ("????????????????", "shell"),
                (test_str, "shell-show"),
                ("'????????????????'", "shell-always"),
                (&format!("'{}'", test_str), "shell-always-show"),
                ("''$'\\302\\220\\302\\221\\302\\222\\302\\223\\302\\224\\302\\225\\302\\226\\302\\227\\302\\230\\302\\231\\302\\232\\302\\233\\302\\234\\302\\235\\302\\236\\302\\237'", "shell-escape"),
                ("''$'\\302\\220\\302\\221\\302\\222\\302\\223\\302\\224\\302\\225\\302\\226\\302\\227\\302\\230\\302\\231\\302\\232\\302\\233\\302\\234\\302\\235\\302\\236\\302\\237'", "shell-escape-always"),
            ],
        );
    }
    #[test]
    fn test_non_unicode_bytes() {
        let ascii = b'_';
        let continuation = b'\xA7';
        let first2byte = b'\xC2';
        let first3byte = b'\xE0';
        let first4byte = b'\xF0';
        let invalid = b'\xC0';
        // a single byte value invalid outside of additional context in UTF-8
        check_names_raw(
            &[continuation],
            &[
                (b"?", "literal"),
                (b"\xA7", "literal-show"),
                (b"\\247", "escape"),
                (b"\"\\247\"", "c"),
                (b"?", "shell"),
                (b"\xA7", "shell-show"),
                (b"'?'", "shell-always"),
                (b"'\xA7'", "shell-always-show"),
                (b"''$'\\247'", "shell-escape"),
                (b"''$'\\247'", "shell-escape-always"),
            ],
        );
        // ...but the byte becomes valid with appropriate context
        // (this is just the § character in UTF-8, written as bytes)
        check_names_raw(
            &[first2byte, continuation],
            &[
                (b"\xC2\xA7", "literal"),
                (b"\xC2\xA7", "literal-show"),
                (b"\xC2\xA7", "escape"),
                (b"\"\xC2\xA7\"", "c"),
                (b"\xC2\xA7", "shell"),
                (b"\xC2\xA7", "shell-show"),
                (b"'\xC2\xA7'", "shell-always"),
                (b"'\xC2\xA7'", "shell-always-show"),
                (b"\xC2\xA7", "shell-escape"),
                (b"'\xC2\xA7'", "shell-escape-always"),
            ],
        );
        // mixed with valid characters
        check_names_raw(
            &[continuation, ascii],
            &[
                (b"?_", "literal"),
                (b"\xA7_", "literal-show"),
                (b"\\247_", "escape"),
                (b"\"\\247_\"", "c"),
                (b"?_", "shell"),
                (b"\xA7_", "shell-show"),
                (b"'?_'", "shell-always"),
                (b"'\xA7_'", "shell-always-show"),
                (b"''$'\\247''_'", "shell-escape"),
                (b"''$'\\247''_'", "shell-escape-always"),
            ],
        );
        check_names_raw(
            &[ascii, continuation],
            &[
                (b"_?", "literal"),
                (b"_\xA7", "literal-show"),
                (b"_\\247", "escape"),
                (b"\"_\\247\"", "c"),
                (b"_?", "shell"),
                (b"_\xA7", "shell-show"),
                (b"'_?'", "shell-always"),
                (b"'_\xA7'", "shell-always-show"),
                (b"'_'$'\\247'", "shell-escape"),
                (b"'_'$'\\247'", "shell-escape-always"),
            ],
        );
        check_names_raw(
            &[ascii, continuation, ascii],
            &[
                (b"_?_", "literal"),
                (b"_\xA7_", "literal-show"),
                (b"_\\247_", "escape"),
                (b"\"_\\247_\"", "c"),
                (b"_?_", "shell"),
                (b"_\xA7_", "shell-show"),
                (b"'_?_'", "shell-always"),
                (b"'_\xA7_'", "shell-always-show"),
                (b"'_'$'\\247''_'", "shell-escape"),
                (b"'_'$'\\247''_'", "shell-escape-always"),
            ],
        );
        check_names_raw(
            &[continuation, ascii, continuation],
            &[
                (b"?_?", "literal"),
                (b"\xA7_\xA7", "literal-show"),
                (b"\\247_\\247", "escape"),
                (b"\"\\247_\\247\"", "c"),
                (b"?_?", "shell"),
                (b"\xA7_\xA7", "shell-show"),
                (b"'?_?'", "shell-always"),
                (b"'\xA7_\xA7'", "shell-always-show"),
                (b"''$'\\247''_'$'\\247'", "shell-escape"),
                (b"''$'\\247''_'$'\\247'", "shell-escape-always"),
            ],
        );
        // contiguous invalid bytes
        check_names_raw(
            &[
                ascii,
                invalid,
                ascii,
                continuation,
                continuation,
                ascii,
                continuation,
                continuation,
                continuation,
                ascii,
                continuation,
                continuation,
                continuation,
                continuation,
                ascii,
            ],
            &[
                (b"_?_??_???_????_", "literal"),
                (
                    b"_\xC0_\xA7\xA7_\xA7\xA7\xA7_\xA7\xA7\xA7\xA7_",
                    "literal-show",
                ),
                (
                    b"_\\300_\\247\\247_\\247\\247\\247_\\247\\247\\247\\247_",
                    "escape",
                ),
                (
                    b"\"_\\300_\\247\\247_\\247\\247\\247_\\247\\247\\247\\247_\"",
                    "c",
                ),
                (b"_?_??_???_????_", "shell"),
                (
                    b"_\xC0_\xA7\xA7_\xA7\xA7\xA7_\xA7\xA7\xA7\xA7_",
                    "shell-show",
                ),
                (b"'_?_??_???_????_'", "shell-always"),
                (
                    b"'_\xC0_\xA7\xA7_\xA7\xA7\xA7_\xA7\xA7\xA7\xA7_'",
                    "shell-always-show",
                ),
                (
                    b"'_'$'\\300''_'$'\\247\\247''_'$'\\247\\247\\247''_'$'\\247\\247\\247\\247''_'",
                    "shell-escape",
                ),
                (
                    b"'_'$'\\300''_'$'\\247\\247''_'$'\\247\\247\\247''_'$'\\247\\247\\247\\247''_'",
                    "shell-escape-always",
                ),
            ],
        );
        // invalid multi-byte sequences that start valid
        check_names_raw(
            &[first2byte, ascii],
            &[
                (b"?_", "literal"),
                (b"\xC2_", "literal-show"),
                (b"\\302_", "escape"),
                (b"\"\\302_\"", "c"),
                (b"?_", "shell"),
                (b"\xC2_", "shell-show"),
                (b"'?_'", "shell-always"),
                (b"'\xC2_'", "shell-always-show"),
                (b"''$'\\302''_'", "shell-escape"),
                (b"''$'\\302''_'", "shell-escape-always"),
            ],
        );
        check_names_raw(
            &[first2byte, first2byte, continuation],
            &[
                (b"?\xC2\xA7", "literal"),
                (b"\xC2\xC2\xA7", "literal-show"),
                (b"\\302\xC2\xA7", "escape"),
                (b"\"\\302\xC2\xA7\"", "c"),
                (b"?\xC2\xA7", "shell"),
                (b"\xC2\xC2\xA7", "shell-show"),
                (b"'?\xC2\xA7'", "shell-always"),
                (b"'\xC2\xC2\xA7'", "shell-always-show"),
                (b"''$'\\302''\xC2\xA7'", "shell-escape"),
                (b"''$'\\302''\xC2\xA7'", "shell-escape-always"),
            ],
        );
        check_names_raw(
            &[first3byte, continuation, ascii],
            &[
                (b"??_", "literal"),
                (b"\xE0\xA7_", "literal-show"),
                (b"\\340\\247_", "escape"),
                (b"\"\\340\\247_\"", "c"),
                (b"??_", "shell"),
                (b"\xE0\xA7_", "shell-show"),
                (b"'??_'", "shell-always"),
                (b"'\xE0\xA7_'", "shell-always-show"),
                (b"''$'\\340\\247''_'", "shell-escape"),
                (b"''$'\\340\\247''_'", "shell-escape-always"),
            ],
        );
        check_names_raw(
            &[first4byte, continuation, continuation, ascii],
            &[
                (b"???_", "literal"),
                (b"\xF0\xA7\xA7_", "literal-show"),
                (b"\\360\\247\\247_", "escape"),
                (b"\"\\360\\247\\247_\"", "c"),
                (b"???_", "shell"),
                (b"\xF0\xA7\xA7_", "shell-show"),
                (b"'???_'", "shell-always"),
                (b"'\xF0\xA7\xA7_'", "shell-always-show"),
                (b"''$'\\360\\247\\247''_'", "shell-escape"),
                (b"''$'\\360\\247\\247''_'", "shell-escape-always"),
            ],
        );
    }
    #[test]
@ -765,7 +1138,7 @@ mod tests {
                ("one\\\\two", "escape"),
                ("\"one\\\\two\"", "c"),
                ("'one\\two'", "shell"),
-                ("\'one\\two\'", "shell-always"),
+                ("'one\\two'", "shell-always"),
                ("'one\\two'", "shell-escape"),
                ("'one\\two'", "shell-escape-always"),
            ],
--- a/src/uucore/src/lib/lib.rs
+++ b/src/uucore/src/lib/lib.rs
@ -255,9 +255,10 @@ pub fn read_yes() -> bool {
    }
 }
-/// Helper function for processing delimiter values (which could be non UTF-8)
+/// Converts an `OsStr` to a UTF-8 `&[u8]`.
-/// It converts OsString to &[u8] for unix targets only
+///
-/// On non-unix (i.e. Windows) it will just return an error if delimiter value is not UTF-8
+/// This always succeeds on unix platforms,
 /// and fails on other platforms if the string can't be coerced to UTF-8.
 pub fn os_str_as_bytes(os_string: &OsStr) -> mods::error::UResult<&[u8]> {
    #[cfg(unix)]
    let bytes = os_string.as_bytes();
@ -273,13 +274,28 @@ pub fn os_str_as_bytes(os_string: &OsStr) -> mods::error::UResult<&[u8]> {
    Ok(bytes)
 }
-/// Helper function for converting a slice of bytes into an &OsStr
+/// Performs a potentially lossy conversion from `OsStr` to UTF-8 bytes.
 /// or OsString in non-unix targets.
 ///
-/// It converts `&[u8]` to `Cow<OsStr>` for unix targets only.
+/// This is always lossless on unix platforms,
-/// On non-unix (i.e. Windows), the conversion goes through the String type
+/// and wraps [`OsStr::to_string_lossy`] on non-unix platforms.
-/// and thus undergo UTF-8 validation, making it fail if the stream contains
+pub fn os_str_as_bytes_lossy(os_string: &OsStr) -> Cow<[u8]> {
-/// non-UTF-8 characters.
+    #[cfg(unix)]
    let bytes = Cow::from(os_string.as_bytes());
    #[cfg(not(unix))]
    let bytes = match os_string.to_string_lossy() {
        Cow::Borrowed(slice) => Cow::from(slice.as_bytes()),
        Cow::Owned(owned) => Cow::from(owned.into_bytes()),
    };
    bytes
 }
 /// Converts a `&[u8]` to an `&OsStr`,
 /// or parses it as UTF-8 into an [`OsString`] on non-unix platforms.
 ///
 /// This always succeeds on unix platforms,
 /// and fails on other platforms if the bytes can't be parsed as UTF-8.
 pub fn os_str_from_bytes(bytes: &[u8]) -> mods::error::UResult<Cow<'_, OsStr>> {
    #[cfg(unix)]
    let os_str = Cow::Borrowed(OsStr::from_bytes(bytes));
@ -291,9 +307,10 @@ pub fn os_str_from_bytes(bytes: &[u8]) -> mods::error::UResult<Cow<'_, OsStr>> {
    Ok(os_str)
 }
-/// Helper function for making an `OsString` from a byte field
+/// Converts a `Vec<u8>` into an `OsString`, parsing as UTF-8 on non-unix platforms.
-/// It converts `Vec<u8>` to `OsString` for unix targets only.
+///
-/// On non-unix (i.e. Windows) it may fail if the bytes are not valid UTF-8
+/// This always succeeds on unix platforms,
 /// and fails on other platforms if the bytes can't be parsed as UTF-8.
 pub fn os_string_from_vec(vec: Vec<u8>) -> mods::error::UResult<OsString> {
    #[cfg(unix)]
    let s = OsString::from_vec(vec);