diff --git a/src/uucore/src/lib/features/quoting_style.rs b/src/uucore/src/lib/features/quoting_style.rs index 2e9cd0b7e..6d0265dc6 100644 --- a/src/uucore/src/lib/features/quoting_style.rs +++ b/src/uucore/src/lib/features/quoting_style.rs @@ -8,8 +8,6 @@ use std::char::from_digit; use std::ffi::{OsStr, OsString}; use std::fmt; -#[cfg(unix)] -use std::os::unix::ffi::{OsStrExt, OsStringExt}; // These are characters with special meaning in the shell (e.g. bash). // The first const contains characters that only have a special meaning when they appear at the beginning of a name. @@ -462,36 +460,18 @@ fn escape_name_inner(name: &[u8], style: &QuotingStyle, dirname: bool) -> Vec OsString { - #[cfg(unix)] - { - let name = name.as_bytes(); - OsStringExt::from_vec(escape_name_inner(name, style, false)) - } - #[cfg(not(unix))] - { - let name = name.to_string_lossy(); - String::from_utf8_lossy(&escape_name_inner(name.as_bytes(), style, false)) - .to_string() - .into() - } + let name = crate::os_str_as_bytes_lossy(name); + crate::os_string_from_vec(escape_name_inner(&name, style, false)) + .expect("all byte sequences should be valid for platform, or already replaced in name") } /// Escape a directory name with respect to the given style. /// This is mainly meant to be used for ls' directory name printing and is not /// likely to be used elsewhere. pub fn escape_dir_name(dir_name: &OsStr, style: &QuotingStyle) -> OsString { - #[cfg(unix)] - { - let name = dir_name.as_bytes(); - OsStringExt::from_vec(escape_name_inner(name, style, true)) - } - #[cfg(not(unix))] - { - let name = dir_name.to_string_lossy(); - String::from_utf8_lossy(&escape_name_inner(name.as_bytes(), style, true)) - .to_string() - .into() - } + let name = crate::os_str_as_bytes_lossy(dir_name); + crate::os_string_from_vec(escape_name_inner(&name, style, true)) + .expect("all byte sequences should be valid for platform, or already replaced in name") } impl fmt::Display for QuotingStyle { diff --git a/src/uucore/src/lib/lib.rs b/src/uucore/src/lib/lib.rs index 6142e688d..e98a22815 100644 --- a/src/uucore/src/lib/lib.rs +++ b/src/uucore/src/lib/lib.rs @@ -253,9 +253,10 @@ pub fn read_yes() -> bool { } } -/// Helper function for processing delimiter values (which could be non UTF-8) -/// It converts OsString to &[u8] for unix targets only -/// On non-unix (i.e. Windows) it will just return an error if delimiter value is not UTF-8 +/// Converts an `OsStr` to a UTF-8 `&[u8]`. +/// +/// This always succeeds on unix platforms, +/// and fails on other platforms if the string can't be coerced to UTF-8. pub fn os_str_as_bytes(os_string: &OsStr) -> mods::error::UResult<&[u8]> { #[cfg(unix)] let bytes = os_string.as_bytes(); @@ -271,13 +272,28 @@ pub fn os_str_as_bytes(os_string: &OsStr) -> mods::error::UResult<&[u8]> { Ok(bytes) } -/// Helper function for converting a slice of bytes into an &OsStr -/// or OsString in non-unix targets. +/// Performs a potentially lossy conversion from `OsStr` to UTF-8 bytes. /// -/// It converts `&[u8]` to `Cow` for unix targets only. -/// On non-unix (i.e. Windows), the conversion goes through the String type -/// and thus undergo UTF-8 validation, making it fail if the stream contains -/// non-UTF-8 characters. +/// This is always lossless on unix platforms, +/// and wraps [`OsStr::to_string_lossy`] on non-unix platforms. +pub fn os_str_as_bytes_lossy(os_string: &OsStr) -> Cow<[u8]> { + #[cfg(unix)] + let bytes = Cow::from(os_string.as_bytes()); + + #[cfg(not(unix))] + let bytes = match os_string.to_string_lossy() { + Cow::Borrowed(slice) => Cow::from(slice.as_bytes()), + Cow::Owned(owned) => Cow::from(owned.into_bytes()), + }; + + bytes +} + +/// Converts a `&[u8]` to an `&OsStr`, +/// or parses it as UTF-8 into an [`OsString`] on non-unix platforms. +/// +/// This always succeeds on unix platforms, +/// and fails on other platforms if the bytes can't be parsed as UTF-8. pub fn os_str_from_bytes(bytes: &[u8]) -> mods::error::UResult> { #[cfg(unix)] let os_str = Cow::Borrowed(OsStr::from_bytes(bytes)); @@ -289,9 +305,10 @@ pub fn os_str_from_bytes(bytes: &[u8]) -> mods::error::UResult> { Ok(os_str) } -/// Helper function for making an `OsString` from a byte field -/// It converts `Vec` to `OsString` for unix targets only. -/// On non-unix (i.e. Windows) it may fail if the bytes are not valid UTF-8 +/// Converts a `Vec` into an `OsString`, parsing as UTF-8 on non-unix platforms. +/// +/// This always succeeds on unix platforms, +/// and fails on other platforms if the bytes can't be parsed as UTF-8. pub fn os_string_from_vec(vec: Vec) -> mods::error::UResult { #[cfg(unix)] let s = OsString::from_vec(vec);