1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-29 03:57:44 +00:00

core: improve OsStr(ing) helpers

This adds the `os_str_as_bytes_lossy` function, for when we want infallible
conversion across platforms, and improves the doc comments of similar functions
to be more accurate and better formatted.
This commit is contained in:
Justin Tracey 2024-12-19 17:08:05 -05:00
parent 43229ae104
commit db1ed4c094
No known key found for this signature in database
GPG key ID: 62B84F5ABDDDCE54
2 changed files with 35 additions and 38 deletions

View file

@ -8,8 +8,6 @@
use std::char::from_digit; use std::char::from_digit;
use std::ffi::{OsStr, OsString}; use std::ffi::{OsStr, OsString};
use std::fmt; use std::fmt;
#[cfg(unix)]
use std::os::unix::ffi::{OsStrExt, OsStringExt};
// These are characters with special meaning in the shell (e.g. bash). // These are characters with special meaning in the shell (e.g. bash).
// The first const contains characters that only have a special meaning when they appear at the beginning of a name. // The first const contains characters that only have a special meaning when they appear at the beginning of a name.
@ -462,36 +460,18 @@ fn escape_name_inner(name: &[u8], style: &QuotingStyle, dirname: bool) -> Vec<u8
/// Escape a filename with respect to the given style. /// Escape a filename with respect to the given style.
pub fn escape_name(name: &OsStr, style: &QuotingStyle) -> OsString { pub fn escape_name(name: &OsStr, style: &QuotingStyle) -> OsString {
#[cfg(unix)] let name = crate::os_str_as_bytes_lossy(name);
{ crate::os_string_from_vec(escape_name_inner(&name, style, false))
let name = name.as_bytes(); .expect("all byte sequences should be valid for platform, or already replaced in name")
OsStringExt::from_vec(escape_name_inner(name, style, false))
}
#[cfg(not(unix))]
{
let name = name.to_string_lossy();
String::from_utf8_lossy(&escape_name_inner(name.as_bytes(), style, false))
.to_string()
.into()
}
} }
/// Escape a directory name with respect to the given style. /// Escape a directory name with respect to the given style.
/// This is mainly meant to be used for ls' directory name printing and is not /// This is mainly meant to be used for ls' directory name printing and is not
/// likely to be used elsewhere. /// likely to be used elsewhere.
pub fn escape_dir_name(dir_name: &OsStr, style: &QuotingStyle) -> OsString { pub fn escape_dir_name(dir_name: &OsStr, style: &QuotingStyle) -> OsString {
#[cfg(unix)] let name = crate::os_str_as_bytes_lossy(dir_name);
{ crate::os_string_from_vec(escape_name_inner(&name, style, true))
let name = dir_name.as_bytes(); .expect("all byte sequences should be valid for platform, or already replaced in name")
OsStringExt::from_vec(escape_name_inner(name, style, true))
}
#[cfg(not(unix))]
{
let name = dir_name.to_string_lossy();
String::from_utf8_lossy(&escape_name_inner(name.as_bytes(), style, true))
.to_string()
.into()
}
} }
impl fmt::Display for QuotingStyle { impl fmt::Display for QuotingStyle {

View file

@ -253,9 +253,10 @@ pub fn read_yes() -> bool {
} }
} }
/// Helper function for processing delimiter values (which could be non UTF-8) /// Converts an `OsStr` to a UTF-8 `&[u8]`.
/// It converts OsString to &[u8] for unix targets only ///
/// On non-unix (i.e. Windows) it will just return an error if delimiter value is not UTF-8 /// This always succeeds on unix platforms,
/// and fails on other platforms if the string can't be coerced to UTF-8.
pub fn os_str_as_bytes(os_string: &OsStr) -> mods::error::UResult<&[u8]> { pub fn os_str_as_bytes(os_string: &OsStr) -> mods::error::UResult<&[u8]> {
#[cfg(unix)] #[cfg(unix)]
let bytes = os_string.as_bytes(); let bytes = os_string.as_bytes();
@ -271,13 +272,28 @@ pub fn os_str_as_bytes(os_string: &OsStr) -> mods::error::UResult<&[u8]> {
Ok(bytes) Ok(bytes)
} }
/// Helper function for converting a slice of bytes into an &OsStr /// Performs a potentially lossy conversion from `OsStr` to UTF-8 bytes.
/// or OsString in non-unix targets.
/// ///
/// It converts `&[u8]` to `Cow<OsStr>` for unix targets only. /// This is always lossless on unix platforms,
/// On non-unix (i.e. Windows), the conversion goes through the String type /// and wraps [`OsStr::to_string_lossy`] on non-unix platforms.
/// and thus undergo UTF-8 validation, making it fail if the stream contains pub fn os_str_as_bytes_lossy(os_string: &OsStr) -> Cow<[u8]> {
/// non-UTF-8 characters. #[cfg(unix)]
let bytes = Cow::from(os_string.as_bytes());
#[cfg(not(unix))]
let bytes = match os_string.to_string_lossy() {
Cow::Borrowed(slice) => Cow::from(slice.as_bytes()),
Cow::Owned(owned) => Cow::from(owned.into_bytes()),
};
bytes
}
/// Converts a `&[u8]` to an `&OsStr`,
/// or parses it as UTF-8 into an [`OsString`] on non-unix platforms.
///
/// This always succeeds on unix platforms,
/// and fails on other platforms if the bytes can't be parsed as UTF-8.
pub fn os_str_from_bytes(bytes: &[u8]) -> mods::error::UResult<Cow<'_, OsStr>> { pub fn os_str_from_bytes(bytes: &[u8]) -> mods::error::UResult<Cow<'_, OsStr>> {
#[cfg(unix)] #[cfg(unix)]
let os_str = Cow::Borrowed(OsStr::from_bytes(bytes)); let os_str = Cow::Borrowed(OsStr::from_bytes(bytes));
@ -289,9 +305,10 @@ pub fn os_str_from_bytes(bytes: &[u8]) -> mods::error::UResult<Cow<'_, OsStr>> {
Ok(os_str) Ok(os_str)
} }
/// Helper function for making an `OsString` from a byte field /// Converts a `Vec<u8>` into an `OsString`, parsing as UTF-8 on non-unix platforms.
/// It converts `Vec<u8>` to `OsString` for unix targets only. ///
/// On non-unix (i.e. Windows) it may fail if the bytes are not valid UTF-8 /// This always succeeds on unix platforms,
/// and fails on other platforms if the bytes can't be parsed as UTF-8.
pub fn os_string_from_vec(vec: Vec<u8>) -> mods::error::UResult<OsString> { pub fn os_string_from_vec(vec: Vec<u8>) -> mods::error::UResult<OsString> {
#[cfg(unix)] #[cfg(unix)]
let s = OsString::from_vec(vec); let s = OsString::from_vec(vec);