1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-29 03:57:44 +00:00

core: improve OsStr(ing) helpers

This adds the `os_str_as_bytes_lossy` function, for when we want infallible
conversion across platforms, and improves the doc comments of similar functions
to be more accurate and better formatted.
This commit is contained in:
Justin Tracey 2024-12-19 17:08:05 -05:00
parent 43229ae104
commit db1ed4c094
No known key found for this signature in database
GPG key ID: 62B84F5ABDDDCE54
2 changed files with 35 additions and 38 deletions

View file

@ -8,8 +8,6 @@
use std::char::from_digit;
use std::ffi::{OsStr, OsString};
use std::fmt;
#[cfg(unix)]
use std::os::unix::ffi::{OsStrExt, OsStringExt};
// These are characters with special meaning in the shell (e.g. bash).
// The first const contains characters that only have a special meaning when they appear at the beginning of a name.
@ -462,36 +460,18 @@ fn escape_name_inner(name: &[u8], style: &QuotingStyle, dirname: bool) -> Vec<u8
/// Escape a filename with respect to the given style.
pub fn escape_name(name: &OsStr, style: &QuotingStyle) -> OsString {
#[cfg(unix)]
{
let name = name.as_bytes();
OsStringExt::from_vec(escape_name_inner(name, style, false))
}
#[cfg(not(unix))]
{
let name = name.to_string_lossy();
String::from_utf8_lossy(&escape_name_inner(name.as_bytes(), style, false))
.to_string()
.into()
}
let name = crate::os_str_as_bytes_lossy(name);
crate::os_string_from_vec(escape_name_inner(&name, style, false))
.expect("all byte sequences should be valid for platform, or already replaced in name")
}
/// Escape a directory name with respect to the given style.
/// This is mainly meant to be used for ls' directory name printing and is not
/// likely to be used elsewhere.
pub fn escape_dir_name(dir_name: &OsStr, style: &QuotingStyle) -> OsString {
#[cfg(unix)]
{
let name = dir_name.as_bytes();
OsStringExt::from_vec(escape_name_inner(name, style, true))
}
#[cfg(not(unix))]
{
let name = dir_name.to_string_lossy();
String::from_utf8_lossy(&escape_name_inner(name.as_bytes(), style, true))
.to_string()
.into()
}
let name = crate::os_str_as_bytes_lossy(dir_name);
crate::os_string_from_vec(escape_name_inner(&name, style, true))
.expect("all byte sequences should be valid for platform, or already replaced in name")
}
impl fmt::Display for QuotingStyle {

View file

@ -253,9 +253,10 @@ pub fn read_yes() -> bool {
}
}
/// Helper function for processing delimiter values (which could be non UTF-8)
/// It converts OsString to &[u8] for unix targets only
/// On non-unix (i.e. Windows) it will just return an error if delimiter value is not UTF-8
/// Converts an `OsStr` to a UTF-8 `&[u8]`.
///
/// This always succeeds on unix platforms,
/// and fails on other platforms if the string can't be coerced to UTF-8.
pub fn os_str_as_bytes(os_string: &OsStr) -> mods::error::UResult<&[u8]> {
#[cfg(unix)]
let bytes = os_string.as_bytes();
@ -271,13 +272,28 @@ pub fn os_str_as_bytes(os_string: &OsStr) -> mods::error::UResult<&[u8]> {
Ok(bytes)
}
/// Helper function for converting a slice of bytes into an &OsStr
/// or OsString in non-unix targets.
/// Performs a potentially lossy conversion from `OsStr` to UTF-8 bytes.
///
/// It converts `&[u8]` to `Cow<OsStr>` for unix targets only.
/// On non-unix (i.e. Windows), the conversion goes through the String type
/// and thus undergo UTF-8 validation, making it fail if the stream contains
/// non-UTF-8 characters.
/// This is always lossless on unix platforms,
/// and wraps [`OsStr::to_string_lossy`] on non-unix platforms.
pub fn os_str_as_bytes_lossy(os_string: &OsStr) -> Cow<[u8]> {
#[cfg(unix)]
let bytes = Cow::from(os_string.as_bytes());
#[cfg(not(unix))]
let bytes = match os_string.to_string_lossy() {
Cow::Borrowed(slice) => Cow::from(slice.as_bytes()),
Cow::Owned(owned) => Cow::from(owned.into_bytes()),
};
bytes
}
/// Converts a `&[u8]` to an `&OsStr`,
/// or parses it as UTF-8 into an [`OsString`] on non-unix platforms.
///
/// This always succeeds on unix platforms,
/// and fails on other platforms if the bytes can't be parsed as UTF-8.
pub fn os_str_from_bytes(bytes: &[u8]) -> mods::error::UResult<Cow<'_, OsStr>> {
#[cfg(unix)]
let os_str = Cow::Borrowed(OsStr::from_bytes(bytes));
@ -289,9 +305,10 @@ pub fn os_str_from_bytes(bytes: &[u8]) -> mods::error::UResult<Cow<'_, OsStr>> {
Ok(os_str)
}
/// Helper function for making an `OsString` from a byte field
/// It converts `Vec<u8>` to `OsString` for unix targets only.
/// On non-unix (i.e. Windows) it may fail if the bytes are not valid UTF-8
/// Converts a `Vec<u8>` into an `OsString`, parsing as UTF-8 on non-unix platforms.
///
/// This always succeeds on unix platforms,
/// and fails on other platforms if the bytes can't be parsed as UTF-8.
pub fn os_string_from_vec(vec: Vec<u8>) -> mods::error::UResult<OsString> {
#[cfg(unix)]
let s = OsString::from_vec(vec);