mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-29 03:57:44 +00:00
core: improve OsStr(ing) helpers
This adds the `os_str_as_bytes_lossy` function, for when we want infallible conversion across platforms, and improves the doc comments of similar functions to be more accurate and better formatted.
This commit is contained in:
parent
43229ae104
commit
db1ed4c094
2 changed files with 35 additions and 38 deletions
|
@ -8,8 +8,6 @@
|
||||||
use std::char::from_digit;
|
use std::char::from_digit;
|
||||||
use std::ffi::{OsStr, OsString};
|
use std::ffi::{OsStr, OsString};
|
||||||
use std::fmt;
|
use std::fmt;
|
||||||
#[cfg(unix)]
|
|
||||||
use std::os::unix::ffi::{OsStrExt, OsStringExt};
|
|
||||||
|
|
||||||
// These are characters with special meaning in the shell (e.g. bash).
|
// These are characters with special meaning in the shell (e.g. bash).
|
||||||
// The first const contains characters that only have a special meaning when they appear at the beginning of a name.
|
// The first const contains characters that only have a special meaning when they appear at the beginning of a name.
|
||||||
|
@ -462,36 +460,18 @@ fn escape_name_inner(name: &[u8], style: &QuotingStyle, dirname: bool) -> Vec<u8
|
||||||
|
|
||||||
/// Escape a filename with respect to the given style.
|
/// Escape a filename with respect to the given style.
|
||||||
pub fn escape_name(name: &OsStr, style: &QuotingStyle) -> OsString {
|
pub fn escape_name(name: &OsStr, style: &QuotingStyle) -> OsString {
|
||||||
#[cfg(unix)]
|
let name = crate::os_str_as_bytes_lossy(name);
|
||||||
{
|
crate::os_string_from_vec(escape_name_inner(&name, style, false))
|
||||||
let name = name.as_bytes();
|
.expect("all byte sequences should be valid for platform, or already replaced in name")
|
||||||
OsStringExt::from_vec(escape_name_inner(name, style, false))
|
|
||||||
}
|
|
||||||
#[cfg(not(unix))]
|
|
||||||
{
|
|
||||||
let name = name.to_string_lossy();
|
|
||||||
String::from_utf8_lossy(&escape_name_inner(name.as_bytes(), style, false))
|
|
||||||
.to_string()
|
|
||||||
.into()
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Escape a directory name with respect to the given style.
|
/// Escape a directory name with respect to the given style.
|
||||||
/// This is mainly meant to be used for ls' directory name printing and is not
|
/// This is mainly meant to be used for ls' directory name printing and is not
|
||||||
/// likely to be used elsewhere.
|
/// likely to be used elsewhere.
|
||||||
pub fn escape_dir_name(dir_name: &OsStr, style: &QuotingStyle) -> OsString {
|
pub fn escape_dir_name(dir_name: &OsStr, style: &QuotingStyle) -> OsString {
|
||||||
#[cfg(unix)]
|
let name = crate::os_str_as_bytes_lossy(dir_name);
|
||||||
{
|
crate::os_string_from_vec(escape_name_inner(&name, style, true))
|
||||||
let name = dir_name.as_bytes();
|
.expect("all byte sequences should be valid for platform, or already replaced in name")
|
||||||
OsStringExt::from_vec(escape_name_inner(name, style, true))
|
|
||||||
}
|
|
||||||
#[cfg(not(unix))]
|
|
||||||
{
|
|
||||||
let name = dir_name.to_string_lossy();
|
|
||||||
String::from_utf8_lossy(&escape_name_inner(name.as_bytes(), style, true))
|
|
||||||
.to_string()
|
|
||||||
.into()
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl fmt::Display for QuotingStyle {
|
impl fmt::Display for QuotingStyle {
|
||||||
|
|
|
@ -253,9 +253,10 @@ pub fn read_yes() -> bool {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Helper function for processing delimiter values (which could be non UTF-8)
|
/// Converts an `OsStr` to a UTF-8 `&[u8]`.
|
||||||
/// It converts OsString to &[u8] for unix targets only
|
///
|
||||||
/// On non-unix (i.e. Windows) it will just return an error if delimiter value is not UTF-8
|
/// This always succeeds on unix platforms,
|
||||||
|
/// and fails on other platforms if the string can't be coerced to UTF-8.
|
||||||
pub fn os_str_as_bytes(os_string: &OsStr) -> mods::error::UResult<&[u8]> {
|
pub fn os_str_as_bytes(os_string: &OsStr) -> mods::error::UResult<&[u8]> {
|
||||||
#[cfg(unix)]
|
#[cfg(unix)]
|
||||||
let bytes = os_string.as_bytes();
|
let bytes = os_string.as_bytes();
|
||||||
|
@ -271,13 +272,28 @@ pub fn os_str_as_bytes(os_string: &OsStr) -> mods::error::UResult<&[u8]> {
|
||||||
Ok(bytes)
|
Ok(bytes)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Helper function for converting a slice of bytes into an &OsStr
|
/// Performs a potentially lossy conversion from `OsStr` to UTF-8 bytes.
|
||||||
/// or OsString in non-unix targets.
|
|
||||||
///
|
///
|
||||||
/// It converts `&[u8]` to `Cow<OsStr>` for unix targets only.
|
/// This is always lossless on unix platforms,
|
||||||
/// On non-unix (i.e. Windows), the conversion goes through the String type
|
/// and wraps [`OsStr::to_string_lossy`] on non-unix platforms.
|
||||||
/// and thus undergo UTF-8 validation, making it fail if the stream contains
|
pub fn os_str_as_bytes_lossy(os_string: &OsStr) -> Cow<[u8]> {
|
||||||
/// non-UTF-8 characters.
|
#[cfg(unix)]
|
||||||
|
let bytes = Cow::from(os_string.as_bytes());
|
||||||
|
|
||||||
|
#[cfg(not(unix))]
|
||||||
|
let bytes = match os_string.to_string_lossy() {
|
||||||
|
Cow::Borrowed(slice) => Cow::from(slice.as_bytes()),
|
||||||
|
Cow::Owned(owned) => Cow::from(owned.into_bytes()),
|
||||||
|
};
|
||||||
|
|
||||||
|
bytes
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Converts a `&[u8]` to an `&OsStr`,
|
||||||
|
/// or parses it as UTF-8 into an [`OsString`] on non-unix platforms.
|
||||||
|
///
|
||||||
|
/// This always succeeds on unix platforms,
|
||||||
|
/// and fails on other platforms if the bytes can't be parsed as UTF-8.
|
||||||
pub fn os_str_from_bytes(bytes: &[u8]) -> mods::error::UResult<Cow<'_, OsStr>> {
|
pub fn os_str_from_bytes(bytes: &[u8]) -> mods::error::UResult<Cow<'_, OsStr>> {
|
||||||
#[cfg(unix)]
|
#[cfg(unix)]
|
||||||
let os_str = Cow::Borrowed(OsStr::from_bytes(bytes));
|
let os_str = Cow::Borrowed(OsStr::from_bytes(bytes));
|
||||||
|
@ -289,9 +305,10 @@ pub fn os_str_from_bytes(bytes: &[u8]) -> mods::error::UResult<Cow<'_, OsStr>> {
|
||||||
Ok(os_str)
|
Ok(os_str)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Helper function for making an `OsString` from a byte field
|
/// Converts a `Vec<u8>` into an `OsString`, parsing as UTF-8 on non-unix platforms.
|
||||||
/// It converts `Vec<u8>` to `OsString` for unix targets only.
|
///
|
||||||
/// On non-unix (i.e. Windows) it may fail if the bytes are not valid UTF-8
|
/// This always succeeds on unix platforms,
|
||||||
|
/// and fails on other platforms if the bytes can't be parsed as UTF-8.
|
||||||
pub fn os_string_from_vec(vec: Vec<u8>) -> mods::error::UResult<OsString> {
|
pub fn os_string_from_vec(vec: Vec<u8>) -> mods::error::UResult<OsString> {
|
||||||
#[cfg(unix)]
|
#[cfg(unix)]
|
||||||
let s = OsString::from_vec(vec);
|
let s = OsString::from_vec(vec);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue