mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-28 11:37:44 +00:00
Merge pull request #6882 from jtracey/quoting_style_bytes
quoting_style: Add support for non-UTF-8 bytes
This commit is contained in:
commit
bb2fb66073
10 changed files with 584 additions and 175 deletions
|
@ -1,4 +1,4 @@
|
|||
msrv = "1.77.0"
|
||||
msrv = "1.79.0"
|
||||
cognitive-complexity-threshold = 24
|
||||
missing-docs-in-crate-items = true
|
||||
check-private-items = true
|
||||
|
|
2
.github/workflows/CICD.yml
vendored
2
.github/workflows/CICD.yml
vendored
|
@ -11,7 +11,7 @@ env:
|
|||
PROJECT_NAME: coreutils
|
||||
PROJECT_DESC: "Core universal (cross-platform) utilities"
|
||||
PROJECT_AUTH: "uutils"
|
||||
RUST_MIN_SRV: "1.77.0"
|
||||
RUST_MIN_SRV: "1.79.0"
|
||||
# * style job configuration
|
||||
STYLE_FAIL_ON_FAULT: true ## (bool) fail the build if a style job contains a fault (error or warning); may be overridden on a per-job basis
|
||||
|
||||
|
|
|
@ -16,7 +16,7 @@ repository = "https://github.com/uutils/coreutils"
|
|||
readme = "README.md"
|
||||
keywords = ["coreutils", "uutils", "cross-platform", "cli", "utility"]
|
||||
categories = ["command-line-utilities"]
|
||||
rust-version = "1.77.0"
|
||||
rust-version = "1.79.0"
|
||||
edition = "2021"
|
||||
|
||||
build = "build.rs"
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
[](https://deps.rs/repo/github/uutils/coreutils)
|
||||
|
||||
[](https://codecov.io/gh/uutils/coreutils)
|
||||

|
||||

|
||||
|
||||
</div>
|
||||
|
||||
|
@ -70,7 +70,7 @@ the [coreutils docs](https://github.com/uutils/uutils.github.io) repository.
|
|||
### Rust Version
|
||||
|
||||
uutils follows Rust's release channels and is tested against stable, beta and
|
||||
nightly. The current Minimum Supported Rust Version (MSRV) is `1.77.0`.
|
||||
nightly. The current Minimum Supported Rust Version (MSRV) is `1.79.0`.
|
||||
|
||||
## Building
|
||||
|
||||
|
|
|
@ -21,7 +21,7 @@ use std::os::windows::fs::MetadataExt;
|
|||
use std::{
|
||||
cmp::Reverse,
|
||||
error::Error,
|
||||
ffi::OsString,
|
||||
ffi::{OsStr, OsString},
|
||||
fmt::{Display, Write as FmtWrite},
|
||||
fs::{self, DirEntry, FileType, Metadata, ReadDir},
|
||||
io::{stdout, BufWriter, ErrorKind, Stdout, Write},
|
||||
|
@ -55,7 +55,7 @@ use uucore::libc::{dev_t, major, minor};
|
|||
#[cfg(unix)]
|
||||
use uucore::libc::{S_IXGRP, S_IXOTH, S_IXUSR};
|
||||
use uucore::line_ending::LineEnding;
|
||||
use uucore::quoting_style::{escape_dir_name, escape_name, QuotingStyle};
|
||||
use uucore::quoting_style::{self, QuotingStyle};
|
||||
use uucore::{
|
||||
display::Quotable,
|
||||
error::{set_exit_code, UError, UResult},
|
||||
|
@ -2048,7 +2048,11 @@ impl PathData {
|
|||
/// file11
|
||||
/// ```
|
||||
fn show_dir_name(path_data: &PathData, out: &mut BufWriter<Stdout>, config: &Config) {
|
||||
let escaped_name = escape_dir_name(path_data.p_buf.as_os_str(), &config.quoting_style);
|
||||
// FIXME: replace this with appropriate behavior for literal unprintable bytes
|
||||
let escaped_name =
|
||||
quoting_style::escape_dir_name(path_data.p_buf.as_os_str(), &config.quoting_style)
|
||||
.to_string_lossy()
|
||||
.to_string();
|
||||
|
||||
let name = if config.hyperlink && !config.dired {
|
||||
create_hyperlink(&escaped_name, path_data)
|
||||
|
@ -3002,7 +3006,6 @@ use std::sync::Mutex;
|
|||
#[cfg(unix)]
|
||||
use uucore::entries;
|
||||
use uucore::fs::FileInformation;
|
||||
use uucore::quoting_style;
|
||||
|
||||
#[cfg(unix)]
|
||||
fn cached_uid2usr(uid: u32) -> String {
|
||||
|
@ -3542,3 +3545,10 @@ fn calculate_padding_collection(
|
|||
|
||||
padding_collections
|
||||
}
|
||||
|
||||
// FIXME: replace this with appropriate behavior for literal unprintable bytes
|
||||
fn escape_name(name: &OsStr, style: &QuotingStyle) -> String {
|
||||
quoting_style::escape_name(name, style)
|
||||
.to_string_lossy()
|
||||
.to_string()
|
||||
}
|
||||
|
|
|
@ -13,7 +13,7 @@ mod word_count;
|
|||
use std::{
|
||||
borrow::{Borrow, Cow},
|
||||
cmp::max,
|
||||
ffi::OsString,
|
||||
ffi::{OsStr, OsString},
|
||||
fs::{self, File},
|
||||
io::{self, Write},
|
||||
iter,
|
||||
|
@ -28,7 +28,7 @@ use utf8::{BufReadDecoder, BufReadDecoderError};
|
|||
use uucore::{
|
||||
error::{FromIo, UError, UResult},
|
||||
format_usage, help_about, help_usage,
|
||||
quoting_style::{escape_name, QuotingStyle},
|
||||
quoting_style::{self, QuotingStyle},
|
||||
shortcut_value_parser::ShortcutValueParser,
|
||||
show,
|
||||
};
|
||||
|
@ -259,7 +259,7 @@ impl<'a> Input<'a> {
|
|||
match self {
|
||||
Self::Path(path) => Some(match path.to_str() {
|
||||
Some(s) if !s.contains('\n') => Cow::Borrowed(s),
|
||||
_ => Cow::Owned(escape_name(path.as_os_str(), QS_ESCAPE)),
|
||||
_ => Cow::Owned(escape_name_wrapper(path.as_os_str())),
|
||||
}),
|
||||
Self::Stdin(StdinKind::Explicit) => Some(Cow::Borrowed(STDIN_REPR)),
|
||||
Self::Stdin(StdinKind::Implicit) => None,
|
||||
|
@ -269,7 +269,7 @@ impl<'a> Input<'a> {
|
|||
/// Converts input into the form that appears in errors.
|
||||
fn path_display(&self) -> String {
|
||||
match self {
|
||||
Self::Path(path) => escape_name(path.as_os_str(), QS_ESCAPE),
|
||||
Self::Path(path) => escape_name_wrapper(path.as_os_str()),
|
||||
Self::Stdin(_) => String::from("standard input"),
|
||||
}
|
||||
}
|
||||
|
@ -361,7 +361,7 @@ impl WcError {
|
|||
Some((input, idx)) => {
|
||||
let path = match input {
|
||||
Input::Stdin(_) => STDIN_REPR.into(),
|
||||
Input::Path(path) => escape_name(path.as_os_str(), QS_ESCAPE).into(),
|
||||
Input::Path(path) => escape_name_wrapper(path.as_os_str()).into(),
|
||||
};
|
||||
Self::ZeroLengthFileNameCtx { path, idx }
|
||||
}
|
||||
|
@ -761,7 +761,9 @@ fn files0_iter_file<'a>(path: &Path) -> UResult<impl Iterator<Item = InputIterIt
|
|||
Err(e) => Err(e.map_err_context(|| {
|
||||
format!(
|
||||
"cannot open {} for reading",
|
||||
escape_name(path.as_os_str(), QS_QUOTE_ESCAPE)
|
||||
quoting_style::escape_name(path.as_os_str(), QS_QUOTE_ESCAPE)
|
||||
.into_string()
|
||||
.expect("All escaped names with the escaping option return valid strings.")
|
||||
)
|
||||
})),
|
||||
}
|
||||
|
@ -793,9 +795,9 @@ fn files0_iter<'a>(
|
|||
Ok(Input::Path(PathBuf::from(s).into()))
|
||||
}
|
||||
}
|
||||
Err(e) => Err(e.map_err_context(|| {
|
||||
format!("{}: read error", escape_name(&err_path, QS_ESCAPE))
|
||||
}) as Box<dyn UError>),
|
||||
Err(e) => Err(e
|
||||
.map_err_context(|| format!("{}: read error", escape_name_wrapper(&err_path)))
|
||||
as Box<dyn UError>),
|
||||
}),
|
||||
);
|
||||
// Loop until there is an error; yield that error and then nothing else.
|
||||
|
@ -808,6 +810,12 @@ fn files0_iter<'a>(
|
|||
})
|
||||
}
|
||||
|
||||
fn escape_name_wrapper(name: &OsStr) -> String {
|
||||
quoting_style::escape_name(name, QS_ESCAPE)
|
||||
.into_string()
|
||||
.expect("All escaped names with the escaping option return valid strings.")
|
||||
}
|
||||
|
||||
fn wc(inputs: &Inputs, settings: &Settings) -> UResult<()> {
|
||||
let mut total_word_count = WordCount::default();
|
||||
let mut num_inputs: usize = 0;
|
||||
|
|
|
@ -112,7 +112,8 @@ fn extract_value<T: Default>(p: Result<T, ParseError<'_, T>>, input: &str) -> T
|
|||
Default::default()
|
||||
}
|
||||
ParseError::PartialMatch(v, rest) => {
|
||||
if input.starts_with('\'') {
|
||||
let bytes = input.as_encoded_bytes();
|
||||
if !bytes.is_empty() && bytes[0] == b'\'' {
|
||||
show_warning!(
|
||||
"{}: character(s) following character constant have been ignored",
|
||||
&rest,
|
||||
|
|
|
@ -353,20 +353,20 @@ impl Spec {
|
|||
writer.write_all(&parsed).map_err(FormatError::IoError)
|
||||
}
|
||||
Self::QuotedString => {
|
||||
let s = args.get_str();
|
||||
writer
|
||||
.write_all(
|
||||
escape_name(
|
||||
s.as_ref(),
|
||||
&QuotingStyle::Shell {
|
||||
escape: true,
|
||||
always_quote: false,
|
||||
show_control: false,
|
||||
},
|
||||
)
|
||||
.as_bytes(),
|
||||
)
|
||||
.map_err(FormatError::IoError)
|
||||
let s = escape_name(
|
||||
args.get_str().as_ref(),
|
||||
&QuotingStyle::Shell {
|
||||
escape: true,
|
||||
always_quote: false,
|
||||
show_control: false,
|
||||
},
|
||||
);
|
||||
#[cfg(unix)]
|
||||
let bytes = std::os::unix::ffi::OsStringExt::into_vec(s);
|
||||
#[cfg(not(unix))]
|
||||
let bytes = s.to_string_lossy().as_bytes().to_owned();
|
||||
|
||||
writer.write_all(&bytes).map_err(FormatError::IoError)
|
||||
}
|
||||
Self::SignedInt {
|
||||
width,
|
||||
|
|
|
@ -6,39 +6,43 @@
|
|||
//! Set of functions for escaping names according to different quoting styles.
|
||||
|
||||
use std::char::from_digit;
|
||||
use std::ffi::OsStr;
|
||||
use std::ffi::{OsStr, OsString};
|
||||
use std::fmt;
|
||||
|
||||
// These are characters with special meaning in the shell (e.g. bash).
|
||||
// The first const contains characters that only have a special meaning when they appear at the beginning of a name.
|
||||
const SPECIAL_SHELL_CHARS_START: &[char] = &['~', '#'];
|
||||
const SPECIAL_SHELL_CHARS_START: &[u8] = b"~#";
|
||||
// PR#6559 : Remove `]{}` from special shell chars.
|
||||
const SPECIAL_SHELL_CHARS: &str = "`$&*()|[;\\'\"<>?! ";
|
||||
|
||||
/// The quoting style to use when escaping a name.
|
||||
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
|
||||
pub enum QuotingStyle {
|
||||
/// Escape the name as a literal string.
|
||||
/// Escape the name as a shell string.
|
||||
/// Used in, e.g., `ls --quoting-style=shell`.
|
||||
Shell {
|
||||
/// Whether to escape characters in the name.
|
||||
/// True in, e.g., `ls --quoting-style=shell-escape`.
|
||||
escape: bool,
|
||||
|
||||
/// Whether to always quote the name.
|
||||
always_quote: bool,
|
||||
|
||||
/// Whether to show control characters.
|
||||
/// Whether to show control and non-unicode characters, or replace them with `?`.
|
||||
show_control: bool,
|
||||
},
|
||||
|
||||
/// Escape the name as a C string.
|
||||
/// Used in, e.g., `ls --quote-name`.
|
||||
C {
|
||||
/// The type of quotes to use.
|
||||
quotes: Quotes,
|
||||
},
|
||||
|
||||
/// Escape the name as a literal string.
|
||||
/// Do not escape the string.
|
||||
/// Used in, e.g., `ls --literal`.
|
||||
Literal {
|
||||
/// Whether to show control characters.
|
||||
/// Whether to show control and non-unicode characters, or replace them with `?`.
|
||||
show_control: bool,
|
||||
},
|
||||
}
|
||||
|
@ -72,16 +76,24 @@ enum EscapeState {
|
|||
Octal(EscapeOctal),
|
||||
}
|
||||
|
||||
/// Bytes we need to present as escaped octal, in the form of `\nnn` per byte.
|
||||
/// Only supports characters up to 2 bytes long in UTF-8.
|
||||
struct EscapeOctal {
|
||||
c: char,
|
||||
c: [u8; 2],
|
||||
state: EscapeOctalState,
|
||||
idx: usize,
|
||||
idx: u8,
|
||||
}
|
||||
|
||||
enum EscapeOctalState {
|
||||
Done,
|
||||
Backslash,
|
||||
Value,
|
||||
FirstBackslash,
|
||||
FirstValue,
|
||||
LastBackslash,
|
||||
LastValue,
|
||||
}
|
||||
|
||||
fn byte_to_octal_digit(byte: u8, idx: u8) -> u8 {
|
||||
(byte >> (idx * 3)) & 0o7
|
||||
}
|
||||
|
||||
impl Iterator for EscapeOctal {
|
||||
|
@ -90,29 +102,57 @@ impl Iterator for EscapeOctal {
|
|||
fn next(&mut self) -> Option<char> {
|
||||
match self.state {
|
||||
EscapeOctalState::Done => None,
|
||||
EscapeOctalState::Backslash => {
|
||||
self.state = EscapeOctalState::Value;
|
||||
EscapeOctalState::FirstBackslash => {
|
||||
self.state = EscapeOctalState::FirstValue;
|
||||
Some('\\')
|
||||
}
|
||||
EscapeOctalState::Value => {
|
||||
let octal_digit = ((self.c as u32) >> (self.idx * 3)) & 0o7;
|
||||
EscapeOctalState::LastBackslash => {
|
||||
self.state = EscapeOctalState::LastValue;
|
||||
Some('\\')
|
||||
}
|
||||
EscapeOctalState::FirstValue => {
|
||||
let octal_digit = byte_to_octal_digit(self.c[0], self.idx);
|
||||
if self.idx == 0 {
|
||||
self.state = EscapeOctalState::LastBackslash;
|
||||
self.idx = 2;
|
||||
} else {
|
||||
self.idx -= 1;
|
||||
}
|
||||
Some(from_digit(octal_digit.into(), 8).unwrap())
|
||||
}
|
||||
EscapeOctalState::LastValue => {
|
||||
let octal_digit = byte_to_octal_digit(self.c[1], self.idx);
|
||||
if self.idx == 0 {
|
||||
self.state = EscapeOctalState::Done;
|
||||
} else {
|
||||
self.idx -= 1;
|
||||
}
|
||||
Some(from_digit(octal_digit, 8).unwrap())
|
||||
Some(from_digit(octal_digit.into(), 8).unwrap())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl EscapeOctal {
|
||||
fn from(c: char) -> Self {
|
||||
fn from_char(c: char) -> Self {
|
||||
if c.len_utf8() == 1 {
|
||||
return Self::from_byte(c as u8);
|
||||
}
|
||||
|
||||
let mut buf = [0; 2];
|
||||
let _s = c.encode_utf8(&mut buf);
|
||||
Self {
|
||||
c,
|
||||
c: buf,
|
||||
idx: 2,
|
||||
state: EscapeOctalState::Backslash,
|
||||
state: EscapeOctalState::FirstBackslash,
|
||||
}
|
||||
}
|
||||
|
||||
fn from_byte(b: u8) -> Self {
|
||||
Self {
|
||||
c: [0, b],
|
||||
idx: 2,
|
||||
state: EscapeOctalState::LastBackslash,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -124,6 +164,12 @@ impl EscapedChar {
|
|||
}
|
||||
}
|
||||
|
||||
fn new_octal(b: u8) -> Self {
|
||||
Self {
|
||||
state: EscapeState::Octal(EscapeOctal::from_byte(b)),
|
||||
}
|
||||
}
|
||||
|
||||
fn new_c(c: char, quotes: Quotes, dirname: bool) -> Self {
|
||||
use EscapeState::*;
|
||||
let init_state = match c {
|
||||
|
@ -148,7 +194,7 @@ impl EscapedChar {
|
|||
_ => Char(' '),
|
||||
},
|
||||
':' if dirname => Backslash(':'),
|
||||
_ if c.is_ascii_control() => Octal(EscapeOctal::from(c)),
|
||||
_ if c.is_control() => Octal(EscapeOctal::from_char(c)),
|
||||
_ => Char(c),
|
||||
};
|
||||
Self { state: init_state }
|
||||
|
@ -165,11 +211,11 @@ impl EscapedChar {
|
|||
'\x0B' => Backslash('v'),
|
||||
'\x0C' => Backslash('f'),
|
||||
'\r' => Backslash('r'),
|
||||
'\x00'..='\x1F' | '\x7F' => Octal(EscapeOctal::from(c)),
|
||||
'\'' => match quotes {
|
||||
Quotes::Single => Backslash('\''),
|
||||
_ => Char('\''),
|
||||
},
|
||||
_ if c.is_control() => Octal(EscapeOctal::from_char(c)),
|
||||
_ if SPECIAL_SHELL_CHARS.contains(c) => ForceQuote(c),
|
||||
_ => Char(c),
|
||||
};
|
||||
|
@ -205,102 +251,124 @@ impl Iterator for EscapedChar {
|
|||
}
|
||||
}
|
||||
|
||||
fn shell_without_escape(name: &str, quotes: Quotes, show_control_chars: bool) -> (String, bool) {
|
||||
/// Check whether `bytes` starts with any byte in `pattern`.
|
||||
fn bytes_start_with(bytes: &[u8], pattern: &[u8]) -> bool {
|
||||
!bytes.is_empty() && pattern.contains(&bytes[0])
|
||||
}
|
||||
|
||||
fn shell_without_escape(name: &[u8], quotes: Quotes, show_control_chars: bool) -> (Vec<u8>, bool) {
|
||||
let mut must_quote = false;
|
||||
let mut escaped_str = String::with_capacity(name.len());
|
||||
let mut escaped_str = Vec::with_capacity(name.len());
|
||||
let mut utf8_buf = vec![0; 4];
|
||||
|
||||
for c in name.chars() {
|
||||
let escaped = {
|
||||
let ec = EscapedChar::new_shell(c, false, quotes);
|
||||
if show_control_chars {
|
||||
ec
|
||||
} else {
|
||||
ec.hide_control()
|
||||
}
|
||||
};
|
||||
for s in name.utf8_chunks() {
|
||||
for c in s.valid().chars() {
|
||||
let escaped = {
|
||||
let ec = EscapedChar::new_shell(c, false, quotes);
|
||||
if show_control_chars {
|
||||
ec
|
||||
} else {
|
||||
ec.hide_control()
|
||||
}
|
||||
};
|
||||
|
||||
match escaped.state {
|
||||
EscapeState::Backslash('\'') => escaped_str.push_str("'\\''"),
|
||||
EscapeState::ForceQuote(x) => {
|
||||
must_quote = true;
|
||||
escaped_str.push(x);
|
||||
}
|
||||
_ => {
|
||||
for char in escaped {
|
||||
escaped_str.push(char);
|
||||
match escaped.state {
|
||||
EscapeState::Backslash('\'') => escaped_str.extend_from_slice(b"'\\''"),
|
||||
EscapeState::ForceQuote(x) => {
|
||||
must_quote = true;
|
||||
escaped_str.extend_from_slice(x.encode_utf8(&mut utf8_buf).as_bytes());
|
||||
}
|
||||
_ => {
|
||||
for c in escaped {
|
||||
escaped_str.extend_from_slice(c.encode_utf8(&mut utf8_buf).as_bytes());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if show_control_chars {
|
||||
escaped_str.extend_from_slice(s.invalid());
|
||||
} else {
|
||||
escaped_str.resize(escaped_str.len() + s.invalid().len(), b'?');
|
||||
}
|
||||
}
|
||||
|
||||
must_quote = must_quote || name.starts_with(SPECIAL_SHELL_CHARS_START);
|
||||
must_quote = must_quote || bytes_start_with(name, SPECIAL_SHELL_CHARS_START);
|
||||
(escaped_str, must_quote)
|
||||
}
|
||||
|
||||
fn shell_with_escape(name: &str, quotes: Quotes) -> (String, bool) {
|
||||
fn shell_with_escape(name: &[u8], quotes: Quotes) -> (Vec<u8>, bool) {
|
||||
// We need to keep track of whether we are in a dollar expression
|
||||
// because e.g. \b\n is escaped as $'\b\n' and not like $'b'$'n'
|
||||
let mut in_dollar = false;
|
||||
let mut must_quote = false;
|
||||
let mut escaped_str = String::with_capacity(name.len());
|
||||
|
||||
for c in name.chars() {
|
||||
let escaped = EscapedChar::new_shell(c, true, quotes);
|
||||
match escaped.state {
|
||||
EscapeState::Char(x) => {
|
||||
if in_dollar {
|
||||
escaped_str.push_str("''");
|
||||
for s in name.utf8_chunks() {
|
||||
for c in s.valid().chars() {
|
||||
let escaped = EscapedChar::new_shell(c, true, quotes);
|
||||
match escaped.state {
|
||||
EscapeState::Char(x) => {
|
||||
if in_dollar {
|
||||
escaped_str.push_str("''");
|
||||
in_dollar = false;
|
||||
}
|
||||
escaped_str.push(x);
|
||||
}
|
||||
EscapeState::ForceQuote(x) => {
|
||||
if in_dollar {
|
||||
escaped_str.push_str("''");
|
||||
in_dollar = false;
|
||||
}
|
||||
must_quote = true;
|
||||
escaped_str.push(x);
|
||||
}
|
||||
// Single quotes are not put in dollar expressions, but are escaped
|
||||
// if the string also contains double quotes. In that case, they must
|
||||
// be handled separately.
|
||||
EscapeState::Backslash('\'') => {
|
||||
must_quote = true;
|
||||
in_dollar = false;
|
||||
escaped_str.push_str("'\\''");
|
||||
}
|
||||
escaped_str.push(x);
|
||||
}
|
||||
EscapeState::ForceQuote(x) => {
|
||||
if in_dollar {
|
||||
escaped_str.push_str("''");
|
||||
in_dollar = false;
|
||||
}
|
||||
must_quote = true;
|
||||
escaped_str.push(x);
|
||||
}
|
||||
// Single quotes are not put in dollar expressions, but are escaped
|
||||
// if the string also contains double quotes. In that case, they must
|
||||
// be handled separately.
|
||||
EscapeState::Backslash('\'') => {
|
||||
must_quote = true;
|
||||
in_dollar = false;
|
||||
escaped_str.push_str("'\\''");
|
||||
}
|
||||
_ => {
|
||||
if !in_dollar {
|
||||
escaped_str.push_str("'$'");
|
||||
in_dollar = true;
|
||||
}
|
||||
must_quote = true;
|
||||
for char in escaped {
|
||||
escaped_str.push(char);
|
||||
_ => {
|
||||
if !in_dollar {
|
||||
escaped_str.push_str("'$'");
|
||||
in_dollar = true;
|
||||
}
|
||||
must_quote = true;
|
||||
for char in escaped {
|
||||
escaped_str.push(char);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if !s.invalid().is_empty() {
|
||||
if !in_dollar {
|
||||
escaped_str.push_str("'$'");
|
||||
in_dollar = true;
|
||||
}
|
||||
must_quote = true;
|
||||
let escaped_bytes: String = s
|
||||
.invalid()
|
||||
.iter()
|
||||
.flat_map(|b| EscapedChar::new_octal(*b))
|
||||
.collect();
|
||||
escaped_str.push_str(&escaped_bytes);
|
||||
}
|
||||
}
|
||||
must_quote = must_quote || name.starts_with(SPECIAL_SHELL_CHARS_START);
|
||||
(escaped_str, must_quote)
|
||||
must_quote = must_quote || bytes_start_with(name, SPECIAL_SHELL_CHARS_START);
|
||||
(escaped_str.into(), must_quote)
|
||||
}
|
||||
|
||||
/// Return a set of characters that implies quoting of the word in
|
||||
/// shell-quoting mode.
|
||||
fn shell_escaped_char_set(is_dirname: bool) -> &'static [char] {
|
||||
const ESCAPED_CHARS: &[char] = &[
|
||||
// the ':' colon character only induce quoting in the
|
||||
// context of ls displaying a directory name before listing its content.
|
||||
// (e.g. with the recursive flag -R)
|
||||
':',
|
||||
// Under this line are the control characters that should be
|
||||
// quoted in shell mode in all cases.
|
||||
'"', '`', '$', '\\', '^', '\n', '\t', '\r', '=',
|
||||
];
|
||||
|
||||
fn shell_escaped_char_set(is_dirname: bool) -> &'static [u8] {
|
||||
const ESCAPED_CHARS: &[u8] = b":\"`$\\^\n\t\r=";
|
||||
// the ':' colon character only induce quoting in the
|
||||
// context of ls displaying a directory name before listing its content.
|
||||
// (e.g. with the recursive flag -R)
|
||||
let start_index = if is_dirname { 0 } else { 1 };
|
||||
|
||||
&ESCAPED_CHARS[start_index..]
|
||||
}
|
||||
|
||||
|
@ -308,41 +376,57 @@ fn shell_escaped_char_set(is_dirname: bool) -> &'static [char] {
|
|||
///
|
||||
/// This inner function provides an additional flag `dirname` which
|
||||
/// is meant for ls' directory name display.
|
||||
fn escape_name_inner(name: &OsStr, style: &QuotingStyle, dirname: bool) -> String {
|
||||
fn escape_name_inner(name: &[u8], style: &QuotingStyle, dirname: bool) -> Vec<u8> {
|
||||
match style {
|
||||
QuotingStyle::Literal { show_control } => {
|
||||
if *show_control {
|
||||
name.to_string_lossy().into_owned()
|
||||
name.to_owned()
|
||||
} else {
|
||||
name.to_string_lossy()
|
||||
.chars()
|
||||
.flat_map(|c| EscapedChar::new_literal(c).hide_control())
|
||||
.collect()
|
||||
name.utf8_chunks()
|
||||
.map(|s| {
|
||||
let valid: String = s
|
||||
.valid()
|
||||
.chars()
|
||||
.flat_map(|c| EscapedChar::new_literal(c).hide_control())
|
||||
.collect();
|
||||
let invalid = "?".repeat(s.invalid().len());
|
||||
valid + &invalid
|
||||
})
|
||||
.collect::<String>()
|
||||
.into()
|
||||
}
|
||||
}
|
||||
QuotingStyle::C { quotes } => {
|
||||
let escaped_str: String = name
|
||||
.to_string_lossy()
|
||||
.chars()
|
||||
.flat_map(|c| EscapedChar::new_c(c, *quotes, dirname))
|
||||
.collect();
|
||||
.utf8_chunks()
|
||||
.flat_map(|s| {
|
||||
let valid = s
|
||||
.valid()
|
||||
.chars()
|
||||
.flat_map(|c| EscapedChar::new_c(c, *quotes, dirname));
|
||||
let invalid = s.invalid().iter().flat_map(|b| EscapedChar::new_octal(*b));
|
||||
valid.chain(invalid)
|
||||
})
|
||||
.collect::<String>();
|
||||
|
||||
match quotes {
|
||||
Quotes::Single => format!("'{escaped_str}'"),
|
||||
Quotes::Double => format!("\"{escaped_str}\""),
|
||||
Quotes::None => escaped_str,
|
||||
}
|
||||
.into()
|
||||
}
|
||||
QuotingStyle::Shell {
|
||||
escape,
|
||||
always_quote,
|
||||
show_control,
|
||||
} => {
|
||||
let name = name.to_string_lossy();
|
||||
|
||||
let (quotes, must_quote) = if name.contains(shell_escaped_char_set(dirname)) {
|
||||
let (quotes, must_quote) = if name
|
||||
.iter()
|
||||
.any(|c| shell_escaped_char_set(dirname).contains(c))
|
||||
{
|
||||
(Quotes::Single, true)
|
||||
} else if name.contains('\'') {
|
||||
} else if name.contains(&b'\'') {
|
||||
(Quotes::Double, true)
|
||||
} else if *always_quote {
|
||||
(Quotes::Single, true)
|
||||
|
@ -351,30 +435,43 @@ fn escape_name_inner(name: &OsStr, style: &QuotingStyle, dirname: bool) -> Strin
|
|||
};
|
||||
|
||||
let (escaped_str, contains_quote_chars) = if *escape {
|
||||
shell_with_escape(&name, quotes)
|
||||
shell_with_escape(name, quotes)
|
||||
} else {
|
||||
shell_without_escape(&name, quotes, *show_control)
|
||||
shell_without_escape(name, quotes, *show_control)
|
||||
};
|
||||
|
||||
match (must_quote | contains_quote_chars, quotes) {
|
||||
(true, Quotes::Single) => format!("'{escaped_str}'"),
|
||||
(true, Quotes::Double) => format!("\"{escaped_str}\""),
|
||||
_ => escaped_str,
|
||||
if must_quote | contains_quote_chars && quotes != Quotes::None {
|
||||
let mut quoted_str = Vec::<u8>::with_capacity(escaped_str.len() + 2);
|
||||
let quote = if quotes == Quotes::Single {
|
||||
b'\''
|
||||
} else {
|
||||
b'"'
|
||||
};
|
||||
quoted_str.push(quote);
|
||||
quoted_str.extend(escaped_str);
|
||||
quoted_str.push(quote);
|
||||
quoted_str
|
||||
} else {
|
||||
escaped_str
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Escape a filename with respect to the given style.
|
||||
pub fn escape_name(name: &OsStr, style: &QuotingStyle) -> String {
|
||||
escape_name_inner(name, style, false)
|
||||
pub fn escape_name(name: &OsStr, style: &QuotingStyle) -> OsString {
|
||||
let name = crate::os_str_as_bytes_lossy(name);
|
||||
crate::os_string_from_vec(escape_name_inner(&name, style, false))
|
||||
.expect("all byte sequences should be valid for platform, or already replaced in name")
|
||||
}
|
||||
|
||||
/// Escape a directory name with respect to the given style.
|
||||
/// This is mainly meant to be used for ls' directory name printing and is not
|
||||
/// likely to be used elsewhere.
|
||||
pub fn escape_dir_name(dir_name: &OsStr, style: &QuotingStyle) -> String {
|
||||
escape_name_inner(dir_name, style, true)
|
||||
pub fn escape_dir_name(dir_name: &OsStr, style: &QuotingStyle) -> OsString {
|
||||
let name = crate::os_str_as_bytes_lossy(dir_name);
|
||||
crate::os_string_from_vec(escape_name_inner(&name, style, true))
|
||||
.expect("all byte sequences should be valid for platform, or already replaced in name")
|
||||
}
|
||||
|
||||
impl fmt::Display for QuotingStyle {
|
||||
|
@ -415,7 +512,7 @@ impl fmt::Display for Quotes {
|
|||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::quoting_style::{escape_name, Quotes, QuotingStyle};
|
||||
use crate::quoting_style::{escape_name_inner, Quotes, QuotingStyle};
|
||||
|
||||
// spell-checker:ignore (tests/words) one\'two one'two
|
||||
|
||||
|
@ -465,14 +562,31 @@ mod tests {
|
|||
}
|
||||
}
|
||||
|
||||
fn check_names_inner<T>(name: &[u8], map: &[(T, &str)]) -> Vec<Vec<u8>> {
|
||||
map.iter()
|
||||
.map(|(_, style)| escape_name_inner(name, &get_style(style), false))
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn check_names(name: &str, map: &[(&str, &str)]) {
|
||||
assert_eq!(
|
||||
map.iter()
|
||||
.map(|(_, style)| escape_name(name.as_ref(), &get_style(style)))
|
||||
.collect::<Vec<String>>(),
|
||||
.map(|(correct, _)| *correct)
|
||||
.collect::<Vec<&str>>(),
|
||||
check_names_inner(name.as_bytes(), map)
|
||||
.iter()
|
||||
.map(|bytes| std::str::from_utf8(bytes)
|
||||
.expect("valid str goes in, valid str comes out"))
|
||||
.collect::<Vec<&str>>()
|
||||
);
|
||||
}
|
||||
|
||||
fn check_names_raw(name: &[u8], map: &[(&[u8], &str)]) {
|
||||
assert_eq!(
|
||||
map.iter()
|
||||
.map(|(correct, _)| correct.to_string())
|
||||
.collect::<Vec<String>>()
|
||||
.map(|(correct, _)| *correct)
|
||||
.collect::<Vec<&[u8]>>(),
|
||||
check_names_inner(name, map)
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -487,10 +601,10 @@ mod tests {
|
|||
("\"one_two\"", "c"),
|
||||
("one_two", "shell"),
|
||||
("one_two", "shell-show"),
|
||||
("\'one_two\'", "shell-always"),
|
||||
("\'one_two\'", "shell-always-show"),
|
||||
("'one_two'", "shell-always"),
|
||||
("'one_two'", "shell-always-show"),
|
||||
("one_two", "shell-escape"),
|
||||
("\'one_two\'", "shell-escape-always"),
|
||||
("'one_two'", "shell-escape-always"),
|
||||
],
|
||||
);
|
||||
}
|
||||
|
@ -504,12 +618,12 @@ mod tests {
|
|||
("one two", "literal-show"),
|
||||
("one\\ two", "escape"),
|
||||
("\"one two\"", "c"),
|
||||
("\'one two\'", "shell"),
|
||||
("\'one two\'", "shell-show"),
|
||||
("\'one two\'", "shell-always"),
|
||||
("\'one two\'", "shell-always-show"),
|
||||
("\'one two\'", "shell-escape"),
|
||||
("\'one two\'", "shell-escape-always"),
|
||||
("'one two'", "shell"),
|
||||
("'one two'", "shell-show"),
|
||||
("'one two'", "shell-always"),
|
||||
("'one two'", "shell-always-show"),
|
||||
("'one two'", "shell-escape"),
|
||||
("'one two'", "shell-escape-always"),
|
||||
],
|
||||
);
|
||||
|
||||
|
@ -551,7 +665,7 @@ mod tests {
|
|||
|
||||
// One single quote
|
||||
check_names(
|
||||
"one\'two",
|
||||
"one'two",
|
||||
&[
|
||||
("one'two", "literal"),
|
||||
("one'two", "literal-show"),
|
||||
|
@ -637,7 +751,7 @@ mod tests {
|
|||
],
|
||||
);
|
||||
|
||||
// The first 16 control characters. NUL is also included, even though it is of
|
||||
// The first 16 ASCII control characters. NUL is also included, even though it is of
|
||||
// no importance for file names.
|
||||
check_names(
|
||||
"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F",
|
||||
|
@ -676,7 +790,7 @@ mod tests {
|
|||
],
|
||||
);
|
||||
|
||||
// The last 16 control characters.
|
||||
// The last 16 ASCII control characters.
|
||||
check_names(
|
||||
"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F",
|
||||
&[
|
||||
|
@ -730,6 +844,265 @@ mod tests {
|
|||
("''$'\\177'", "shell-escape-always"),
|
||||
],
|
||||
);
|
||||
|
||||
// The first 16 Unicode control characters.
|
||||
let test_str = std::str::from_utf8(b"\xC2\x80\xC2\x81\xC2\x82\xC2\x83\xC2\x84\xC2\x85\xC2\x86\xC2\x87\xC2\x88\xC2\x89\xC2\x8A\xC2\x8B\xC2\x8C\xC2\x8D\xC2\x8E\xC2\x8F").unwrap();
|
||||
check_names(
|
||||
test_str,
|
||||
&[
|
||||
("????????????????", "literal"),
|
||||
(test_str, "literal-show"),
|
||||
("\\302\\200\\302\\201\\302\\202\\302\\203\\302\\204\\302\\205\\302\\206\\302\\207\\302\\210\\302\\211\\302\\212\\302\\213\\302\\214\\302\\215\\302\\216\\302\\217", "escape"),
|
||||
("\"\\302\\200\\302\\201\\302\\202\\302\\203\\302\\204\\302\\205\\302\\206\\302\\207\\302\\210\\302\\211\\302\\212\\302\\213\\302\\214\\302\\215\\302\\216\\302\\217\"", "c"),
|
||||
("????????????????", "shell"),
|
||||
(test_str, "shell-show"),
|
||||
("'????????????????'", "shell-always"),
|
||||
(&format!("'{}'", test_str), "shell-always-show"),
|
||||
("''$'\\302\\200\\302\\201\\302\\202\\302\\203\\302\\204\\302\\205\\302\\206\\302\\207\\302\\210\\302\\211\\302\\212\\302\\213\\302\\214\\302\\215\\302\\216\\302\\217'", "shell-escape"),
|
||||
("''$'\\302\\200\\302\\201\\302\\202\\302\\203\\302\\204\\302\\205\\302\\206\\302\\207\\302\\210\\302\\211\\302\\212\\302\\213\\302\\214\\302\\215\\302\\216\\302\\217'", "shell-escape-always"),
|
||||
],
|
||||
);
|
||||
|
||||
// The last 16 Unicode control characters.
|
||||
let test_str = std::str::from_utf8(b"\xC2\x90\xC2\x91\xC2\x92\xC2\x93\xC2\x94\xC2\x95\xC2\x96\xC2\x97\xC2\x98\xC2\x99\xC2\x9A\xC2\x9B\xC2\x9C\xC2\x9D\xC2\x9E\xC2\x9F").unwrap();
|
||||
check_names(
|
||||
test_str,
|
||||
&[
|
||||
("????????????????", "literal"),
|
||||
(test_str, "literal-show"),
|
||||
("\\302\\220\\302\\221\\302\\222\\302\\223\\302\\224\\302\\225\\302\\226\\302\\227\\302\\230\\302\\231\\302\\232\\302\\233\\302\\234\\302\\235\\302\\236\\302\\237", "escape"),
|
||||
("\"\\302\\220\\302\\221\\302\\222\\302\\223\\302\\224\\302\\225\\302\\226\\302\\227\\302\\230\\302\\231\\302\\232\\302\\233\\302\\234\\302\\235\\302\\236\\302\\237\"", "c"),
|
||||
("????????????????", "shell"),
|
||||
(test_str, "shell-show"),
|
||||
("'????????????????'", "shell-always"),
|
||||
(&format!("'{}'", test_str), "shell-always-show"),
|
||||
("''$'\\302\\220\\302\\221\\302\\222\\302\\223\\302\\224\\302\\225\\302\\226\\302\\227\\302\\230\\302\\231\\302\\232\\302\\233\\302\\234\\302\\235\\302\\236\\302\\237'", "shell-escape"),
|
||||
("''$'\\302\\220\\302\\221\\302\\222\\302\\223\\302\\224\\302\\225\\302\\226\\302\\227\\302\\230\\302\\231\\302\\232\\302\\233\\302\\234\\302\\235\\302\\236\\302\\237'", "shell-escape-always"),
|
||||
],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_non_unicode_bytes() {
|
||||
let ascii = b'_';
|
||||
let continuation = b'\xA7';
|
||||
let first2byte = b'\xC2';
|
||||
let first3byte = b'\xE0';
|
||||
let first4byte = b'\xF0';
|
||||
let invalid = b'\xC0';
|
||||
|
||||
// a single byte value invalid outside of additional context in UTF-8
|
||||
check_names_raw(
|
||||
&[continuation],
|
||||
&[
|
||||
(b"?", "literal"),
|
||||
(b"\xA7", "literal-show"),
|
||||
(b"\\247", "escape"),
|
||||
(b"\"\\247\"", "c"),
|
||||
(b"?", "shell"),
|
||||
(b"\xA7", "shell-show"),
|
||||
(b"'?'", "shell-always"),
|
||||
(b"'\xA7'", "shell-always-show"),
|
||||
(b"''$'\\247'", "shell-escape"),
|
||||
(b"''$'\\247'", "shell-escape-always"),
|
||||
],
|
||||
);
|
||||
|
||||
// ...but the byte becomes valid with appropriate context
|
||||
// (this is just the § character in UTF-8, written as bytes)
|
||||
check_names_raw(
|
||||
&[first2byte, continuation],
|
||||
&[
|
||||
(b"\xC2\xA7", "literal"),
|
||||
(b"\xC2\xA7", "literal-show"),
|
||||
(b"\xC2\xA7", "escape"),
|
||||
(b"\"\xC2\xA7\"", "c"),
|
||||
(b"\xC2\xA7", "shell"),
|
||||
(b"\xC2\xA7", "shell-show"),
|
||||
(b"'\xC2\xA7'", "shell-always"),
|
||||
(b"'\xC2\xA7'", "shell-always-show"),
|
||||
(b"\xC2\xA7", "shell-escape"),
|
||||
(b"'\xC2\xA7'", "shell-escape-always"),
|
||||
],
|
||||
);
|
||||
|
||||
// mixed with valid characters
|
||||
check_names_raw(
|
||||
&[continuation, ascii],
|
||||
&[
|
||||
(b"?_", "literal"),
|
||||
(b"\xA7_", "literal-show"),
|
||||
(b"\\247_", "escape"),
|
||||
(b"\"\\247_\"", "c"),
|
||||
(b"?_", "shell"),
|
||||
(b"\xA7_", "shell-show"),
|
||||
(b"'?_'", "shell-always"),
|
||||
(b"'\xA7_'", "shell-always-show"),
|
||||
(b"''$'\\247''_'", "shell-escape"),
|
||||
(b"''$'\\247''_'", "shell-escape-always"),
|
||||
],
|
||||
);
|
||||
check_names_raw(
|
||||
&[ascii, continuation],
|
||||
&[
|
||||
(b"_?", "literal"),
|
||||
(b"_\xA7", "literal-show"),
|
||||
(b"_\\247", "escape"),
|
||||
(b"\"_\\247\"", "c"),
|
||||
(b"_?", "shell"),
|
||||
(b"_\xA7", "shell-show"),
|
||||
(b"'_?'", "shell-always"),
|
||||
(b"'_\xA7'", "shell-always-show"),
|
||||
(b"'_'$'\\247'", "shell-escape"),
|
||||
(b"'_'$'\\247'", "shell-escape-always"),
|
||||
],
|
||||
);
|
||||
check_names_raw(
|
||||
&[ascii, continuation, ascii],
|
||||
&[
|
||||
(b"_?_", "literal"),
|
||||
(b"_\xA7_", "literal-show"),
|
||||
(b"_\\247_", "escape"),
|
||||
(b"\"_\\247_\"", "c"),
|
||||
(b"_?_", "shell"),
|
||||
(b"_\xA7_", "shell-show"),
|
||||
(b"'_?_'", "shell-always"),
|
||||
(b"'_\xA7_'", "shell-always-show"),
|
||||
(b"'_'$'\\247''_'", "shell-escape"),
|
||||
(b"'_'$'\\247''_'", "shell-escape-always"),
|
||||
],
|
||||
);
|
||||
check_names_raw(
|
||||
&[continuation, ascii, continuation],
|
||||
&[
|
||||
(b"?_?", "literal"),
|
||||
(b"\xA7_\xA7", "literal-show"),
|
||||
(b"\\247_\\247", "escape"),
|
||||
(b"\"\\247_\\247\"", "c"),
|
||||
(b"?_?", "shell"),
|
||||
(b"\xA7_\xA7", "shell-show"),
|
||||
(b"'?_?'", "shell-always"),
|
||||
(b"'\xA7_\xA7'", "shell-always-show"),
|
||||
(b"''$'\\247''_'$'\\247'", "shell-escape"),
|
||||
(b"''$'\\247''_'$'\\247'", "shell-escape-always"),
|
||||
],
|
||||
);
|
||||
|
||||
// contiguous invalid bytes
|
||||
check_names_raw(
|
||||
&[
|
||||
ascii,
|
||||
invalid,
|
||||
ascii,
|
||||
continuation,
|
||||
continuation,
|
||||
ascii,
|
||||
continuation,
|
||||
continuation,
|
||||
continuation,
|
||||
ascii,
|
||||
continuation,
|
||||
continuation,
|
||||
continuation,
|
||||
continuation,
|
||||
ascii,
|
||||
],
|
||||
&[
|
||||
(b"_?_??_???_????_", "literal"),
|
||||
(
|
||||
b"_\xC0_\xA7\xA7_\xA7\xA7\xA7_\xA7\xA7\xA7\xA7_",
|
||||
"literal-show",
|
||||
),
|
||||
(
|
||||
b"_\\300_\\247\\247_\\247\\247\\247_\\247\\247\\247\\247_",
|
||||
"escape",
|
||||
),
|
||||
(
|
||||
b"\"_\\300_\\247\\247_\\247\\247\\247_\\247\\247\\247\\247_\"",
|
||||
"c",
|
||||
),
|
||||
(b"_?_??_???_????_", "shell"),
|
||||
(
|
||||
b"_\xC0_\xA7\xA7_\xA7\xA7\xA7_\xA7\xA7\xA7\xA7_",
|
||||
"shell-show",
|
||||
),
|
||||
(b"'_?_??_???_????_'", "shell-always"),
|
||||
(
|
||||
b"'_\xC0_\xA7\xA7_\xA7\xA7\xA7_\xA7\xA7\xA7\xA7_'",
|
||||
"shell-always-show",
|
||||
),
|
||||
(
|
||||
b"'_'$'\\300''_'$'\\247\\247''_'$'\\247\\247\\247''_'$'\\247\\247\\247\\247''_'",
|
||||
"shell-escape",
|
||||
),
|
||||
(
|
||||
b"'_'$'\\300''_'$'\\247\\247''_'$'\\247\\247\\247''_'$'\\247\\247\\247\\247''_'",
|
||||
"shell-escape-always",
|
||||
),
|
||||
],
|
||||
);
|
||||
|
||||
// invalid multi-byte sequences that start valid
|
||||
check_names_raw(
|
||||
&[first2byte, ascii],
|
||||
&[
|
||||
(b"?_", "literal"),
|
||||
(b"\xC2_", "literal-show"),
|
||||
(b"\\302_", "escape"),
|
||||
(b"\"\\302_\"", "c"),
|
||||
(b"?_", "shell"),
|
||||
(b"\xC2_", "shell-show"),
|
||||
(b"'?_'", "shell-always"),
|
||||
(b"'\xC2_'", "shell-always-show"),
|
||||
(b"''$'\\302''_'", "shell-escape"),
|
||||
(b"''$'\\302''_'", "shell-escape-always"),
|
||||
],
|
||||
);
|
||||
check_names_raw(
|
||||
&[first2byte, first2byte, continuation],
|
||||
&[
|
||||
(b"?\xC2\xA7", "literal"),
|
||||
(b"\xC2\xC2\xA7", "literal-show"),
|
||||
(b"\\302\xC2\xA7", "escape"),
|
||||
(b"\"\\302\xC2\xA7\"", "c"),
|
||||
(b"?\xC2\xA7", "shell"),
|
||||
(b"\xC2\xC2\xA7", "shell-show"),
|
||||
(b"'?\xC2\xA7'", "shell-always"),
|
||||
(b"'\xC2\xC2\xA7'", "shell-always-show"),
|
||||
(b"''$'\\302''\xC2\xA7'", "shell-escape"),
|
||||
(b"''$'\\302''\xC2\xA7'", "shell-escape-always"),
|
||||
],
|
||||
);
|
||||
check_names_raw(
|
||||
&[first3byte, continuation, ascii],
|
||||
&[
|
||||
(b"??_", "literal"),
|
||||
(b"\xE0\xA7_", "literal-show"),
|
||||
(b"\\340\\247_", "escape"),
|
||||
(b"\"\\340\\247_\"", "c"),
|
||||
(b"??_", "shell"),
|
||||
(b"\xE0\xA7_", "shell-show"),
|
||||
(b"'??_'", "shell-always"),
|
||||
(b"'\xE0\xA7_'", "shell-always-show"),
|
||||
(b"''$'\\340\\247''_'", "shell-escape"),
|
||||
(b"''$'\\340\\247''_'", "shell-escape-always"),
|
||||
],
|
||||
);
|
||||
check_names_raw(
|
||||
&[first4byte, continuation, continuation, ascii],
|
||||
&[
|
||||
(b"???_", "literal"),
|
||||
(b"\xF0\xA7\xA7_", "literal-show"),
|
||||
(b"\\360\\247\\247_", "escape"),
|
||||
(b"\"\\360\\247\\247_\"", "c"),
|
||||
(b"???_", "shell"),
|
||||
(b"\xF0\xA7\xA7_", "shell-show"),
|
||||
(b"'???_'", "shell-always"),
|
||||
(b"'\xF0\xA7\xA7_'", "shell-always-show"),
|
||||
(b"''$'\\360\\247\\247''_'", "shell-escape"),
|
||||
(b"''$'\\360\\247\\247''_'", "shell-escape-always"),
|
||||
],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -765,7 +1138,7 @@ mod tests {
|
|||
("one\\\\two", "escape"),
|
||||
("\"one\\\\two\"", "c"),
|
||||
("'one\\two'", "shell"),
|
||||
("\'one\\two\'", "shell-always"),
|
||||
("'one\\two'", "shell-always"),
|
||||
("'one\\two'", "shell-escape"),
|
||||
("'one\\two'", "shell-escape-always"),
|
||||
],
|
||||
|
|
|
@ -255,9 +255,10 @@ pub fn read_yes() -> bool {
|
|||
}
|
||||
}
|
||||
|
||||
/// Helper function for processing delimiter values (which could be non UTF-8)
|
||||
/// It converts OsString to &[u8] for unix targets only
|
||||
/// On non-unix (i.e. Windows) it will just return an error if delimiter value is not UTF-8
|
||||
/// Converts an `OsStr` to a UTF-8 `&[u8]`.
|
||||
///
|
||||
/// This always succeeds on unix platforms,
|
||||
/// and fails on other platforms if the string can't be coerced to UTF-8.
|
||||
pub fn os_str_as_bytes(os_string: &OsStr) -> mods::error::UResult<&[u8]> {
|
||||
#[cfg(unix)]
|
||||
let bytes = os_string.as_bytes();
|
||||
|
@ -273,13 +274,28 @@ pub fn os_str_as_bytes(os_string: &OsStr) -> mods::error::UResult<&[u8]> {
|
|||
Ok(bytes)
|
||||
}
|
||||
|
||||
/// Helper function for converting a slice of bytes into an &OsStr
|
||||
/// or OsString in non-unix targets.
|
||||
/// Performs a potentially lossy conversion from `OsStr` to UTF-8 bytes.
|
||||
///
|
||||
/// It converts `&[u8]` to `Cow<OsStr>` for unix targets only.
|
||||
/// On non-unix (i.e. Windows), the conversion goes through the String type
|
||||
/// and thus undergo UTF-8 validation, making it fail if the stream contains
|
||||
/// non-UTF-8 characters.
|
||||
/// This is always lossless on unix platforms,
|
||||
/// and wraps [`OsStr::to_string_lossy`] on non-unix platforms.
|
||||
pub fn os_str_as_bytes_lossy(os_string: &OsStr) -> Cow<[u8]> {
|
||||
#[cfg(unix)]
|
||||
let bytes = Cow::from(os_string.as_bytes());
|
||||
|
||||
#[cfg(not(unix))]
|
||||
let bytes = match os_string.to_string_lossy() {
|
||||
Cow::Borrowed(slice) => Cow::from(slice.as_bytes()),
|
||||
Cow::Owned(owned) => Cow::from(owned.into_bytes()),
|
||||
};
|
||||
|
||||
bytes
|
||||
}
|
||||
|
||||
/// Converts a `&[u8]` to an `&OsStr`,
|
||||
/// or parses it as UTF-8 into an [`OsString`] on non-unix platforms.
|
||||
///
|
||||
/// This always succeeds on unix platforms,
|
||||
/// and fails on other platforms if the bytes can't be parsed as UTF-8.
|
||||
pub fn os_str_from_bytes(bytes: &[u8]) -> mods::error::UResult<Cow<'_, OsStr>> {
|
||||
#[cfg(unix)]
|
||||
let os_str = Cow::Borrowed(OsStr::from_bytes(bytes));
|
||||
|
@ -291,9 +307,10 @@ pub fn os_str_from_bytes(bytes: &[u8]) -> mods::error::UResult<Cow<'_, OsStr>> {
|
|||
Ok(os_str)
|
||||
}
|
||||
|
||||
/// Helper function for making an `OsString` from a byte field
|
||||
/// It converts `Vec<u8>` to `OsString` for unix targets only.
|
||||
/// On non-unix (i.e. Windows) it may fail if the bytes are not valid UTF-8
|
||||
/// Converts a `Vec<u8>` into an `OsString`, parsing as UTF-8 on non-unix platforms.
|
||||
///
|
||||
/// This always succeeds on unix platforms,
|
||||
/// and fails on other platforms if the bytes can't be parsed as UTF-8.
|
||||
pub fn os_string_from_vec(vec: Vec<u8>) -> mods::error::UResult<OsString> {
|
||||
#[cfg(unix)]
|
||||
let s = OsString::from_vec(vec);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue