mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-30 12:37:49 +00:00
quoting_style: add support for non-unicode bytes
This new functionality is implemented, but not yet exposed here.
This commit is contained in:
parent
cb3be5e3aa
commit
355103134b
1 changed files with 406 additions and 106 deletions
|
@ -11,34 +11,38 @@ use std::fmt;
|
|||
|
||||
// These are characters with special meaning in the shell (e.g. bash).
|
||||
// The first const contains characters that only have a special meaning when they appear at the beginning of a name.
|
||||
const SPECIAL_SHELL_CHARS_START: &[char] = &['~', '#'];
|
||||
const SPECIAL_SHELL_CHARS_START: &[u8] = b"~#";
|
||||
// PR#6559 : Remove `]{}` from special shell chars.
|
||||
const SPECIAL_SHELL_CHARS: &str = "`$&*()|[;\\'\"<>?! ";
|
||||
|
||||
/// The quoting style to use when escaping a name.
|
||||
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
|
||||
pub enum QuotingStyle {
|
||||
/// Escape the name as a literal string.
|
||||
/// Escape the name as a shell string.
|
||||
/// Used in, e.g., `ls --quoting-style=shell`.
|
||||
Shell {
|
||||
/// Whether to escape characters in the name.
|
||||
/// True in, e.g., `ls --quoting-style=shell-escape`.
|
||||
escape: bool,
|
||||
|
||||
/// Whether to always quote the name.
|
||||
always_quote: bool,
|
||||
|
||||
/// Whether to show control characters.
|
||||
/// Whether to show control and non-unicode characters, or replace them with `?`.
|
||||
show_control: bool,
|
||||
},
|
||||
|
||||
/// Escape the name as a C string.
|
||||
/// Used in, e.g., `ls --quote-name`.
|
||||
C {
|
||||
/// The type of quotes to use.
|
||||
quotes: Quotes,
|
||||
},
|
||||
|
||||
/// Escape the name as a literal string.
|
||||
/// Do not escape the string.
|
||||
/// Used in, e.g., `ls --literal`.
|
||||
Literal {
|
||||
/// Whether to show control characters.
|
||||
/// Whether to show control and non-unicode characters, or replace them with `?`.
|
||||
show_control: bool,
|
||||
},
|
||||
}
|
||||
|
@ -72,8 +76,9 @@ enum EscapeState {
|
|||
Octal(EscapeOctal),
|
||||
}
|
||||
|
||||
/// Byte we need to present as escaped octal, in the form of `\nnn`
|
||||
struct EscapeOctal {
|
||||
c: char,
|
||||
c: u8,
|
||||
state: EscapeOctalState,
|
||||
idx: usize,
|
||||
}
|
||||
|
@ -95,20 +100,20 @@ impl Iterator for EscapeOctal {
|
|||
Some('\\')
|
||||
}
|
||||
EscapeOctalState::Value => {
|
||||
let octal_digit = ((self.c as u32) >> (self.idx * 3)) & 0o7;
|
||||
let octal_digit = ((self.c) >> (self.idx * 3)) & 0o7;
|
||||
if self.idx == 0 {
|
||||
self.state = EscapeOctalState::Done;
|
||||
} else {
|
||||
self.idx -= 1;
|
||||
}
|
||||
Some(from_digit(octal_digit, 8).unwrap())
|
||||
Some(from_digit(octal_digit.into(), 8).unwrap())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl EscapeOctal {
|
||||
fn from(c: char) -> Self {
|
||||
fn from(c: u8) -> Self {
|
||||
Self {
|
||||
c,
|
||||
idx: 2,
|
||||
|
@ -124,6 +129,12 @@ impl EscapedChar {
|
|||
}
|
||||
}
|
||||
|
||||
fn new_octal(b: u8) -> Self {
|
||||
Self {
|
||||
state: EscapeState::Octal(EscapeOctal::from(b)),
|
||||
}
|
||||
}
|
||||
|
||||
fn new_c(c: char, quotes: Quotes, dirname: bool) -> Self {
|
||||
use EscapeState::*;
|
||||
let init_state = match c {
|
||||
|
@ -148,7 +159,7 @@ impl EscapedChar {
|
|||
_ => Char(' '),
|
||||
},
|
||||
':' if dirname => Backslash(':'),
|
||||
_ if c.is_ascii_control() => Octal(EscapeOctal::from(c)),
|
||||
_ if c.is_ascii_control() => Octal(EscapeOctal::from(c as u8)),
|
||||
_ => Char(c),
|
||||
};
|
||||
Self { state: init_state }
|
||||
|
@ -165,7 +176,7 @@ impl EscapedChar {
|
|||
'\x0B' => Backslash('v'),
|
||||
'\x0C' => Backslash('f'),
|
||||
'\r' => Backslash('r'),
|
||||
'\x00'..='\x1F' | '\x7F' => Octal(EscapeOctal::from(c)),
|
||||
'\x00'..='\x1F' | '\x7F' => Octal(EscapeOctal::from(c as u8)),
|
||||
'\'' => match quotes {
|
||||
Quotes::Single => Backslash('\''),
|
||||
_ => Char('\''),
|
||||
|
@ -205,102 +216,124 @@ impl Iterator for EscapedChar {
|
|||
}
|
||||
}
|
||||
|
||||
fn shell_without_escape(name: &str, quotes: Quotes, show_control_chars: bool) -> (String, bool) {
|
||||
/// Check whether `bytes` starts with any byte in `pattern`.
|
||||
fn bytes_start_with(bytes: &[u8], pattern: &[u8]) -> bool {
|
||||
!bytes.is_empty() && pattern.contains(&bytes[0])
|
||||
}
|
||||
|
||||
fn shell_without_escape(name: &[u8], quotes: Quotes, show_control_chars: bool) -> (Vec<u8>, bool) {
|
||||
let mut must_quote = false;
|
||||
let mut escaped_str = String::with_capacity(name.len());
|
||||
let mut escaped_str = Vec::with_capacity(name.len());
|
||||
let mut utf8_buf = vec![0; 4];
|
||||
|
||||
for c in name.chars() {
|
||||
let escaped = {
|
||||
let ec = EscapedChar::new_shell(c, false, quotes);
|
||||
if show_control_chars {
|
||||
ec
|
||||
} else {
|
||||
ec.hide_control()
|
||||
}
|
||||
};
|
||||
for s in name.utf8_chunks() {
|
||||
for c in s.valid().chars() {
|
||||
let escaped = {
|
||||
let ec = EscapedChar::new_shell(c, false, quotes);
|
||||
if show_control_chars {
|
||||
ec
|
||||
} else {
|
||||
ec.hide_control()
|
||||
}
|
||||
};
|
||||
|
||||
match escaped.state {
|
||||
EscapeState::Backslash('\'') => escaped_str.push_str("'\\''"),
|
||||
EscapeState::ForceQuote(x) => {
|
||||
must_quote = true;
|
||||
escaped_str.push(x);
|
||||
}
|
||||
_ => {
|
||||
for char in escaped {
|
||||
escaped_str.push(char);
|
||||
match escaped.state {
|
||||
EscapeState::Backslash('\'') => escaped_str.extend_from_slice(b"'\\''"),
|
||||
EscapeState::ForceQuote(x) => {
|
||||
must_quote = true;
|
||||
escaped_str.extend_from_slice(x.encode_utf8(&mut utf8_buf).as_bytes());
|
||||
}
|
||||
_ => {
|
||||
for c in escaped {
|
||||
escaped_str.extend_from_slice(c.encode_utf8(&mut utf8_buf).as_bytes());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if show_control_chars {
|
||||
escaped_str.extend_from_slice(s.invalid());
|
||||
} else {
|
||||
escaped_str.resize(escaped_str.len() + s.invalid().len(), b'?');
|
||||
}
|
||||
}
|
||||
|
||||
must_quote = must_quote || name.starts_with(SPECIAL_SHELL_CHARS_START);
|
||||
must_quote = must_quote || bytes_start_with(name, SPECIAL_SHELL_CHARS_START);
|
||||
(escaped_str, must_quote)
|
||||
}
|
||||
|
||||
fn shell_with_escape(name: &str, quotes: Quotes) -> (String, bool) {
|
||||
fn shell_with_escape(name: &[u8], quotes: Quotes) -> (Vec<u8>, bool) {
|
||||
// We need to keep track of whether we are in a dollar expression
|
||||
// because e.g. \b\n is escaped as $'\b\n' and not like $'b'$'n'
|
||||
let mut in_dollar = false;
|
||||
let mut must_quote = false;
|
||||
let mut escaped_str = String::with_capacity(name.len());
|
||||
|
||||
for c in name.chars() {
|
||||
let escaped = EscapedChar::new_shell(c, true, quotes);
|
||||
match escaped.state {
|
||||
EscapeState::Char(x) => {
|
||||
if in_dollar {
|
||||
escaped_str.push_str("''");
|
||||
for s in name.utf8_chunks() {
|
||||
for c in s.valid().chars() {
|
||||
let escaped = EscapedChar::new_shell(c, true, quotes);
|
||||
match escaped.state {
|
||||
EscapeState::Char(x) => {
|
||||
if in_dollar {
|
||||
escaped_str.push_str("''");
|
||||
in_dollar = false;
|
||||
}
|
||||
escaped_str.push(x);
|
||||
}
|
||||
EscapeState::ForceQuote(x) => {
|
||||
if in_dollar {
|
||||
escaped_str.push_str("''");
|
||||
in_dollar = false;
|
||||
}
|
||||
must_quote = true;
|
||||
escaped_str.push(x);
|
||||
}
|
||||
// Single quotes are not put in dollar expressions, but are escaped
|
||||
// if the string also contains double quotes. In that case, they must
|
||||
// be handled separately.
|
||||
EscapeState::Backslash('\'') => {
|
||||
must_quote = true;
|
||||
in_dollar = false;
|
||||
escaped_str.push_str("'\\''");
|
||||
}
|
||||
escaped_str.push(x);
|
||||
}
|
||||
EscapeState::ForceQuote(x) => {
|
||||
if in_dollar {
|
||||
escaped_str.push_str("''");
|
||||
in_dollar = false;
|
||||
}
|
||||
must_quote = true;
|
||||
escaped_str.push(x);
|
||||
}
|
||||
// Single quotes are not put in dollar expressions, but are escaped
|
||||
// if the string also contains double quotes. In that case, they must
|
||||
// be handled separately.
|
||||
EscapeState::Backslash('\'') => {
|
||||
must_quote = true;
|
||||
in_dollar = false;
|
||||
escaped_str.push_str("'\\''");
|
||||
}
|
||||
_ => {
|
||||
if !in_dollar {
|
||||
escaped_str.push_str("'$'");
|
||||
in_dollar = true;
|
||||
}
|
||||
must_quote = true;
|
||||
for char in escaped {
|
||||
escaped_str.push(char);
|
||||
_ => {
|
||||
if !in_dollar {
|
||||
escaped_str.push_str("'$'");
|
||||
in_dollar = true;
|
||||
}
|
||||
must_quote = true;
|
||||
for char in escaped {
|
||||
escaped_str.push(char);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if !s.invalid().is_empty() {
|
||||
if !in_dollar {
|
||||
escaped_str.push_str("'$'");
|
||||
in_dollar = true;
|
||||
}
|
||||
must_quote = true;
|
||||
let escaped_bytes: String = s
|
||||
.invalid()
|
||||
.iter()
|
||||
.flat_map(|b| EscapedChar::new_octal(*b))
|
||||
.collect();
|
||||
escaped_str.push_str(&escaped_bytes);
|
||||
}
|
||||
}
|
||||
must_quote = must_quote || name.starts_with(SPECIAL_SHELL_CHARS_START);
|
||||
(escaped_str, must_quote)
|
||||
must_quote = must_quote || bytes_start_with(name, SPECIAL_SHELL_CHARS_START);
|
||||
(escaped_str.into(), must_quote)
|
||||
}
|
||||
|
||||
/// Return a set of characters that implies quoting of the word in
|
||||
/// shell-quoting mode.
|
||||
fn shell_escaped_char_set(is_dirname: bool) -> &'static [char] {
|
||||
const ESCAPED_CHARS: &[char] = &[
|
||||
// the ':' colon character only induce quoting in the
|
||||
// context of ls displaying a directory name before listing its content.
|
||||
// (e.g. with the recursive flag -R)
|
||||
':',
|
||||
// Under this line are the control characters that should be
|
||||
// quoted in shell mode in all cases.
|
||||
'"', '`', '$', '\\', '^', '\n', '\t', '\r', '=',
|
||||
];
|
||||
|
||||
fn shell_escaped_char_set(is_dirname: bool) -> &'static [u8] {
|
||||
const ESCAPED_CHARS: &[u8] = b":\"`$\\^\n\t\r=";
|
||||
// the ':' colon character only induce quoting in the
|
||||
// context of ls displaying a directory name before listing its content.
|
||||
// (e.g. with the recursive flag -R)
|
||||
let start_index = if is_dirname { 0 } else { 1 };
|
||||
|
||||
&ESCAPED_CHARS[start_index..]
|
||||
}
|
||||
|
||||
|
@ -308,41 +341,57 @@ fn shell_escaped_char_set(is_dirname: bool) -> &'static [char] {
|
|||
///
|
||||
/// This inner function provides an additional flag `dirname` which
|
||||
/// is meant for ls' directory name display.
|
||||
fn escape_name_inner(name: &OsStr, style: &QuotingStyle, dirname: bool) -> String {
|
||||
fn escape_name_inner(name: &[u8], style: &QuotingStyle, dirname: bool) -> Vec<u8> {
|
||||
match style {
|
||||
QuotingStyle::Literal { show_control } => {
|
||||
if *show_control {
|
||||
name.to_string_lossy().into_owned()
|
||||
name.to_owned()
|
||||
} else {
|
||||
name.to_string_lossy()
|
||||
.chars()
|
||||
.flat_map(|c| EscapedChar::new_literal(c).hide_control())
|
||||
.collect()
|
||||
name.utf8_chunks()
|
||||
.map(|s| {
|
||||
let valid: String = s
|
||||
.valid()
|
||||
.chars()
|
||||
.flat_map(|c| EscapedChar::new_literal(c).hide_control())
|
||||
.collect();
|
||||
let invalid = "?".repeat(s.invalid().len());
|
||||
valid + &invalid
|
||||
})
|
||||
.collect::<String>()
|
||||
.into()
|
||||
}
|
||||
}
|
||||
QuotingStyle::C { quotes } => {
|
||||
let escaped_str: String = name
|
||||
.to_string_lossy()
|
||||
.chars()
|
||||
.flat_map(|c| EscapedChar::new_c(c, *quotes, dirname))
|
||||
.collect();
|
||||
.utf8_chunks()
|
||||
.flat_map(|s| {
|
||||
let valid = s
|
||||
.valid()
|
||||
.chars()
|
||||
.flat_map(|c| EscapedChar::new_c(c, *quotes, dirname));
|
||||
let invalid = s.invalid().iter().flat_map(|b| EscapedChar::new_octal(*b));
|
||||
valid.chain(invalid)
|
||||
})
|
||||
.collect::<String>();
|
||||
|
||||
match quotes {
|
||||
Quotes::Single => format!("'{escaped_str}'"),
|
||||
Quotes::Double => format!("\"{escaped_str}\""),
|
||||
Quotes::None => escaped_str,
|
||||
}
|
||||
.into()
|
||||
}
|
||||
QuotingStyle::Shell {
|
||||
escape,
|
||||
always_quote,
|
||||
show_control,
|
||||
} => {
|
||||
let name = name.to_string_lossy();
|
||||
|
||||
let (quotes, must_quote) = if name.contains(shell_escaped_char_set(dirname)) {
|
||||
let (quotes, must_quote) = if name
|
||||
.iter()
|
||||
.any(|c| shell_escaped_char_set(dirname).contains(c))
|
||||
{
|
||||
(Quotes::Single, true)
|
||||
} else if name.contains('\'') {
|
||||
} else if name.contains(&b'\'') {
|
||||
(Quotes::Double, true)
|
||||
} else if *always_quote {
|
||||
(Quotes::Single, true)
|
||||
|
@ -351,15 +400,24 @@ fn escape_name_inner(name: &OsStr, style: &QuotingStyle, dirname: bool) -> Strin
|
|||
};
|
||||
|
||||
let (escaped_str, contains_quote_chars) = if *escape {
|
||||
shell_with_escape(&name, quotes)
|
||||
shell_with_escape(name, quotes)
|
||||
} else {
|
||||
shell_without_escape(&name, quotes, *show_control)
|
||||
shell_without_escape(name, quotes, *show_control)
|
||||
};
|
||||
|
||||
match (must_quote | contains_quote_chars, quotes) {
|
||||
(true, Quotes::Single) => format!("'{escaped_str}'"),
|
||||
(true, Quotes::Double) => format!("\"{escaped_str}\""),
|
||||
_ => escaped_str,
|
||||
if must_quote | contains_quote_chars && quotes != Quotes::None {
|
||||
let mut quoted_str = Vec::<u8>::with_capacity(escaped_str.len() + 2);
|
||||
let quote = if quotes == Quotes::Single {
|
||||
b'\''
|
||||
} else {
|
||||
b'"'
|
||||
};
|
||||
quoted_str.push(quote);
|
||||
quoted_str.extend(escaped_str);
|
||||
quoted_str.push(quote);
|
||||
quoted_str
|
||||
} else {
|
||||
escaped_str
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -367,14 +425,16 @@ fn escape_name_inner(name: &OsStr, style: &QuotingStyle, dirname: bool) -> Strin
|
|||
|
||||
/// Escape a filename with respect to the given style.
|
||||
pub fn escape_name(name: &OsStr, style: &QuotingStyle) -> String {
|
||||
escape_name_inner(name, style, false)
|
||||
let name = name.to_string_lossy();
|
||||
String::from_utf8_lossy(&escape_name_inner(name.as_bytes(), style, false)).to_string()
|
||||
}
|
||||
|
||||
/// Escape a directory name with respect to the given style.
|
||||
/// This is mainly meant to be used for ls' directory name printing and is not
|
||||
/// likely to be used elsewhere.
|
||||
pub fn escape_dir_name(dir_name: &OsStr, style: &QuotingStyle) -> String {
|
||||
escape_name_inner(dir_name, style, true)
|
||||
let dir_name = dir_name.to_string_lossy();
|
||||
String::from_utf8_lossy(&escape_name_inner(dir_name.as_bytes(), style, true)).to_string()
|
||||
}
|
||||
|
||||
impl fmt::Display for QuotingStyle {
|
||||
|
@ -415,7 +475,7 @@ impl fmt::Display for Quotes {
|
|||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::quoting_style::{escape_name, Quotes, QuotingStyle};
|
||||
use crate::quoting_style::{escape_name_inner, Quotes, QuotingStyle};
|
||||
|
||||
// spell-checker:ignore (tests/words) one\'two one'two
|
||||
|
||||
|
@ -465,14 +525,31 @@ mod tests {
|
|||
}
|
||||
}
|
||||
|
||||
fn check_names_inner<T>(name: &[u8], map: &[(T, &str)]) -> Vec<Vec<u8>> {
|
||||
map.iter()
|
||||
.map(|(_, style)| escape_name_inner(name, &get_style(style), false))
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn check_names(name: &str, map: &[(&str, &str)]) {
|
||||
assert_eq!(
|
||||
map.iter()
|
||||
.map(|(_, style)| escape_name(name.as_ref(), &get_style(style)))
|
||||
.collect::<Vec<String>>(),
|
||||
.map(|(correct, _)| *correct)
|
||||
.collect::<Vec<&str>>(),
|
||||
check_names_inner(name.as_bytes(), map)
|
||||
.iter()
|
||||
.map(|bytes| std::str::from_utf8(bytes)
|
||||
.expect("valid str goes in, valid str comes out"))
|
||||
.collect::<Vec<&str>>()
|
||||
);
|
||||
}
|
||||
|
||||
fn check_names_raw(name: &[u8], map: &[(&[u8], &str)]) {
|
||||
assert_eq!(
|
||||
map.iter()
|
||||
.map(|(correct, _)| correct.to_string())
|
||||
.collect::<Vec<String>>()
|
||||
.map(|(correct, _)| *correct)
|
||||
.collect::<Vec<&[u8]>>(),
|
||||
check_names_inner(name, map)
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -732,6 +809,229 @@ mod tests {
|
|||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_non_unicode_bytes() {
|
||||
let ascii = b'_';
|
||||
let continuation = b'\xA7';
|
||||
let first2byte = b'\xC2';
|
||||
let first3byte = b'\xE0';
|
||||
let first4byte = b'\xF0';
|
||||
let invalid = b'\xC0';
|
||||
|
||||
// a single byte value invalid outside of additional context in UTF-8
|
||||
check_names_raw(
|
||||
&[continuation],
|
||||
&[
|
||||
(b"?", "literal"),
|
||||
(b"\xA7", "literal-show"),
|
||||
(b"\\247", "escape"),
|
||||
(b"\"\\247\"", "c"),
|
||||
(b"?", "shell"),
|
||||
(b"\xA7", "shell-show"),
|
||||
(b"'?'", "shell-always"),
|
||||
(b"'\xA7'", "shell-always-show"),
|
||||
(b"''$'\\247'", "shell-escape"),
|
||||
(b"''$'\\247'", "shell-escape-always"),
|
||||
],
|
||||
);
|
||||
|
||||
// ...but the byte becomes valid with appropriate context
|
||||
// (this is just the § character in UTF-8, written as bytes)
|
||||
check_names_raw(
|
||||
&[first2byte, continuation],
|
||||
&[
|
||||
(b"\xC2\xA7", "literal"),
|
||||
(b"\xC2\xA7", "literal-show"),
|
||||
(b"\xC2\xA7", "escape"),
|
||||
(b"\"\xC2\xA7\"", "c"),
|
||||
(b"\xC2\xA7", "shell"),
|
||||
(b"\xC2\xA7", "shell-show"),
|
||||
(b"'\xC2\xA7'", "shell-always"),
|
||||
(b"'\xC2\xA7'", "shell-always-show"),
|
||||
(b"\xC2\xA7", "shell-escape"),
|
||||
(b"'\xC2\xA7'", "shell-escape-always"),
|
||||
],
|
||||
);
|
||||
|
||||
// mixed with valid characters
|
||||
check_names_raw(
|
||||
&[continuation, ascii],
|
||||
&[
|
||||
(b"?_", "literal"),
|
||||
(b"\xA7_", "literal-show"),
|
||||
(b"\\247_", "escape"),
|
||||
(b"\"\\247_\"", "c"),
|
||||
(b"?_", "shell"),
|
||||
(b"\xA7_", "shell-show"),
|
||||
(b"'?_'", "shell-always"),
|
||||
(b"'\xA7_'", "shell-always-show"),
|
||||
(b"''$'\\247''_'", "shell-escape"),
|
||||
(b"''$'\\247''_'", "shell-escape-always"),
|
||||
],
|
||||
);
|
||||
check_names_raw(
|
||||
&[ascii, continuation],
|
||||
&[
|
||||
(b"_?", "literal"),
|
||||
(b"_\xA7", "literal-show"),
|
||||
(b"_\\247", "escape"),
|
||||
(b"\"_\\247\"", "c"),
|
||||
(b"_?", "shell"),
|
||||
(b"_\xA7", "shell-show"),
|
||||
(b"'_?'", "shell-always"),
|
||||
(b"'_\xA7'", "shell-always-show"),
|
||||
(b"'_'$'\\247'", "shell-escape"),
|
||||
(b"'_'$'\\247'", "shell-escape-always"),
|
||||
],
|
||||
);
|
||||
check_names_raw(
|
||||
&[ascii, continuation, ascii],
|
||||
&[
|
||||
(b"_?_", "literal"),
|
||||
(b"_\xA7_", "literal-show"),
|
||||
(b"_\\247_", "escape"),
|
||||
(b"\"_\\247_\"", "c"),
|
||||
(b"_?_", "shell"),
|
||||
(b"_\xA7_", "shell-show"),
|
||||
(b"'_?_'", "shell-always"),
|
||||
(b"'_\xA7_'", "shell-always-show"),
|
||||
(b"'_'$'\\247''_'", "shell-escape"),
|
||||
(b"'_'$'\\247''_'", "shell-escape-always"),
|
||||
],
|
||||
);
|
||||
check_names_raw(
|
||||
&[continuation, ascii, continuation],
|
||||
&[
|
||||
(b"?_?", "literal"),
|
||||
(b"\xA7_\xA7", "literal-show"),
|
||||
(b"\\247_\\247", "escape"),
|
||||
(b"\"\\247_\\247\"", "c"),
|
||||
(b"?_?", "shell"),
|
||||
(b"\xA7_\xA7", "shell-show"),
|
||||
(b"'?_?'", "shell-always"),
|
||||
(b"'\xA7_\xA7'", "shell-always-show"),
|
||||
(b"''$'\\247''_'$'\\247'", "shell-escape"),
|
||||
(b"''$'\\247''_'$'\\247'", "shell-escape-always"),
|
||||
],
|
||||
);
|
||||
|
||||
// contiguous invalid bytes
|
||||
check_names_raw(
|
||||
&[
|
||||
ascii,
|
||||
invalid,
|
||||
ascii,
|
||||
continuation,
|
||||
continuation,
|
||||
ascii,
|
||||
continuation,
|
||||
continuation,
|
||||
continuation,
|
||||
ascii,
|
||||
continuation,
|
||||
continuation,
|
||||
continuation,
|
||||
continuation,
|
||||
ascii,
|
||||
],
|
||||
&[
|
||||
(b"_?_??_???_????_", "literal"),
|
||||
(
|
||||
b"_\xC0_\xA7\xA7_\xA7\xA7\xA7_\xA7\xA7\xA7\xA7_",
|
||||
"literal-show",
|
||||
),
|
||||
(
|
||||
b"_\\300_\\247\\247_\\247\\247\\247_\\247\\247\\247\\247_",
|
||||
"escape",
|
||||
),
|
||||
(
|
||||
b"\"_\\300_\\247\\247_\\247\\247\\247_\\247\\247\\247\\247_\"",
|
||||
"c",
|
||||
),
|
||||
(b"_?_??_???_????_", "shell"),
|
||||
(
|
||||
b"_\xC0_\xA7\xA7_\xA7\xA7\xA7_\xA7\xA7\xA7\xA7_",
|
||||
"shell-show",
|
||||
),
|
||||
(b"'_?_??_???_????_'", "shell-always"),
|
||||
(
|
||||
b"'_\xC0_\xA7\xA7_\xA7\xA7\xA7_\xA7\xA7\xA7\xA7_'",
|
||||
"shell-always-show",
|
||||
),
|
||||
(
|
||||
b"'_'$'\\300''_'$'\\247\\247''_'$'\\247\\247\\247''_'$'\\247\\247\\247\\247''_'",
|
||||
"shell-escape",
|
||||
),
|
||||
(
|
||||
b"'_'$'\\300''_'$'\\247\\247''_'$'\\247\\247\\247''_'$'\\247\\247\\247\\247''_'",
|
||||
"shell-escape-always",
|
||||
),
|
||||
],
|
||||
);
|
||||
|
||||
// invalid multi-byte sequences that start valid
|
||||
check_names_raw(
|
||||
&[first2byte, ascii],
|
||||
&[
|
||||
(b"?_", "literal"),
|
||||
(b"\xC2_", "literal-show"),
|
||||
(b"\\302_", "escape"),
|
||||
(b"\"\\302_\"", "c"),
|
||||
(b"?_", "shell"),
|
||||
(b"\xC2_", "shell-show"),
|
||||
(b"'?_'", "shell-always"),
|
||||
(b"'\xC2_'", "shell-always-show"),
|
||||
(b"''$'\\302''_'", "shell-escape"),
|
||||
(b"''$'\\302''_'", "shell-escape-always"),
|
||||
],
|
||||
);
|
||||
check_names_raw(
|
||||
&[first2byte, first2byte, continuation],
|
||||
&[
|
||||
(b"?\xC2\xA7", "literal"),
|
||||
(b"\xC2\xC2\xA7", "literal-show"),
|
||||
(b"\\302\xC2\xA7", "escape"),
|
||||
(b"\"\\302\xC2\xA7\"", "c"),
|
||||
(b"?\xC2\xA7", "shell"),
|
||||
(b"\xC2\xC2\xA7", "shell-show"),
|
||||
(b"'?\xC2\xA7'", "shell-always"),
|
||||
(b"'\xC2\xC2\xA7'", "shell-always-show"),
|
||||
(b"''$'\\302''\xC2\xA7'", "shell-escape"),
|
||||
(b"''$'\\302''\xC2\xA7'", "shell-escape-always"),
|
||||
],
|
||||
);
|
||||
check_names_raw(
|
||||
&[first3byte, continuation, ascii],
|
||||
&[
|
||||
(b"??_", "literal"),
|
||||
(b"\xE0\xA7_", "literal-show"),
|
||||
(b"\\340\\247_", "escape"),
|
||||
(b"\"\\340\\247_\"", "c"),
|
||||
(b"??_", "shell"),
|
||||
(b"\xE0\xA7_", "shell-show"),
|
||||
(b"'??_'", "shell-always"),
|
||||
(b"'\xE0\xA7_'", "shell-always-show"),
|
||||
(b"''$'\\340\\247''_'", "shell-escape"),
|
||||
(b"''$'\\340\\247''_'", "shell-escape-always"),
|
||||
],
|
||||
);
|
||||
check_names_raw(
|
||||
&[first4byte, continuation, continuation, ascii],
|
||||
&[
|
||||
(b"???_", "literal"),
|
||||
(b"\xF0\xA7\xA7_", "literal-show"),
|
||||
(b"\\360\\247\\247_", "escape"),
|
||||
(b"\"\\360\\247\\247_\"", "c"),
|
||||
(b"???_", "shell"),
|
||||
(b"\xF0\xA7\xA7_", "shell-show"),
|
||||
(b"'???_'", "shell-always"),
|
||||
(b"'\xF0\xA7\xA7_'", "shell-always-show"),
|
||||
(b"''$'\\360\\247\\247''_'", "shell-escape"),
|
||||
(b"''$'\\360\\247\\247''_'", "shell-escape-always"),
|
||||
],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_question_mark() {
|
||||
// A question mark must force quotes in shell and shell-always, unless
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue