mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-08-01 05:27:45 +00:00
quoting_style: add support for non-unicode bytes
This new functionality is implemented, but not yet exposed here.
This commit is contained in:
parent
cb3be5e3aa
commit
355103134b
1 changed files with 406 additions and 106 deletions
|
@ -11,34 +11,38 @@ use std::fmt;
|
||||||
|
|
||||||
// These are characters with special meaning in the shell (e.g. bash).
|
// These are characters with special meaning in the shell (e.g. bash).
|
||||||
// The first const contains characters that only have a special meaning when they appear at the beginning of a name.
|
// The first const contains characters that only have a special meaning when they appear at the beginning of a name.
|
||||||
const SPECIAL_SHELL_CHARS_START: &[char] = &['~', '#'];
|
const SPECIAL_SHELL_CHARS_START: &[u8] = b"~#";
|
||||||
// PR#6559 : Remove `]{}` from special shell chars.
|
// PR#6559 : Remove `]{}` from special shell chars.
|
||||||
const SPECIAL_SHELL_CHARS: &str = "`$&*()|[;\\'\"<>?! ";
|
const SPECIAL_SHELL_CHARS: &str = "`$&*()|[;\\'\"<>?! ";
|
||||||
|
|
||||||
/// The quoting style to use when escaping a name.
|
/// The quoting style to use when escaping a name.
|
||||||
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
|
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
|
||||||
pub enum QuotingStyle {
|
pub enum QuotingStyle {
|
||||||
/// Escape the name as a literal string.
|
/// Escape the name as a shell string.
|
||||||
|
/// Used in, e.g., `ls --quoting-style=shell`.
|
||||||
Shell {
|
Shell {
|
||||||
/// Whether to escape characters in the name.
|
/// Whether to escape characters in the name.
|
||||||
|
/// True in, e.g., `ls --quoting-style=shell-escape`.
|
||||||
escape: bool,
|
escape: bool,
|
||||||
|
|
||||||
/// Whether to always quote the name.
|
/// Whether to always quote the name.
|
||||||
always_quote: bool,
|
always_quote: bool,
|
||||||
|
|
||||||
/// Whether to show control characters.
|
/// Whether to show control and non-unicode characters, or replace them with `?`.
|
||||||
show_control: bool,
|
show_control: bool,
|
||||||
},
|
},
|
||||||
|
|
||||||
/// Escape the name as a C string.
|
/// Escape the name as a C string.
|
||||||
|
/// Used in, e.g., `ls --quote-name`.
|
||||||
C {
|
C {
|
||||||
/// The type of quotes to use.
|
/// The type of quotes to use.
|
||||||
quotes: Quotes,
|
quotes: Quotes,
|
||||||
},
|
},
|
||||||
|
|
||||||
/// Escape the name as a literal string.
|
/// Do not escape the string.
|
||||||
|
/// Used in, e.g., `ls --literal`.
|
||||||
Literal {
|
Literal {
|
||||||
/// Whether to show control characters.
|
/// Whether to show control and non-unicode characters, or replace them with `?`.
|
||||||
show_control: bool,
|
show_control: bool,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
@ -72,8 +76,9 @@ enum EscapeState {
|
||||||
Octal(EscapeOctal),
|
Octal(EscapeOctal),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Byte we need to present as escaped octal, in the form of `\nnn`
|
||||||
struct EscapeOctal {
|
struct EscapeOctal {
|
||||||
c: char,
|
c: u8,
|
||||||
state: EscapeOctalState,
|
state: EscapeOctalState,
|
||||||
idx: usize,
|
idx: usize,
|
||||||
}
|
}
|
||||||
|
@ -95,20 +100,20 @@ impl Iterator for EscapeOctal {
|
||||||
Some('\\')
|
Some('\\')
|
||||||
}
|
}
|
||||||
EscapeOctalState::Value => {
|
EscapeOctalState::Value => {
|
||||||
let octal_digit = ((self.c as u32) >> (self.idx * 3)) & 0o7;
|
let octal_digit = ((self.c) >> (self.idx * 3)) & 0o7;
|
||||||
if self.idx == 0 {
|
if self.idx == 0 {
|
||||||
self.state = EscapeOctalState::Done;
|
self.state = EscapeOctalState::Done;
|
||||||
} else {
|
} else {
|
||||||
self.idx -= 1;
|
self.idx -= 1;
|
||||||
}
|
}
|
||||||
Some(from_digit(octal_digit, 8).unwrap())
|
Some(from_digit(octal_digit.into(), 8).unwrap())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl EscapeOctal {
|
impl EscapeOctal {
|
||||||
fn from(c: char) -> Self {
|
fn from(c: u8) -> Self {
|
||||||
Self {
|
Self {
|
||||||
c,
|
c,
|
||||||
idx: 2,
|
idx: 2,
|
||||||
|
@ -124,6 +129,12 @@ impl EscapedChar {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn new_octal(b: u8) -> Self {
|
||||||
|
Self {
|
||||||
|
state: EscapeState::Octal(EscapeOctal::from(b)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn new_c(c: char, quotes: Quotes, dirname: bool) -> Self {
|
fn new_c(c: char, quotes: Quotes, dirname: bool) -> Self {
|
||||||
use EscapeState::*;
|
use EscapeState::*;
|
||||||
let init_state = match c {
|
let init_state = match c {
|
||||||
|
@ -148,7 +159,7 @@ impl EscapedChar {
|
||||||
_ => Char(' '),
|
_ => Char(' '),
|
||||||
},
|
},
|
||||||
':' if dirname => Backslash(':'),
|
':' if dirname => Backslash(':'),
|
||||||
_ if c.is_ascii_control() => Octal(EscapeOctal::from(c)),
|
_ if c.is_ascii_control() => Octal(EscapeOctal::from(c as u8)),
|
||||||
_ => Char(c),
|
_ => Char(c),
|
||||||
};
|
};
|
||||||
Self { state: init_state }
|
Self { state: init_state }
|
||||||
|
@ -165,7 +176,7 @@ impl EscapedChar {
|
||||||
'\x0B' => Backslash('v'),
|
'\x0B' => Backslash('v'),
|
||||||
'\x0C' => Backslash('f'),
|
'\x0C' => Backslash('f'),
|
||||||
'\r' => Backslash('r'),
|
'\r' => Backslash('r'),
|
||||||
'\x00'..='\x1F' | '\x7F' => Octal(EscapeOctal::from(c)),
|
'\x00'..='\x1F' | '\x7F' => Octal(EscapeOctal::from(c as u8)),
|
||||||
'\'' => match quotes {
|
'\'' => match quotes {
|
||||||
Quotes::Single => Backslash('\''),
|
Quotes::Single => Backslash('\''),
|
||||||
_ => Char('\''),
|
_ => Char('\''),
|
||||||
|
@ -205,11 +216,18 @@ impl Iterator for EscapedChar {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn shell_without_escape(name: &str, quotes: Quotes, show_control_chars: bool) -> (String, bool) {
|
/// Check whether `bytes` starts with any byte in `pattern`.
|
||||||
let mut must_quote = false;
|
fn bytes_start_with(bytes: &[u8], pattern: &[u8]) -> bool {
|
||||||
let mut escaped_str = String::with_capacity(name.len());
|
!bytes.is_empty() && pattern.contains(&bytes[0])
|
||||||
|
}
|
||||||
|
|
||||||
for c in name.chars() {
|
fn shell_without_escape(name: &[u8], quotes: Quotes, show_control_chars: bool) -> (Vec<u8>, bool) {
|
||||||
|
let mut must_quote = false;
|
||||||
|
let mut escaped_str = Vec::with_capacity(name.len());
|
||||||
|
let mut utf8_buf = vec![0; 4];
|
||||||
|
|
||||||
|
for s in name.utf8_chunks() {
|
||||||
|
for c in s.valid().chars() {
|
||||||
let escaped = {
|
let escaped = {
|
||||||
let ec = EscapedChar::new_shell(c, false, quotes);
|
let ec = EscapedChar::new_shell(c, false, quotes);
|
||||||
if show_control_chars {
|
if show_control_chars {
|
||||||
|
@ -220,31 +238,39 @@ fn shell_without_escape(name: &str, quotes: Quotes, show_control_chars: bool) ->
|
||||||
};
|
};
|
||||||
|
|
||||||
match escaped.state {
|
match escaped.state {
|
||||||
EscapeState::Backslash('\'') => escaped_str.push_str("'\\''"),
|
EscapeState::Backslash('\'') => escaped_str.extend_from_slice(b"'\\''"),
|
||||||
EscapeState::ForceQuote(x) => {
|
EscapeState::ForceQuote(x) => {
|
||||||
must_quote = true;
|
must_quote = true;
|
||||||
escaped_str.push(x);
|
escaped_str.extend_from_slice(x.encode_utf8(&mut utf8_buf).as_bytes());
|
||||||
}
|
}
|
||||||
_ => {
|
_ => {
|
||||||
for char in escaped {
|
for c in escaped {
|
||||||
escaped_str.push(char);
|
escaped_str.extend_from_slice(c.encode_utf8(&mut utf8_buf).as_bytes());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
must_quote = must_quote || name.starts_with(SPECIAL_SHELL_CHARS_START);
|
if show_control_chars {
|
||||||
|
escaped_str.extend_from_slice(s.invalid());
|
||||||
|
} else {
|
||||||
|
escaped_str.resize(escaped_str.len() + s.invalid().len(), b'?');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
must_quote = must_quote || bytes_start_with(name, SPECIAL_SHELL_CHARS_START);
|
||||||
(escaped_str, must_quote)
|
(escaped_str, must_quote)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn shell_with_escape(name: &str, quotes: Quotes) -> (String, bool) {
|
fn shell_with_escape(name: &[u8], quotes: Quotes) -> (Vec<u8>, bool) {
|
||||||
// We need to keep track of whether we are in a dollar expression
|
// We need to keep track of whether we are in a dollar expression
|
||||||
// because e.g. \b\n is escaped as $'\b\n' and not like $'b'$'n'
|
// because e.g. \b\n is escaped as $'\b\n' and not like $'b'$'n'
|
||||||
let mut in_dollar = false;
|
let mut in_dollar = false;
|
||||||
let mut must_quote = false;
|
let mut must_quote = false;
|
||||||
let mut escaped_str = String::with_capacity(name.len());
|
let mut escaped_str = String::with_capacity(name.len());
|
||||||
|
|
||||||
for c in name.chars() {
|
for s in name.utf8_chunks() {
|
||||||
|
for c in s.valid().chars() {
|
||||||
let escaped = EscapedChar::new_shell(c, true, quotes);
|
let escaped = EscapedChar::new_shell(c, true, quotes);
|
||||||
match escaped.state {
|
match escaped.state {
|
||||||
EscapeState::Char(x) => {
|
EscapeState::Char(x) => {
|
||||||
|
@ -282,25 +308,32 @@ fn shell_with_escape(name: &str, quotes: Quotes) -> (String, bool) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
must_quote = must_quote || name.starts_with(SPECIAL_SHELL_CHARS_START);
|
if !s.invalid().is_empty() {
|
||||||
(escaped_str, must_quote)
|
if !in_dollar {
|
||||||
|
escaped_str.push_str("'$'");
|
||||||
|
in_dollar = true;
|
||||||
|
}
|
||||||
|
must_quote = true;
|
||||||
|
let escaped_bytes: String = s
|
||||||
|
.invalid()
|
||||||
|
.iter()
|
||||||
|
.flat_map(|b| EscapedChar::new_octal(*b))
|
||||||
|
.collect();
|
||||||
|
escaped_str.push_str(&escaped_bytes);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
must_quote = must_quote || bytes_start_with(name, SPECIAL_SHELL_CHARS_START);
|
||||||
|
(escaped_str.into(), must_quote)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return a set of characters that implies quoting of the word in
|
/// Return a set of characters that implies quoting of the word in
|
||||||
/// shell-quoting mode.
|
/// shell-quoting mode.
|
||||||
fn shell_escaped_char_set(is_dirname: bool) -> &'static [char] {
|
fn shell_escaped_char_set(is_dirname: bool) -> &'static [u8] {
|
||||||
const ESCAPED_CHARS: &[char] = &[
|
const ESCAPED_CHARS: &[u8] = b":\"`$\\^\n\t\r=";
|
||||||
// the ':' colon character only induce quoting in the
|
// the ':' colon character only induce quoting in the
|
||||||
// context of ls displaying a directory name before listing its content.
|
// context of ls displaying a directory name before listing its content.
|
||||||
// (e.g. with the recursive flag -R)
|
// (e.g. with the recursive flag -R)
|
||||||
':',
|
|
||||||
// Under this line are the control characters that should be
|
|
||||||
// quoted in shell mode in all cases.
|
|
||||||
'"', '`', '$', '\\', '^', '\n', '\t', '\r', '=',
|
|
||||||
];
|
|
||||||
|
|
||||||
let start_index = if is_dirname { 0 } else { 1 };
|
let start_index = if is_dirname { 0 } else { 1 };
|
||||||
|
|
||||||
&ESCAPED_CHARS[start_index..]
|
&ESCAPED_CHARS[start_index..]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -308,41 +341,57 @@ fn shell_escaped_char_set(is_dirname: bool) -> &'static [char] {
|
||||||
///
|
///
|
||||||
/// This inner function provides an additional flag `dirname` which
|
/// This inner function provides an additional flag `dirname` which
|
||||||
/// is meant for ls' directory name display.
|
/// is meant for ls' directory name display.
|
||||||
fn escape_name_inner(name: &OsStr, style: &QuotingStyle, dirname: bool) -> String {
|
fn escape_name_inner(name: &[u8], style: &QuotingStyle, dirname: bool) -> Vec<u8> {
|
||||||
match style {
|
match style {
|
||||||
QuotingStyle::Literal { show_control } => {
|
QuotingStyle::Literal { show_control } => {
|
||||||
if *show_control {
|
if *show_control {
|
||||||
name.to_string_lossy().into_owned()
|
name.to_owned()
|
||||||
} else {
|
} else {
|
||||||
name.to_string_lossy()
|
name.utf8_chunks()
|
||||||
|
.map(|s| {
|
||||||
|
let valid: String = s
|
||||||
|
.valid()
|
||||||
.chars()
|
.chars()
|
||||||
.flat_map(|c| EscapedChar::new_literal(c).hide_control())
|
.flat_map(|c| EscapedChar::new_literal(c).hide_control())
|
||||||
.collect()
|
.collect();
|
||||||
|
let invalid = "?".repeat(s.invalid().len());
|
||||||
|
valid + &invalid
|
||||||
|
})
|
||||||
|
.collect::<String>()
|
||||||
|
.into()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
QuotingStyle::C { quotes } => {
|
QuotingStyle::C { quotes } => {
|
||||||
let escaped_str: String = name
|
let escaped_str: String = name
|
||||||
.to_string_lossy()
|
.utf8_chunks()
|
||||||
|
.flat_map(|s| {
|
||||||
|
let valid = s
|
||||||
|
.valid()
|
||||||
.chars()
|
.chars()
|
||||||
.flat_map(|c| EscapedChar::new_c(c, *quotes, dirname))
|
.flat_map(|c| EscapedChar::new_c(c, *quotes, dirname));
|
||||||
.collect();
|
let invalid = s.invalid().iter().flat_map(|b| EscapedChar::new_octal(*b));
|
||||||
|
valid.chain(invalid)
|
||||||
|
})
|
||||||
|
.collect::<String>();
|
||||||
|
|
||||||
match quotes {
|
match quotes {
|
||||||
Quotes::Single => format!("'{escaped_str}'"),
|
Quotes::Single => format!("'{escaped_str}'"),
|
||||||
Quotes::Double => format!("\"{escaped_str}\""),
|
Quotes::Double => format!("\"{escaped_str}\""),
|
||||||
Quotes::None => escaped_str,
|
Quotes::None => escaped_str,
|
||||||
}
|
}
|
||||||
|
.into()
|
||||||
}
|
}
|
||||||
QuotingStyle::Shell {
|
QuotingStyle::Shell {
|
||||||
escape,
|
escape,
|
||||||
always_quote,
|
always_quote,
|
||||||
show_control,
|
show_control,
|
||||||
} => {
|
} => {
|
||||||
let name = name.to_string_lossy();
|
let (quotes, must_quote) = if name
|
||||||
|
.iter()
|
||||||
let (quotes, must_quote) = if name.contains(shell_escaped_char_set(dirname)) {
|
.any(|c| shell_escaped_char_set(dirname).contains(c))
|
||||||
|
{
|
||||||
(Quotes::Single, true)
|
(Quotes::Single, true)
|
||||||
} else if name.contains('\'') {
|
} else if name.contains(&b'\'') {
|
||||||
(Quotes::Double, true)
|
(Quotes::Double, true)
|
||||||
} else if *always_quote {
|
} else if *always_quote {
|
||||||
(Quotes::Single, true)
|
(Quotes::Single, true)
|
||||||
|
@ -351,15 +400,24 @@ fn escape_name_inner(name: &OsStr, style: &QuotingStyle, dirname: bool) -> Strin
|
||||||
};
|
};
|
||||||
|
|
||||||
let (escaped_str, contains_quote_chars) = if *escape {
|
let (escaped_str, contains_quote_chars) = if *escape {
|
||||||
shell_with_escape(&name, quotes)
|
shell_with_escape(name, quotes)
|
||||||
} else {
|
} else {
|
||||||
shell_without_escape(&name, quotes, *show_control)
|
shell_without_escape(name, quotes, *show_control)
|
||||||
};
|
};
|
||||||
|
|
||||||
match (must_quote | contains_quote_chars, quotes) {
|
if must_quote | contains_quote_chars && quotes != Quotes::None {
|
||||||
(true, Quotes::Single) => format!("'{escaped_str}'"),
|
let mut quoted_str = Vec::<u8>::with_capacity(escaped_str.len() + 2);
|
||||||
(true, Quotes::Double) => format!("\"{escaped_str}\""),
|
let quote = if quotes == Quotes::Single {
|
||||||
_ => escaped_str,
|
b'\''
|
||||||
|
} else {
|
||||||
|
b'"'
|
||||||
|
};
|
||||||
|
quoted_str.push(quote);
|
||||||
|
quoted_str.extend(escaped_str);
|
||||||
|
quoted_str.push(quote);
|
||||||
|
quoted_str
|
||||||
|
} else {
|
||||||
|
escaped_str
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -367,14 +425,16 @@ fn escape_name_inner(name: &OsStr, style: &QuotingStyle, dirname: bool) -> Strin
|
||||||
|
|
||||||
/// Escape a filename with respect to the given style.
|
/// Escape a filename with respect to the given style.
|
||||||
pub fn escape_name(name: &OsStr, style: &QuotingStyle) -> String {
|
pub fn escape_name(name: &OsStr, style: &QuotingStyle) -> String {
|
||||||
escape_name_inner(name, style, false)
|
let name = name.to_string_lossy();
|
||||||
|
String::from_utf8_lossy(&escape_name_inner(name.as_bytes(), style, false)).to_string()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Escape a directory name with respect to the given style.
|
/// Escape a directory name with respect to the given style.
|
||||||
/// This is mainly meant to be used for ls' directory name printing and is not
|
/// This is mainly meant to be used for ls' directory name printing and is not
|
||||||
/// likely to be used elsewhere.
|
/// likely to be used elsewhere.
|
||||||
pub fn escape_dir_name(dir_name: &OsStr, style: &QuotingStyle) -> String {
|
pub fn escape_dir_name(dir_name: &OsStr, style: &QuotingStyle) -> String {
|
||||||
escape_name_inner(dir_name, style, true)
|
let dir_name = dir_name.to_string_lossy();
|
||||||
|
String::from_utf8_lossy(&escape_name_inner(dir_name.as_bytes(), style, true)).to_string()
|
||||||
}
|
}
|
||||||
|
|
||||||
impl fmt::Display for QuotingStyle {
|
impl fmt::Display for QuotingStyle {
|
||||||
|
@ -415,7 +475,7 @@ impl fmt::Display for Quotes {
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use crate::quoting_style::{escape_name, Quotes, QuotingStyle};
|
use crate::quoting_style::{escape_name_inner, Quotes, QuotingStyle};
|
||||||
|
|
||||||
// spell-checker:ignore (tests/words) one\'two one'two
|
// spell-checker:ignore (tests/words) one\'two one'two
|
||||||
|
|
||||||
|
@ -465,14 +525,31 @@ mod tests {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn check_names_inner<T>(name: &[u8], map: &[(T, &str)]) -> Vec<Vec<u8>> {
|
||||||
|
map.iter()
|
||||||
|
.map(|(_, style)| escape_name_inner(name, &get_style(style), false))
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
fn check_names(name: &str, map: &[(&str, &str)]) {
|
fn check_names(name: &str, map: &[(&str, &str)]) {
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
map.iter()
|
map.iter()
|
||||||
.map(|(_, style)| escape_name(name.as_ref(), &get_style(style)))
|
.map(|(correct, _)| *correct)
|
||||||
.collect::<Vec<String>>(),
|
.collect::<Vec<&str>>(),
|
||||||
|
check_names_inner(name.as_bytes(), map)
|
||||||
|
.iter()
|
||||||
|
.map(|bytes| std::str::from_utf8(bytes)
|
||||||
|
.expect("valid str goes in, valid str comes out"))
|
||||||
|
.collect::<Vec<&str>>()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn check_names_raw(name: &[u8], map: &[(&[u8], &str)]) {
|
||||||
|
assert_eq!(
|
||||||
map.iter()
|
map.iter()
|
||||||
.map(|(correct, _)| correct.to_string())
|
.map(|(correct, _)| *correct)
|
||||||
.collect::<Vec<String>>()
|
.collect::<Vec<&[u8]>>(),
|
||||||
|
check_names_inner(name, map)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -732,6 +809,229 @@ mod tests {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_non_unicode_bytes() {
|
||||||
|
let ascii = b'_';
|
||||||
|
let continuation = b'\xA7';
|
||||||
|
let first2byte = b'\xC2';
|
||||||
|
let first3byte = b'\xE0';
|
||||||
|
let first4byte = b'\xF0';
|
||||||
|
let invalid = b'\xC0';
|
||||||
|
|
||||||
|
// a single byte value invalid outside of additional context in UTF-8
|
||||||
|
check_names_raw(
|
||||||
|
&[continuation],
|
||||||
|
&[
|
||||||
|
(b"?", "literal"),
|
||||||
|
(b"\xA7", "literal-show"),
|
||||||
|
(b"\\247", "escape"),
|
||||||
|
(b"\"\\247\"", "c"),
|
||||||
|
(b"?", "shell"),
|
||||||
|
(b"\xA7", "shell-show"),
|
||||||
|
(b"'?'", "shell-always"),
|
||||||
|
(b"'\xA7'", "shell-always-show"),
|
||||||
|
(b"''$'\\247'", "shell-escape"),
|
||||||
|
(b"''$'\\247'", "shell-escape-always"),
|
||||||
|
],
|
||||||
|
);
|
||||||
|
|
||||||
|
// ...but the byte becomes valid with appropriate context
|
||||||
|
// (this is just the § character in UTF-8, written as bytes)
|
||||||
|
check_names_raw(
|
||||||
|
&[first2byte, continuation],
|
||||||
|
&[
|
||||||
|
(b"\xC2\xA7", "literal"),
|
||||||
|
(b"\xC2\xA7", "literal-show"),
|
||||||
|
(b"\xC2\xA7", "escape"),
|
||||||
|
(b"\"\xC2\xA7\"", "c"),
|
||||||
|
(b"\xC2\xA7", "shell"),
|
||||||
|
(b"\xC2\xA7", "shell-show"),
|
||||||
|
(b"'\xC2\xA7'", "shell-always"),
|
||||||
|
(b"'\xC2\xA7'", "shell-always-show"),
|
||||||
|
(b"\xC2\xA7", "shell-escape"),
|
||||||
|
(b"'\xC2\xA7'", "shell-escape-always"),
|
||||||
|
],
|
||||||
|
);
|
||||||
|
|
||||||
|
// mixed with valid characters
|
||||||
|
check_names_raw(
|
||||||
|
&[continuation, ascii],
|
||||||
|
&[
|
||||||
|
(b"?_", "literal"),
|
||||||
|
(b"\xA7_", "literal-show"),
|
||||||
|
(b"\\247_", "escape"),
|
||||||
|
(b"\"\\247_\"", "c"),
|
||||||
|
(b"?_", "shell"),
|
||||||
|
(b"\xA7_", "shell-show"),
|
||||||
|
(b"'?_'", "shell-always"),
|
||||||
|
(b"'\xA7_'", "shell-always-show"),
|
||||||
|
(b"''$'\\247''_'", "shell-escape"),
|
||||||
|
(b"''$'\\247''_'", "shell-escape-always"),
|
||||||
|
],
|
||||||
|
);
|
||||||
|
check_names_raw(
|
||||||
|
&[ascii, continuation],
|
||||||
|
&[
|
||||||
|
(b"_?", "literal"),
|
||||||
|
(b"_\xA7", "literal-show"),
|
||||||
|
(b"_\\247", "escape"),
|
||||||
|
(b"\"_\\247\"", "c"),
|
||||||
|
(b"_?", "shell"),
|
||||||
|
(b"_\xA7", "shell-show"),
|
||||||
|
(b"'_?'", "shell-always"),
|
||||||
|
(b"'_\xA7'", "shell-always-show"),
|
||||||
|
(b"'_'$'\\247'", "shell-escape"),
|
||||||
|
(b"'_'$'\\247'", "shell-escape-always"),
|
||||||
|
],
|
||||||
|
);
|
||||||
|
check_names_raw(
|
||||||
|
&[ascii, continuation, ascii],
|
||||||
|
&[
|
||||||
|
(b"_?_", "literal"),
|
||||||
|
(b"_\xA7_", "literal-show"),
|
||||||
|
(b"_\\247_", "escape"),
|
||||||
|
(b"\"_\\247_\"", "c"),
|
||||||
|
(b"_?_", "shell"),
|
||||||
|
(b"_\xA7_", "shell-show"),
|
||||||
|
(b"'_?_'", "shell-always"),
|
||||||
|
(b"'_\xA7_'", "shell-always-show"),
|
||||||
|
(b"'_'$'\\247''_'", "shell-escape"),
|
||||||
|
(b"'_'$'\\247''_'", "shell-escape-always"),
|
||||||
|
],
|
||||||
|
);
|
||||||
|
check_names_raw(
|
||||||
|
&[continuation, ascii, continuation],
|
||||||
|
&[
|
||||||
|
(b"?_?", "literal"),
|
||||||
|
(b"\xA7_\xA7", "literal-show"),
|
||||||
|
(b"\\247_\\247", "escape"),
|
||||||
|
(b"\"\\247_\\247\"", "c"),
|
||||||
|
(b"?_?", "shell"),
|
||||||
|
(b"\xA7_\xA7", "shell-show"),
|
||||||
|
(b"'?_?'", "shell-always"),
|
||||||
|
(b"'\xA7_\xA7'", "shell-always-show"),
|
||||||
|
(b"''$'\\247''_'$'\\247'", "shell-escape"),
|
||||||
|
(b"''$'\\247''_'$'\\247'", "shell-escape-always"),
|
||||||
|
],
|
||||||
|
);
|
||||||
|
|
||||||
|
// contiguous invalid bytes
|
||||||
|
check_names_raw(
|
||||||
|
&[
|
||||||
|
ascii,
|
||||||
|
invalid,
|
||||||
|
ascii,
|
||||||
|
continuation,
|
||||||
|
continuation,
|
||||||
|
ascii,
|
||||||
|
continuation,
|
||||||
|
continuation,
|
||||||
|
continuation,
|
||||||
|
ascii,
|
||||||
|
continuation,
|
||||||
|
continuation,
|
||||||
|
continuation,
|
||||||
|
continuation,
|
||||||
|
ascii,
|
||||||
|
],
|
||||||
|
&[
|
||||||
|
(b"_?_??_???_????_", "literal"),
|
||||||
|
(
|
||||||
|
b"_\xC0_\xA7\xA7_\xA7\xA7\xA7_\xA7\xA7\xA7\xA7_",
|
||||||
|
"literal-show",
|
||||||
|
),
|
||||||
|
(
|
||||||
|
b"_\\300_\\247\\247_\\247\\247\\247_\\247\\247\\247\\247_",
|
||||||
|
"escape",
|
||||||
|
),
|
||||||
|
(
|
||||||
|
b"\"_\\300_\\247\\247_\\247\\247\\247_\\247\\247\\247\\247_\"",
|
||||||
|
"c",
|
||||||
|
),
|
||||||
|
(b"_?_??_???_????_", "shell"),
|
||||||
|
(
|
||||||
|
b"_\xC0_\xA7\xA7_\xA7\xA7\xA7_\xA7\xA7\xA7\xA7_",
|
||||||
|
"shell-show",
|
||||||
|
),
|
||||||
|
(b"'_?_??_???_????_'", "shell-always"),
|
||||||
|
(
|
||||||
|
b"'_\xC0_\xA7\xA7_\xA7\xA7\xA7_\xA7\xA7\xA7\xA7_'",
|
||||||
|
"shell-always-show",
|
||||||
|
),
|
||||||
|
(
|
||||||
|
b"'_'$'\\300''_'$'\\247\\247''_'$'\\247\\247\\247''_'$'\\247\\247\\247\\247''_'",
|
||||||
|
"shell-escape",
|
||||||
|
),
|
||||||
|
(
|
||||||
|
b"'_'$'\\300''_'$'\\247\\247''_'$'\\247\\247\\247''_'$'\\247\\247\\247\\247''_'",
|
||||||
|
"shell-escape-always",
|
||||||
|
),
|
||||||
|
],
|
||||||
|
);
|
||||||
|
|
||||||
|
// invalid multi-byte sequences that start valid
|
||||||
|
check_names_raw(
|
||||||
|
&[first2byte, ascii],
|
||||||
|
&[
|
||||||
|
(b"?_", "literal"),
|
||||||
|
(b"\xC2_", "literal-show"),
|
||||||
|
(b"\\302_", "escape"),
|
||||||
|
(b"\"\\302_\"", "c"),
|
||||||
|
(b"?_", "shell"),
|
||||||
|
(b"\xC2_", "shell-show"),
|
||||||
|
(b"'?_'", "shell-always"),
|
||||||
|
(b"'\xC2_'", "shell-always-show"),
|
||||||
|
(b"''$'\\302''_'", "shell-escape"),
|
||||||
|
(b"''$'\\302''_'", "shell-escape-always"),
|
||||||
|
],
|
||||||
|
);
|
||||||
|
check_names_raw(
|
||||||
|
&[first2byte, first2byte, continuation],
|
||||||
|
&[
|
||||||
|
(b"?\xC2\xA7", "literal"),
|
||||||
|
(b"\xC2\xC2\xA7", "literal-show"),
|
||||||
|
(b"\\302\xC2\xA7", "escape"),
|
||||||
|
(b"\"\\302\xC2\xA7\"", "c"),
|
||||||
|
(b"?\xC2\xA7", "shell"),
|
||||||
|
(b"\xC2\xC2\xA7", "shell-show"),
|
||||||
|
(b"'?\xC2\xA7'", "shell-always"),
|
||||||
|
(b"'\xC2\xC2\xA7'", "shell-always-show"),
|
||||||
|
(b"''$'\\302''\xC2\xA7'", "shell-escape"),
|
||||||
|
(b"''$'\\302''\xC2\xA7'", "shell-escape-always"),
|
||||||
|
],
|
||||||
|
);
|
||||||
|
check_names_raw(
|
||||||
|
&[first3byte, continuation, ascii],
|
||||||
|
&[
|
||||||
|
(b"??_", "literal"),
|
||||||
|
(b"\xE0\xA7_", "literal-show"),
|
||||||
|
(b"\\340\\247_", "escape"),
|
||||||
|
(b"\"\\340\\247_\"", "c"),
|
||||||
|
(b"??_", "shell"),
|
||||||
|
(b"\xE0\xA7_", "shell-show"),
|
||||||
|
(b"'??_'", "shell-always"),
|
||||||
|
(b"'\xE0\xA7_'", "shell-always-show"),
|
||||||
|
(b"''$'\\340\\247''_'", "shell-escape"),
|
||||||
|
(b"''$'\\340\\247''_'", "shell-escape-always"),
|
||||||
|
],
|
||||||
|
);
|
||||||
|
check_names_raw(
|
||||||
|
&[first4byte, continuation, continuation, ascii],
|
||||||
|
&[
|
||||||
|
(b"???_", "literal"),
|
||||||
|
(b"\xF0\xA7\xA7_", "literal-show"),
|
||||||
|
(b"\\360\\247\\247_", "escape"),
|
||||||
|
(b"\"\\360\\247\\247_\"", "c"),
|
||||||
|
(b"???_", "shell"),
|
||||||
|
(b"\xF0\xA7\xA7_", "shell-show"),
|
||||||
|
(b"'???_'", "shell-always"),
|
||||||
|
(b"'\xF0\xA7\xA7_'", "shell-always-show"),
|
||||||
|
(b"''$'\\360\\247\\247''_'", "shell-escape"),
|
||||||
|
(b"''$'\\360\\247\\247''_'", "shell-escape-always"),
|
||||||
|
],
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_question_mark() {
|
fn test_question_mark() {
|
||||||
// A question mark must force quotes in shell and shell-always, unless
|
// A question mark must force quotes in shell and shell-always, unless
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue