1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-27 19:17:43 +00:00

quoting_style: introduce Quoter trait to factorize QuotingStyle behaviors

This commit is contained in:
Dorian Peron 2025-06-14 04:49:25 +02:00
parent 6ebab2d768
commit 4958697bd5
4 changed files with 388 additions and 197 deletions

View file

@ -0,0 +1,57 @@
// This file is part of the uutils coreutils package.
//
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.
use super::{EscapedChar, Quoter, Quotes};
pub(super) struct CQuoter {
/// The type of quotes to use.
quotes: Quotes,
dirname: bool,
buffer: Vec<u8>,
}
impl CQuoter {
pub fn new(quotes: Quotes, dirname: bool, size_hint: usize) -> Self {
let mut buffer = Vec::with_capacity(size_hint);
match quotes {
Quotes::None => (),
Quotes::Single => buffer.push(b'\''),
Quotes::Double => buffer.push(b'"'),
}
Self {
quotes,
dirname,
buffer,
}
}
}
impl Quoter for CQuoter {
fn push_char(&mut self, input: char) {
let escaped: String = EscapedChar::new_c(input, self.quotes, self.dirname)
.hide_control()
.collect();
self.buffer.extend_from_slice(escaped.as_bytes());
}
fn push_invalid(&mut self, input: &[u8]) {
for b in input {
let escaped: String = EscapedChar::new_octal(*b).hide_control().collect();
self.buffer.extend_from_slice(escaped.as_bytes());
}
}
fn finalize(mut self: Box<Self>) -> Vec<u8> {
match self.quotes {
Quotes::None => (),
Quotes::Single => self.buffer.push(b'\''),
Quotes::Double => self.buffer.push(b'"'),
}
self.buffer
}
}

View file

@ -0,0 +1,31 @@
// This file is part of the uutils coreutils package.
//
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.
use super::{EscapedChar, Quoter};
pub(super) struct LiteralQuoter(Vec<u8>);
impl LiteralQuoter {
pub fn new(size_hint: usize) -> Self {
Self(Vec::with_capacity(size_hint))
}
}
impl Quoter for LiteralQuoter {
fn push_char(&mut self, input: char) {
let escaped = EscapedChar::new_literal(input)
.hide_control()
.collect::<String>();
self.0.extend(escaped.as_bytes());
}
fn push_invalid(&mut self, input: &[u8]) {
self.0.extend(std::iter::repeat_n(b'?', input.len()));
}
fn finalize(self: Box<Self>) -> Vec<u8> {
self.0
}
}

View file

@ -9,9 +9,14 @@ use std::char::from_digit;
use std::ffi::{OsStr, OsString};
use std::fmt;
// These are characters with special meaning in the shell (e.g. bash).
// The first const contains characters that only have a special meaning when they appear at the beginning of a name.
const SPECIAL_SHELL_CHARS_START: &[u8] = b"~#";
use crate::quoting_style::c_quoter::CQuoter;
use crate::quoting_style::literal_quoter::LiteralQuoter;
use crate::quoting_style::shell_quoter::{EscapedShellQuoter, NonEscapedShellQuoter};
mod c_quoter;
mod literal_quoter;
mod shell_quoter;
// PR#6559 : Remove `]{}` from special shell chars.
const SPECIAL_SHELL_CHARS: &str = "`$&*()|[;\\'\"<>?! ";
@ -47,6 +52,26 @@ pub enum QuotingStyle {
},
}
/// Common interface of quoting mechanisms.
trait Quoter {
/// Push a valid character.
fn push_char(&mut self, input: char);
/// Push a sequence of valid characters.
fn push_str(&mut self, input: &str) {
for c in input.chars() {
self.push_char(c);
}
}
/// Push a continuous slice of invalid data wrt the encoding used to
/// decode the stream.
fn push_invalid(&mut self, input: &[u8]);
/// Apply post-processing on the constructed buffer and return it.
fn finalize(self: Box<Self>) -> Vec<u8>;
}
/// The type of quotes to use when escaping a name as a C string.
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum Quotes {
@ -251,211 +276,48 @@ impl Iterator for EscapedChar {
}
}
/// Check whether `bytes` starts with any byte in `pattern`.
fn bytes_start_with(bytes: &[u8], pattern: &[u8]) -> bool {
!bytes.is_empty() && pattern.contains(&bytes[0])
}
fn shell_without_escape(name: &[u8], quotes: Quotes, show_control_chars: bool) -> (Vec<u8>, bool) {
let mut must_quote = false;
let mut escaped_str = Vec::with_capacity(name.len());
let mut utf8_buf = vec![0; 4];
for s in name.utf8_chunks() {
for c in s.valid().chars() {
let escaped = {
let ec = EscapedChar::new_shell(c, false, quotes);
if show_control_chars {
ec
} else {
ec.hide_control()
}
};
match escaped.state {
EscapeState::Backslash('\'') => escaped_str.extend_from_slice(b"'\\''"),
EscapeState::ForceQuote(x) => {
must_quote = true;
escaped_str.extend_from_slice(x.encode_utf8(&mut utf8_buf).as_bytes());
}
_ => {
for c in escaped {
escaped_str.extend_from_slice(c.encode_utf8(&mut utf8_buf).as_bytes());
}
}
}
}
if show_control_chars {
escaped_str.extend_from_slice(s.invalid());
} else {
escaped_str.resize(escaped_str.len() + s.invalid().len(), b'?');
}
}
must_quote = must_quote || bytes_start_with(name, SPECIAL_SHELL_CHARS_START);
(escaped_str, must_quote)
}
fn shell_with_escape(name: &[u8], quotes: Quotes) -> (Vec<u8>, bool) {
// We need to keep track of whether we are in a dollar expression
// because e.g. \b\n is escaped as $'\b\n' and not like $'b'$'n'
let mut in_dollar = false;
let mut must_quote = false;
let mut escaped_str = String::with_capacity(name.len());
for s in name.utf8_chunks() {
for c in s.valid().chars() {
let escaped = EscapedChar::new_shell(c, true, quotes);
match escaped.state {
EscapeState::Char(x) => {
if in_dollar {
escaped_str.push_str("''");
in_dollar = false;
}
escaped_str.push(x);
}
EscapeState::ForceQuote(x) => {
if in_dollar {
escaped_str.push_str("''");
in_dollar = false;
}
must_quote = true;
escaped_str.push(x);
}
// Single quotes are not put in dollar expressions, but are escaped
// if the string also contains double quotes. In that case, they must
// be handled separately.
EscapeState::Backslash('\'') => {
must_quote = true;
in_dollar = false;
escaped_str.push_str("'\\''");
}
_ => {
if !in_dollar {
escaped_str.push_str("'$'");
in_dollar = true;
}
must_quote = true;
for char in escaped {
escaped_str.push(char);
}
}
}
}
if !s.invalid().is_empty() {
if !in_dollar {
escaped_str.push_str("'$'");
in_dollar = true;
}
must_quote = true;
let escaped_bytes: String = s
.invalid()
.iter()
.flat_map(|b| EscapedChar::new_octal(*b))
.collect();
escaped_str.push_str(&escaped_bytes);
}
}
must_quote = must_quote || bytes_start_with(name, SPECIAL_SHELL_CHARS_START);
(escaped_str.into(), must_quote)
}
/// Return a set of characters that implies quoting of the word in
/// shell-quoting mode.
fn shell_escaped_char_set(is_dirname: bool) -> &'static [u8] {
const ESCAPED_CHARS: &[u8] = b":\"`$\\^\n\t\r=";
// the ':' colon character only induce quoting in the
// context of ls displaying a directory name before listing its content.
// (e.g. with the recursive flag -R)
let start_index = if is_dirname { 0 } else { 1 };
&ESCAPED_CHARS[start_index..]
}
/// Escape a name according to the given quoting style.
///
/// This inner function provides an additional flag `dirname` which
/// is meant for ls' directory name display.
fn escape_name_inner(name: &[u8], style: &QuotingStyle, dirname: bool) -> Vec<u8> {
match style {
QuotingStyle::Literal { show_control } => {
if *show_control {
name.to_owned()
} else {
name.utf8_chunks()
.map(|s| {
let valid: String = s
.valid()
.chars()
.flat_map(|c| EscapedChar::new_literal(c).hide_control())
.collect();
let invalid = "?".repeat(s.invalid().len());
valid + &invalid
})
.collect::<String>()
.into()
}
}
QuotingStyle::C { quotes } => {
let escaped_str: String = name
.utf8_chunks()
.flat_map(|s| {
let valid = s
.valid()
.chars()
.flat_map(|c| EscapedChar::new_c(c, *quotes, dirname));
let invalid = s.invalid().iter().flat_map(|b| EscapedChar::new_octal(*b));
valid.chain(invalid)
})
.collect::<String>();
// Early handle Literal with show_control style
if let QuotingStyle::Literal { show_control: true } = style {
return name.to_owned();
}
match quotes {
Quotes::Single => format!("'{escaped_str}'"),
Quotes::Double => format!("\"{escaped_str}\""),
Quotes::None => escaped_str,
}
.into()
}
let mut quoter: Box<dyn Quoter> = match style {
QuotingStyle::Literal { .. } => Box::new(LiteralQuoter::new(name.len())),
QuotingStyle::C { quotes } => Box::new(CQuoter::new(*quotes, dirname, name.len())),
QuotingStyle::Shell {
escape,
escape: true,
always_quote,
..
} => Box::new(EscapedShellQuoter::new(
name,
*always_quote,
dirname,
name.len(),
)),
QuotingStyle::Shell {
escape: false,
always_quote,
show_control,
} => {
let (quotes, must_quote) = if name
.iter()
.any(|c| shell_escaped_char_set(dirname).contains(c))
{
(Quotes::Single, true)
} else if name.contains(&b'\'') {
(Quotes::Double, true)
} else if *always_quote || name.is_empty() {
(Quotes::Single, true)
} else {
(Quotes::Single, false)
};
} => Box::new(NonEscapedShellQuoter::new(
name,
*show_control,
*always_quote,
dirname,
name.len(),
)),
};
let (escaped_str, contains_quote_chars) = if *escape {
shell_with_escape(name, quotes)
} else {
shell_without_escape(name, quotes, *show_control)
};
if must_quote | contains_quote_chars && quotes != Quotes::None {
let mut quoted_str = Vec::<u8>::with_capacity(escaped_str.len() + 2);
let quote = if quotes == Quotes::Single {
b'\''
} else {
b'"'
};
quoted_str.push(quote);
quoted_str.extend(escaped_str);
quoted_str.push(quote);
quoted_str
} else {
escaped_str
}
}
for chunk in name.utf8_chunks() {
quoter.push_str(chunk.valid());
quoter.push_invalid(chunk.invalid());
}
quoter.finalize()
}
/// Escape a filename with respect to the given style.

View file

@ -0,0 +1,241 @@
// This file is part of the uutils coreutils package.
//
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.
use super::{EscapeState, EscapedChar, Quoter, Quotes};
// These are characters with special meaning in the shell (e.g. bash). The
// first const contains characters that only have a special meaning when they
// appear at the beginning of a name.
const SPECIAL_SHELL_CHARS_START: &[u8] = b"~#";
// Escaped and NonEscaped shell quoting strategies are very different.
// Therefore, we are using separate Quoter structures for each of them.
pub(super) struct NonEscapedShellQuoter<'a> {
// INIT
/// Original name.
reference: &'a [u8],
/// The quotes to be used if necessary
quotes: Quotes,
/// Whether to show control and non-unicode characters, or replace them
/// with `?`.
show_control: bool,
// INTERNAL STATE
/// Whether the name should be quoted.
must_quote: bool,
buffer: Vec<u8>,
}
impl<'a> NonEscapedShellQuoter<'a> {
pub fn new(
reference: &'a [u8],
show_control: bool,
always_quote: bool,
dirname: bool,
size_hint: usize,
) -> Self {
let (quotes, must_quote) = initial_quoting(reference, dirname, always_quote);
Self {
reference,
quotes,
show_control,
must_quote,
buffer: Vec::with_capacity(size_hint),
}
}
}
impl<'a> Quoter for NonEscapedShellQuoter<'a> {
fn push_char(&mut self, input: char) {
let escaped = EscapedChar::new_shell(input, false, self.quotes);
let escaped = if self.show_control {
escaped
} else {
escaped.hide_control()
};
match escaped.state {
EscapeState::Backslash('\'') => self.buffer.extend(b"'\\''"),
EscapeState::ForceQuote(x) => {
self.must_quote = true;
self.buffer.extend(x.to_string().as_bytes());
}
_ => {
self.buffer.extend(escaped.collect::<String>().as_bytes());
}
}
}
fn push_invalid(&mut self, input: &[u8]) {
if self.show_control {
self.buffer.extend(input);
} else {
self.buffer.extend(std::iter::repeat_n(b'?', input.len()));
}
}
fn finalize(self: Box<Self>) -> Vec<u8> {
finalize_shell_quoter(self.buffer, self.reference, self.must_quote, self.quotes)
}
}
// We need to keep track of whether we are in a dollar expression
// because e.g. \b\n is escaped as $'\b\n' and not like $'b'$'n'
pub(super) struct EscapedShellQuoter<'a> {
// INIT
/// Original name.
reference: &'a [u8],
/// The quotes to be used if necessary
quotes: Quotes,
// INTERNAL STATE
/// Whether the name should be quoted.
must_quote: bool,
/// Whether we are currently in a dollar escaped environment.
in_dollar: bool,
buffer: Vec<u8>,
}
impl<'a> EscapedShellQuoter<'a> {
pub fn new(reference: &'a [u8], always_quote: bool, dirname: bool, size_hint: usize) -> Self {
let (quotes, must_quote) = initial_quoting(reference, dirname, always_quote);
Self {
reference,
quotes,
must_quote,
in_dollar: false,
buffer: Vec::with_capacity(size_hint),
}
}
fn enter_dollar(&mut self) {
if !self.in_dollar {
self.buffer.extend(b"'$'");
self.in_dollar = true;
}
}
fn exit_dollar(&mut self) {
if self.in_dollar {
self.buffer.extend(b"''");
self.in_dollar = false;
}
}
}
impl<'a> Quoter for EscapedShellQuoter<'a> {
fn push_char(&mut self, input: char) {
let escaped = EscapedChar::new_shell(input, true, self.quotes);
match escaped.state {
EscapeState::Char(x) => {
self.exit_dollar();
self.buffer.extend(x.to_string().as_bytes());
}
EscapeState::ForceQuote(x) => {
self.exit_dollar();
self.must_quote = true;
self.buffer.extend(x.to_string().as_bytes());
}
// Single quotes are not put in dollar expressions, but are escaped
// if the string also contains double quotes. In that case, they
// must be handled separately.
EscapeState::Backslash('\'') => {
self.must_quote = true;
self.in_dollar = false;
self.buffer.extend(b"'\\''");
}
_ => {
self.enter_dollar();
self.must_quote = true;
self.buffer.extend(escaped.collect::<String>().as_bytes());
}
}
}
fn push_invalid(&mut self, input: &[u8]) {
// Early return on empty inputs.
if input.is_empty() {
return;
}
self.enter_dollar();
self.must_quote = true;
self.buffer.extend(
input
.iter()
.flat_map(|b| EscapedChar::new_octal(*b))
.collect::<String>()
.as_bytes(),
);
}
fn finalize(self: Box<Self>) -> Vec<u8> {
finalize_shell_quoter(self.buffer, self.reference, self.must_quote, self.quotes)
}
}
/// Deduce the initial quoting status from the provided information
fn initial_quoting(input: &[u8], dirname: bool, always_quote: bool) -> (Quotes, bool) {
if input
.iter()
.any(|c| shell_escaped_char_set(dirname).contains(c))
{
(Quotes::Single, true)
} else if input.contains(&b'\'') {
(Quotes::Double, true)
} else if always_quote || input.is_empty() {
(Quotes::Single, true)
} else {
(Quotes::Single, false)
}
}
/// Check whether `bytes` starts with any byte in `pattern`.
fn bytes_start_with(bytes: &[u8], pattern: &[u8]) -> bool {
!bytes.is_empty() && pattern.contains(&bytes[0])
}
/// Return a set of characters that implies quoting of the word in
/// shell-quoting mode.
fn shell_escaped_char_set(is_dirname: bool) -> &'static [u8] {
const ESCAPED_CHARS: &[u8] = b":\"`$\\^\n\t\r=";
// the ':' colon character only induce quoting in the
// context of ls displaying a directory name before listing its content.
// (e.g. with the recursive flag -R)
let start_index = if is_dirname { 0 } else { 1 };
&ESCAPED_CHARS[start_index..]
}
fn finalize_shell_quoter(
buffer: Vec<u8>,
reference: &[u8],
must_quote: bool,
quotes: Quotes,
) -> Vec<u8> {
let contains_quote_chars = must_quote || bytes_start_with(reference, SPECIAL_SHELL_CHARS_START);
if must_quote | contains_quote_chars && quotes != Quotes::None {
let mut quoted = Vec::<u8>::with_capacity(buffer.len() + 2);
let quote = if quotes == Quotes::Single {
b'\''
} else {
b'"'
};
quoted.push(quote);
quoted.extend(buffer);
quoted.push(quote);
quoted
} else {
buffer
}
}