diff --git a/src/uucore/src/lib/features/quoting_style/c_quoter.rs b/src/uucore/src/lib/features/quoting_style/c_quoter.rs new file mode 100644 index 000000000..47a215719 --- /dev/null +++ b/src/uucore/src/lib/features/quoting_style/c_quoter.rs @@ -0,0 +1,57 @@ +// This file is part of the uutils coreutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +use super::{EscapedChar, Quoter, Quotes}; + +pub(super) struct CQuoter { + /// The type of quotes to use. + quotes: Quotes, + + dirname: bool, + + buffer: Vec, +} + +impl CQuoter { + pub fn new(quotes: Quotes, dirname: bool, size_hint: usize) -> Self { + let mut buffer = Vec::with_capacity(size_hint); + match quotes { + Quotes::None => (), + Quotes::Single => buffer.push(b'\''), + Quotes::Double => buffer.push(b'"'), + } + + Self { + quotes, + dirname, + buffer, + } + } +} + +impl Quoter for CQuoter { + fn push_char(&mut self, input: char) { + let escaped: String = EscapedChar::new_c(input, self.quotes, self.dirname) + .hide_control() + .collect(); + self.buffer.extend_from_slice(escaped.as_bytes()); + } + + fn push_invalid(&mut self, input: &[u8]) { + for b in input { + let escaped: String = EscapedChar::new_octal(*b).hide_control().collect(); + self.buffer.extend_from_slice(escaped.as_bytes()); + } + } + + fn finalize(mut self: Box) -> Vec { + match self.quotes { + Quotes::None => (), + Quotes::Single => self.buffer.push(b'\''), + Quotes::Double => self.buffer.push(b'"'), + } + self.buffer + } +} diff --git a/src/uucore/src/lib/features/quoting_style/literal_quoter.rs b/src/uucore/src/lib/features/quoting_style/literal_quoter.rs new file mode 100644 index 000000000..555bbf890 --- /dev/null +++ b/src/uucore/src/lib/features/quoting_style/literal_quoter.rs @@ -0,0 +1,31 @@ +// This file is part of the uutils coreutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +use super::{EscapedChar, Quoter}; + +pub(super) struct LiteralQuoter(Vec); + +impl LiteralQuoter { + pub fn new(size_hint: usize) -> Self { + Self(Vec::with_capacity(size_hint)) + } +} + +impl Quoter for LiteralQuoter { + fn push_char(&mut self, input: char) { + let escaped = EscapedChar::new_literal(input) + .hide_control() + .collect::(); + self.0.extend(escaped.as_bytes()); + } + + fn push_invalid(&mut self, input: &[u8]) { + self.0.extend(std::iter::repeat_n(b'?', input.len())); + } + + fn finalize(self: Box) -> Vec { + self.0 + } +} diff --git a/src/uucore/src/lib/features/quoting_style/mod.rs b/src/uucore/src/lib/features/quoting_style/mod.rs index d9dcd078b..baddb6aaf 100644 --- a/src/uucore/src/lib/features/quoting_style/mod.rs +++ b/src/uucore/src/lib/features/quoting_style/mod.rs @@ -9,9 +9,14 @@ use std::char::from_digit; use std::ffi::{OsStr, OsString}; use std::fmt; -// These are characters with special meaning in the shell (e.g. bash). -// The first const contains characters that only have a special meaning when they appear at the beginning of a name. -const SPECIAL_SHELL_CHARS_START: &[u8] = b"~#"; +use crate::quoting_style::c_quoter::CQuoter; +use crate::quoting_style::literal_quoter::LiteralQuoter; +use crate::quoting_style::shell_quoter::{EscapedShellQuoter, NonEscapedShellQuoter}; + +mod c_quoter; +mod literal_quoter; +mod shell_quoter; + // PR#6559 : Remove `]{}` from special shell chars. const SPECIAL_SHELL_CHARS: &str = "`$&*()|[;\\'\"<>?! "; @@ -47,6 +52,26 @@ pub enum QuotingStyle { }, } +/// Common interface of quoting mechanisms. +trait Quoter { + /// Push a valid character. + fn push_char(&mut self, input: char); + + /// Push a sequence of valid characters. + fn push_str(&mut self, input: &str) { + for c in input.chars() { + self.push_char(c); + } + } + + /// Push a continuous slice of invalid data wrt the encoding used to + /// decode the stream. + fn push_invalid(&mut self, input: &[u8]); + + /// Apply post-processing on the constructed buffer and return it. + fn finalize(self: Box) -> Vec; +} + /// The type of quotes to use when escaping a name as a C string. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub enum Quotes { @@ -251,211 +276,48 @@ impl Iterator for EscapedChar { } } -/// Check whether `bytes` starts with any byte in `pattern`. -fn bytes_start_with(bytes: &[u8], pattern: &[u8]) -> bool { - !bytes.is_empty() && pattern.contains(&bytes[0]) -} - -fn shell_without_escape(name: &[u8], quotes: Quotes, show_control_chars: bool) -> (Vec, bool) { - let mut must_quote = false; - let mut escaped_str = Vec::with_capacity(name.len()); - let mut utf8_buf = vec![0; 4]; - - for s in name.utf8_chunks() { - for c in s.valid().chars() { - let escaped = { - let ec = EscapedChar::new_shell(c, false, quotes); - if show_control_chars { - ec - } else { - ec.hide_control() - } - }; - - match escaped.state { - EscapeState::Backslash('\'') => escaped_str.extend_from_slice(b"'\\''"), - EscapeState::ForceQuote(x) => { - must_quote = true; - escaped_str.extend_from_slice(x.encode_utf8(&mut utf8_buf).as_bytes()); - } - _ => { - for c in escaped { - escaped_str.extend_from_slice(c.encode_utf8(&mut utf8_buf).as_bytes()); - } - } - } - } - - if show_control_chars { - escaped_str.extend_from_slice(s.invalid()); - } else { - escaped_str.resize(escaped_str.len() + s.invalid().len(), b'?'); - } - } - - must_quote = must_quote || bytes_start_with(name, SPECIAL_SHELL_CHARS_START); - (escaped_str, must_quote) -} - -fn shell_with_escape(name: &[u8], quotes: Quotes) -> (Vec, bool) { - // We need to keep track of whether we are in a dollar expression - // because e.g. \b\n is escaped as $'\b\n' and not like $'b'$'n' - let mut in_dollar = false; - let mut must_quote = false; - let mut escaped_str = String::with_capacity(name.len()); - - for s in name.utf8_chunks() { - for c in s.valid().chars() { - let escaped = EscapedChar::new_shell(c, true, quotes); - match escaped.state { - EscapeState::Char(x) => { - if in_dollar { - escaped_str.push_str("''"); - in_dollar = false; - } - escaped_str.push(x); - } - EscapeState::ForceQuote(x) => { - if in_dollar { - escaped_str.push_str("''"); - in_dollar = false; - } - must_quote = true; - escaped_str.push(x); - } - // Single quotes are not put in dollar expressions, but are escaped - // if the string also contains double quotes. In that case, they must - // be handled separately. - EscapeState::Backslash('\'') => { - must_quote = true; - in_dollar = false; - escaped_str.push_str("'\\''"); - } - _ => { - if !in_dollar { - escaped_str.push_str("'$'"); - in_dollar = true; - } - must_quote = true; - for char in escaped { - escaped_str.push(char); - } - } - } - } - if !s.invalid().is_empty() { - if !in_dollar { - escaped_str.push_str("'$'"); - in_dollar = true; - } - must_quote = true; - let escaped_bytes: String = s - .invalid() - .iter() - .flat_map(|b| EscapedChar::new_octal(*b)) - .collect(); - escaped_str.push_str(&escaped_bytes); - } - } - must_quote = must_quote || bytes_start_with(name, SPECIAL_SHELL_CHARS_START); - (escaped_str.into(), must_quote) -} - -/// Return a set of characters that implies quoting of the word in -/// shell-quoting mode. -fn shell_escaped_char_set(is_dirname: bool) -> &'static [u8] { - const ESCAPED_CHARS: &[u8] = b":\"`$\\^\n\t\r="; - // the ':' colon character only induce quoting in the - // context of ls displaying a directory name before listing its content. - // (e.g. with the recursive flag -R) - let start_index = if is_dirname { 0 } else { 1 }; - &ESCAPED_CHARS[start_index..] -} - /// Escape a name according to the given quoting style. /// /// This inner function provides an additional flag `dirname` which /// is meant for ls' directory name display. fn escape_name_inner(name: &[u8], style: &QuotingStyle, dirname: bool) -> Vec { - match style { - QuotingStyle::Literal { show_control } => { - if *show_control { - name.to_owned() - } else { - name.utf8_chunks() - .map(|s| { - let valid: String = s - .valid() - .chars() - .flat_map(|c| EscapedChar::new_literal(c).hide_control()) - .collect(); - let invalid = "?".repeat(s.invalid().len()); - valid + &invalid - }) - .collect::() - .into() - } - } - QuotingStyle::C { quotes } => { - let escaped_str: String = name - .utf8_chunks() - .flat_map(|s| { - let valid = s - .valid() - .chars() - .flat_map(|c| EscapedChar::new_c(c, *quotes, dirname)); - let invalid = s.invalid().iter().flat_map(|b| EscapedChar::new_octal(*b)); - valid.chain(invalid) - }) - .collect::(); + // Early handle Literal with show_control style + if let QuotingStyle::Literal { show_control: true } = style { + return name.to_owned(); + } - match quotes { - Quotes::Single => format!("'{escaped_str}'"), - Quotes::Double => format!("\"{escaped_str}\""), - Quotes::None => escaped_str, - } - .into() - } + let mut quoter: Box = match style { + QuotingStyle::Literal { .. } => Box::new(LiteralQuoter::new(name.len())), + QuotingStyle::C { quotes } => Box::new(CQuoter::new(*quotes, dirname, name.len())), QuotingStyle::Shell { - escape, + escape: true, + always_quote, + .. + } => Box::new(EscapedShellQuoter::new( + name, + *always_quote, + dirname, + name.len(), + )), + QuotingStyle::Shell { + escape: false, always_quote, show_control, - } => { - let (quotes, must_quote) = if name - .iter() - .any(|c| shell_escaped_char_set(dirname).contains(c)) - { - (Quotes::Single, true) - } else if name.contains(&b'\'') { - (Quotes::Double, true) - } else if *always_quote || name.is_empty() { - (Quotes::Single, true) - } else { - (Quotes::Single, false) - }; + } => Box::new(NonEscapedShellQuoter::new( + name, + *show_control, + *always_quote, + dirname, + name.len(), + )), + }; - let (escaped_str, contains_quote_chars) = if *escape { - shell_with_escape(name, quotes) - } else { - shell_without_escape(name, quotes, *show_control) - }; - - if must_quote | contains_quote_chars && quotes != Quotes::None { - let mut quoted_str = Vec::::with_capacity(escaped_str.len() + 2); - let quote = if quotes == Quotes::Single { - b'\'' - } else { - b'"' - }; - quoted_str.push(quote); - quoted_str.extend(escaped_str); - quoted_str.push(quote); - quoted_str - } else { - escaped_str - } - } + for chunk in name.utf8_chunks() { + quoter.push_str(chunk.valid()); + quoter.push_invalid(chunk.invalid()); } + + quoter.finalize() } /// Escape a filename with respect to the given style. diff --git a/src/uucore/src/lib/features/quoting_style/shell_quoter.rs b/src/uucore/src/lib/features/quoting_style/shell_quoter.rs new file mode 100644 index 000000000..d05dda52f --- /dev/null +++ b/src/uucore/src/lib/features/quoting_style/shell_quoter.rs @@ -0,0 +1,241 @@ +// This file is part of the uutils coreutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +use super::{EscapeState, EscapedChar, Quoter, Quotes}; + +// These are characters with special meaning in the shell (e.g. bash). The +// first const contains characters that only have a special meaning when they +// appear at the beginning of a name. +const SPECIAL_SHELL_CHARS_START: &[u8] = b"~#"; + +// Escaped and NonEscaped shell quoting strategies are very different. +// Therefore, we are using separate Quoter structures for each of them. + +pub(super) struct NonEscapedShellQuoter<'a> { + // INIT + /// Original name. + reference: &'a [u8], + + /// The quotes to be used if necessary + quotes: Quotes, + + /// Whether to show control and non-unicode characters, or replace them + /// with `?`. + show_control: bool, + + // INTERNAL STATE + /// Whether the name should be quoted. + must_quote: bool, + + buffer: Vec, +} + +impl<'a> NonEscapedShellQuoter<'a> { + pub fn new( + reference: &'a [u8], + show_control: bool, + always_quote: bool, + dirname: bool, + size_hint: usize, + ) -> Self { + let (quotes, must_quote) = initial_quoting(reference, dirname, always_quote); + Self { + reference, + quotes, + show_control, + must_quote, + buffer: Vec::with_capacity(size_hint), + } + } +} + +impl<'a> Quoter for NonEscapedShellQuoter<'a> { + fn push_char(&mut self, input: char) { + let escaped = EscapedChar::new_shell(input, false, self.quotes); + + let escaped = if self.show_control { + escaped + } else { + escaped.hide_control() + }; + + match escaped.state { + EscapeState::Backslash('\'') => self.buffer.extend(b"'\\''"), + EscapeState::ForceQuote(x) => { + self.must_quote = true; + self.buffer.extend(x.to_string().as_bytes()); + } + _ => { + self.buffer.extend(escaped.collect::().as_bytes()); + } + } + } + + fn push_invalid(&mut self, input: &[u8]) { + if self.show_control { + self.buffer.extend(input); + } else { + self.buffer.extend(std::iter::repeat_n(b'?', input.len())); + } + } + + fn finalize(self: Box) -> Vec { + finalize_shell_quoter(self.buffer, self.reference, self.must_quote, self.quotes) + } +} + +// We need to keep track of whether we are in a dollar expression +// because e.g. \b\n is escaped as $'\b\n' and not like $'b'$'n' +pub(super) struct EscapedShellQuoter<'a> { + // INIT + /// Original name. + reference: &'a [u8], + + /// The quotes to be used if necessary + quotes: Quotes, + + // INTERNAL STATE + /// Whether the name should be quoted. + must_quote: bool, + + /// Whether we are currently in a dollar escaped environment. + in_dollar: bool, + + buffer: Vec, +} + +impl<'a> EscapedShellQuoter<'a> { + pub fn new(reference: &'a [u8], always_quote: bool, dirname: bool, size_hint: usize) -> Self { + let (quotes, must_quote) = initial_quoting(reference, dirname, always_quote); + Self { + reference, + quotes, + must_quote, + in_dollar: false, + buffer: Vec::with_capacity(size_hint), + } + } + + fn enter_dollar(&mut self) { + if !self.in_dollar { + self.buffer.extend(b"'$'"); + self.in_dollar = true; + } + } + + fn exit_dollar(&mut self) { + if self.in_dollar { + self.buffer.extend(b"''"); + self.in_dollar = false; + } + } +} + +impl<'a> Quoter for EscapedShellQuoter<'a> { + fn push_char(&mut self, input: char) { + let escaped = EscapedChar::new_shell(input, true, self.quotes); + match escaped.state { + EscapeState::Char(x) => { + self.exit_dollar(); + self.buffer.extend(x.to_string().as_bytes()); + } + EscapeState::ForceQuote(x) => { + self.exit_dollar(); + self.must_quote = true; + self.buffer.extend(x.to_string().as_bytes()); + } + // Single quotes are not put in dollar expressions, but are escaped + // if the string also contains double quotes. In that case, they + // must be handled separately. + EscapeState::Backslash('\'') => { + self.must_quote = true; + self.in_dollar = false; + self.buffer.extend(b"'\\''"); + } + _ => { + self.enter_dollar(); + self.must_quote = true; + self.buffer.extend(escaped.collect::().as_bytes()); + } + } + } + + fn push_invalid(&mut self, input: &[u8]) { + // Early return on empty inputs. + if input.is_empty() { + return; + } + + self.enter_dollar(); + self.must_quote = true; + self.buffer.extend( + input + .iter() + .flat_map(|b| EscapedChar::new_octal(*b)) + .collect::() + .as_bytes(), + ); + } + + fn finalize(self: Box) -> Vec { + finalize_shell_quoter(self.buffer, self.reference, self.must_quote, self.quotes) + } +} + +/// Deduce the initial quoting status from the provided information +fn initial_quoting(input: &[u8], dirname: bool, always_quote: bool) -> (Quotes, bool) { + if input + .iter() + .any(|c| shell_escaped_char_set(dirname).contains(c)) + { + (Quotes::Single, true) + } else if input.contains(&b'\'') { + (Quotes::Double, true) + } else if always_quote || input.is_empty() { + (Quotes::Single, true) + } else { + (Quotes::Single, false) + } +} + +/// Check whether `bytes` starts with any byte in `pattern`. +fn bytes_start_with(bytes: &[u8], pattern: &[u8]) -> bool { + !bytes.is_empty() && pattern.contains(&bytes[0]) +} + +/// Return a set of characters that implies quoting of the word in +/// shell-quoting mode. +fn shell_escaped_char_set(is_dirname: bool) -> &'static [u8] { + const ESCAPED_CHARS: &[u8] = b":\"`$\\^\n\t\r="; + // the ':' colon character only induce quoting in the + // context of ls displaying a directory name before listing its content. + // (e.g. with the recursive flag -R) + let start_index = if is_dirname { 0 } else { 1 }; + &ESCAPED_CHARS[start_index..] +} + +fn finalize_shell_quoter( + buffer: Vec, + reference: &[u8], + must_quote: bool, + quotes: Quotes, +) -> Vec { + let contains_quote_chars = must_quote || bytes_start_with(reference, SPECIAL_SHELL_CHARS_START); + + if must_quote | contains_quote_chars && quotes != Quotes::None { + let mut quoted = Vec::::with_capacity(buffer.len() + 2); + let quote = if quotes == Quotes::Single { + b'\'' + } else { + b'"' + }; + quoted.push(quote); + quoted.extend(buffer); + quoted.push(quote); + quoted + } else { + buffer + } +}