From bb30eb513e7ebbcd964b9fc6c76aeef326c1fcd6 Mon Sep 17 00:00:00 2001 From: Dorian Peron Date: Sat, 14 Jun 2025 12:56:10 +0200 Subject: [PATCH] quoting_style: move EscapeChar stuff to its own file --- .../features/quoting_style/escaped_char.rs | 201 ++++++++++++++++++ .../src/lib/features/quoting_style/mod.rs | 197 +---------------- 2 files changed, 204 insertions(+), 194 deletions(-) create mode 100644 src/uucore/src/lib/features/quoting_style/escaped_char.rs diff --git a/src/uucore/src/lib/features/quoting_style/escaped_char.rs b/src/uucore/src/lib/features/quoting_style/escaped_char.rs new file mode 100644 index 000000000..e9a14ca73 --- /dev/null +++ b/src/uucore/src/lib/features/quoting_style/escaped_char.rs @@ -0,0 +1,201 @@ +// This file is part of the uutils coreutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +use std::char::from_digit; + +use super::Quotes; + +// PR#6559 : Remove `]{}` from special shell chars. +const SPECIAL_SHELL_CHARS: &str = "`$&*()|[;\\'\"<>?! "; + +// This implementation is heavily inspired by the std::char::EscapeDefault implementation +// in the Rust standard library. This custom implementation is needed because the +// characters \a, \b, \e, \f & \v are not recognized by Rust. +pub struct EscapedChar { + pub state: EscapeState, +} + +pub enum EscapeState { + Done, + Char(char), + Backslash(char), + ForceQuote(char), + Octal(EscapeOctal), +} + +/// Bytes we need to present as escaped octal, in the form of `\nnn` per byte. +/// Only supports characters up to 2 bytes long in UTF-8. +pub struct EscapeOctal { + c: [u8; 2], + state: EscapeOctalState, + idx: u8, +} + +enum EscapeOctalState { + Done, + FirstBackslash, + FirstValue, + LastBackslash, + LastValue, +} + +fn byte_to_octal_digit(byte: u8, idx: u8) -> u8 { + (byte >> (idx * 3)) & 0o7 +} + +impl Iterator for EscapeOctal { + type Item = char; + + fn next(&mut self) -> Option { + match self.state { + EscapeOctalState::Done => None, + EscapeOctalState::FirstBackslash => { + self.state = EscapeOctalState::FirstValue; + Some('\\') + } + EscapeOctalState::LastBackslash => { + self.state = EscapeOctalState::LastValue; + Some('\\') + } + EscapeOctalState::FirstValue => { + let octal_digit = byte_to_octal_digit(self.c[0], self.idx); + if self.idx == 0 { + self.state = EscapeOctalState::LastBackslash; + self.idx = 2; + } else { + self.idx -= 1; + } + Some(from_digit(octal_digit.into(), 8).unwrap()) + } + EscapeOctalState::LastValue => { + let octal_digit = byte_to_octal_digit(self.c[1], self.idx); + if self.idx == 0 { + self.state = EscapeOctalState::Done; + } else { + self.idx -= 1; + } + Some(from_digit(octal_digit.into(), 8).unwrap()) + } + } + } +} + +impl EscapeOctal { + fn from_char(c: char) -> Self { + if c.len_utf8() == 1 { + return Self::from_byte(c as u8); + } + + let mut buf = [0; 2]; + let _s = c.encode_utf8(&mut buf); + Self { + c: buf, + idx: 2, + state: EscapeOctalState::FirstBackslash, + } + } + + fn from_byte(b: u8) -> Self { + Self { + c: [0, b], + idx: 2, + state: EscapeOctalState::LastBackslash, + } + } +} + +impl EscapedChar { + pub fn new_literal(c: char) -> Self { + Self { + state: EscapeState::Char(c), + } + } + + pub fn new_octal(b: u8) -> Self { + Self { + state: EscapeState::Octal(EscapeOctal::from_byte(b)), + } + } + + pub fn new_c(c: char, quotes: Quotes, dirname: bool) -> Self { + use EscapeState::*; + let init_state = match c { + '\x07' => Backslash('a'), + '\x08' => Backslash('b'), + '\t' => Backslash('t'), + '\n' => Backslash('n'), + '\x0B' => Backslash('v'), + '\x0C' => Backslash('f'), + '\r' => Backslash('r'), + '\\' => Backslash('\\'), + '\'' => match quotes { + Quotes::Single => Backslash('\''), + _ => Char('\''), + }, + '"' => match quotes { + Quotes::Double => Backslash('"'), + _ => Char('"'), + }, + ' ' if !dirname => match quotes { + Quotes::None => Backslash(' '), + _ => Char(' '), + }, + ':' if dirname => Backslash(':'), + _ if c.is_control() => Octal(EscapeOctal::from_char(c)), + _ => Char(c), + }; + Self { state: init_state } + } + + pub fn new_shell(c: char, escape: bool, quotes: Quotes) -> Self { + use EscapeState::*; + let init_state = match c { + _ if !escape && c.is_control() => Char(c), + '\x07' => Backslash('a'), + '\x08' => Backslash('b'), + '\t' => Backslash('t'), + '\n' => Backslash('n'), + '\x0B' => Backslash('v'), + '\x0C' => Backslash('f'), + '\r' => Backslash('r'), + '\'' => match quotes { + Quotes::Single => Backslash('\''), + _ => Char('\''), + }, + _ if c.is_control() => Octal(EscapeOctal::from_char(c)), + _ if SPECIAL_SHELL_CHARS.contains(c) => ForceQuote(c), + _ => Char(c), + }; + Self { state: init_state } + } + + pub fn hide_control(self) -> Self { + match self.state { + EscapeState::Char(c) if c.is_control() => Self { + state: EscapeState::Char('?'), + }, + _ => self, + } + } +} + +impl Iterator for EscapedChar { + type Item = char; + + fn next(&mut self) -> Option { + match self.state { + EscapeState::Backslash(c) => { + self.state = EscapeState::Char(c); + Some('\\') + } + EscapeState::Char(c) | EscapeState::ForceQuote(c) => { + self.state = EscapeState::Done; + Some(c) + } + EscapeState::Done => None, + EscapeState::Octal(ref mut iter) => iter.next(), + } + } +} diff --git a/src/uucore/src/lib/features/quoting_style/mod.rs b/src/uucore/src/lib/features/quoting_style/mod.rs index baddb6aaf..107bdcf06 100644 --- a/src/uucore/src/lib/features/quoting_style/mod.rs +++ b/src/uucore/src/lib/features/quoting_style/mod.rs @@ -5,7 +5,6 @@ //! Set of functions for escaping names according to different quoting styles. -use std::char::from_digit; use std::ffi::{OsStr, OsString}; use std::fmt; @@ -13,13 +12,13 @@ use crate::quoting_style::c_quoter::CQuoter; use crate::quoting_style::literal_quoter::LiteralQuoter; use crate::quoting_style::shell_quoter::{EscapedShellQuoter, NonEscapedShellQuoter}; +mod escaped_char; +pub use escaped_char::{EscapeState, EscapedChar}; + mod c_quoter; mod literal_quoter; mod shell_quoter; -// PR#6559 : Remove `]{}` from special shell chars. -const SPECIAL_SHELL_CHARS: &str = "`$&*()|[;\\'\"<>?! "; - /// The quoting style to use when escaping a name. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub enum QuotingStyle { @@ -86,196 +85,6 @@ pub enum Quotes { // TODO: Locale } -// This implementation is heavily inspired by the std::char::EscapeDefault implementation -// in the Rust standard library. This custom implementation is needed because the -// characters \a, \b, \e, \f & \v are not recognized by Rust. -struct EscapedChar { - state: EscapeState, -} - -enum EscapeState { - Done, - Char(char), - Backslash(char), - ForceQuote(char), - Octal(EscapeOctal), -} - -/// Bytes we need to present as escaped octal, in the form of `\nnn` per byte. -/// Only supports characters up to 2 bytes long in UTF-8. -struct EscapeOctal { - c: [u8; 2], - state: EscapeOctalState, - idx: u8, -} - -enum EscapeOctalState { - Done, - FirstBackslash, - FirstValue, - LastBackslash, - LastValue, -} - -fn byte_to_octal_digit(byte: u8, idx: u8) -> u8 { - (byte >> (idx * 3)) & 0o7 -} - -impl Iterator for EscapeOctal { - type Item = char; - - fn next(&mut self) -> Option { - match self.state { - EscapeOctalState::Done => None, - EscapeOctalState::FirstBackslash => { - self.state = EscapeOctalState::FirstValue; - Some('\\') - } - EscapeOctalState::LastBackslash => { - self.state = EscapeOctalState::LastValue; - Some('\\') - } - EscapeOctalState::FirstValue => { - let octal_digit = byte_to_octal_digit(self.c[0], self.idx); - if self.idx == 0 { - self.state = EscapeOctalState::LastBackslash; - self.idx = 2; - } else { - self.idx -= 1; - } - Some(from_digit(octal_digit.into(), 8).unwrap()) - } - EscapeOctalState::LastValue => { - let octal_digit = byte_to_octal_digit(self.c[1], self.idx); - if self.idx == 0 { - self.state = EscapeOctalState::Done; - } else { - self.idx -= 1; - } - Some(from_digit(octal_digit.into(), 8).unwrap()) - } - } - } -} - -impl EscapeOctal { - fn from_char(c: char) -> Self { - if c.len_utf8() == 1 { - return Self::from_byte(c as u8); - } - - let mut buf = [0; 2]; - let _s = c.encode_utf8(&mut buf); - Self { - c: buf, - idx: 2, - state: EscapeOctalState::FirstBackslash, - } - } - - fn from_byte(b: u8) -> Self { - Self { - c: [0, b], - idx: 2, - state: EscapeOctalState::LastBackslash, - } - } -} - -impl EscapedChar { - fn new_literal(c: char) -> Self { - Self { - state: EscapeState::Char(c), - } - } - - fn new_octal(b: u8) -> Self { - Self { - state: EscapeState::Octal(EscapeOctal::from_byte(b)), - } - } - - fn new_c(c: char, quotes: Quotes, dirname: bool) -> Self { - use EscapeState::*; - let init_state = match c { - '\x07' => Backslash('a'), - '\x08' => Backslash('b'), - '\t' => Backslash('t'), - '\n' => Backslash('n'), - '\x0B' => Backslash('v'), - '\x0C' => Backslash('f'), - '\r' => Backslash('r'), - '\\' => Backslash('\\'), - '\'' => match quotes { - Quotes::Single => Backslash('\''), - _ => Char('\''), - }, - '"' => match quotes { - Quotes::Double => Backslash('"'), - _ => Char('"'), - }, - ' ' if !dirname => match quotes { - Quotes::None => Backslash(' '), - _ => Char(' '), - }, - ':' if dirname => Backslash(':'), - _ if c.is_control() => Octal(EscapeOctal::from_char(c)), - _ => Char(c), - }; - Self { state: init_state } - } - - fn new_shell(c: char, escape: bool, quotes: Quotes) -> Self { - use EscapeState::*; - let init_state = match c { - _ if !escape && c.is_control() => Char(c), - '\x07' => Backslash('a'), - '\x08' => Backslash('b'), - '\t' => Backslash('t'), - '\n' => Backslash('n'), - '\x0B' => Backslash('v'), - '\x0C' => Backslash('f'), - '\r' => Backslash('r'), - '\'' => match quotes { - Quotes::Single => Backslash('\''), - _ => Char('\''), - }, - _ if c.is_control() => Octal(EscapeOctal::from_char(c)), - _ if SPECIAL_SHELL_CHARS.contains(c) => ForceQuote(c), - _ => Char(c), - }; - Self { state: init_state } - } - - fn hide_control(self) -> Self { - match self.state { - EscapeState::Char(c) if c.is_control() => Self { - state: EscapeState::Char('?'), - }, - _ => self, - } - } -} - -impl Iterator for EscapedChar { - type Item = char; - - fn next(&mut self) -> Option { - match self.state { - EscapeState::Backslash(c) => { - self.state = EscapeState::Char(c); - Some('\\') - } - EscapeState::Char(c) | EscapeState::ForceQuote(c) => { - self.state = EscapeState::Done; - Some(c) - } - EscapeState::Done => None, - EscapeState::Octal(ref mut iter) => iter.next(), - } - } -} - /// Escape a name according to the given quoting style. /// /// This inner function provides an additional flag `dirname` which