1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-29 03:57:44 +00:00

Merge pull request #7316 from jfinkels/echo-parse-escape-only

echo: use uucore::format::parse_escape_only()
This commit is contained in:
Dorian Péron 2025-02-18 00:42:36 +01:00 committed by GitHub
commit ea1562b5fe
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 7 additions and 236 deletions

View file

@ -18,7 +18,7 @@ path = "src/echo.rs"
[dependencies]
clap = { workspace = true }
uucore = { workspace = true }
uucore = { workspace = true, features = ["format"] }
[[bin]]
name = "echo"

View file

@ -8,10 +8,8 @@ use clap::{crate_version, Arg, ArgAction, ArgMatches, Command};
use std::env;
use std::ffi::{OsStr, OsString};
use std::io::{self, StdoutLock, Write};
use std::iter::Peekable;
use std::ops::ControlFlow;
use std::slice::Iter;
use uucore::error::{UResult, USimpleError};
use uucore::format::{parse_escape_only, EscapedChar, FormatChar};
use uucore::{format_usage, help_about, help_section, help_usage};
const ABOUT: &str = help_about!("echo.md");
@ -25,236 +23,6 @@ mod options {
pub const DISABLE_BACKSLASH_ESCAPE: &str = "disable_backslash_escape";
}
enum BackslashNumberType {
OctalStartingWithNonZero(u8),
OctalStartingWithZero,
Hexadecimal,
}
impl BackslashNumberType {
fn base(&self) -> Base {
match self {
BackslashNumberType::OctalStartingWithZero
| BackslashNumberType::OctalStartingWithNonZero(_) => Base::Octal,
BackslashNumberType::Hexadecimal => Base::Hexadecimal,
}
}
}
enum Base {
Octal,
Hexadecimal,
}
impl Base {
fn ascii_to_number(&self, digit: u8) -> Option<u8> {
fn octal_ascii_digit_to_number(digit: u8) -> Option<u8> {
let number = match digit {
b'0' => 0,
b'1' => 1,
b'2' => 2,
b'3' => 3,
b'4' => 4,
b'5' => 5,
b'6' => 6,
b'7' => 7,
_ => {
return None;
}
};
Some(number)
}
fn hexadecimal_ascii_digit_to_number(digit: u8) -> Option<u8> {
let number = match digit {
b'0' => 0,
b'1' => 1,
b'2' => 2,
b'3' => 3,
b'4' => 4,
b'5' => 5,
b'6' => 6,
b'7' => 7,
b'8' => 8,
b'9' => 9,
b'A' | b'a' => 10,
b'B' | b'b' => 11,
b'C' | b'c' => 12,
b'D' | b'd' => 13,
b'E' | b'e' => 14,
b'F' | b'f' => 15,
_ => {
return None;
}
};
Some(number)
}
match self {
Self::Octal => octal_ascii_digit_to_number(digit),
Self::Hexadecimal => hexadecimal_ascii_digit_to_number(digit),
}
}
fn maximum_number_of_digits(&self) -> u8 {
match self {
Self::Octal => 3,
Self::Hexadecimal => 2,
}
}
fn radix(&self) -> u8 {
match self {
Self::Octal => 8,
Self::Hexadecimal => 16,
}
}
}
/// Parse the numeric part of `\xHHH`, `\0NNN`, and `\NNN` escape sequences
fn parse_backslash_number(
input: &mut Peekable<Iter<u8>>,
backslash_number_type: BackslashNumberType,
) -> Option<u8> {
let first_digit_ascii = match backslash_number_type {
BackslashNumberType::OctalStartingWithZero | BackslashNumberType::Hexadecimal => {
match input.peek() {
Some(&&digit_ascii) => digit_ascii,
None => {
// One of the following cases: argument ends with "\0" or "\x"
// If "\0" (octal): caller will print not ASCII '0', 0x30, but ASCII '\0' (NUL), 0x00
// If "\x" (hexadecimal): caller will print literal "\x"
return None;
}
}
}
// Never returns early when backslash number starts with "\1" through "\7", because caller provides the
// first digit
BackslashNumberType::OctalStartingWithNonZero(digit_ascii) => digit_ascii,
};
let base = backslash_number_type.base();
let first_digit_number = match base.ascii_to_number(first_digit_ascii) {
Some(digit_number) => {
// Move past byte, since it was successfully parsed
let _ = input.next();
digit_number
}
None => {
// The first digit was not a valid octal or hexadecimal digit
// This should never be the case when the backslash number starts with "\1" through "\7"
// (caller unwraps to verify this)
return None;
}
};
let radix = base.radix();
let mut sum = first_digit_number;
for _ in 1..(base.maximum_number_of_digits()) {
match input
.peek()
.and_then(|&&digit_ascii| base.ascii_to_number(digit_ascii))
{
Some(digit_number) => {
// Move past byte, since it was successfully parsed
let _ = input.next();
// All arithmetic on `sum` needs to be wrapping, because octal input can
// take 3 digits, which is 9 bits, and therefore more than what fits in a
// `u8`.
//
// GNU Core Utilities: "if nnn is a nine-bit value, the ninth bit is ignored"
// https://www.gnu.org/software/coreutils/manual/html_node/echo-invocation.html
sum = sum.wrapping_mul(radix).wrapping_add(digit_number);
}
None => {
break;
}
}
}
Some(sum)
}
fn print_escaped(input: &[u8], output: &mut StdoutLock) -> io::Result<ControlFlow<()>> {
let mut iter = input.iter().peekable();
while let Some(&current_byte) = iter.next() {
if current_byte != b'\\' {
output.write_all(&[current_byte])?;
continue;
}
// This is for the \NNN syntax for octal sequences
// Note that '0' is intentionally omitted, because the \0NNN syntax is handled below
if let Some(&&first_digit @ b'1'..=b'7') = iter.peek() {
// Unwrap because anything starting with "\1" through "\7" can be successfully parsed
let parsed_octal_number = parse_backslash_number(
&mut iter,
BackslashNumberType::OctalStartingWithNonZero(first_digit),
)
.unwrap();
output.write_all(&[parsed_octal_number])?;
continue;
}
if let Some(next) = iter.next() {
let unescaped: &[u8] = match *next {
b'\\' => br"\",
b'a' => b"\x07",
b'b' => b"\x08",
b'c' => return Ok(ControlFlow::Break(())),
b'e' => b"\x1B",
b'f' => b"\x0C",
b'n' => b"\n",
b'r' => b"\r",
b't' => b"\t",
b'v' => b"\x0B",
b'x' => {
if let Some(parsed_hexadecimal_number) =
parse_backslash_number(&mut iter, BackslashNumberType::Hexadecimal)
{
&[parsed_hexadecimal_number]
} else {
// "\x" with any non-hexadecimal digit after means "\x" is treated literally
br"\x"
}
}
b'0' => {
if let Some(parsed_octal_number) = parse_backslash_number(
&mut iter,
BackslashNumberType::OctalStartingWithZero,
) {
&[parsed_octal_number]
} else {
// "\0" with any non-octal digit after it means "\0" is treated as ASCII '\0' (NUL), 0x00
b"\0"
}
}
other_byte => {
// Backslash and the following byte are treated literally
&[b'\\', other_byte]
}
};
output.write_all(unescaped)?;
} else {
output.write_all(br"\")?;
}
}
Ok(ControlFlow::Continue(()))
}
// A workaround because clap interprets the first '--' as a marker that a value
// follows. In order to use '--' as a value, we have to inject an additional '--'
fn handle_double_hyphens(args: impl uucore::Args) -> impl uucore::Args {
@ -367,8 +135,11 @@ fn execute(
}
if escaped {
if print_escaped(bytes, stdout_lock)?.is_break() {
return Ok(());
for item in parse_escape_only(bytes) {
match item {
EscapedChar::End => return Ok(()),
c => c.write(&mut *stdout_lock)?,
};
}
} else {
stdout_lock.write_all(bytes)?;