diff --git a/src/uu/echo/src/echo.rs b/src/uu/echo/src/echo.rs index cd9467714..947887210 100644 --- a/src/uu/echo/src/echo.rs +++ b/src/uu/echo/src/echo.rs @@ -6,6 +6,7 @@ use clap::{crate_version, Arg, ArgAction, Command}; use std::io::{self, Write}; use std::iter::Peekable; +use std::ops::ControlFlow; use std::str::Chars; use uucore::error::{FromIo, UResult}; use uucore::{format_usage, help_about, help_section, help_usage}; @@ -21,73 +22,98 @@ mod options { pub const DISABLE_BACKSLASH_ESCAPE: &str = "disable_backslash_escape"; } -fn parse_code( - input: &mut Peekable, - base: u32, - max_digits: u32, - bits_per_digit: u32, -) -> Option { - let mut ret = 0x8000_0000; - for _ in 0..max_digits { - match input.peek().and_then(|c| c.to_digit(base)) { - Some(n) => ret = (ret << bits_per_digit) | n, - None => break, - } - input.next(); - } - std::char::from_u32(ret) +#[repr(u8)] +#[derive(Clone, Copy)] +enum Base { + Oct = 8, + Hex = 16, } -fn print_escaped(input: &str, mut output: impl Write) -> io::Result { - let mut should_stop = false; +impl Base { + fn max_digits(&self) -> u8 { + match self { + Self::Oct => 3, + Self::Hex => 2, + } + } +} - let mut buffer = ['\\'; 2]; +/// Parse the numeric part of the `\xHHH` and `\0NNN` escape sequences +fn parse_code(input: &mut Peekable, base: Base) -> Option { + // All arithmetic on `ret` needs to be wrapping, because octal input can + // take 3 digits, which is 9 bits, and therefore more than what fits in a + // `u8`. GNU just seems to wrap these values. + // Note that if we instead make `ret` a `u32` and use `char::from_u32` will + // yield incorrect results because it will interpret values larger than + // `u8::MAX` as unicode. + let mut ret = input.peek().and_then(|c| c.to_digit(base as u32))? as u8; - // TODO `cargo +nightly clippy` complains that `.peek()` is never - // called on `iter`. However, `peek()` is called inside the - // `parse_code()` function that borrows `iter`. + // We can safely ignore the None case because we just peeked it. + let _ = input.next(); + + for _ in 1..base.max_digits() { + match input.peek().and_then(|c| c.to_digit(base as u32)) { + Some(n) => ret = ret.wrapping_mul(base as u8).wrapping_add(n as u8), + None => break, + } + // We can safely ignore the None case because we just peeked it. + let _ = input.next(); + } + + Some(ret.into()) +} + +fn print_escaped(input: &str, mut output: impl Write) -> io::Result> { let mut iter = input.chars().peekable(); - while let Some(mut c) = iter.next() { - let mut start = 1; + while let Some(c) = iter.next() { + if c != '\\' { + write!(output, "{c}")?; + continue; + } - if c == '\\' { - if let Some(next) = iter.next() { - c = match next { - '\\' => '\\', - 'a' => '\x07', - 'b' => '\x08', - 'c' => { - should_stop = true; - break; - } - 'e' => '\x1b', - 'f' => '\x0c', - 'n' => '\n', - 'r' => '\r', - 't' => '\t', - 'v' => '\x0b', - 'x' => parse_code(&mut iter, 16, 2, 4).unwrap_or_else(|| { - start = 0; - next - }), - '0' => parse_code(&mut iter, 8, 3, 3).unwrap_or('\0'), - _ => { - start = 0; - next - } - }; + // This is for the \NNN syntax for octal sequences. + // Note that '0' is intentionally omitted because that + // would be the \0NNN syntax. + if let Some('1'..='8') = iter.peek() { + if let Some(parsed) = parse_code(&mut iter, Base::Oct) { + write!(output, "{parsed}")?; + continue; } } - buffer[1] = c; - - // because printing char slices is apparently not available in the standard library - for ch in &buffer[start..] { - write!(output, "{ch}")?; + if let Some(next) = iter.next() { + let unescaped = match next { + '\\' => '\\', + 'a' => '\x07', + 'b' => '\x08', + 'c' => return Ok(ControlFlow::Break(())), + 'e' => '\x1b', + 'f' => '\x0c', + 'n' => '\n', + 'r' => '\r', + 't' => '\t', + 'v' => '\x0b', + 'x' => { + if let Some(c) = parse_code(&mut iter, Base::Hex) { + c + } else { + write!(output, "\\")?; + 'x' + } + } + '0' => parse_code(&mut iter, Base::Oct).unwrap_or('\0'), + c => { + write!(output, "\\")?; + c + } + }; + write!(output, "{unescaped}")?; + } else { + write!(output, "\\")?; } } - Ok(should_stop) + Ok(ControlFlow::Continue(())) } #[uucore::main] @@ -148,8 +174,7 @@ fn execute(no_newline: bool, escaped: bool, free: &[String]) -> io::Result<()> { write!(output, " ")?; } if escaped { - let should_stop = print_escaped(input, &mut output)?; - if should_stop { + if print_escaped(input, &mut output)?.is_break() { break; } } else { diff --git a/tests/by-util/test_echo.rs b/tests/by-util/test_echo.rs index 3a8e7f86b..dce5a4c95 100644 --- a/tests/by-util/test_echo.rs +++ b/tests/by-util/test_echo.rs @@ -236,3 +236,47 @@ fn test_hyphen_values_between() { .success() .stdout_is("dumdum dum dum dum -e dum\n"); } + +#[test] +fn wrapping_octal() { + // Some odd behavior of GNU. Values of \0400 and greater do not fit in the + // u8 that we write to stdout. So we test that it wraps: + // + // We give it this input: + // \o501 = 1_0100_0001 (yes, **9** bits) + // This should be wrapped into: + // \o101 = 'A' = 0100_0001, + // because we only write a single character + new_ucmd!() + .arg("-e") + .arg("\\0501") + .succeeds() + .stdout_is("A\n"); +} + +#[test] +fn old_octal_syntax() { + new_ucmd!() + .arg("-e") + .arg("\\1foo") + .succeeds() + .stdout_is("\x01foo\n"); + + new_ucmd!() + .arg("-e") + .arg("\\43foo") + .succeeds() + .stdout_is("#foo\n"); + + new_ucmd!() + .arg("-e") + .arg("\\101 foo") + .succeeds() + .stdout_is("A foo\n"); + + new_ucmd!() + .arg("-e") + .arg("\\1011") + .succeeds() + .stdout_is("A1\n"); +}