1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-28 11:37:44 +00:00

echo: fix wrapping behavior of octal sequences

This commit is contained in:
Terts Diepraam 2023-08-28 22:51:45 +02:00
parent bd82d678ef
commit 4623575a66
2 changed files with 71 additions and 55 deletions

View file

@ -21,73 +21,72 @@ mod options {
pub const DISABLE_BACKSLASH_ESCAPE: &str = "disable_backslash_escape"; pub const DISABLE_BACKSLASH_ESCAPE: &str = "disable_backslash_escape";
} }
fn parse_code( /// Parse the numeric part of the `\xHHH` and `\0NNN` escape sequences
input: &mut Peekable<Chars>, fn parse_code(input: &mut Peekable<Chars>, base: u8, max_digits: u32) -> Option<char> {
base: u32, // All arithmetic on `ret` needs to be wrapping, because octal input can
max_digits: u32, // take 3 digits, which is 9 bits, and therefore more than what fits in a
bits_per_digit: u32, // `u8`. GNU just seems to wrap these values.
) -> Option<char> { // Note that if we instead make `ret` a `u32` and use `char::from_u32` will
let mut ret = 0x8000_0000; // yield incorrect results because it will interpret values larger than
for _ in 0..max_digits { // `u8::MAX` as unicode.
match input.peek().and_then(|c| c.to_digit(base)) { let mut ret = input.peek().and_then(|c| c.to_digit(base as u32))? as u8;
Some(n) => ret = (ret << bits_per_digit) | n,
// We can safely ifgnore the None case because we just peeked it.
let _ = input.next();
for _ in 1..max_digits {
match input.peek().and_then(|c| c.to_digit(base as u32)) {
Some(n) => ret = ret.wrapping_mul(base).wrapping_add(n as u8),
None => break, None => break,
} }
input.next(); // We can safely ifgnore the None case because we just peeked it.
let _ = input.next();
} }
std::char::from_u32(ret)
Some(ret.into())
} }
fn print_escaped(input: &str, mut output: impl Write) -> io::Result<bool> { fn print_escaped(input: &str, mut output: impl Write) -> io::Result<bool> {
let mut should_stop = false;
let mut buffer = ['\\'; 2];
// TODO `cargo +nightly clippy` complains that `.peek()` is never
// called on `iter`. However, `peek()` is called inside the
// `parse_code()` function that borrows `iter`.
let mut iter = input.chars().peekable(); let mut iter = input.chars().peekable();
while let Some(mut c) = iter.next() { while let Some(c) = iter.next() {
let mut start = 1; if c != '\\' {
write!(output, "{c}")?;
continue;
}
if c == '\\' {
if let Some(next) = iter.next() { if let Some(next) = iter.next() {
c = match next { let unescaped = match next {
'\\' => '\\', '\\' => '\\',
'a' => '\x07', 'a' => '\x07',
'b' => '\x08', 'b' => '\x08',
'c' => { 'c' => return Ok(true),
should_stop = true;
break;
}
'e' => '\x1b', 'e' => '\x1b',
'f' => '\x0c', 'f' => '\x0c',
'n' => '\n', 'n' => '\n',
'r' => '\r', 'r' => '\r',
't' => '\t', 't' => '\t',
'v' => '\x0b', 'v' => '\x0b',
'x' => parse_code(&mut iter, 16, 2, 4).unwrap_or_else(|| { 'x' => {
start = 0; if let Some(c) = parse_code(&mut iter, 16, 2) {
next c
}), } else {
'0' => parse_code(&mut iter, 8, 3, 3).unwrap_or('\0'), write!(output, "\\")?;
_ => { 'x'
start = 0; }
next }
'0' => parse_code(&mut iter, 8, 3).unwrap_or('\0'),
c => {
write!(output, "\\")?;
c
} }
}; };
write!(output, "{unescaped}")?;
} else {
write!(output, "\\")?;
} }
} }
buffer[1] = c; Ok(false)
// because printing char slices is apparently not available in the standard library
for ch in &buffer[start..] {
write!(output, "{ch}")?;
}
}
Ok(should_stop)
} }
#[uucore::main] #[uucore::main]

View file

@ -236,3 +236,20 @@ fn test_hyphen_values_between() {
.success() .success()
.stdout_is("dumdum dum dum dum -e dum\n"); .stdout_is("dumdum dum dum dum -e dum\n");
} }
#[test]
fn wrapping_octal() {
// Some odd behavior of GNU. Values of \0400 and greater do not fit in the
// u8 that we write to stdout. So we test that it wraps:
//
// We give it this input:
// \o501 = 1_0100_0001 (yes, **9** bits)
// This should be wrapped into:
// \o101 = 'A' = 0100_0001,
// because we only write a single character
new_ucmd!()
.arg("-e")
.arg("\\0501")
.succeeds()
.stdout_is("A\n");
}