mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-28 11:37:44 +00:00
echo: fix wrapping behavior of octal sequences
This commit is contained in:
parent
bd82d678ef
commit
4623575a66
2 changed files with 71 additions and 55 deletions
|
@ -21,73 +21,72 @@ mod options {
|
||||||
pub const DISABLE_BACKSLASH_ESCAPE: &str = "disable_backslash_escape";
|
pub const DISABLE_BACKSLASH_ESCAPE: &str = "disable_backslash_escape";
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_code(
|
/// Parse the numeric part of the `\xHHH` and `\0NNN` escape sequences
|
||||||
input: &mut Peekable<Chars>,
|
fn parse_code(input: &mut Peekable<Chars>, base: u8, max_digits: u32) -> Option<char> {
|
||||||
base: u32,
|
// All arithmetic on `ret` needs to be wrapping, because octal input can
|
||||||
max_digits: u32,
|
// take 3 digits, which is 9 bits, and therefore more than what fits in a
|
||||||
bits_per_digit: u32,
|
// `u8`. GNU just seems to wrap these values.
|
||||||
) -> Option<char> {
|
// Note that if we instead make `ret` a `u32` and use `char::from_u32` will
|
||||||
let mut ret = 0x8000_0000;
|
// yield incorrect results because it will interpret values larger than
|
||||||
for _ in 0..max_digits {
|
// `u8::MAX` as unicode.
|
||||||
match input.peek().and_then(|c| c.to_digit(base)) {
|
let mut ret = input.peek().and_then(|c| c.to_digit(base as u32))? as u8;
|
||||||
Some(n) => ret = (ret << bits_per_digit) | n,
|
|
||||||
|
// We can safely ifgnore the None case because we just peeked it.
|
||||||
|
let _ = input.next();
|
||||||
|
|
||||||
|
for _ in 1..max_digits {
|
||||||
|
match input.peek().and_then(|c| c.to_digit(base as u32)) {
|
||||||
|
Some(n) => ret = ret.wrapping_mul(base).wrapping_add(n as u8),
|
||||||
None => break,
|
None => break,
|
||||||
}
|
}
|
||||||
input.next();
|
// We can safely ifgnore the None case because we just peeked it.
|
||||||
|
let _ = input.next();
|
||||||
}
|
}
|
||||||
std::char::from_u32(ret)
|
|
||||||
|
Some(ret.into())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn print_escaped(input: &str, mut output: impl Write) -> io::Result<bool> {
|
fn print_escaped(input: &str, mut output: impl Write) -> io::Result<bool> {
|
||||||
let mut should_stop = false;
|
|
||||||
|
|
||||||
let mut buffer = ['\\'; 2];
|
|
||||||
|
|
||||||
// TODO `cargo +nightly clippy` complains that `.peek()` is never
|
|
||||||
// called on `iter`. However, `peek()` is called inside the
|
|
||||||
// `parse_code()` function that borrows `iter`.
|
|
||||||
let mut iter = input.chars().peekable();
|
let mut iter = input.chars().peekable();
|
||||||
while let Some(mut c) = iter.next() {
|
while let Some(c) = iter.next() {
|
||||||
let mut start = 1;
|
if c != '\\' {
|
||||||
|
write!(output, "{c}")?;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
if c == '\\' {
|
|
||||||
if let Some(next) = iter.next() {
|
if let Some(next) = iter.next() {
|
||||||
c = match next {
|
let unescaped = match next {
|
||||||
'\\' => '\\',
|
'\\' => '\\',
|
||||||
'a' => '\x07',
|
'a' => '\x07',
|
||||||
'b' => '\x08',
|
'b' => '\x08',
|
||||||
'c' => {
|
'c' => return Ok(true),
|
||||||
should_stop = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
'e' => '\x1b',
|
'e' => '\x1b',
|
||||||
'f' => '\x0c',
|
'f' => '\x0c',
|
||||||
'n' => '\n',
|
'n' => '\n',
|
||||||
'r' => '\r',
|
'r' => '\r',
|
||||||
't' => '\t',
|
't' => '\t',
|
||||||
'v' => '\x0b',
|
'v' => '\x0b',
|
||||||
'x' => parse_code(&mut iter, 16, 2, 4).unwrap_or_else(|| {
|
'x' => {
|
||||||
start = 0;
|
if let Some(c) = parse_code(&mut iter, 16, 2) {
|
||||||
next
|
c
|
||||||
}),
|
} else {
|
||||||
'0' => parse_code(&mut iter, 8, 3, 3).unwrap_or('\0'),
|
write!(output, "\\")?;
|
||||||
_ => {
|
'x'
|
||||||
start = 0;
|
}
|
||||||
next
|
}
|
||||||
|
'0' => parse_code(&mut iter, 8, 3).unwrap_or('\0'),
|
||||||
|
c => {
|
||||||
|
write!(output, "\\")?;
|
||||||
|
c
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
write!(output, "{unescaped}")?;
|
||||||
|
} else {
|
||||||
|
write!(output, "\\")?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
buffer[1] = c;
|
Ok(false)
|
||||||
|
|
||||||
// because printing char slices is apparently not available in the standard library
|
|
||||||
for ch in &buffer[start..] {
|
|
||||||
write!(output, "{ch}")?;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(should_stop)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[uucore::main]
|
#[uucore::main]
|
||||||
|
|
|
@ -236,3 +236,20 @@ fn test_hyphen_values_between() {
|
||||||
.success()
|
.success()
|
||||||
.stdout_is("dumdum dum dum dum -e dum\n");
|
.stdout_is("dumdum dum dum dum -e dum\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn wrapping_octal() {
|
||||||
|
// Some odd behavior of GNU. Values of \0400 and greater do not fit in the
|
||||||
|
// u8 that we write to stdout. So we test that it wraps:
|
||||||
|
//
|
||||||
|
// We give it this input:
|
||||||
|
// \o501 = 1_0100_0001 (yes, **9** bits)
|
||||||
|
// This should be wrapped into:
|
||||||
|
// \o101 = 'A' = 0100_0001,
|
||||||
|
// because we only write a single character
|
||||||
|
new_ucmd!()
|
||||||
|
.arg("-e")
|
||||||
|
.arg("\\0501")
|
||||||
|
.succeeds()
|
||||||
|
.stdout_is("A\n");
|
||||||
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue