1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-28 11:37:44 +00:00

tr: process octal escape sequences

closes #1817
This commit is contained in:
Daniel Rocco 2021-03-16 09:42:06 -04:00
parent 618d4a4fa5
commit fbb9c50050
2 changed files with 105 additions and 12 deletions

View file

@ -14,17 +14,46 @@ use std::cmp::min;
use std::iter::Peekable;
use std::ops::RangeInclusive;
/// Parse a backslash escape sequence to the corresponding character. Assumes
/// the string starts from the character _after_ the `\` and is not empty.
///
/// Returns a tuple containing the character and the number of characters
/// consumed from the input. The alphabetic escape sequences consume 1
/// character; octal escape sequences consume 1 to 3 octal digits.
#[inline]
fn unescape_char(c: char) -> char {
match c {
'a' => 0x07u8 as char,
'b' => 0x08u8 as char,
'f' => 0x0cu8 as char,
'v' => 0x0bu8 as char,
'n' => '\n',
'r' => '\r',
't' => '\t',
_ => c,
fn parse_sequence(s: &str) -> (char, usize) {
let c = s.chars().next().expect("invalid escape: empty string");
if '0' <= c && c <= '7' {
let mut v = c.to_digit(8).unwrap();
let mut consumed = 1;
let bits_per_digit = 3;
for c in s.chars().skip(1).take(2) {
match c.to_digit(8) {
Some(c) => {
v = (v << bits_per_digit) | c;
consumed += 1;
}
None => break,
}
}
(from_u32(v).expect("invalid octal escape"), consumed)
} else {
(
match c {
'a' => 0x07u8 as char,
'b' => 0x08u8 as char,
'f' => 0x0cu8 as char,
'v' => 0x0bu8 as char,
'n' => '\n',
'r' => '\r',
't' => '\t',
c => c,
},
1,
)
}
}
@ -52,8 +81,9 @@ impl<'a> Iterator for Unescape<'a> {
'\\' if self.string.len() > 1 => {
// yes---it's \ and it's not the last char in a string
// we know that \ is 1 byte long so we can index into the string safely
let c = self.string[1..].chars().next().unwrap();
(Some(unescape_char(c)), 1 + c.len_utf8())
let (c, consumed) = parse_sequence(&self.string[1..]);
(Some(c), 1 + consumed)
}
c => (Some(c), c.len_utf8()), // not an escape char
};

View file

@ -134,3 +134,66 @@ fn missing_required_second_arg_fails() {
assert!(!result.success);
assert!(result.stderr.contains("missing operand after"));
}
#[test]
fn test_interpret_backslash_escapes() {
new_ucmd!()
.args(&["abfnrtv", r"\a\b\f\n\r\t\v"])
.pipe_in("abfnrtv")
.succeeds()
.stdout_is("\u{7}\u{8}\u{c}\n\r\t\u{b}");
}
#[test]
fn test_interpret_unrecognized_backslash_escape_as_character() {
new_ucmd!()
.args(&["qcz+=~-", r"\q\c\z\+\=\~\-"])
.pipe_in("qcz+=~-")
.succeeds()
.stdout_is("qcz+=~-");
}
#[test]
fn test_interpret_single_octal_escape() {
new_ucmd!()
.args(&["X", r"\015"])
.pipe_in("X")
.succeeds()
.stdout_is("\r");
}
#[test]
fn test_interpret_one_and_two_digit_octal_escape() {
new_ucmd!()
.args(&["XYZ", r"\0\11\77"])
.pipe_in("XYZ")
.succeeds()
.stdout_is("\0\t?");
}
#[test]
fn test_octal_escape_is_at_most_three_digits() {
new_ucmd!()
.args(&["XY", r"\0156"])
.pipe_in("XY")
.succeeds()
.stdout_is("\r6");
}
#[test]
fn test_non_octal_digit_ends_escape() {
new_ucmd!()
.args(&["rust", r"\08\11956"])
.pipe_in("rust")
.succeeds()
.stdout_is("\08\t9");
}
#[test]
fn test_interpret_backslash_at_eol_literally() {
new_ucmd!()
.args(&["X", r"\"])
.pipe_in("X")
.succeeds()
.stdout_is("\\");
}