From fbb9c50050af8f8e0c89327150d8176193873d1e Mon Sep 17 00:00:00 2001 From: Daniel Rocco Date: Tue, 16 Mar 2021 09:42:06 -0400 Subject: [PATCH] tr: process octal escape sequences closes #1817 --- src/uu/tr/src/expand.rs | 54 ++++++++++++++++++++++++++-------- tests/by-util/test_tr.rs | 63 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 105 insertions(+), 12 deletions(-) diff --git a/src/uu/tr/src/expand.rs b/src/uu/tr/src/expand.rs index 3291d57ae..e71cf262c 100644 --- a/src/uu/tr/src/expand.rs +++ b/src/uu/tr/src/expand.rs @@ -14,17 +14,46 @@ use std::cmp::min; use std::iter::Peekable; use std::ops::RangeInclusive; +/// Parse a backslash escape sequence to the corresponding character. Assumes +/// the string starts from the character _after_ the `\` and is not empty. +/// +/// Returns a tuple containing the character and the number of characters +/// consumed from the input. The alphabetic escape sequences consume 1 +/// character; octal escape sequences consume 1 to 3 octal digits. #[inline] -fn unescape_char(c: char) -> char { - match c { - 'a' => 0x07u8 as char, - 'b' => 0x08u8 as char, - 'f' => 0x0cu8 as char, - 'v' => 0x0bu8 as char, - 'n' => '\n', - 'r' => '\r', - 't' => '\t', - _ => c, +fn parse_sequence(s: &str) -> (char, usize) { + let c = s.chars().next().expect("invalid escape: empty string"); + + if '0' <= c && c <= '7' { + let mut v = c.to_digit(8).unwrap(); + let mut consumed = 1; + let bits_per_digit = 3; + + for c in s.chars().skip(1).take(2) { + match c.to_digit(8) { + Some(c) => { + v = (v << bits_per_digit) | c; + consumed += 1; + } + None => break, + } + } + + (from_u32(v).expect("invalid octal escape"), consumed) + } else { + ( + match c { + 'a' => 0x07u8 as char, + 'b' => 0x08u8 as char, + 'f' => 0x0cu8 as char, + 'v' => 0x0bu8 as char, + 'n' => '\n', + 'r' => '\r', + 't' => '\t', + c => c, + }, + 1, + ) } } @@ -52,8 +81,9 @@ impl<'a> Iterator for Unescape<'a> { '\\' if self.string.len() > 1 => { // yes---it's \ and it's not the last char in a string // we know that \ is 1 byte long so we can index into the string safely - let c = self.string[1..].chars().next().unwrap(); - (Some(unescape_char(c)), 1 + c.len_utf8()) + let (c, consumed) = parse_sequence(&self.string[1..]); + + (Some(c), 1 + consumed) } c => (Some(c), c.len_utf8()), // not an escape char }; diff --git a/tests/by-util/test_tr.rs b/tests/by-util/test_tr.rs index b32d98d29..a1500bcf6 100644 --- a/tests/by-util/test_tr.rs +++ b/tests/by-util/test_tr.rs @@ -134,3 +134,66 @@ fn missing_required_second_arg_fails() { assert!(!result.success); assert!(result.stderr.contains("missing operand after")); } + +#[test] +fn test_interpret_backslash_escapes() { + new_ucmd!() + .args(&["abfnrtv", r"\a\b\f\n\r\t\v"]) + .pipe_in("abfnrtv") + .succeeds() + .stdout_is("\u{7}\u{8}\u{c}\n\r\t\u{b}"); +} + +#[test] +fn test_interpret_unrecognized_backslash_escape_as_character() { + new_ucmd!() + .args(&["qcz+=~-", r"\q\c\z\+\=\~\-"]) + .pipe_in("qcz+=~-") + .succeeds() + .stdout_is("qcz+=~-"); +} + +#[test] +fn test_interpret_single_octal_escape() { + new_ucmd!() + .args(&["X", r"\015"]) + .pipe_in("X") + .succeeds() + .stdout_is("\r"); +} + +#[test] +fn test_interpret_one_and_two_digit_octal_escape() { + new_ucmd!() + .args(&["XYZ", r"\0\11\77"]) + .pipe_in("XYZ") + .succeeds() + .stdout_is("\0\t?"); +} + +#[test] +fn test_octal_escape_is_at_most_three_digits() { + new_ucmd!() + .args(&["XY", r"\0156"]) + .pipe_in("XY") + .succeeds() + .stdout_is("\r6"); +} + +#[test] +fn test_non_octal_digit_ends_escape() { + new_ucmd!() + .args(&["rust", r"\08\11956"]) + .pipe_in("rust") + .succeeds() + .stdout_is("\08\t9"); +} + +#[test] +fn test_interpret_backslash_at_eol_literally() { + new_ucmd!() + .args(&["X", r"\"]) + .pipe_in("X") + .succeeds() + .stdout_is("\\"); +}