tr: process octal escape sequences

closes #1817
2025-09-13 18:47:58 +00:00 · 2021-03-16 09:42:06 -04:00 · 2021-03-16 09:42:06 -04:00 · fbb9c50050
commit fbb9c50050
parent 618d4a4fa5
2 changed files with 105 additions and 12 deletions
--- a/src/uu/tr/src/expand.rs
+++ b/src/uu/tr/src/expand.rs
@ -14,17 +14,46 @@ use std::cmp::min;
 use std::iter::Peekable;
 use std::ops::RangeInclusive;

+/// Parse a backslash escape sequence to the corresponding character. Assumes
+/// the string starts from the character _after_ the `\` and is not empty.
+///
+/// Returns a tuple containing the character and the number of characters
+/// consumed from the input. The alphabetic escape sequences consume 1
+/// character; octal escape sequences consume 1 to 3 octal digits.
 #[inline]
-fn unescape_char(c: char) -> char {
-    match c {
-        'a' => 0x07u8 as char,
-        'b' => 0x08u8 as char,
-        'f' => 0x0cu8 as char,
-        'v' => 0x0bu8 as char,
-        'n' => '\n',
-        'r' => '\r',
-        't' => '\t',
-        _ => c,
+fn parse_sequence(s: &str) -> (char, usize) {
+    let c = s.chars().next().expect("invalid escape: empty string");
+
+    if '0' <= c && c <= '7' {
+        let mut v = c.to_digit(8).unwrap();
+        let mut consumed = 1;
+        let bits_per_digit = 3;
+
+        for c in s.chars().skip(1).take(2) {
+            match c.to_digit(8) {
+                Some(c) => {
+                    v = (v << bits_per_digit) | c;
+                    consumed += 1;
+                }
+                None => break,
+            }
+        }
+
+        (from_u32(v).expect("invalid octal escape"), consumed)
+    } else {
+        (
+            match c {
+                'a' => 0x07u8 as char,
+                'b' => 0x08u8 as char,
+                'f' => 0x0cu8 as char,
+                'v' => 0x0bu8 as char,
+                'n' => '\n',
+                'r' => '\r',
+                't' => '\t',
+                c => c,
+            },
+            1,
+        )
    }
 }

@ -52,8 +81,9 @@ impl<'a> Iterator for Unescape<'a> {
            '\\' if self.string.len() > 1 => {
                // yes---it's \ and it's not the last char in a string
                // we know that \ is 1 byte long so we can index into the string safely
-                let c = self.string[1..].chars().next().unwrap();
-                (Some(unescape_char(c)), 1 + c.len_utf8())
+                let (c, consumed) = parse_sequence(&self.string[1..]);
+
+                (Some(c), 1 + consumed)
            }
            c => (Some(c), c.len_utf8()), // not an escape char
        };
--- a/tests/by-util/test_tr.rs
+++ b/tests/by-util/test_tr.rs
@ -134,3 +134,66 @@ fn missing_required_second_arg_fails() {
    assert!(!result.success);
    assert!(result.stderr.contains("missing operand after"));
 }
+
+#[test]
+fn test_interpret_backslash_escapes() {
+    new_ucmd!()
+        .args(&["abfnrtv", r"\a\b\f\n\r\t\v"])
+        .pipe_in("abfnrtv")
+        .succeeds()
+        .stdout_is("\u{7}\u{8}\u{c}\n\r\t\u{b}");
+}
+
+#[test]
+fn test_interpret_unrecognized_backslash_escape_as_character() {
+    new_ucmd!()
+        .args(&["qcz+=~-", r"\q\c\z\+\=\~\-"])
+        .pipe_in("qcz+=~-")
+        .succeeds()
+        .stdout_is("qcz+=~-");
+}
+
+#[test]
+fn test_interpret_single_octal_escape() {
+    new_ucmd!()
+        .args(&["X", r"\015"])
+        .pipe_in("X")
+        .succeeds()
+        .stdout_is("\r");
+}
+
+#[test]
+fn test_interpret_one_and_two_digit_octal_escape() {
+    new_ucmd!()
+        .args(&["XYZ", r"\0\11\77"])
+        .pipe_in("XYZ")
+        .succeeds()
+        .stdout_is("\0\t?");
+}
+
+#[test]
+fn test_octal_escape_is_at_most_three_digits() {
+    new_ucmd!()
+        .args(&["XY", r"\0156"])
+        .pipe_in("XY")
+        .succeeds()
+        .stdout_is("\r6");
+}
+
+#[test]
+fn test_non_octal_digit_ends_escape() {
+    new_ucmd!()
+        .args(&["rust", r"\08\11956"])
+        .pipe_in("rust")
+        .succeeds()
+        .stdout_is("\08\t9");
+}
+
+#[test]
+fn test_interpret_backslash_at_eol_literally() {
+    new_ucmd!()
+        .args(&["X", r"\"])
+        .pipe_in("X")
+        .succeeds()
+        .stdout_is("\\");
+}