mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-28 11:37:44 +00:00
parent
618d4a4fa5
commit
fbb9c50050
2 changed files with 105 additions and 12 deletions
|
@ -14,17 +14,46 @@ use std::cmp::min;
|
|||
use std::iter::Peekable;
|
||||
use std::ops::RangeInclusive;
|
||||
|
||||
/// Parse a backslash escape sequence to the corresponding character. Assumes
|
||||
/// the string starts from the character _after_ the `\` and is not empty.
|
||||
///
|
||||
/// Returns a tuple containing the character and the number of characters
|
||||
/// consumed from the input. The alphabetic escape sequences consume 1
|
||||
/// character; octal escape sequences consume 1 to 3 octal digits.
|
||||
#[inline]
|
||||
fn unescape_char(c: char) -> char {
|
||||
match c {
|
||||
'a' => 0x07u8 as char,
|
||||
'b' => 0x08u8 as char,
|
||||
'f' => 0x0cu8 as char,
|
||||
'v' => 0x0bu8 as char,
|
||||
'n' => '\n',
|
||||
'r' => '\r',
|
||||
't' => '\t',
|
||||
_ => c,
|
||||
fn parse_sequence(s: &str) -> (char, usize) {
|
||||
let c = s.chars().next().expect("invalid escape: empty string");
|
||||
|
||||
if '0' <= c && c <= '7' {
|
||||
let mut v = c.to_digit(8).unwrap();
|
||||
let mut consumed = 1;
|
||||
let bits_per_digit = 3;
|
||||
|
||||
for c in s.chars().skip(1).take(2) {
|
||||
match c.to_digit(8) {
|
||||
Some(c) => {
|
||||
v = (v << bits_per_digit) | c;
|
||||
consumed += 1;
|
||||
}
|
||||
None => break,
|
||||
}
|
||||
}
|
||||
|
||||
(from_u32(v).expect("invalid octal escape"), consumed)
|
||||
} else {
|
||||
(
|
||||
match c {
|
||||
'a' => 0x07u8 as char,
|
||||
'b' => 0x08u8 as char,
|
||||
'f' => 0x0cu8 as char,
|
||||
'v' => 0x0bu8 as char,
|
||||
'n' => '\n',
|
||||
'r' => '\r',
|
||||
't' => '\t',
|
||||
c => c,
|
||||
},
|
||||
1,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -52,8 +81,9 @@ impl<'a> Iterator for Unescape<'a> {
|
|||
'\\' if self.string.len() > 1 => {
|
||||
// yes---it's \ and it's not the last char in a string
|
||||
// we know that \ is 1 byte long so we can index into the string safely
|
||||
let c = self.string[1..].chars().next().unwrap();
|
||||
(Some(unescape_char(c)), 1 + c.len_utf8())
|
||||
let (c, consumed) = parse_sequence(&self.string[1..]);
|
||||
|
||||
(Some(c), 1 + consumed)
|
||||
}
|
||||
c => (Some(c), c.len_utf8()), // not an escape char
|
||||
};
|
||||
|
|
|
@ -134,3 +134,66 @@ fn missing_required_second_arg_fails() {
|
|||
assert!(!result.success);
|
||||
assert!(result.stderr.contains("missing operand after"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_interpret_backslash_escapes() {
|
||||
new_ucmd!()
|
||||
.args(&["abfnrtv", r"\a\b\f\n\r\t\v"])
|
||||
.pipe_in("abfnrtv")
|
||||
.succeeds()
|
||||
.stdout_is("\u{7}\u{8}\u{c}\n\r\t\u{b}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_interpret_unrecognized_backslash_escape_as_character() {
|
||||
new_ucmd!()
|
||||
.args(&["qcz+=~-", r"\q\c\z\+\=\~\-"])
|
||||
.pipe_in("qcz+=~-")
|
||||
.succeeds()
|
||||
.stdout_is("qcz+=~-");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_interpret_single_octal_escape() {
|
||||
new_ucmd!()
|
||||
.args(&["X", r"\015"])
|
||||
.pipe_in("X")
|
||||
.succeeds()
|
||||
.stdout_is("\r");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_interpret_one_and_two_digit_octal_escape() {
|
||||
new_ucmd!()
|
||||
.args(&["XYZ", r"\0\11\77"])
|
||||
.pipe_in("XYZ")
|
||||
.succeeds()
|
||||
.stdout_is("\0\t?");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_octal_escape_is_at_most_three_digits() {
|
||||
new_ucmd!()
|
||||
.args(&["XY", r"\0156"])
|
||||
.pipe_in("XY")
|
||||
.succeeds()
|
||||
.stdout_is("\r6");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_non_octal_digit_ends_escape() {
|
||||
new_ucmd!()
|
||||
.args(&["rust", r"\08\11956"])
|
||||
.pipe_in("rust")
|
||||
.succeeds()
|
||||
.stdout_is("\08\t9");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_interpret_backslash_at_eol_literally() {
|
||||
new_ucmd!()
|
||||
.args(&["X", r"\"])
|
||||
.pipe_in("X")
|
||||
.succeeds()
|
||||
.stdout_is("\\");
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue