mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-28 19:47:45 +00:00
parent
618d4a4fa5
commit
fbb9c50050
2 changed files with 105 additions and 12 deletions
|
@ -14,17 +14,46 @@ use std::cmp::min;
|
||||||
use std::iter::Peekable;
|
use std::iter::Peekable;
|
||||||
use std::ops::RangeInclusive;
|
use std::ops::RangeInclusive;
|
||||||
|
|
||||||
|
/// Parse a backslash escape sequence to the corresponding character. Assumes
|
||||||
|
/// the string starts from the character _after_ the `\` and is not empty.
|
||||||
|
///
|
||||||
|
/// Returns a tuple containing the character and the number of characters
|
||||||
|
/// consumed from the input. The alphabetic escape sequences consume 1
|
||||||
|
/// character; octal escape sequences consume 1 to 3 octal digits.
|
||||||
#[inline]
|
#[inline]
|
||||||
fn unescape_char(c: char) -> char {
|
fn parse_sequence(s: &str) -> (char, usize) {
|
||||||
match c {
|
let c = s.chars().next().expect("invalid escape: empty string");
|
||||||
'a' => 0x07u8 as char,
|
|
||||||
'b' => 0x08u8 as char,
|
if '0' <= c && c <= '7' {
|
||||||
'f' => 0x0cu8 as char,
|
let mut v = c.to_digit(8).unwrap();
|
||||||
'v' => 0x0bu8 as char,
|
let mut consumed = 1;
|
||||||
'n' => '\n',
|
let bits_per_digit = 3;
|
||||||
'r' => '\r',
|
|
||||||
't' => '\t',
|
for c in s.chars().skip(1).take(2) {
|
||||||
_ => c,
|
match c.to_digit(8) {
|
||||||
|
Some(c) => {
|
||||||
|
v = (v << bits_per_digit) | c;
|
||||||
|
consumed += 1;
|
||||||
|
}
|
||||||
|
None => break,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
(from_u32(v).expect("invalid octal escape"), consumed)
|
||||||
|
} else {
|
||||||
|
(
|
||||||
|
match c {
|
||||||
|
'a' => 0x07u8 as char,
|
||||||
|
'b' => 0x08u8 as char,
|
||||||
|
'f' => 0x0cu8 as char,
|
||||||
|
'v' => 0x0bu8 as char,
|
||||||
|
'n' => '\n',
|
||||||
|
'r' => '\r',
|
||||||
|
't' => '\t',
|
||||||
|
c => c,
|
||||||
|
},
|
||||||
|
1,
|
||||||
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -52,8 +81,9 @@ impl<'a> Iterator for Unescape<'a> {
|
||||||
'\\' if self.string.len() > 1 => {
|
'\\' if self.string.len() > 1 => {
|
||||||
// yes---it's \ and it's not the last char in a string
|
// yes---it's \ and it's not the last char in a string
|
||||||
// we know that \ is 1 byte long so we can index into the string safely
|
// we know that \ is 1 byte long so we can index into the string safely
|
||||||
let c = self.string[1..].chars().next().unwrap();
|
let (c, consumed) = parse_sequence(&self.string[1..]);
|
||||||
(Some(unescape_char(c)), 1 + c.len_utf8())
|
|
||||||
|
(Some(c), 1 + consumed)
|
||||||
}
|
}
|
||||||
c => (Some(c), c.len_utf8()), // not an escape char
|
c => (Some(c), c.len_utf8()), // not an escape char
|
||||||
};
|
};
|
||||||
|
|
|
@ -134,3 +134,66 @@ fn missing_required_second_arg_fails() {
|
||||||
assert!(!result.success);
|
assert!(!result.success);
|
||||||
assert!(result.stderr.contains("missing operand after"));
|
assert!(result.stderr.contains("missing operand after"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_interpret_backslash_escapes() {
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["abfnrtv", r"\a\b\f\n\r\t\v"])
|
||||||
|
.pipe_in("abfnrtv")
|
||||||
|
.succeeds()
|
||||||
|
.stdout_is("\u{7}\u{8}\u{c}\n\r\t\u{b}");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_interpret_unrecognized_backslash_escape_as_character() {
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["qcz+=~-", r"\q\c\z\+\=\~\-"])
|
||||||
|
.pipe_in("qcz+=~-")
|
||||||
|
.succeeds()
|
||||||
|
.stdout_is("qcz+=~-");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_interpret_single_octal_escape() {
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["X", r"\015"])
|
||||||
|
.pipe_in("X")
|
||||||
|
.succeeds()
|
||||||
|
.stdout_is("\r");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_interpret_one_and_two_digit_octal_escape() {
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["XYZ", r"\0\11\77"])
|
||||||
|
.pipe_in("XYZ")
|
||||||
|
.succeeds()
|
||||||
|
.stdout_is("\0\t?");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_octal_escape_is_at_most_three_digits() {
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["XY", r"\0156"])
|
||||||
|
.pipe_in("XY")
|
||||||
|
.succeeds()
|
||||||
|
.stdout_is("\r6");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_non_octal_digit_ends_escape() {
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["rust", r"\08\11956"])
|
||||||
|
.pipe_in("rust")
|
||||||
|
.succeeds()
|
||||||
|
.stdout_is("\08\t9");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_interpret_backslash_at_eol_literally() {
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["X", r"\"])
|
||||||
|
.pipe_in("X")
|
||||||
|
.succeeds()
|
||||||
|
.stdout_is("\\");
|
||||||
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue