mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-30 12:37:49 +00:00
quoting_style: fix multi-byte control characters
This commit is contained in:
parent
355103134b
commit
2331600f4c
1 changed files with 99 additions and 28 deletions
|
@ -76,17 +76,24 @@ enum EscapeState {
|
||||||
Octal(EscapeOctal),
|
Octal(EscapeOctal),
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Byte we need to present as escaped octal, in the form of `\nnn`
|
/// Bytes we need to present as escaped octal, in the form of `\nnn` per byte.
|
||||||
|
/// Only supports characters up to 2 bytes long in UTF-8.
|
||||||
struct EscapeOctal {
|
struct EscapeOctal {
|
||||||
c: u8,
|
c: [u8; 2],
|
||||||
state: EscapeOctalState,
|
state: EscapeOctalState,
|
||||||
idx: usize,
|
idx: u8,
|
||||||
}
|
}
|
||||||
|
|
||||||
enum EscapeOctalState {
|
enum EscapeOctalState {
|
||||||
Done,
|
Done,
|
||||||
Backslash,
|
FirstBackslash,
|
||||||
Value,
|
FirstValue,
|
||||||
|
LastBackslash,
|
||||||
|
LastValue,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn byte_to_octal_digit(byte: u8, idx: u8) -> u8 {
|
||||||
|
(byte >> (idx * 3)) & 0o7
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Iterator for EscapeOctal {
|
impl Iterator for EscapeOctal {
|
||||||
|
@ -95,12 +102,26 @@ impl Iterator for EscapeOctal {
|
||||||
fn next(&mut self) -> Option<char> {
|
fn next(&mut self) -> Option<char> {
|
||||||
match self.state {
|
match self.state {
|
||||||
EscapeOctalState::Done => None,
|
EscapeOctalState::Done => None,
|
||||||
EscapeOctalState::Backslash => {
|
EscapeOctalState::FirstBackslash => {
|
||||||
self.state = EscapeOctalState::Value;
|
self.state = EscapeOctalState::FirstValue;
|
||||||
Some('\\')
|
Some('\\')
|
||||||
}
|
}
|
||||||
EscapeOctalState::Value => {
|
EscapeOctalState::LastBackslash => {
|
||||||
let octal_digit = ((self.c) >> (self.idx * 3)) & 0o7;
|
self.state = EscapeOctalState::LastValue;
|
||||||
|
Some('\\')
|
||||||
|
}
|
||||||
|
EscapeOctalState::FirstValue => {
|
||||||
|
let octal_digit = byte_to_octal_digit(self.c[0], self.idx);
|
||||||
|
if self.idx == 0 {
|
||||||
|
self.state = EscapeOctalState::LastBackslash;
|
||||||
|
self.idx = 2;
|
||||||
|
} else {
|
||||||
|
self.idx -= 1;
|
||||||
|
}
|
||||||
|
Some(from_digit(octal_digit.into(), 8).unwrap())
|
||||||
|
}
|
||||||
|
EscapeOctalState::LastValue => {
|
||||||
|
let octal_digit = byte_to_octal_digit(self.c[1], self.idx);
|
||||||
if self.idx == 0 {
|
if self.idx == 0 {
|
||||||
self.state = EscapeOctalState::Done;
|
self.state = EscapeOctalState::Done;
|
||||||
} else {
|
} else {
|
||||||
|
@ -113,11 +134,25 @@ impl Iterator for EscapeOctal {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl EscapeOctal {
|
impl EscapeOctal {
|
||||||
fn from(c: u8) -> Self {
|
fn from_char(c: char) -> Self {
|
||||||
|
if c.len_utf8() == 1 {
|
||||||
|
return Self::from_byte(c as u8);
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut buf = [0; 2];
|
||||||
|
let _s = c.encode_utf8(&mut buf);
|
||||||
Self {
|
Self {
|
||||||
c,
|
c: buf,
|
||||||
idx: 2,
|
idx: 2,
|
||||||
state: EscapeOctalState::Backslash,
|
state: EscapeOctalState::FirstBackslash,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn from_byte(b: u8) -> Self {
|
||||||
|
Self {
|
||||||
|
c: [0, b],
|
||||||
|
idx: 2,
|
||||||
|
state: EscapeOctalState::LastBackslash,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -131,7 +166,7 @@ impl EscapedChar {
|
||||||
|
|
||||||
fn new_octal(b: u8) -> Self {
|
fn new_octal(b: u8) -> Self {
|
||||||
Self {
|
Self {
|
||||||
state: EscapeState::Octal(EscapeOctal::from(b)),
|
state: EscapeState::Octal(EscapeOctal::from_byte(b)),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -159,7 +194,7 @@ impl EscapedChar {
|
||||||
_ => Char(' '),
|
_ => Char(' '),
|
||||||
},
|
},
|
||||||
':' if dirname => Backslash(':'),
|
':' if dirname => Backslash(':'),
|
||||||
_ if c.is_ascii_control() => Octal(EscapeOctal::from(c as u8)),
|
_ if c.is_control() => Octal(EscapeOctal::from_char(c)),
|
||||||
_ => Char(c),
|
_ => Char(c),
|
||||||
};
|
};
|
||||||
Self { state: init_state }
|
Self { state: init_state }
|
||||||
|
@ -176,11 +211,11 @@ impl EscapedChar {
|
||||||
'\x0B' => Backslash('v'),
|
'\x0B' => Backslash('v'),
|
||||||
'\x0C' => Backslash('f'),
|
'\x0C' => Backslash('f'),
|
||||||
'\r' => Backslash('r'),
|
'\r' => Backslash('r'),
|
||||||
'\x00'..='\x1F' | '\x7F' => Octal(EscapeOctal::from(c as u8)),
|
|
||||||
'\'' => match quotes {
|
'\'' => match quotes {
|
||||||
Quotes::Single => Backslash('\''),
|
Quotes::Single => Backslash('\''),
|
||||||
_ => Char('\''),
|
_ => Char('\''),
|
||||||
},
|
},
|
||||||
|
_ if c.is_control() => Octal(EscapeOctal::from_char(c)),
|
||||||
_ if SPECIAL_SHELL_CHARS.contains(c) => ForceQuote(c),
|
_ if SPECIAL_SHELL_CHARS.contains(c) => ForceQuote(c),
|
||||||
_ => Char(c),
|
_ => Char(c),
|
||||||
};
|
};
|
||||||
|
@ -564,10 +599,10 @@ mod tests {
|
||||||
("\"one_two\"", "c"),
|
("\"one_two\"", "c"),
|
||||||
("one_two", "shell"),
|
("one_two", "shell"),
|
||||||
("one_two", "shell-show"),
|
("one_two", "shell-show"),
|
||||||
("\'one_two\'", "shell-always"),
|
("'one_two'", "shell-always"),
|
||||||
("\'one_two\'", "shell-always-show"),
|
("'one_two'", "shell-always-show"),
|
||||||
("one_two", "shell-escape"),
|
("one_two", "shell-escape"),
|
||||||
("\'one_two\'", "shell-escape-always"),
|
("'one_two'", "shell-escape-always"),
|
||||||
],
|
],
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
@ -581,12 +616,12 @@ mod tests {
|
||||||
("one two", "literal-show"),
|
("one two", "literal-show"),
|
||||||
("one\\ two", "escape"),
|
("one\\ two", "escape"),
|
||||||
("\"one two\"", "c"),
|
("\"one two\"", "c"),
|
||||||
("\'one two\'", "shell"),
|
("'one two'", "shell"),
|
||||||
("\'one two\'", "shell-show"),
|
("'one two'", "shell-show"),
|
||||||
("\'one two\'", "shell-always"),
|
("'one two'", "shell-always"),
|
||||||
("\'one two\'", "shell-always-show"),
|
("'one two'", "shell-always-show"),
|
||||||
("\'one two\'", "shell-escape"),
|
("'one two'", "shell-escape"),
|
||||||
("\'one two\'", "shell-escape-always"),
|
("'one two'", "shell-escape-always"),
|
||||||
],
|
],
|
||||||
);
|
);
|
||||||
|
|
||||||
|
@ -628,7 +663,7 @@ mod tests {
|
||||||
|
|
||||||
// One single quote
|
// One single quote
|
||||||
check_names(
|
check_names(
|
||||||
"one\'two",
|
"one'two",
|
||||||
&[
|
&[
|
||||||
("one'two", "literal"),
|
("one'two", "literal"),
|
||||||
("one'two", "literal-show"),
|
("one'two", "literal-show"),
|
||||||
|
@ -714,7 +749,7 @@ mod tests {
|
||||||
],
|
],
|
||||||
);
|
);
|
||||||
|
|
||||||
// The first 16 control characters. NUL is also included, even though it is of
|
// The first 16 ASCII control characters. NUL is also included, even though it is of
|
||||||
// no importance for file names.
|
// no importance for file names.
|
||||||
check_names(
|
check_names(
|
||||||
"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F",
|
"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F",
|
||||||
|
@ -753,7 +788,7 @@ mod tests {
|
||||||
],
|
],
|
||||||
);
|
);
|
||||||
|
|
||||||
// The last 16 control characters.
|
// The last 16 ASCII control characters.
|
||||||
check_names(
|
check_names(
|
||||||
"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F",
|
"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F",
|
||||||
&[
|
&[
|
||||||
|
@ -807,6 +842,42 @@ mod tests {
|
||||||
("''$'\\177'", "shell-escape-always"),
|
("''$'\\177'", "shell-escape-always"),
|
||||||
],
|
],
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// The first 16 Unicode control characters.
|
||||||
|
let test_str = std::str::from_utf8(b"\xC2\x80\xC2\x81\xC2\x82\xC2\x83\xC2\x84\xC2\x85\xC2\x86\xC2\x87\xC2\x88\xC2\x89\xC2\x8A\xC2\x8B\xC2\x8C\xC2\x8D\xC2\x8E\xC2\x8F").unwrap();
|
||||||
|
check_names(
|
||||||
|
test_str,
|
||||||
|
&[
|
||||||
|
("????????????????", "literal"),
|
||||||
|
(test_str, "literal-show"),
|
||||||
|
("\\302\\200\\302\\201\\302\\202\\302\\203\\302\\204\\302\\205\\302\\206\\302\\207\\302\\210\\302\\211\\302\\212\\302\\213\\302\\214\\302\\215\\302\\216\\302\\217", "escape"),
|
||||||
|
("\"\\302\\200\\302\\201\\302\\202\\302\\203\\302\\204\\302\\205\\302\\206\\302\\207\\302\\210\\302\\211\\302\\212\\302\\213\\302\\214\\302\\215\\302\\216\\302\\217\"", "c"),
|
||||||
|
("????????????????", "shell"),
|
||||||
|
(test_str, "shell-show"),
|
||||||
|
("'????????????????'", "shell-always"),
|
||||||
|
(&format!("'{}'", test_str), "shell-always-show"),
|
||||||
|
("''$'\\302\\200\\302\\201\\302\\202\\302\\203\\302\\204\\302\\205\\302\\206\\302\\207\\302\\210\\302\\211\\302\\212\\302\\213\\302\\214\\302\\215\\302\\216\\302\\217'", "shell-escape"),
|
||||||
|
("''$'\\302\\200\\302\\201\\302\\202\\302\\203\\302\\204\\302\\205\\302\\206\\302\\207\\302\\210\\302\\211\\302\\212\\302\\213\\302\\214\\302\\215\\302\\216\\302\\217'", "shell-escape-always"),
|
||||||
|
],
|
||||||
|
);
|
||||||
|
|
||||||
|
// The last 16 Unicode control characters.
|
||||||
|
let test_str = std::str::from_utf8(b"\xC2\x90\xC2\x91\xC2\x92\xC2\x93\xC2\x94\xC2\x95\xC2\x96\xC2\x97\xC2\x98\xC2\x99\xC2\x9A\xC2\x9B\xC2\x9C\xC2\x9D\xC2\x9E\xC2\x9F").unwrap();
|
||||||
|
check_names(
|
||||||
|
test_str,
|
||||||
|
&[
|
||||||
|
("????????????????", "literal"),
|
||||||
|
(test_str, "literal-show"),
|
||||||
|
("\\302\\220\\302\\221\\302\\222\\302\\223\\302\\224\\302\\225\\302\\226\\302\\227\\302\\230\\302\\231\\302\\232\\302\\233\\302\\234\\302\\235\\302\\236\\302\\237", "escape"),
|
||||||
|
("\"\\302\\220\\302\\221\\302\\222\\302\\223\\302\\224\\302\\225\\302\\226\\302\\227\\302\\230\\302\\231\\302\\232\\302\\233\\302\\234\\302\\235\\302\\236\\302\\237\"", "c"),
|
||||||
|
("????????????????", "shell"),
|
||||||
|
(test_str, "shell-show"),
|
||||||
|
("'????????????????'", "shell-always"),
|
||||||
|
(&format!("'{}'", test_str), "shell-always-show"),
|
||||||
|
("''$'\\302\\220\\302\\221\\302\\222\\302\\223\\302\\224\\302\\225\\302\\226\\302\\227\\302\\230\\302\\231\\302\\232\\302\\233\\302\\234\\302\\235\\302\\236\\302\\237'", "shell-escape"),
|
||||||
|
("''$'\\302\\220\\302\\221\\302\\222\\302\\223\\302\\224\\302\\225\\302\\226\\302\\227\\302\\230\\302\\231\\302\\232\\302\\233\\302\\234\\302\\235\\302\\236\\302\\237'", "shell-escape-always"),
|
||||||
|
],
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
@ -1065,7 +1136,7 @@ mod tests {
|
||||||
("one\\\\two", "escape"),
|
("one\\\\two", "escape"),
|
||||||
("\"one\\\\two\"", "c"),
|
("\"one\\\\two\"", "c"),
|
||||||
("'one\\two'", "shell"),
|
("'one\\two'", "shell"),
|
||||||
("\'one\\two\'", "shell-always"),
|
("'one\\two'", "shell-always"),
|
||||||
("'one\\two'", "shell-escape"),
|
("'one\\two'", "shell-escape"),
|
||||||
("'one\\two'", "shell-escape-always"),
|
("'one\\two'", "shell-escape-always"),
|
||||||
],
|
],
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue