1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-27 19:17:43 +00:00

quoting_style: patch tests for encoding

This commit is contained in:
Dorian Peron 2025-06-14 16:59:31 +02:00
parent deeaec3d4a
commit f17940b4e5

View file

@ -265,18 +265,18 @@ mod tests {
} }
} }
fn check_names_inner<T>(name: &[u8], map: &[(T, &str)]) -> Vec<Vec<u8>> { fn check_names_inner<T>(encoding: UEncoding, name: &[u8], map: &[(T, &str)]) -> Vec<Vec<u8>> {
map.iter() map.iter()
.map(|(_, style)| escape_name_inner(name, &get_style(style), false, UEncoding::Utf8)) .map(|(_, style)| escape_name_inner(name, &get_style(style), false, encoding))
.collect() .collect()
} }
fn check_names(name: &str, map: &[(&str, &str)]) { fn check_names_encoding(encoding: UEncoding, name: &str, map: &[(&str, &str)]) {
assert_eq!( assert_eq!(
map.iter() map.iter()
.map(|(correct, _)| *correct) .map(|(correct, _)| *correct)
.collect::<Vec<&str>>(), .collect::<Vec<&str>>(),
check_names_inner(name.as_bytes(), map) check_names_inner(encoding, name.as_bytes(), map)
.iter() .iter()
.map(|bytes| std::str::from_utf8(bytes) .map(|bytes| std::str::from_utf8(bytes)
.expect("valid str goes in, valid str comes out")) .expect("valid str goes in, valid str comes out"))
@ -284,18 +284,28 @@ mod tests {
); );
} }
fn check_names_raw(name: &[u8], map: &[(&[u8], &str)]) { fn check_names_both(name: &str, map: &[(&str, &str)]) {
check_names_encoding(UEncoding::Utf8, name, map);
check_names_encoding(UEncoding::Ascii, name, map);
}
fn check_names_encoding_raw(encoding: UEncoding, name: &[u8], map: &[(&[u8], &str)]) {
assert_eq!( assert_eq!(
map.iter() map.iter()
.map(|(correct, _)| *correct) .map(|(correct, _)| *correct)
.collect::<Vec<&[u8]>>(), .collect::<Vec<&[u8]>>(),
check_names_inner(name, map) check_names_inner(encoding, name, map)
); );
} }
fn check_names_raw_both(name: &[u8], map: &[(&[u8], &str)]) {
check_names_encoding_raw(UEncoding::Utf8, name, map);
check_names_encoding_raw(UEncoding::Ascii, name, map);
}
#[test] #[test]
fn test_simple_names() { fn test_simple_names() {
check_names( check_names_both(
"one_two", "one_two",
&[ &[
("one_two", "literal"), ("one_two", "literal"),
@ -314,7 +324,7 @@ mod tests {
#[test] #[test]
fn test_empty_string() { fn test_empty_string() {
check_names( check_names_both(
"", "",
&[ &[
("", "literal"), ("", "literal"),
@ -333,7 +343,7 @@ mod tests {
#[test] #[test]
fn test_spaces() { fn test_spaces() {
check_names( check_names_both(
"one two", "one two",
&[ &[
("one two", "literal"), ("one two", "literal"),
@ -349,7 +359,7 @@ mod tests {
], ],
); );
check_names( check_names_both(
" one", " one",
&[ &[
(" one", "literal"), (" one", "literal"),
@ -369,7 +379,7 @@ mod tests {
#[test] #[test]
fn test_quotes() { fn test_quotes() {
// One double quote // One double quote
check_names( check_names_both(
"one\"two", "one\"two",
&[ &[
("one\"two", "literal"), ("one\"two", "literal"),
@ -386,7 +396,7 @@ mod tests {
); );
// One single quote // One single quote
check_names( check_names_both(
"one'two", "one'two",
&[ &[
("one'two", "literal"), ("one'two", "literal"),
@ -403,7 +413,7 @@ mod tests {
); );
// One single quote and one double quote // One single quote and one double quote
check_names( check_names_both(
"one'two\"three", "one'two\"three",
&[ &[
("one'two\"three", "literal"), ("one'two\"three", "literal"),
@ -420,7 +430,7 @@ mod tests {
); );
// Consecutive quotes // Consecutive quotes
check_names( check_names_both(
"one''two\"\"three", "one''two\"\"three",
&[ &[
("one''two\"\"three", "literal"), ("one''two\"\"three", "literal"),
@ -440,7 +450,7 @@ mod tests {
#[test] #[test]
fn test_control_chars() { fn test_control_chars() {
// A simple newline // A simple newline
check_names( check_names_both(
"one\ntwo", "one\ntwo",
&[ &[
("one?two", "literal"), ("one?two", "literal"),
@ -457,7 +467,7 @@ mod tests {
); );
// A control character followed by a special shell character // A control character followed by a special shell character
check_names( check_names_both(
"one\n&two", "one\n&two",
&[ &[
("one?&two", "literal"), ("one?&two", "literal"),
@ -475,7 +485,7 @@ mod tests {
// The first 16 ASCII control characters. NUL is also included, even though it is of // The first 16 ASCII control characters. NUL is also included, even though it is of
// no importance for file names. // no importance for file names.
check_names( check_names_both(
"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F", "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F",
&[ &[
("????????????????", "literal"), ("????????????????", "literal"),
@ -513,7 +523,7 @@ mod tests {
); );
// The last 16 ASCII control characters. // The last 16 ASCII control characters.
check_names( check_names_both(
"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F", "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F",
&[ &[
("????????????????", "literal"), ("????????????????", "literal"),
@ -551,7 +561,7 @@ mod tests {
); );
// DEL // DEL
check_names( check_names_both(
"\x7F", "\x7F",
&[ &[
("?", "literal"), ("?", "literal"),
@ -569,10 +579,9 @@ mod tests {
// The first 16 Unicode control characters. // The first 16 Unicode control characters.
let test_str = std::str::from_utf8(b"\xC2\x80\xC2\x81\xC2\x82\xC2\x83\xC2\x84\xC2\x85\xC2\x86\xC2\x87\xC2\x88\xC2\x89\xC2\x8A\xC2\x8B\xC2\x8C\xC2\x8D\xC2\x8E\xC2\x8F").unwrap(); let test_str = std::str::from_utf8(b"\xC2\x80\xC2\x81\xC2\x82\xC2\x83\xC2\x84\xC2\x85\xC2\x86\xC2\x87\xC2\x88\xC2\x89\xC2\x8A\xC2\x8B\xC2\x8C\xC2\x8D\xC2\x8E\xC2\x8F").unwrap();
check_names( check_names_both(
test_str, test_str,
&[ &[
("????????????????", "literal"),
(test_str, "literal-show"), (test_str, "literal-show"),
( (
"\\302\\200\\302\\201\\302\\202\\302\\203\\302\\204\\302\\205\\302\\206\\302\\207\\302\\210\\302\\211\\302\\212\\302\\213\\302\\214\\302\\215\\302\\216\\302\\217", "\\302\\200\\302\\201\\302\\202\\302\\203\\302\\204\\302\\205\\302\\206\\302\\207\\302\\210\\302\\211\\302\\212\\302\\213\\302\\214\\302\\215\\302\\216\\302\\217",
@ -582,9 +591,7 @@ mod tests {
"\"\\302\\200\\302\\201\\302\\202\\302\\203\\302\\204\\302\\205\\302\\206\\302\\207\\302\\210\\302\\211\\302\\212\\302\\213\\302\\214\\302\\215\\302\\216\\302\\217\"", "\"\\302\\200\\302\\201\\302\\202\\302\\203\\302\\204\\302\\205\\302\\206\\302\\207\\302\\210\\302\\211\\302\\212\\302\\213\\302\\214\\302\\215\\302\\216\\302\\217\"",
"c", "c",
), ),
("????????????????", "shell"),
(test_str, "shell-show"), (test_str, "shell-show"),
("'????????????????'", "shell-always"),
(&format!("'{test_str}'"), "shell-always-show"), (&format!("'{test_str}'"), "shell-always-show"),
( (
"''$'\\302\\200\\302\\201\\302\\202\\302\\203\\302\\204\\302\\205\\302\\206\\302\\207\\302\\210\\302\\211\\302\\212\\302\\213\\302\\214\\302\\215\\302\\216\\302\\217'", "''$'\\302\\200\\302\\201\\302\\202\\302\\203\\302\\204\\302\\205\\302\\206\\302\\207\\302\\210\\302\\211\\302\\212\\302\\213\\302\\214\\302\\215\\302\\216\\302\\217'",
@ -596,13 +603,31 @@ mod tests {
), ),
], ],
); );
// Different expected output for UTF-8 and ASCII in these cases.
// The last 16 Unicode control characters. check_names_encoding(
let test_str = std::str::from_utf8(b"\xC2\x90\xC2\x91\xC2\x92\xC2\x93\xC2\x94\xC2\x95\xC2\x96\xC2\x97\xC2\x98\xC2\x99\xC2\x9A\xC2\x9B\xC2\x9C\xC2\x9D\xC2\x9E\xC2\x9F").unwrap(); UEncoding::Utf8,
check_names(
test_str, test_str,
&[ &[
("????????????????", "literal"), ("????????????????", "literal"),
("????????????????", "shell"),
("'????????????????'", "shell-always"),
],
);
check_names_encoding(
UEncoding::Ascii,
test_str,
&[
("????????????????????????????????", "literal"),
("????????????????????????????????", "shell"),
("'????????????????????????????????'", "shell-always"),
],
);
// The last 16 Unicode control characters.
let test_str = std::str::from_utf8(b"\xC2\x90\xC2\x91\xC2\x92\xC2\x93\xC2\x94\xC2\x95\xC2\x96\xC2\x97\xC2\x98\xC2\x99\xC2\x9A\xC2\x9B\xC2\x9C\xC2\x9D\xC2\x9E\xC2\x9F").unwrap();
check_names_both(
test_str,
&[
(test_str, "literal-show"), (test_str, "literal-show"),
( (
"\\302\\220\\302\\221\\302\\222\\302\\223\\302\\224\\302\\225\\302\\226\\302\\227\\302\\230\\302\\231\\302\\232\\302\\233\\302\\234\\302\\235\\302\\236\\302\\237", "\\302\\220\\302\\221\\302\\222\\302\\223\\302\\224\\302\\225\\302\\226\\302\\227\\302\\230\\302\\231\\302\\232\\302\\233\\302\\234\\302\\235\\302\\236\\302\\237",
@ -612,9 +637,7 @@ mod tests {
"\"\\302\\220\\302\\221\\302\\222\\302\\223\\302\\224\\302\\225\\302\\226\\302\\227\\302\\230\\302\\231\\302\\232\\302\\233\\302\\234\\302\\235\\302\\236\\302\\237\"", "\"\\302\\220\\302\\221\\302\\222\\302\\223\\302\\224\\302\\225\\302\\226\\302\\227\\302\\230\\302\\231\\302\\232\\302\\233\\302\\234\\302\\235\\302\\236\\302\\237\"",
"c", "c",
), ),
("????????????????", "shell"),
(test_str, "shell-show"), (test_str, "shell-show"),
("'????????????????'", "shell-always"),
(&format!("'{test_str}'"), "shell-always-show"), (&format!("'{test_str}'"), "shell-always-show"),
( (
"''$'\\302\\220\\302\\221\\302\\222\\302\\223\\302\\224\\302\\225\\302\\226\\302\\227\\302\\230\\302\\231\\302\\232\\302\\233\\302\\234\\302\\235\\302\\236\\302\\237'", "''$'\\302\\220\\302\\221\\302\\222\\302\\223\\302\\224\\302\\225\\302\\226\\302\\227\\302\\230\\302\\231\\302\\232\\302\\233\\302\\234\\302\\235\\302\\236\\302\\237'",
@ -626,6 +649,25 @@ mod tests {
), ),
], ],
); );
// Different expected output for UTF-8 and ASCII in these cases.
check_names_encoding(
UEncoding::Utf8,
test_str,
&[
("????????????????", "literal"),
("????????????????", "shell"),
("'????????????????'", "shell-always"),
],
);
check_names_encoding(
UEncoding::Ascii,
test_str,
&[
("????????????????????????????????", "literal"),
("????????????????????????????????", "shell"),
("'????????????????????????????????'", "shell-always"),
],
);
} }
#[test] #[test]
@ -638,7 +680,7 @@ mod tests {
let invalid = b'\xC0'; let invalid = b'\xC0';
// a single byte value invalid outside of additional context in UTF-8 // a single byte value invalid outside of additional context in UTF-8
check_names_raw( check_names_raw_both(
&[continuation], &[continuation],
&[ &[
(b"?", "literal"), (b"?", "literal"),
@ -656,24 +698,45 @@ mod tests {
// ...but the byte becomes valid with appropriate context // ...but the byte becomes valid with appropriate context
// (this is just the § character in UTF-8, written as bytes) // (this is just the § character in UTF-8, written as bytes)
check_names_raw( let input = &[first2byte, continuation];
&[first2byte, continuation], check_names_raw_both(
input,
&[
(b"\xC2\xA7", "literal-show"),
(b"\xC2\xA7", "shell-show"),
(b"'\xC2\xA7'", "shell-always-show"),
],
);
// Different expected output for UTF-8 and ASCII in these cases.
check_names_encoding_raw(
UEncoding::Utf8,
input,
&[ &[
(b"\xC2\xA7", "literal"), (b"\xC2\xA7", "literal"),
(b"\xC2\xA7", "literal-show"),
(b"\xC2\xA7", "escape"), (b"\xC2\xA7", "escape"),
(b"\"\xC2\xA7\"", "c"), (b"\"\xC2\xA7\"", "c"),
(b"\xC2\xA7", "shell"), (b"\xC2\xA7", "shell"),
(b"\xC2\xA7", "shell-show"),
(b"'\xC2\xA7'", "shell-always"), (b"'\xC2\xA7'", "shell-always"),
(b"'\xC2\xA7'", "shell-always-show"),
(b"\xC2\xA7", "shell-escape"), (b"\xC2\xA7", "shell-escape"),
(b"'\xC2\xA7'", "shell-escape-always"), (b"'\xC2\xA7'", "shell-escape-always"),
], ],
); );
check_names_encoding_raw(
UEncoding::Ascii,
input,
&[
(b"??", "literal"),
(b"\\302\\247", "escape"),
(b"\"\\302\\247\"", "c"),
(b"??", "shell"),
(b"'??'", "shell-always"),
(b"''$'\\302\\247'", "shell-escape"),
(b"''$'\\302\\247'", "shell-escape-always"),
],
);
// mixed with valid characters // mixed with valid characters
check_names_raw( check_names_raw_both(
&[continuation, ascii], &[continuation, ascii],
&[ &[
(b"?_", "literal"), (b"?_", "literal"),
@ -688,7 +751,7 @@ mod tests {
(b"''$'\\247''_'", "shell-escape-always"), (b"''$'\\247''_'", "shell-escape-always"),
], ],
); );
check_names_raw( check_names_raw_both(
&[ascii, continuation], &[ascii, continuation],
&[ &[
(b"_?", "literal"), (b"_?", "literal"),
@ -703,7 +766,7 @@ mod tests {
(b"'_'$'\\247'", "shell-escape-always"), (b"'_'$'\\247'", "shell-escape-always"),
], ],
); );
check_names_raw( check_names_raw_both(
&[ascii, continuation, ascii], &[ascii, continuation, ascii],
&[ &[
(b"_?_", "literal"), (b"_?_", "literal"),
@ -718,7 +781,7 @@ mod tests {
(b"'_'$'\\247''_'", "shell-escape-always"), (b"'_'$'\\247''_'", "shell-escape-always"),
], ],
); );
check_names_raw( check_names_raw_both(
&[continuation, ascii, continuation], &[continuation, ascii, continuation],
&[ &[
(b"?_?", "literal"), (b"?_?", "literal"),
@ -735,7 +798,7 @@ mod tests {
); );
// contiguous invalid bytes // contiguous invalid bytes
check_names_raw( check_names_raw_both(
&[ &[
ascii, ascii,
invalid, invalid,
@ -789,7 +852,7 @@ mod tests {
); );
// invalid multi-byte sequences that start valid // invalid multi-byte sequences that start valid
check_names_raw( check_names_raw_both(
&[first2byte, ascii], &[first2byte, ascii],
&[ &[
(b"?_", "literal"), (b"?_", "literal"),
@ -804,11 +867,15 @@ mod tests {
(b"''$'\\302''_'", "shell-escape-always"), (b"''$'\\302''_'", "shell-escape-always"),
], ],
); );
check_names_raw(
&[first2byte, first2byte, continuation], let input = &[first2byte, first2byte, continuation];
check_names_raw_both(input, &[(b"\xC2\xC2\xA7", "literal-show")]);
// Different expected output for UTF-8 and ASCII in these cases.
check_names_encoding_raw(
UEncoding::Utf8,
input,
&[ &[
(b"?\xC2\xA7", "literal"), (b"?\xC2\xA7", "literal"),
(b"\xC2\xC2\xA7", "literal-show"),
(b"\\302\xC2\xA7", "escape"), (b"\\302\xC2\xA7", "escape"),
(b"\"\\302\xC2\xA7\"", "c"), (b"\"\\302\xC2\xA7\"", "c"),
(b"?\xC2\xA7", "shell"), (b"?\xC2\xA7", "shell"),
@ -819,7 +886,23 @@ mod tests {
(b"''$'\\302''\xC2\xA7'", "shell-escape-always"), (b"''$'\\302''\xC2\xA7'", "shell-escape-always"),
], ],
); );
check_names_raw( check_names_encoding_raw(
UEncoding::Ascii,
input,
&[
(b"???", "literal"),
(b"\\302\\302\\247", "escape"),
(b"\"\\302\\302\\247\"", "c"),
(b"???", "shell"),
(b"\xC2\xC2\xA7", "shell-show"),
(b"'???'", "shell-always"),
(b"'\xC2\xC2\xA7'", "shell-always-show"),
(b"''$'\\302\\302\\247'", "shell-escape"),
(b"''$'\\302\\302\\247'", "shell-escape-always"),
],
);
check_names_raw_both(
&[first3byte, continuation, ascii], &[first3byte, continuation, ascii],
&[ &[
(b"??_", "literal"), (b"??_", "literal"),
@ -834,7 +917,7 @@ mod tests {
(b"''$'\\340\\247''_'", "shell-escape-always"), (b"''$'\\340\\247''_'", "shell-escape-always"),
], ],
); );
check_names_raw( check_names_raw_both(
&[first4byte, continuation, continuation, ascii], &[first4byte, continuation, continuation, ascii],
&[ &[
(b"???_", "literal"), (b"???_", "literal"),
@ -856,7 +939,7 @@ mod tests {
// A question mark must force quotes in shell and shell-always, unless // A question mark must force quotes in shell and shell-always, unless
// it is in place of a control character (that case is already covered // it is in place of a control character (that case is already covered
// in other tests) // in other tests)
check_names( check_names_both(
"one?two", "one?two",
&[ &[
("one?two", "literal"), ("one?two", "literal"),
@ -876,7 +959,7 @@ mod tests {
#[test] #[test]
fn test_backslash() { fn test_backslash() {
// Escaped in C-style, but not in Shell-style escaping // Escaped in C-style, but not in Shell-style escaping
check_names( check_names_both(
"one\\two", "one\\two",
&[ &[
("one\\two", "literal"), ("one\\two", "literal"),
@ -893,32 +976,32 @@ mod tests {
#[test] #[test]
fn test_tilde_and_hash() { fn test_tilde_and_hash() {
check_names("~", &[("'~'", "shell"), ("'~'", "shell-escape")]); check_names_both("~", &[("'~'", "shell"), ("'~'", "shell-escape")]);
check_names( check_names_both(
"~name", "~name",
&[("'~name'", "shell"), ("'~name'", "shell-escape")], &[("'~name'", "shell"), ("'~name'", "shell-escape")],
); );
check_names( check_names_both(
"some~name", "some~name",
&[("some~name", "shell"), ("some~name", "shell-escape")], &[("some~name", "shell"), ("some~name", "shell-escape")],
); );
check_names("name~", &[("name~", "shell"), ("name~", "shell-escape")]); check_names_both("name~", &[("name~", "shell"), ("name~", "shell-escape")]);
check_names("#", &[("'#'", "shell"), ("'#'", "shell-escape")]); check_names_both("#", &[("'#'", "shell"), ("'#'", "shell-escape")]);
check_names( check_names_both(
"#name", "#name",
&[("'#name'", "shell"), ("'#name'", "shell-escape")], &[("'#name'", "shell"), ("'#name'", "shell-escape")],
); );
check_names( check_names_both(
"some#name", "some#name",
&[("some#name", "shell"), ("some#name", "shell-escape")], &[("some#name", "shell"), ("some#name", "shell-escape")],
); );
check_names("name#", &[("name#", "shell"), ("name#", "shell-escape")]); check_names_both("name#", &[("name#", "shell"), ("name#", "shell-escape")]);
} }
#[test] #[test]
fn test_special_chars_in_double_quotes() { fn test_special_chars_in_double_quotes() {
check_names( check_names_both(
"can'$t", "can'$t",
&[ &[
("'can'\\''$t'", "shell"), ("'can'\\''$t'", "shell"),
@ -928,7 +1011,7 @@ mod tests {
], ],
); );
check_names( check_names_both(
"can'`t", "can'`t",
&[ &[
("'can'\\''`t'", "shell"), ("'can'\\''`t'", "shell"),
@ -938,7 +1021,7 @@ mod tests {
], ],
); );
check_names( check_names_both(
"can'\\t", "can'\\t",
&[ &[
("'can'\\''\\t'", "shell"), ("'can'\\''\\t'", "shell"),