1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-28 03:27:44 +00:00

echo: handle multibyte escape sequences (#6803)

* echo: handle multibyte escape sequences

Bug was reported, with root cause analysis, by kkew3
Added tests were derived from test cases provided by kkew3
See https://github.com/uutils/coreutils/issues/6741

* Use concrete type

* Fix MSRV issue

* Fix non-UTF-8 argument handling

* Fix MSRV issue

* Fix Clippy violation

* Fix compiler warning

* Address PR comments

* Add MSRV TODO comments

* echo: use stdout_only_bytes instead of stdout_is_bytes

---------

Co-authored-by: Daniel Hofstetter <daniel.hofstetter@42dh.com>
This commit is contained in:
Andrew Liebenow 2024-10-22 04:03:08 -05:00 committed by GitHub
parent 99fa11ac5c
commit 66f11c4ce4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 375 additions and 80 deletions

View file

@ -303,3 +303,88 @@ fn partial_version_argument() {
fn partial_help_argument() {
new_ucmd!().arg("--he").succeeds().stdout_is("--he\n");
}
#[test]
fn multibyte_escape_unicode() {
// spell-checker:disable-next-line
// Tests suggested by kkew3
// https://github.com/uutils/coreutils/issues/6741
// \u{1F602} is:
//
// "Face with Tears of Joy"
// U+1F602
// "😂"
new_ucmd!()
.args(&["-e", r"\xf0\x9f\x98\x82"])
.succeeds()
.stdout_only("\u{1F602}\n");
new_ucmd!()
.args(&["-e", r"\x41\xf0\x9f\x98\x82\x42"])
.succeeds()
.stdout_only("A\u{1F602}B\n");
new_ucmd!()
.args(&["-e", r"\xf0\x41\x9f\x98\x82"])
.succeeds()
.stdout_only_bytes(b"\xF0A\x9F\x98\x82\n");
new_ucmd!()
.args(&["-e", r"\x41\xf0\c\x9f\x98\x82"])
.succeeds()
.stdout_only_bytes(b"A\xF0");
}
#[test]
fn non_utf_8_hex_round_trip() {
new_ucmd!()
.args(&["-e", r"\xFF"])
.succeeds()
.stdout_only_bytes(b"\xFF\n");
}
#[test]
fn nine_bit_octal() {
const RESULT: &[u8] = b"\xFF\n";
new_ucmd!()
.args(&["-e", r"\0777"])
.succeeds()
.stdout_only_bytes(RESULT);
new_ucmd!()
.args(&["-e", r"\777"])
.succeeds()
.stdout_only_bytes(RESULT);
}
#[test]
#[cfg(target_family = "unix")]
fn non_utf_8() {
use std::ffi::OsStr;
use std::os::unix::ffi::OsStrExt;
// ISO-8859-1 encoded text
// spell-checker:disable
const INPUT_AND_OUTPUT: &[u8] =
b"Swer an rehte g\xFCete wendet s\xEEn gem\xFCete, dem volget s\xE6lde und \xEAre.";
// spell-checker:enable
let os_str = OsStr::from_bytes(INPUT_AND_OUTPUT);
new_ucmd!()
.arg("-n")
.arg(os_str)
.succeeds()
.stdout_only_bytes(INPUT_AND_OUTPUT);
}
#[test]
fn slash_eight_off_by_one() {
new_ucmd!()
.args(&["-e", "-n", r"\8"])
.succeeds()
.stdout_only(r"\8");
}