From ce4342d12e630c08c5a0db7da05a1b84f862882f Mon Sep 17 00:00:00 2001 From: Chirag Jadwani Date: Mon, 15 Mar 2021 14:08:14 +0530 Subject: [PATCH] uniq: Fix panic on invalid utf-8 input --- src/uu/uniq/src/uniq.rs | 10 +++++++--- tests/by-util/test_uniq.rs | 9 +++++++++ tests/fixtures/uniq/not-utf8-sequence.txt | 2 ++ 3 files changed, 18 insertions(+), 3 deletions(-) create mode 100644 tests/fixtures/uniq/not-utf8-sequence.txt diff --git a/src/uu/uniq/src/uniq.rs b/src/uu/uniq/src/uniq.rs index afbda2829..98260bb8f 100644 --- a/src/uu/uniq/src/uniq.rs +++ b/src/uu/uniq/src/uniq.rs @@ -10,7 +10,7 @@ extern crate uucore; use clap::{App, Arg, ArgMatches}; use std::fs::File; -use std::io::{stdin, stdout, BufRead, BufReader, BufWriter, Read, Write}; +use std::io::{stdin, stdout, BufRead, BufReader, BufWriter, Read, Result, Write}; use std::path::Path; use std::str::FromStr; @@ -61,8 +61,7 @@ impl Uniq { let delimiters = &self.delimiters; let line_terminator = self.get_line_terminator(); - for io_line in reader.split(line_terminator) { - let line = String::from_utf8(crash_if_err!(1, io_line)).unwrap(); + for line in reader.split(line_terminator).map(get_line_string) { if !lines.is_empty() && self.cmp_keys(&lines[0], &line) { let print_delimiter = delimiters == &Delimiters::Prepend || (delimiters == &Delimiters::Separate && first_line_printed); @@ -199,6 +198,11 @@ impl Uniq { } } +fn get_line_string(io_line: Result>) -> String { + let line_bytes = crash_if_err!(1, io_line); + crash_if_err!(1, String::from_utf8(line_bytes)) +} + fn opt_parsed(opt_name: &str, matches: &ArgMatches) -> Option { matches.value_of(opt_name).map(|arg_str| { let opt_val: Option = arg_str.parse().ok(); diff --git a/tests/by-util/test_uniq.rs b/tests/by-util/test_uniq.rs index 53d57b49c..22e67540e 100644 --- a/tests/by-util/test_uniq.rs +++ b/tests/by-util/test_uniq.rs @@ -138,3 +138,12 @@ fn test_stdin_zero_terminated() { .run() .stdout_is_fixture("sorted-zero-terminated.expected"); } + +#[test] +fn test_invalid_utf8() { + new_ucmd!() + .arg("not-utf8-sequence.txt") + .run() + .failure() + .stderr_only("uniq: error: invalid utf-8 sequence of 1 bytes from index 0"); +} diff --git a/tests/fixtures/uniq/not-utf8-sequence.txt b/tests/fixtures/uniq/not-utf8-sequence.txt new file mode 100644 index 000000000..78c146ffd --- /dev/null +++ b/tests/fixtures/uniq/not-utf8-sequence.txt @@ -0,0 +1,2 @@ +Next line contains two bytes - 0xCC and 0xCD - which are not a valid utf-8 sequence +ÌÍ \ No newline at end of file