1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-28 03:27:44 +00:00

uniq: Fix panic on invalid utf-8 input

This commit is contained in:
Chirag Jadwani 2021-03-15 14:08:14 +05:30
parent 4574b2b58d
commit ce4342d12e
3 changed files with 18 additions and 3 deletions

View file

@ -10,7 +10,7 @@ extern crate uucore;
use clap::{App, Arg, ArgMatches}; use clap::{App, Arg, ArgMatches};
use std::fs::File; use std::fs::File;
use std::io::{stdin, stdout, BufRead, BufReader, BufWriter, Read, Write}; use std::io::{stdin, stdout, BufRead, BufReader, BufWriter, Read, Result, Write};
use std::path::Path; use std::path::Path;
use std::str::FromStr; use std::str::FromStr;
@ -61,8 +61,7 @@ impl Uniq {
let delimiters = &self.delimiters; let delimiters = &self.delimiters;
let line_terminator = self.get_line_terminator(); let line_terminator = self.get_line_terminator();
for io_line in reader.split(line_terminator) { for line in reader.split(line_terminator).map(get_line_string) {
let line = String::from_utf8(crash_if_err!(1, io_line)).unwrap();
if !lines.is_empty() && self.cmp_keys(&lines[0], &line) { if !lines.is_empty() && self.cmp_keys(&lines[0], &line) {
let print_delimiter = delimiters == &Delimiters::Prepend let print_delimiter = delimiters == &Delimiters::Prepend
|| (delimiters == &Delimiters::Separate && first_line_printed); || (delimiters == &Delimiters::Separate && first_line_printed);
@ -199,6 +198,11 @@ impl Uniq {
} }
} }
fn get_line_string(io_line: Result<Vec<u8>>) -> String {
let line_bytes = crash_if_err!(1, io_line);
crash_if_err!(1, String::from_utf8(line_bytes))
}
fn opt_parsed<T: FromStr>(opt_name: &str, matches: &ArgMatches) -> Option<T> { fn opt_parsed<T: FromStr>(opt_name: &str, matches: &ArgMatches) -> Option<T> {
matches.value_of(opt_name).map(|arg_str| { matches.value_of(opt_name).map(|arg_str| {
let opt_val: Option<T> = arg_str.parse().ok(); let opt_val: Option<T> = arg_str.parse().ok();

View file

@ -138,3 +138,12 @@ fn test_stdin_zero_terminated() {
.run() .run()
.stdout_is_fixture("sorted-zero-terminated.expected"); .stdout_is_fixture("sorted-zero-terminated.expected");
} }
#[test]
fn test_invalid_utf8() {
new_ucmd!()
.arg("not-utf8-sequence.txt")
.run()
.failure()
.stderr_only("uniq: error: invalid utf-8 sequence of 1 bytes from index 0");
}

View file

@ -0,0 +1,2 @@
Next line contains two bytes - 0xCC and 0xCD - which are not a valid utf-8 sequence
ÌÍ