From 83f96ec29d6e063f0777bbc26f45d6ccf4eb5f1a Mon Sep 17 00:00:00 2001 From: Jeffrey Finkelstein Date: Mon, 24 Jan 2022 21:18:59 -0500 Subject: [PATCH] tail: don't error when following non-UTF-8 data Fix a bug where `tail -f` would terminate with an error due to failing to parse a UTF-8 string from a sequence of bytes read from the followed file. This commit replaces the call to `BufRead::read_line()` with a call to `BufRead::read_until()` so that any sequence of bytes regardless of encoding can be read. Fixes #1050. --- src/uu/tail/src/tail.rs | 10 +++++++--- tests/by-util/test_tail.rs | 28 ++++++++++++++++++++++++++++ tests/common/util.rs | 9 ++++++++- 3 files changed, 43 insertions(+), 4 deletions(-) diff --git a/src/uu/tail/src/tail.rs b/src/uu/tail/src/tail.rs index cf1f5c3c5..67b1741e6 100644 --- a/src/uu/tail/src/tail.rs +++ b/src/uu/tail/src/tail.rs @@ -345,6 +345,7 @@ pub fn uu_app<'a>() -> App<'a> { ) } +/// Continually check for new data in the given readers, writing any to stdout. fn follow(readers: &mut [(T, &String)], settings: &Settings) -> UResult<()> { if readers.is_empty() || !settings.follow { return Ok(()); @@ -353,6 +354,7 @@ fn follow(readers: &mut [(T, &String)], settings: &Settings) -> URes let mut last = readers.len() - 1; let mut read_some = false; let mut process = platform::ProcessChecker::new(settings.pid); + let mut stdout = stdout(); loop { sleep(Duration::new(0, settings.sleep_msec * 1000)); @@ -363,8 +365,8 @@ fn follow(readers: &mut [(T, &String)], settings: &Settings) -> URes for (i, (reader, filename)) in readers.iter_mut().enumerate() { // Print all new content since the last pass loop { - let mut datum = String::new(); - match reader.read_line(&mut datum) { + let mut datum = vec![]; + match reader.read_until(b'\n', &mut datum) { Ok(0) => break, Ok(_) => { read_some = true; @@ -372,7 +374,9 @@ fn follow(readers: &mut [(T, &String)], settings: &Settings) -> URes println!("\n==> {} <==", filename); last = i; } - print!("{}", datum); + stdout + .write_all(&datum) + .map_err_context(|| String::from("write error"))?; } Err(err) => return Err(USimpleError::new(1, err.to_string())), } diff --git a/tests/by-util/test_tail.rs b/tests/by-util/test_tail.rs index f4d932e79..40a229d3a 100644 --- a/tests/by-util/test_tail.rs +++ b/tests/by-util/test_tail.rs @@ -77,6 +77,34 @@ fn test_follow() { child.kill().unwrap(); } +/// Test for following when bytes are written that are not valid UTF-8. +#[test] +fn test_follow_non_utf8_bytes() { + // Tail the test file and start following it. + let (at, mut ucmd) = at_and_ucmd!(); + let mut child = ucmd.arg("-f").arg(FOOBAR_TXT).run_no_wait(); + let expected = at.read("foobar_single_default.expected"); + assert_eq!(read_size(&mut child, expected.len()), expected); + + // Now append some bytes that are not valid UTF-8. + // + // The binary integer "10000000" is *not* a valid UTF-8 encoding + // of a character: https://en.wikipedia.org/wiki/UTF-8#Encoding + // + // We also write the newline character because our implementation + // of `tail` is attempting to read a line of input, so the + // presence of a newline character will force the `follow()` + // function to conclude reading input bytes and start writing them + // to output. The newline character is not fundamental to this + // test, it is just a requirement of the current implementation. + let expected = [0b10000000, b'\n']; + at.append_bytes(FOOBAR_TXT, &expected); + let actual = read_size_bytes(&mut child, expected.len()); + assert_eq!(actual, expected.to_vec()); + + child.kill().unwrap(); +} + #[test] fn test_follow_multiple() { let (at, mut ucmd) = at_and_ucmd!(); diff --git a/tests/common/util.rs b/tests/common/util.rs index 5df0b753f..79fe4d5d7 100644 --- a/tests/common/util.rs +++ b/tests/common/util.rs @@ -1117,6 +1117,13 @@ impl UCommand { /// Wrapper for `child.stdout.read_exact()`. /// Careful, this blocks indefinitely if `size` bytes is never reached. pub fn read_size(child: &mut Child, size: usize) -> String { + String::from_utf8(read_size_bytes(child, size)).unwrap() +} + +/// Read the specified number of bytes from the stdout of the child process. +/// +/// Careful, this blocks indefinitely if `size` bytes is never reached. +pub fn read_size_bytes(child: &mut Child, size: usize) -> Vec { let mut output = Vec::new(); output.resize(size, 0); sleep(Duration::from_secs(1)); @@ -1126,7 +1133,7 @@ pub fn read_size(child: &mut Child, size: usize) -> String { .unwrap() .read_exact(output.as_mut_slice()) .unwrap(); - String::from_utf8(output).unwrap() + output } pub fn vec_of_size(n: usize) -> Vec {