mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-28 03:27:44 +00:00
tail: don't error when following non-UTF-8 data
Fix a bug where `tail -f` would terminate with an error due to failing to parse a UTF-8 string from a sequence of bytes read from the followed file. This commit replaces the call to `BufRead::read_line()` with a call to `BufRead::read_until()` so that any sequence of bytes regardless of encoding can be read. Fixes #1050.
This commit is contained in:
parent
d2fe245192
commit
83f96ec29d
3 changed files with 43 additions and 4 deletions
|
@ -345,6 +345,7 @@ pub fn uu_app<'a>() -> App<'a> {
|
|||
)
|
||||
}
|
||||
|
||||
/// Continually check for new data in the given readers, writing any to stdout.
|
||||
fn follow<T: BufRead>(readers: &mut [(T, &String)], settings: &Settings) -> UResult<()> {
|
||||
if readers.is_empty() || !settings.follow {
|
||||
return Ok(());
|
||||
|
@ -353,6 +354,7 @@ fn follow<T: BufRead>(readers: &mut [(T, &String)], settings: &Settings) -> URes
|
|||
let mut last = readers.len() - 1;
|
||||
let mut read_some = false;
|
||||
let mut process = platform::ProcessChecker::new(settings.pid);
|
||||
let mut stdout = stdout();
|
||||
|
||||
loop {
|
||||
sleep(Duration::new(0, settings.sleep_msec * 1000));
|
||||
|
@ -363,8 +365,8 @@ fn follow<T: BufRead>(readers: &mut [(T, &String)], settings: &Settings) -> URes
|
|||
for (i, (reader, filename)) in readers.iter_mut().enumerate() {
|
||||
// Print all new content since the last pass
|
||||
loop {
|
||||
let mut datum = String::new();
|
||||
match reader.read_line(&mut datum) {
|
||||
let mut datum = vec![];
|
||||
match reader.read_until(b'\n', &mut datum) {
|
||||
Ok(0) => break,
|
||||
Ok(_) => {
|
||||
read_some = true;
|
||||
|
@ -372,7 +374,9 @@ fn follow<T: BufRead>(readers: &mut [(T, &String)], settings: &Settings) -> URes
|
|||
println!("\n==> {} <==", filename);
|
||||
last = i;
|
||||
}
|
||||
print!("{}", datum);
|
||||
stdout
|
||||
.write_all(&datum)
|
||||
.map_err_context(|| String::from("write error"))?;
|
||||
}
|
||||
Err(err) => return Err(USimpleError::new(1, err.to_string())),
|
||||
}
|
||||
|
|
|
@ -77,6 +77,34 @@ fn test_follow() {
|
|||
child.kill().unwrap();
|
||||
}
|
||||
|
||||
/// Test for following when bytes are written that are not valid UTF-8.
|
||||
#[test]
|
||||
fn test_follow_non_utf8_bytes() {
|
||||
// Tail the test file and start following it.
|
||||
let (at, mut ucmd) = at_and_ucmd!();
|
||||
let mut child = ucmd.arg("-f").arg(FOOBAR_TXT).run_no_wait();
|
||||
let expected = at.read("foobar_single_default.expected");
|
||||
assert_eq!(read_size(&mut child, expected.len()), expected);
|
||||
|
||||
// Now append some bytes that are not valid UTF-8.
|
||||
//
|
||||
// The binary integer "10000000" is *not* a valid UTF-8 encoding
|
||||
// of a character: https://en.wikipedia.org/wiki/UTF-8#Encoding
|
||||
//
|
||||
// We also write the newline character because our implementation
|
||||
// of `tail` is attempting to read a line of input, so the
|
||||
// presence of a newline character will force the `follow()`
|
||||
// function to conclude reading input bytes and start writing them
|
||||
// to output. The newline character is not fundamental to this
|
||||
// test, it is just a requirement of the current implementation.
|
||||
let expected = [0b10000000, b'\n'];
|
||||
at.append_bytes(FOOBAR_TXT, &expected);
|
||||
let actual = read_size_bytes(&mut child, expected.len());
|
||||
assert_eq!(actual, expected.to_vec());
|
||||
|
||||
child.kill().unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_follow_multiple() {
|
||||
let (at, mut ucmd) = at_and_ucmd!();
|
||||
|
|
|
@ -1117,6 +1117,13 @@ impl UCommand {
|
|||
/// Wrapper for `child.stdout.read_exact()`.
|
||||
/// Careful, this blocks indefinitely if `size` bytes is never reached.
|
||||
pub fn read_size(child: &mut Child, size: usize) -> String {
|
||||
String::from_utf8(read_size_bytes(child, size)).unwrap()
|
||||
}
|
||||
|
||||
/// Read the specified number of bytes from the stdout of the child process.
|
||||
///
|
||||
/// Careful, this blocks indefinitely if `size` bytes is never reached.
|
||||
pub fn read_size_bytes(child: &mut Child, size: usize) -> Vec<u8> {
|
||||
let mut output = Vec::new();
|
||||
output.resize(size, 0);
|
||||
sleep(Duration::from_secs(1));
|
||||
|
@ -1126,7 +1133,7 @@ pub fn read_size(child: &mut Child, size: usize) -> String {
|
|||
.unwrap()
|
||||
.read_exact(output.as_mut_slice())
|
||||
.unwrap();
|
||||
String::from_utf8(output).unwrap()
|
||||
output
|
||||
}
|
||||
|
||||
pub fn vec_of_size(n: usize) -> Vec<u8> {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue