mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-08-05 07:27:46 +00:00
tail: Performace improvements
Improve performance of `tail` utility. Tail now uses performance-optimized memchr APIs when searching through a file for delimiters.
This commit is contained in:
parent
3971bb3b0c
commit
b264457c41
1 changed files with 48 additions and 37 deletions
|
@ -3,7 +3,8 @@
|
||||||
// For the full copyright and license information, please view the LICENSE
|
// For the full copyright and license information, please view the LICENSE
|
||||||
// file that was distributed with this source code.
|
// file that was distributed with this source code.
|
||||||
|
|
||||||
// spell-checker:ignore (ToDO) seekable seek'd tail'ing ringbuffer ringbuf unwatch Uncategorized filehandle Signum
|
// spell-checker:ignore (ToDO) seekable seek'd tail'ing ringbuffer ringbuf unwatch
|
||||||
|
// spell-checker:ignore (ToDO) Uncategorized filehandle Signum memrchr
|
||||||
// spell-checker:ignore (libs) kqueue
|
// spell-checker:ignore (libs) kqueue
|
||||||
// spell-checker:ignore (acronyms)
|
// spell-checker:ignore (acronyms)
|
||||||
// spell-checker:ignore (env/flags)
|
// spell-checker:ignore (env/flags)
|
||||||
|
@ -24,11 +25,12 @@ pub use args::uu_app;
|
||||||
use args::{FilterMode, Settings, Signum, parse_args};
|
use args::{FilterMode, Settings, Signum, parse_args};
|
||||||
use chunks::ReverseChunks;
|
use chunks::ReverseChunks;
|
||||||
use follow::Observer;
|
use follow::Observer;
|
||||||
|
use memchr::{memchr_iter, memrchr_iter};
|
||||||
use paths::{FileExtTail, HeaderPrinter, Input, InputKind, MetadataExtTail};
|
use paths::{FileExtTail, HeaderPrinter, Input, InputKind, MetadataExtTail};
|
||||||
use same_file::Handle;
|
use same_file::Handle;
|
||||||
use std::cmp::Ordering;
|
use std::cmp::Ordering;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::{self, BufRead, BufReader, BufWriter, Read, Seek, SeekFrom, Write, stdin, stdout};
|
use std::io::{self, BufReader, BufWriter, ErrorKind, Read, Seek, SeekFrom, Write, stdin, stdout};
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
use uucore::display::Quotable;
|
use uucore::display::Quotable;
|
||||||
use uucore::error::{FromIo, UResult, USimpleError, get_exit_code, set_exit_code};
|
use uucore::error::{FromIo, UResult, USimpleError, get_exit_code, set_exit_code};
|
||||||
|
@ -285,34 +287,42 @@ fn tail_stdin(
|
||||||
/// let i = forwards_thru_file(&mut reader, 2, b'\n').unwrap();
|
/// let i = forwards_thru_file(&mut reader, 2, b'\n').unwrap();
|
||||||
/// assert_eq!(i, 2);
|
/// assert_eq!(i, 2);
|
||||||
/// ```
|
/// ```
|
||||||
fn forwards_thru_file<R>(
|
fn forwards_thru_file(
|
||||||
reader: &mut R,
|
reader: &mut impl Read,
|
||||||
num_delimiters: u64,
|
num_delimiters: u64,
|
||||||
delimiter: u8,
|
delimiter: u8,
|
||||||
) -> std::io::Result<usize>
|
) -> std::io::Result<usize> {
|
||||||
where
|
// If num_delimiters == 0, always return 0.
|
||||||
R: Read,
|
if num_delimiters == 0 {
|
||||||
{
|
return Ok(0);
|
||||||
let mut reader = BufReader::new(reader);
|
}
|
||||||
|
// Use a 32K buffer.
|
||||||
let mut buf = vec![];
|
let mut buf = [0; 32 * 1024];
|
||||||
let mut total = 0;
|
let mut total = 0;
|
||||||
for _ in 0..num_delimiters {
|
let mut count = 0;
|
||||||
match reader.read_until(delimiter, &mut buf) {
|
// Iterate through the input, using `count` to record the number of times `delimiter`
|
||||||
Ok(0) => {
|
// is seen. Once we find `num_delimiters` instances, return the offset of the byte
|
||||||
return Ok(total);
|
// immediately following that delimiter.
|
||||||
}
|
loop {
|
||||||
|
match reader.read(&mut buf) {
|
||||||
|
// Ok(0) => EoF before we found `num_delimiters` instance of `delimiter`.
|
||||||
|
// Return the total number of bytes read in that case.
|
||||||
|
Ok(0) => return Ok(total),
|
||||||
Ok(n) => {
|
Ok(n) => {
|
||||||
|
// Use memchr_iter since it greatly improves search performance.
|
||||||
|
for offset in memchr_iter(delimiter, &buf[..n]) {
|
||||||
|
count += 1;
|
||||||
|
if count == num_delimiters {
|
||||||
|
// Return offset of the byte after the `delimiter` instance.
|
||||||
|
return Ok(total + offset + 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
total += n;
|
total += n;
|
||||||
buf.clear();
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
Err(e) => {
|
|
||||||
return Err(e);
|
|
||||||
}
|
}
|
||||||
|
Err(e) if e.kind() == ErrorKind::Interrupted => continue,
|
||||||
|
Err(e) => return Err(e),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok(total)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Iterate over bytes in the file, in reverse, until we find the
|
/// Iterate over bytes in the file, in reverse, until we find the
|
||||||
|
@ -322,35 +332,36 @@ fn backwards_thru_file(file: &mut File, num_delimiters: u64, delimiter: u8) {
|
||||||
// This variable counts the number of delimiters found in the file
|
// This variable counts the number of delimiters found in the file
|
||||||
// so far (reading from the end of the file toward the beginning).
|
// so far (reading from the end of the file toward the beginning).
|
||||||
let mut counter = 0;
|
let mut counter = 0;
|
||||||
|
let mut first_slice = true;
|
||||||
for (block_idx, slice) in ReverseChunks::new(file).enumerate() {
|
for slice in ReverseChunks::new(file) {
|
||||||
// Iterate over each byte in the slice in reverse order.
|
// Iterate over each byte in the slice in reverse order.
|
||||||
let mut iter = slice.iter().enumerate().rev();
|
let mut iter = memrchr_iter(delimiter, &slice);
|
||||||
|
|
||||||
// Ignore a trailing newline in the last block, if there is one.
|
// Ignore a trailing newline in the last block, if there is one.
|
||||||
if block_idx == 0 {
|
if first_slice {
|
||||||
if let Some(c) = slice.last() {
|
if let Some(c) = slice.last() {
|
||||||
if *c == delimiter {
|
if *c == delimiter {
|
||||||
iter.next();
|
iter.next();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
first_slice = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// For each byte, increment the count of the number of
|
// For each byte, increment the count of the number of
|
||||||
// delimiters found. If we have found more than the specified
|
// delimiters found. If we have found more than the specified
|
||||||
// number of delimiters, terminate the search and seek to the
|
// number of delimiters, terminate the search and seek to the
|
||||||
// appropriate location in the file.
|
// appropriate location in the file.
|
||||||
for (i, ch) in iter {
|
for i in iter {
|
||||||
if *ch == delimiter {
|
counter += 1;
|
||||||
counter += 1;
|
if counter >= num_delimiters {
|
||||||
if counter >= num_delimiters {
|
// We should never over-count - assert that.
|
||||||
// After each iteration of the outer loop, the
|
assert_eq!(counter, num_delimiters);
|
||||||
// cursor in the file is at the *beginning* of the
|
// After each iteration of the outer loop, the
|
||||||
// block, so seeking forward by `i + 1` bytes puts
|
// cursor in the file is at the *beginning* of the
|
||||||
// us right after the found delimiter.
|
// block, so seeking forward by `i + 1` bytes puts
|
||||||
file.seek(SeekFrom::Current((i + 1) as i64)).unwrap();
|
// us right after the found delimiter.
|
||||||
return;
|
file.seek(SeekFrom::Current((i + 1) as i64)).unwrap();
|
||||||
}
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue