1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-09-14 19:16:17 +00:00

Merge pull request #2206 from jfinkels/tail-reverse-block-iterator

tail: refactor code into ReverseChunks iterator
This commit is contained in:
Sylvestre Ledru 2021-05-13 09:07:02 +02:00 committed by GitHub
commit 204b051711
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 122 additions and 54 deletions

83
src/uu/tail/src/chunks.rs Normal file
View file

@ -0,0 +1,83 @@
//! Iterating over a file by chunks, starting at the end of the file.
//!
//! Use [`ReverseChunks::new`] to create a new iterator over chunks of
//! bytes from the file.
use std::fs::File;
use std::io::{Read, Seek, SeekFrom};
/// When reading files in reverse in `bounded_tail`, this is the size of each
/// block read at a time.
pub const BLOCK_SIZE: u64 = 1 << 16;
/// An iterator over a file in non-overlapping chunks from the end of the file.
///
/// Each chunk is a [`Vec`]<[`u8`]> of size [`BLOCK_SIZE`] (except
/// possibly the last chunk, which might be smaller). Each call to
/// [`next`] will seek backwards through the given file.
pub struct ReverseChunks<'a> {
/// The file to iterate over, by blocks, from the end to the beginning.
file: &'a File,
/// The total number of bytes in the file.
size: u64,
/// The total number of blocks to read.
max_blocks_to_read: usize,
/// The index of the next block to read.
block_idx: usize,
}
impl<'a> ReverseChunks<'a> {
pub fn new(file: &'a mut File) -> ReverseChunks<'a> {
let size = file.seek(SeekFrom::End(0)).unwrap();
let max_blocks_to_read = (size as f64 / BLOCK_SIZE as f64).ceil() as usize;
let block_idx = 0;
ReverseChunks {
file,
size,
max_blocks_to_read,
block_idx,
}
}
}
impl<'a> Iterator for ReverseChunks<'a> {
type Item = Vec<u8>;
fn next(&mut self) -> Option<Self::Item> {
// If there are no more chunks to read, terminate the iterator.
if self.block_idx >= self.max_blocks_to_read {
return None;
}
// The chunk size is `BLOCK_SIZE` for all but the last chunk
// (that is, the chunk closest to the beginning of the file),
// which contains the remainder of the bytes.
let block_size = if self.block_idx == self.max_blocks_to_read - 1 {
self.size % BLOCK_SIZE
} else {
BLOCK_SIZE
};
// Seek backwards by the next chunk, read the full chunk into
// `buf`, and then seek back to the start of the chunk again.
let mut buf = vec![0; BLOCK_SIZE as usize];
let pos = self
.file
.seek(SeekFrom::Current(-(block_size as i64)))
.unwrap();
self.file
.read_exact(&mut buf[0..(block_size as usize)])
.unwrap();
let pos2 = self
.file
.seek(SeekFrom::Current(-(block_size as i64)))
.unwrap();
assert_eq!(pos, pos2);
self.block_idx += 1;
Some(buf[0..(block_size as usize)].to_vec())
}
}

View file

@ -15,8 +15,11 @@ extern crate clap;
#[macro_use]
extern crate uucore;
mod chunks;
mod platform;
mod ringbuffer;
use chunks::ReverseChunks;
use chunks::BLOCK_SIZE;
use ringbuffer::RingBuffer;
use clap::{App, Arg};
@ -241,7 +244,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
}
let mut file = File::open(&path).unwrap();
if is_seekable(&mut file) {
bounded_tail(&file, &settings);
bounded_tail(&mut file, &settings);
if settings.follow {
let reader = BufReader::new(file);
readers.push(reader);
@ -355,10 +358,6 @@ pub fn parse_size(mut size_slice: &str) -> Result<u64, ParseSizeErr> {
}
}
/// When reading files in reverse in `bounded_tail`, this is the size of each
/// block read at a time.
const BLOCK_SIZE: u64 = 1 << 16;
fn follow<T: Read>(readers: &mut [BufReader<T>], filenames: &[String], settings: &Settings) {
assert!(settings.follow);
let mut last = readers.len() - 1;
@ -396,48 +395,42 @@ fn follow<T: Read>(readers: &mut [BufReader<T>], filenames: &[String], settings:
}
}
/// Iterate over bytes in the file, in reverse, until `should_stop` returns
/// true. The `file` is left seek'd to the position just after the byte that
/// `should_stop` returned true for.
fn backwards_thru_file<F>(
mut file: &File,
size: u64,
buf: &mut Vec<u8>,
delimiter: u8,
should_stop: &mut F,
) where
F: FnMut(u8) -> bool,
{
assert!(buf.len() >= BLOCK_SIZE as usize);
/// Iterate over bytes in the file, in reverse, until we find the
/// `num_delimiters` instance of `delimiter`. The `file` is left seek'd to the
/// position just after that delimiter.
fn backwards_thru_file(file: &mut File, num_delimiters: usize, delimiter: u8) {
// This variable counts the number of delimiters found in the file
// so far (reading from the end of the file toward the beginning).
let mut counter = 0;
let max_blocks_to_read = (size as f64 / BLOCK_SIZE as f64).ceil() as usize;
for (block_idx, slice) in ReverseChunks::new(file).enumerate() {
// Iterate over each byte in the slice in reverse order.
let mut iter = slice.iter().enumerate().rev();
for block_idx in 0..max_blocks_to_read {
let block_size = if block_idx == max_blocks_to_read - 1 {
size % BLOCK_SIZE
} else {
BLOCK_SIZE
};
// Seek backwards by the next block, read the full block into
// `buf`, and then seek back to the start of the block again.
let pos = file.seek(SeekFrom::Current(-(block_size as i64))).unwrap();
file.read_exact(&mut buf[0..(block_size as usize)]).unwrap();
let pos2 = file.seek(SeekFrom::Current(-(block_size as i64))).unwrap();
assert_eq!(pos, pos2);
// Iterate backwards through the bytes, calling `should_stop` on each
// one.
let slice = &buf[0..(block_size as usize)];
for (i, ch) in slice.iter().enumerate().rev() {
// Ignore one trailing newline.
if block_idx == 0 && i as u64 == block_size - 1 && *ch == delimiter {
continue;
// Ignore a trailing newline in the last block, if there is one.
if block_idx == 0 {
if let Some(c) = slice.last() {
if *c == delimiter {
iter.next();
}
}
}
if should_stop(*ch) {
file.seek(SeekFrom::Current((i + 1) as i64)).unwrap();
return;
// For each byte, increment the count of the number of
// delimiters found. If we have found more than the specified
// number of delimiters, terminate the search and seek to the
// appropriate location in the file.
for (i, ch) in iter {
if *ch == delimiter {
counter += 1;
if counter >= num_delimiters {
// After each iteration of the outer loop, the
// cursor in the file is at the *beginning* of the
// block, so seeking forward by `i + 1` bytes puts
// us right after the found delimiter.
file.seek(SeekFrom::Current((i + 1) as i64)).unwrap();
return;
}
}
}
}
@ -448,21 +441,13 @@ fn backwards_thru_file<F>(
/// end of the file, and then read the file "backwards" in blocks of size
/// `BLOCK_SIZE` until we find the location of the first line/byte. This ends up
/// being a nice performance win for very large files.
fn bounded_tail(mut file: &File, settings: &Settings) {
let size = file.seek(SeekFrom::End(0)).unwrap();
fn bounded_tail(file: &mut File, settings: &Settings) {
let mut buf = vec![0; BLOCK_SIZE as usize];
// Find the position in the file to start printing from.
match settings.mode {
FilterMode::Lines(mut count, delimiter) => {
backwards_thru_file(&file, size, &mut buf, delimiter, &mut |byte| {
if byte == delimiter {
count -= 1;
count == 0
} else {
false
}
});
FilterMode::Lines(count, delimiter) => {
backwards_thru_file(file, count as usize, delimiter);
}
FilterMode::Bytes(count) => {
file.seek(SeekFrom::End(-(count as i64))).unwrap();