mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-09-14 19:16:17 +00:00
Merge pull request #2206 from jfinkels/tail-reverse-block-iterator
tail: refactor code into ReverseChunks iterator
This commit is contained in:
commit
204b051711
2 changed files with 122 additions and 54 deletions
83
src/uu/tail/src/chunks.rs
Normal file
83
src/uu/tail/src/chunks.rs
Normal file
|
@ -0,0 +1,83 @@
|
|||
//! Iterating over a file by chunks, starting at the end of the file.
|
||||
//!
|
||||
//! Use [`ReverseChunks::new`] to create a new iterator over chunks of
|
||||
//! bytes from the file.
|
||||
use std::fs::File;
|
||||
use std::io::{Read, Seek, SeekFrom};
|
||||
|
||||
/// When reading files in reverse in `bounded_tail`, this is the size of each
|
||||
/// block read at a time.
|
||||
pub const BLOCK_SIZE: u64 = 1 << 16;
|
||||
|
||||
/// An iterator over a file in non-overlapping chunks from the end of the file.
|
||||
///
|
||||
/// Each chunk is a [`Vec`]<[`u8`]> of size [`BLOCK_SIZE`] (except
|
||||
/// possibly the last chunk, which might be smaller). Each call to
|
||||
/// [`next`] will seek backwards through the given file.
|
||||
pub struct ReverseChunks<'a> {
|
||||
/// The file to iterate over, by blocks, from the end to the beginning.
|
||||
file: &'a File,
|
||||
|
||||
/// The total number of bytes in the file.
|
||||
size: u64,
|
||||
|
||||
/// The total number of blocks to read.
|
||||
max_blocks_to_read: usize,
|
||||
|
||||
/// The index of the next block to read.
|
||||
block_idx: usize,
|
||||
}
|
||||
|
||||
impl<'a> ReverseChunks<'a> {
|
||||
pub fn new(file: &'a mut File) -> ReverseChunks<'a> {
|
||||
let size = file.seek(SeekFrom::End(0)).unwrap();
|
||||
let max_blocks_to_read = (size as f64 / BLOCK_SIZE as f64).ceil() as usize;
|
||||
let block_idx = 0;
|
||||
ReverseChunks {
|
||||
file,
|
||||
size,
|
||||
max_blocks_to_read,
|
||||
block_idx,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Iterator for ReverseChunks<'a> {
|
||||
type Item = Vec<u8>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
// If there are no more chunks to read, terminate the iterator.
|
||||
if self.block_idx >= self.max_blocks_to_read {
|
||||
return None;
|
||||
}
|
||||
|
||||
// The chunk size is `BLOCK_SIZE` for all but the last chunk
|
||||
// (that is, the chunk closest to the beginning of the file),
|
||||
// which contains the remainder of the bytes.
|
||||
let block_size = if self.block_idx == self.max_blocks_to_read - 1 {
|
||||
self.size % BLOCK_SIZE
|
||||
} else {
|
||||
BLOCK_SIZE
|
||||
};
|
||||
|
||||
// Seek backwards by the next chunk, read the full chunk into
|
||||
// `buf`, and then seek back to the start of the chunk again.
|
||||
let mut buf = vec![0; BLOCK_SIZE as usize];
|
||||
let pos = self
|
||||
.file
|
||||
.seek(SeekFrom::Current(-(block_size as i64)))
|
||||
.unwrap();
|
||||
self.file
|
||||
.read_exact(&mut buf[0..(block_size as usize)])
|
||||
.unwrap();
|
||||
let pos2 = self
|
||||
.file
|
||||
.seek(SeekFrom::Current(-(block_size as i64)))
|
||||
.unwrap();
|
||||
assert_eq!(pos, pos2);
|
||||
|
||||
self.block_idx += 1;
|
||||
|
||||
Some(buf[0..(block_size as usize)].to_vec())
|
||||
}
|
||||
}
|
|
@ -15,8 +15,11 @@ extern crate clap;
|
|||
#[macro_use]
|
||||
extern crate uucore;
|
||||
|
||||
mod chunks;
|
||||
mod platform;
|
||||
mod ringbuffer;
|
||||
use chunks::ReverseChunks;
|
||||
use chunks::BLOCK_SIZE;
|
||||
use ringbuffer::RingBuffer;
|
||||
|
||||
use clap::{App, Arg};
|
||||
|
@ -241,7 +244,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
|
|||
}
|
||||
let mut file = File::open(&path).unwrap();
|
||||
if is_seekable(&mut file) {
|
||||
bounded_tail(&file, &settings);
|
||||
bounded_tail(&mut file, &settings);
|
||||
if settings.follow {
|
||||
let reader = BufReader::new(file);
|
||||
readers.push(reader);
|
||||
|
@ -355,10 +358,6 @@ pub fn parse_size(mut size_slice: &str) -> Result<u64, ParseSizeErr> {
|
|||
}
|
||||
}
|
||||
|
||||
/// When reading files in reverse in `bounded_tail`, this is the size of each
|
||||
/// block read at a time.
|
||||
const BLOCK_SIZE: u64 = 1 << 16;
|
||||
|
||||
fn follow<T: Read>(readers: &mut [BufReader<T>], filenames: &[String], settings: &Settings) {
|
||||
assert!(settings.follow);
|
||||
let mut last = readers.len() - 1;
|
||||
|
@ -396,48 +395,42 @@ fn follow<T: Read>(readers: &mut [BufReader<T>], filenames: &[String], settings:
|
|||
}
|
||||
}
|
||||
|
||||
/// Iterate over bytes in the file, in reverse, until `should_stop` returns
|
||||
/// true. The `file` is left seek'd to the position just after the byte that
|
||||
/// `should_stop` returned true for.
|
||||
fn backwards_thru_file<F>(
|
||||
mut file: &File,
|
||||
size: u64,
|
||||
buf: &mut Vec<u8>,
|
||||
delimiter: u8,
|
||||
should_stop: &mut F,
|
||||
) where
|
||||
F: FnMut(u8) -> bool,
|
||||
{
|
||||
assert!(buf.len() >= BLOCK_SIZE as usize);
|
||||
/// Iterate over bytes in the file, in reverse, until we find the
|
||||
/// `num_delimiters` instance of `delimiter`. The `file` is left seek'd to the
|
||||
/// position just after that delimiter.
|
||||
fn backwards_thru_file(file: &mut File, num_delimiters: usize, delimiter: u8) {
|
||||
// This variable counts the number of delimiters found in the file
|
||||
// so far (reading from the end of the file toward the beginning).
|
||||
let mut counter = 0;
|
||||
|
||||
let max_blocks_to_read = (size as f64 / BLOCK_SIZE as f64).ceil() as usize;
|
||||
for (block_idx, slice) in ReverseChunks::new(file).enumerate() {
|
||||
// Iterate over each byte in the slice in reverse order.
|
||||
let mut iter = slice.iter().enumerate().rev();
|
||||
|
||||
for block_idx in 0..max_blocks_to_read {
|
||||
let block_size = if block_idx == max_blocks_to_read - 1 {
|
||||
size % BLOCK_SIZE
|
||||
} else {
|
||||
BLOCK_SIZE
|
||||
};
|
||||
|
||||
// Seek backwards by the next block, read the full block into
|
||||
// `buf`, and then seek back to the start of the block again.
|
||||
let pos = file.seek(SeekFrom::Current(-(block_size as i64))).unwrap();
|
||||
file.read_exact(&mut buf[0..(block_size as usize)]).unwrap();
|
||||
let pos2 = file.seek(SeekFrom::Current(-(block_size as i64))).unwrap();
|
||||
assert_eq!(pos, pos2);
|
||||
|
||||
// Iterate backwards through the bytes, calling `should_stop` on each
|
||||
// one.
|
||||
let slice = &buf[0..(block_size as usize)];
|
||||
for (i, ch) in slice.iter().enumerate().rev() {
|
||||
// Ignore one trailing newline.
|
||||
if block_idx == 0 && i as u64 == block_size - 1 && *ch == delimiter {
|
||||
continue;
|
||||
// Ignore a trailing newline in the last block, if there is one.
|
||||
if block_idx == 0 {
|
||||
if let Some(c) = slice.last() {
|
||||
if *c == delimiter {
|
||||
iter.next();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if should_stop(*ch) {
|
||||
file.seek(SeekFrom::Current((i + 1) as i64)).unwrap();
|
||||
return;
|
||||
// For each byte, increment the count of the number of
|
||||
// delimiters found. If we have found more than the specified
|
||||
// number of delimiters, terminate the search and seek to the
|
||||
// appropriate location in the file.
|
||||
for (i, ch) in iter {
|
||||
if *ch == delimiter {
|
||||
counter += 1;
|
||||
if counter >= num_delimiters {
|
||||
// After each iteration of the outer loop, the
|
||||
// cursor in the file is at the *beginning* of the
|
||||
// block, so seeking forward by `i + 1` bytes puts
|
||||
// us right after the found delimiter.
|
||||
file.seek(SeekFrom::Current((i + 1) as i64)).unwrap();
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -448,21 +441,13 @@ fn backwards_thru_file<F>(
|
|||
/// end of the file, and then read the file "backwards" in blocks of size
|
||||
/// `BLOCK_SIZE` until we find the location of the first line/byte. This ends up
|
||||
/// being a nice performance win for very large files.
|
||||
fn bounded_tail(mut file: &File, settings: &Settings) {
|
||||
let size = file.seek(SeekFrom::End(0)).unwrap();
|
||||
fn bounded_tail(file: &mut File, settings: &Settings) {
|
||||
let mut buf = vec![0; BLOCK_SIZE as usize];
|
||||
|
||||
// Find the position in the file to start printing from.
|
||||
match settings.mode {
|
||||
FilterMode::Lines(mut count, delimiter) => {
|
||||
backwards_thru_file(&file, size, &mut buf, delimiter, &mut |byte| {
|
||||
if byte == delimiter {
|
||||
count -= 1;
|
||||
count == 0
|
||||
} else {
|
||||
false
|
||||
}
|
||||
});
|
||||
FilterMode::Lines(count, delimiter) => {
|
||||
backwards_thru_file(file, count as usize, delimiter);
|
||||
}
|
||||
FilterMode::Bytes(count) => {
|
||||
file.seek(SeekFrom::End(-(count as i64))).unwrap();
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue