mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-09-15 03:26:18 +00:00
tail: refactor code into ReverseChunks iterator
Refactor code from the `backwards_thru_file()` function into a new `ReverseChunks` iterator, and use that iterator to simplify the implementation of the `backwards_thru_file()` function. The `ReverseChunks` iterator yields `Vec<u8>` objects, each of which references bytes of a given file.
This commit is contained in:
parent
3114fd77be
commit
2e621759b2
2 changed files with 120 additions and 52 deletions
83
src/uu/tail/src/chunks.rs
Normal file
83
src/uu/tail/src/chunks.rs
Normal file
|
@ -0,0 +1,83 @@
|
||||||
|
//! Iterating over a file by chunks, starting at the end of the file.
|
||||||
|
//!
|
||||||
|
//! Use [`ReverseChunks::new`] to create a new iterator over chunks of
|
||||||
|
//! bytes from the file.
|
||||||
|
use std::fs::File;
|
||||||
|
use std::io::{Read, Seek, SeekFrom};
|
||||||
|
|
||||||
|
/// When reading files in reverse in `bounded_tail`, this is the size of each
|
||||||
|
/// block read at a time.
|
||||||
|
pub const BLOCK_SIZE: u64 = 1 << 16;
|
||||||
|
|
||||||
|
/// An iterator over a file in non-overlapping chunks from the end of the file.
|
||||||
|
///
|
||||||
|
/// Each chunk is a [`Vec`]<[`u8`]> of size [`BLOCK_SIZE`] (except
|
||||||
|
/// possibly the last chunk, which might be smaller). Each call to
|
||||||
|
/// [`next`] will seek backwards through the given file.
|
||||||
|
pub struct ReverseChunks<'a> {
|
||||||
|
/// The file to iterate over, by blocks, from the end to the beginning.
|
||||||
|
file: &'a File,
|
||||||
|
|
||||||
|
/// The total number of bytes in the file.
|
||||||
|
size: u64,
|
||||||
|
|
||||||
|
/// The total number of blocks to read.
|
||||||
|
max_blocks_to_read: usize,
|
||||||
|
|
||||||
|
/// The index of the next block to read.
|
||||||
|
block_idx: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> ReverseChunks<'a> {
|
||||||
|
pub fn new(file: &'a mut File) -> ReverseChunks<'a> {
|
||||||
|
let size = file.seek(SeekFrom::End(0)).unwrap();
|
||||||
|
let max_blocks_to_read = (size as f64 / BLOCK_SIZE as f64).ceil() as usize;
|
||||||
|
let block_idx = 0;
|
||||||
|
ReverseChunks {
|
||||||
|
file,
|
||||||
|
size,
|
||||||
|
max_blocks_to_read,
|
||||||
|
block_idx,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> Iterator for ReverseChunks<'a> {
|
||||||
|
type Item = Vec<u8>;
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
|
// If there are no more chunks to read, terminate the iterator.
|
||||||
|
if self.block_idx >= self.max_blocks_to_read {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
// The chunk size is `BLOCK_SIZE` for all but the last chunk
|
||||||
|
// (that is, the chunk closest to the beginning of the file),
|
||||||
|
// which contains the remainder of the bytes.
|
||||||
|
let block_size = if self.block_idx == self.max_blocks_to_read - 1 {
|
||||||
|
self.size % BLOCK_SIZE
|
||||||
|
} else {
|
||||||
|
BLOCK_SIZE
|
||||||
|
};
|
||||||
|
|
||||||
|
// Seek backwards by the next chunk, read the full chunk into
|
||||||
|
// `buf`, and then seek back to the start of the chunk again.
|
||||||
|
let mut buf = vec![0; BLOCK_SIZE as usize];
|
||||||
|
let pos = self
|
||||||
|
.file
|
||||||
|
.seek(SeekFrom::Current(-(block_size as i64)))
|
||||||
|
.unwrap();
|
||||||
|
self.file
|
||||||
|
.read_exact(&mut buf[0..(block_size as usize)])
|
||||||
|
.unwrap();
|
||||||
|
let pos2 = self
|
||||||
|
.file
|
||||||
|
.seek(SeekFrom::Current(-(block_size as i64)))
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(pos, pos2);
|
||||||
|
|
||||||
|
self.block_idx += 1;
|
||||||
|
|
||||||
|
Some(buf[0..(block_size as usize)].to_vec())
|
||||||
|
}
|
||||||
|
}
|
|
@ -15,8 +15,11 @@ extern crate clap;
|
||||||
#[macro_use]
|
#[macro_use]
|
||||||
extern crate uucore;
|
extern crate uucore;
|
||||||
|
|
||||||
|
mod chunks;
|
||||||
mod platform;
|
mod platform;
|
||||||
mod ringbuffer;
|
mod ringbuffer;
|
||||||
|
use chunks::ReverseChunks;
|
||||||
|
use chunks::BLOCK_SIZE;
|
||||||
use ringbuffer::RingBuffer;
|
use ringbuffer::RingBuffer;
|
||||||
|
|
||||||
use clap::{App, Arg};
|
use clap::{App, Arg};
|
||||||
|
@ -355,10 +358,6 @@ pub fn parse_size(mut size_slice: &str) -> Result<u64, ParseSizeErr> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// When reading files in reverse in `bounded_tail`, this is the size of each
|
|
||||||
/// block read at a time.
|
|
||||||
const BLOCK_SIZE: u64 = 1 << 16;
|
|
||||||
|
|
||||||
fn follow<T: Read>(readers: &mut [BufReader<T>], filenames: &[String], settings: &Settings) {
|
fn follow<T: Read>(readers: &mut [BufReader<T>], filenames: &[String], settings: &Settings) {
|
||||||
assert!(settings.follow);
|
assert!(settings.follow);
|
||||||
let mut last = readers.len() - 1;
|
let mut last = readers.len() - 1;
|
||||||
|
@ -396,51 +395,45 @@ fn follow<T: Read>(readers: &mut [BufReader<T>], filenames: &[String], settings:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Iterate over bytes in the file, in reverse, until `should_stop` returns
|
/// Iterate over bytes in the file, in reverse, until we find the
|
||||||
/// true. The `file` is left seek'd to the position just after the byte that
|
/// `num_delimiters` instance of `delimiter`. The `file` is left seek'd to the
|
||||||
/// `should_stop` returned true for.
|
/// position just after that delimiter.
|
||||||
fn backwards_thru_file<F>(
|
fn backwards_thru_file(file: &mut File, num_delimiters: usize, delimiter: u8) {
|
||||||
file: &mut File,
|
// This variable counts the number of delimiters found in the file
|
||||||
size: u64,
|
// so far (reading from the end of the file toward the beginning).
|
||||||
buf: &mut Vec<u8>,
|
let mut counter = 0;
|
||||||
delimiter: u8,
|
|
||||||
should_stop: &mut F,
|
|
||||||
) where
|
|
||||||
F: FnMut(u8) -> bool,
|
|
||||||
{
|
|
||||||
assert!(buf.len() >= BLOCK_SIZE as usize);
|
|
||||||
|
|
||||||
let max_blocks_to_read = (size as f64 / BLOCK_SIZE as f64).ceil() as usize;
|
for (block_idx, slice) in ReverseChunks::new(file).enumerate() {
|
||||||
|
// Iterate over each byte in the slice in reverse order.
|
||||||
|
let mut iter = slice.iter().enumerate().rev();
|
||||||
|
|
||||||
for block_idx in 0..max_blocks_to_read {
|
// Ignore a trailing newline in the last block, if there is one.
|
||||||
let block_size = if block_idx == max_blocks_to_read - 1 {
|
if block_idx == 0 {
|
||||||
size % BLOCK_SIZE
|
if let Some(c) = slice.last() {
|
||||||
} else {
|
if *c == delimiter {
|
||||||
BLOCK_SIZE
|
iter.next();
|
||||||
};
|
}
|
||||||
|
}
|
||||||
// Seek backwards by the next block, read the full block into
|
|
||||||
// `buf`, and then seek back to the start of the block again.
|
|
||||||
let pos = file.seek(SeekFrom::Current(-(block_size as i64))).unwrap();
|
|
||||||
file.read_exact(&mut buf[0..(block_size as usize)]).unwrap();
|
|
||||||
let pos2 = file.seek(SeekFrom::Current(-(block_size as i64))).unwrap();
|
|
||||||
assert_eq!(pos, pos2);
|
|
||||||
|
|
||||||
// Iterate backwards through the bytes, calling `should_stop` on each
|
|
||||||
// one.
|
|
||||||
let slice = &buf[0..(block_size as usize)];
|
|
||||||
for (i, ch) in slice.iter().enumerate().rev() {
|
|
||||||
// Ignore one trailing newline.
|
|
||||||
if block_idx == 0 && i as u64 == block_size - 1 && *ch == delimiter {
|
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if should_stop(*ch) {
|
// For each byte, increment the count of the number of
|
||||||
|
// delimiters found. If we have found more than the specified
|
||||||
|
// number of delimiters, terminate the search and seek to the
|
||||||
|
// appropriate location in the file.
|
||||||
|
for (i, ch) in iter {
|
||||||
|
if *ch == delimiter {
|
||||||
|
counter += 1;
|
||||||
|
if counter >= num_delimiters {
|
||||||
|
// After each iteration of the outer loop, the
|
||||||
|
// cursor in the file is at the *beginning* of the
|
||||||
|
// block, so seeking forward by `i + 1` bytes puts
|
||||||
|
// us right after the found delimiter.
|
||||||
file.seek(SeekFrom::Current((i + 1) as i64)).unwrap();
|
file.seek(SeekFrom::Current((i + 1) as i64)).unwrap();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// When tail'ing a file, we do not need to read the whole file from start to
|
/// When tail'ing a file, we do not need to read the whole file from start to
|
||||||
|
@ -449,20 +442,12 @@ fn backwards_thru_file<F>(
|
||||||
/// `BLOCK_SIZE` until we find the location of the first line/byte. This ends up
|
/// `BLOCK_SIZE` until we find the location of the first line/byte. This ends up
|
||||||
/// being a nice performance win for very large files.
|
/// being a nice performance win for very large files.
|
||||||
fn bounded_tail(file: &mut File, settings: &Settings) {
|
fn bounded_tail(file: &mut File, settings: &Settings) {
|
||||||
let size = file.seek(SeekFrom::End(0)).unwrap();
|
|
||||||
let mut buf = vec![0; BLOCK_SIZE as usize];
|
let mut buf = vec![0; BLOCK_SIZE as usize];
|
||||||
|
|
||||||
// Find the position in the file to start printing from.
|
// Find the position in the file to start printing from.
|
||||||
match settings.mode {
|
match settings.mode {
|
||||||
FilterMode::Lines(mut count, delimiter) => {
|
FilterMode::Lines(count, delimiter) => {
|
||||||
backwards_thru_file(file, size, &mut buf, delimiter, &mut |byte| {
|
backwards_thru_file(file, count as usize, delimiter);
|
||||||
if byte == delimiter {
|
|
||||||
count -= 1;
|
|
||||||
count == 0
|
|
||||||
} else {
|
|
||||||
false
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
FilterMode::Bytes(count) => {
|
FilterMode::Bytes(count) => {
|
||||||
file.seek(SeekFrom::End(-(count as i64))).unwrap();
|
file.seek(SeekFrom::End(-(count as i64))).unwrap();
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue