tail: refactor code into ReverseChunks iterator

Refactor code from the `backwards_thru_file()` function into a new `ReverseChunks` iterator, and use that iterator to simplify the implementation of the `backwards_thru_file()` function. The `ReverseChunks` iterator yields `Vec<u8>` objects, each of which references bytes of a given file.
2025-09-15 03:26:18 +00:00 · 2021-05-11 21:10:30 -04:00 · 2021-05-11 21:10:30 -04:00 · 2e621759b2
commit 2e621759b2
parent 3114fd77be
2 changed files with 120 additions and 52 deletions
--- a/src/uu/tail/src/chunks.rs
+++ b/src/uu/tail/src/chunks.rs
@ -0,0 +1,83 @@
 //! Iterating over a file by chunks, starting at the end of the file.
 //!
 //! Use [`ReverseChunks::new`] to create a new iterator over chunks of
 //! bytes from the file.
 use std::fs::File;
 use std::io::{Read, Seek, SeekFrom};
 /// When reading files in reverse in `bounded_tail`, this is the size of each
 /// block read at a time.
 pub const BLOCK_SIZE: u64 = 1 << 16;
 /// An iterator over a file in non-overlapping chunks from the end of the file.
 ///
 /// Each chunk is a [`Vec`]<[`u8`]> of size [`BLOCK_SIZE`] (except
 /// possibly the last chunk, which might be smaller). Each call to
 /// [`next`] will seek backwards through the given file.
 pub struct ReverseChunks<'a> {
    /// The file to iterate over, by blocks, from the end to the beginning.
    file: &'a File,
    /// The total number of bytes in the file.
    size: u64,
    /// The total number of blocks to read.
    max_blocks_to_read: usize,
    /// The index of the next block to read.
    block_idx: usize,
 }
 impl<'a> ReverseChunks<'a> {
    pub fn new(file: &'a mut File) -> ReverseChunks<'a> {
        let size = file.seek(SeekFrom::End(0)).unwrap();
        let max_blocks_to_read = (size as f64 / BLOCK_SIZE as f64).ceil() as usize;
        let block_idx = 0;
        ReverseChunks {
            file,
            size,
            max_blocks_to_read,
            block_idx,
        }
    }
 }
 impl<'a> Iterator for ReverseChunks<'a> {
    type Item = Vec<u8>;
    fn next(&mut self) -> Option<Self::Item> {
        // If there are no more chunks to read, terminate the iterator.
        if self.block_idx >= self.max_blocks_to_read {
            return None;
        }
        // The chunk size is `BLOCK_SIZE` for all but the last chunk
        // (that is, the chunk closest to the beginning of the file),
        // which contains the remainder of the bytes.
        let block_size = if self.block_idx == self.max_blocks_to_read - 1 {
            self.size % BLOCK_SIZE
        } else {
            BLOCK_SIZE
        };
        // Seek backwards by the next chunk, read the full chunk into
        // `buf`, and then seek back to the start of the chunk again.
        let mut buf = vec![0; BLOCK_SIZE as usize];
        let pos = self
            .file
            .seek(SeekFrom::Current(-(block_size as i64)))
            .unwrap();
        self.file
            .read_exact(&mut buf[0..(block_size as usize)])
            .unwrap();
        let pos2 = self
            .file
            .seek(SeekFrom::Current(-(block_size as i64)))
            .unwrap();
        assert_eq!(pos, pos2);
        self.block_idx += 1;
        Some(buf[0..(block_size as usize)].to_vec())
    }
 }
--- a/src/uu/tail/src/tail.rs
+++ b/src/uu/tail/src/tail.rs
@ -15,8 +15,11 @@ extern crate clap;
 #[macro_use]
 extern crate uucore;
 mod chunks;
 mod platform;
 mod ringbuffer;
 use chunks::ReverseChunks;
 use chunks::BLOCK_SIZE;
 use ringbuffer::RingBuffer;
 use clap::{App, Arg};
@ -355,10 +358,6 @@ pub fn parse_size(mut size_slice: &str) -> Result<u64, ParseSizeErr> {
    }
 }
 /// When reading files in reverse in `bounded_tail`, this is the size of each
 /// block read at a time.
 const BLOCK_SIZE: u64 = 1 << 16;
 fn follow<T: Read>(readers: &mut [BufReader<T>], filenames: &[String], settings: &Settings) {
    assert!(settings.follow);
    let mut last = readers.len() - 1;
@ -396,51 +395,45 @@ fn follow<T: Read>(readers: &mut [BufReader<T>], filenames: &[String], settings:
    }
 }
-/// Iterate over bytes in the file, in reverse, until `should_stop` returns
+/// Iterate over bytes in the file, in reverse, until we find the
-/// true. The `file` is left seek'd to the position just after the byte that
+/// `num_delimiters` instance of `delimiter`. The `file` is left seek'd to the
-/// `should_stop` returned true for.
+/// position just after that delimiter.
-fn backwards_thru_file<F>(
+fn backwards_thru_file(file: &mut File, num_delimiters: usize, delimiter: u8) {
-    file: &mut File,
+    // This variable counts the number of delimiters found in the file
-    size: u64,
+    // so far (reading from the end of the file toward the beginning).
-    buf: &mut Vec<u8>,
+    let mut counter = 0;
    delimiter: u8,
    should_stop: &mut F,
 ) where
    F: FnMut(u8) -> bool,
 {
    assert!(buf.len() >= BLOCK_SIZE as usize);
-    let max_blocks_to_read = (size as f64 / BLOCK_SIZE as f64).ceil() as usize;
+    for (block_idx, slice) in ReverseChunks::new(file).enumerate() {
        // Iterate over each byte in the slice in reverse order.
        let mut iter = slice.iter().enumerate().rev();
-    for block_idx in 0..max_blocks_to_read {
+        // Ignore a trailing newline in the last block, if there is one.
-        let block_size = if block_idx == max_blocks_to_read - 1 {
+        if block_idx == 0 {
-            size % BLOCK_SIZE
+            if let Some(c) = slice.last() {
-        } else {
+                if *c == delimiter {
-            BLOCK_SIZE
+                    iter.next();
-        };
+                }
-
+            }
        // Seek backwards by the next block, read the full block into
        // `buf`, and then seek back to the start of the block again.
        let pos = file.seek(SeekFrom::Current(-(block_size as i64))).unwrap();
        file.read_exact(&mut buf[0..(block_size as usize)]).unwrap();
        let pos2 = file.seek(SeekFrom::Current(-(block_size as i64))).unwrap();
        assert_eq!(pos, pos2);
        // Iterate backwards through the bytes, calling `should_stop` on each
        // one.
        let slice = &buf[0..(block_size as usize)];
        for (i, ch) in slice.iter().enumerate().rev() {
            // Ignore one trailing newline.
            if block_idx == 0 && i as u64 == block_size - 1 && *ch == delimiter {
                continue;
        }
-            if should_stop(*ch) {
+        // For each byte, increment the count of the number of
        // delimiters found. If we have found more than the specified
        // number of delimiters, terminate the search and seek to the
        // appropriate location in the file.
        for (i, ch) in iter {
            if *ch == delimiter {
                counter += 1;
                if counter >= num_delimiters {
                    // After each iteration of the outer loop, the
                    // cursor in the file is at the *beginning* of the
                    // block, so seeking forward by `i + 1` bytes puts
                    // us right after the found delimiter.
                    file.seek(SeekFrom::Current((i + 1) as i64)).unwrap();
                    return;
                }
            }
        }
    }
 }
 /// When tail'ing a file, we do not need to read the whole file from start to
@ -449,20 +442,12 @@ fn backwards_thru_file<F>(
 /// `BLOCK_SIZE` until we find the location of the first line/byte. This ends up
 /// being a nice performance win for very large files.
 fn bounded_tail(file: &mut File, settings: &Settings) {
    let size = file.seek(SeekFrom::End(0)).unwrap();
    let mut buf = vec![0; BLOCK_SIZE as usize];
    // Find the position in the file to start printing from.
    match settings.mode {
-        FilterMode::Lines(mut count, delimiter) => {
+        FilterMode::Lines(count, delimiter) => {
-            backwards_thru_file(file, size, &mut buf, delimiter, &mut |byte| {
+            backwards_thru_file(file, count as usize, delimiter);
                if byte == delimiter {
                    count -= 1;
                    count == 0
                } else {
                    false
                }
            });
        }
        FilterMode::Bytes(count) => {
            file.seek(SeekFrom::End(-(count as i64))).unwrap();