tail: improve performance of piped stdin

Rewrite handling of stdin when it is piped and read input in chunks. Fixes https://github.com/uutils/coreutils/issues/3842
2025-09-15 03:26:18 +00:00 · 2022-09-09 13:50:59 +02:00 · 2022-09-09 13:50:59 +02:00 · 2658f8ae5b
commit 2658f8ae5b
parent b39f5239e7
7 changed files with 1704 additions and 83 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -2871,6 +2871,7 @@ version = "0.0.15"
 dependencies = [
 "clap",
 "libc",
 "memchr",
 "nix",
 "notify",
 "same-file",
--- a/src/uu/tail/Cargo.toml
+++ b/src/uu/tail/Cargo.toml
@ -18,6 +18,7 @@ path = "src/tail.rs"
 [dependencies]
 clap = { version = "3.2", features = ["wrap_help", "cargo"] }
 libc = "0.2.132"
 memchr = "2.5.0"
 notify = { version = "=5.0.0-pre.16", features=["macos_kqueue"]}
 uucore = { version=">=0.0.15", package="uucore", path="../../uucore", features=["ringbuffer", "lines"] }
 same-file = "1.0.6"
--- a/src/uu/tail/src/chunks.rs
+++ b/src/uu/tail/src/chunks.rs
@ -1,14 +1,29 @@
-//! Iterating over a file by chunks, starting at the end of the file.
+//  * This file is part of the uutils coreutils package.
 //  *
 //  * For the full copyright and license information, please view the LICENSE
 //  * file that was distributed with this source code.
 //! Iterating over a file by chunks, either starting at the end of the file with [`ReverseChunks`]
 //! or at the end of piped stdin with [`LinesChunk`] or [`BytesChunk`].
 //!
-//! Use [`ReverseChunks::new`] to create a new iterator over chunks of
+//! Use [`ReverseChunks::new`] to create a new iterator over chunks of bytes from the file.
-//! bytes from the file.
+// spell-checker:ignore (ToDO) filehandle BUFSIZ
 use std::collections::VecDeque;
 use std::fs::File;
-use std::io::{Read, Seek, SeekFrom};
+use std::io::{BufReader, Read, Seek, SeekFrom, Write};
 use uucore::error::UResult;
 /// When reading files in reverse in `bounded_tail`, this is the size of each
 /// block read at a time.
 pub const BLOCK_SIZE: u64 = 1 << 16;
 /// The size of the backing buffer of a LinesChunk or BytesChunk in bytes. The value of BUFFER_SIZE
 /// originates from the BUFSIZ constant in stdio.h and the libc crate to make stream IO efficient.
 /// In the latter the value is constantly set to 8192 on all platforms, where the value in stdio.h
 /// is determined on each platform differently. Since libc chose 8192 as a reasonable default the
 /// value here is set to this value, too.
 pub const BUFFER_SIZE: usize = 8192;
 /// An iterator over a file in non-overlapping chunks from the end of the file.
 ///
 /// Each chunk is a [`Vec`]<[`u8`]> of size [`BLOCK_SIZE`] (except
@ -86,3 +101,598 @@ impl<'a> Iterator for ReverseChunks<'a> {
        Some(buf[0..(block_size as usize)].to_vec())
    }
 }
 /// The type of the backing buffer of [`BytesChunk`] and [`LinesChunk`] which can hold
 /// [`BUFFER_SIZE`] elements at max.
 type ChunkBuffer = [u8; BUFFER_SIZE];
 /// A [`BytesChunk`] storing a fixed size number of bytes in a buffer.
 #[derive(Clone, PartialEq, Eq, Debug)]
 pub struct BytesChunk {
    /// The [`ChunkBuffer`], an array storing the bytes, for example filled by
    /// [`BytesChunk::fill`]
    buffer: ChunkBuffer,
    /// Stores the number of bytes, this buffer holds. This is not equal to buffer.len(), since the
    /// [`BytesChunk`] may store less bytes than the internal buffer can hold. In addition
    /// [`BytesChunk`] may be reused, what makes it necessary to track the number of stored bytes.
    /// The choice of usize is sufficient here, since the number of bytes max value is
    /// [`BUFFER_SIZE`], which is a usize.
    bytes: usize,
 }
 impl BytesChunk {
    #[allow(clippy::new_without_default)]
    pub fn new() -> Self {
        Self {
            buffer: [0; BUFFER_SIZE],
            bytes: 0,
        }
    }
    /// Create a new chunk from an existing chunk. The new chunk's buffer will be copied from the
    /// old chunk's buffer, copying the slice `[offset..old_chunk.bytes]` into the new chunk's
    /// buffer but starting at 0 instead of offset. If the offset is larger or equal to
    /// `chunk.lines` then a new empty `BytesChunk` is returned.
    ///
    /// # Arguments
    ///
    /// * `chunk`: The chunk to create a new `BytesChunk` chunk from
    /// * `offset`: Start to copy the old chunk's buffer from this position. May not be larger
    ///             than `chunk.bytes`.
    ///
    /// # Examples
    ///
    /// ```rust,ignore
    /// let mut chunk = BytesChunk::new();
    /// chunk.buffer[1] = 1;
    /// chunk.bytes = 2;
    /// let new_chunk = BytesChunk::from_chunk(&chunk, 0);
    /// assert_eq!(2, new_chunk.get_buffer().len());
    /// assert_eq!(&[0, 1], new_chunk.get_buffer());
    ///
    /// let new_chunk = BytesChunk::from_chunk(&chunk, 1);
    /// assert_eq!(1, new_chunk.get_buffer().len());
    /// assert_eq!(&[1], new_chunk.get_buffer());
    /// ```
    fn from_chunk(chunk: &Self, offset: usize) -> Self {
        if offset >= chunk.bytes {
            return Self::new();
        }
        let mut buffer: ChunkBuffer = [0; BUFFER_SIZE];
        let slice = chunk.get_buffer_with(offset);
        buffer[..slice.len()].copy_from_slice(slice);
        Self {
            buffer,
            bytes: chunk.bytes - offset,
        }
    }
    /// Receive the internal buffer safely, so it returns a slice only containing as many bytes as
    /// large the `self.bytes` value is.
    ///
    /// returns: a slice containing the bytes of the internal buffer from `[0..self.bytes]`
    ///
    /// # Examples
    ///
    /// ```rust,ignore
    /// let mut chunk = BytesChunk::new();
    /// chunk.bytes = 1;
    /// assert_eq!(&[0], chunk.get_buffer());
    /// ```
    pub fn get_buffer(&self) -> &[u8] {
        &self.buffer[..self.bytes]
    }
    /// Like [`BytesChunk::get_buffer`], but returning a slice from `[offset.self.bytes]`.
    ///
    /// returns: a slice containing the bytes of the internal buffer from `[offset..self.bytes]`
    ///
    /// # Examples
    ///
    /// ```rust,ignore
    /// let mut chunk = BytesChunk::new();
    /// chunk.bytes = 2;
    /// assert_eq!(&[0], chunk.get_buffer_with(1));
    /// ```
    pub fn get_buffer_with(&self, offset: usize) -> &[u8] {
        &self.buffer[offset..self.bytes]
    }
    pub fn has_data(&self) -> bool {
        self.bytes > 0
    }
    /// Fills `self.buffer` with maximal [`BUFFER_SIZE`] number of bytes, draining the reader by
    /// that number of bytes. If EOF is reached (so 0 bytes are read), then returns
    /// [`UResult<None>`] or else the result with [`Some(bytes)`] where bytes is the number of bytes
    /// read from the source.
    pub fn fill(&mut self, filehandle: &mut BufReader<impl Read>) -> UResult<Option<usize>> {
        let num_bytes = filehandle.read(&mut self.buffer)?;
        self.bytes = num_bytes;
        if num_bytes == 0 {
            return Ok(None);
        }
        Ok(Some(self.bytes))
    }
 }
 /// An abstraction layer on top of [`BytesChunk`] mainly to simplify filling only the needed amount
 /// of chunks. See also [`Self::fill`].
 pub struct BytesChunkBuffer {
    /// The number of bytes to print
    num_print: u64,
    /// The current number of bytes summed over all stored chunks in [`Self::chunks`]. Use u64 here
    /// to support files > 4GB on 32-bit systems. Note, this differs from `BytesChunk::bytes` which
    /// is a usize. The choice of u64 is based on `tail::FilterMode::Bytes`.
    bytes: u64,
    /// The buffer to store [`BytesChunk`] in
    chunks: VecDeque<Box<BytesChunk>>,
 }
 impl BytesChunkBuffer {
    /// Creates a new [`BytesChunkBuffer`].
    ///
    /// # Arguments
    ///
    /// * `num_print`: The number of bytes to print
    ///
    /// # Examples
    ///
    /// ```rust,ignore
    /// let mut chunk = BytesChunk::new();
    /// chunk.buffer[1] = 1;
    /// chunk.bytes = 2;
    /// let new_chunk = BytesChunk::from_chunk(&chunk, 0);
    /// assert_eq!(2, new_chunk.get_buffer().len());
    /// assert_eq!(&[0, 1], new_chunk.get_buffer());
    ///
    /// let new_chunk = BytesChunk::from_chunk(&chunk, 1);
    /// assert_eq!(1, new_chunk.get_buffer().len());
    /// assert_eq!(&[1], new_chunk.get_buffer());
    /// ```
    pub fn new(num_print: u64) -> Self {
        Self {
            bytes: 0,
            num_print,
            chunks: VecDeque::new(),
        }
    }
    /// Fills this buffer with chunks and consumes the reader completely. This method ensures that
    /// there are exactly as many chunks as needed to match `self.num_print` bytes, so there are
    /// in sum exactly `self.num_print` bytes stored in all chunks. The method returns an iterator
    /// over these chunks. If there are no chunks, for example because the piped stdin contained no
    /// bytes, or `num_print = 0` then `iterator.next` returns None.
    ///
    /// # Examples
    ///
    /// ```rust,ignore
    /// use crate::chunks::BytesChunkBuffer;
    /// use std::io::{BufReader, Cursor};
    ///
    /// let mut reader = BufReader::new(Cursor::new(""));
    /// let num_print = 0;
    /// let mut chunks = BytesChunkBuffer::new(num_print);
    /// chunks.fill(&mut reader).unwrap();
    ///
    /// let mut reader = BufReader::new(Cursor::new("a"));
    /// let num_print = 1;
    /// let mut chunks = BytesChunkBuffer::new(num_print);
    /// chunks.fill(&mut reader).unwrap();
    /// ```
    pub fn fill(&mut self, reader: &mut BufReader<impl Read>) -> UResult<()> {
        let mut chunk = Box::new(BytesChunk::new());
        // fill chunks with all bytes from reader and reuse already instantiated chunks if possible
        while (chunk.fill(reader)?).is_some() {
            self.bytes += chunk.bytes as u64;
            self.chunks.push_back(chunk);
            let first = &self.chunks[0];
            if self.bytes - first.bytes as u64 > self.num_print {
                chunk = self.chunks.pop_front().unwrap();
                self.bytes -= chunk.bytes as u64;
            } else {
                chunk = Box::new(BytesChunk::new());
            }
        }
        // quit early if there are no chunks for example in case the pipe was empty
        if self.chunks.is_empty() {
            return Ok(());
        }
        let chunk = self.chunks.pop_front().unwrap();
        // calculate the offset in the first chunk and put the calculated chunk as first element in
        // the self.chunks collection. The calculated offset must be in the range 0 to BUFFER_SIZE
        // and is therefore safely convertible to a usize without losses.
        let offset = self.bytes.saturating_sub(self.num_print) as usize;
        self.chunks
            .push_front(Box::new(BytesChunk::from_chunk(&chunk, offset)));
        Ok(())
    }
    pub fn print(&self, mut writer: impl Write) -> UResult<()> {
        for chunk in &self.chunks {
            writer.write_all(chunk.get_buffer())?;
        }
        Ok(())
    }
 }
 /// Works similar to a [`BytesChunk`] but also stores the number of lines encountered in the current
 /// buffer. The size of the buffer is limited to a fixed size number of bytes.
 #[derive(Debug)]
 pub struct LinesChunk {
    /// Work on top of a [`BytesChunk`]
    chunk: BytesChunk,
    /// The number of lines delimited by `delimiter`. The choice of usize is sufficient here,
    /// because lines max value is the number of bytes contained in this chunk's buffer, and the
    /// number of bytes max value is [`BUFFER_SIZE`], which is a usize.
    lines: usize,
    /// The delimiter to use, to count the lines
    delimiter: u8,
 }
 impl LinesChunk {
    pub fn new(delimiter: u8) -> Self {
        Self {
            chunk: BytesChunk::new(),
            lines: 0,
            delimiter,
        }
    }
    /// Count the number of lines delimited with [`Self::delimiter`] contained in the buffer.
    /// Currently [`memchr`] is used because performance is better than using an iterator or for
    /// loop.
    ///
    /// # Examples
    ///
    /// ```rust,ignore
    /// let mut chunk = LinesChunk::new(b'\n');
    /// chunk.buffer[0..12].copy_from_slice("hello\nworld\n".as_bytes());
    /// chunk.bytes = 12;
    /// assert_eq!(2, chunk.count_lines());
    ///
    /// chunk.buffer[0..14].copy_from_slice("hello\r\nworld\r\n".as_bytes());
    /// chunk.bytes = 14;
    /// assert_eq!(2, chunk.count_lines());
    /// ```
    fn count_lines(&self) -> usize {
        memchr::memchr_iter(self.delimiter, self.get_buffer()).count()
    }
    /// Creates a new [`LinesChunk`] from an existing one with an offset in lines. The new chunk
    /// contains exactly `chunk.lines - offset` lines. The offset in bytes is calculated and applied
    /// to the new chunk, so the new chunk contains only the bytes encountered after the offset in
    /// number of lines and the `delimiter`. If the offset is larger than `chunk.lines` then a new
    /// empty `LinesChunk` is returned.
    ///
    /// # Arguments
    ///
    /// * `chunk`: The chunk to create the new chunk from
    /// * `offset`: The offset in number of lines (not bytes)
    ///
    /// # Examples
    ///
    /// ```rust,ignore
    /// let mut chunk = LinesChunk::new(b'\n');
    /// // manually filling the buffer and setting the correct values for bytes and lines
    /// chunk.buffer[0..12].copy_from_slice("hello\nworld\n".as_bytes());
    /// chunk.bytes = 12;
    /// chunk.lines = 2;
    ///
    /// let offset = 1; // offset in number of lines
    /// let new_chunk = LinesChunk::from(&chunk, offset);
    /// assert_eq!("world\n".as_bytes(), new_chunk.get_buffer());
    /// assert_eq!(6, new_chunk.bytes);
    /// assert_eq!(1, new_chunk.lines);
    /// ```
    fn from_chunk(chunk: &Self, offset: usize) -> Self {
        if offset > chunk.lines {
            return Self::new(chunk.delimiter);
        }
        let bytes_offset = chunk.calculate_bytes_offset_from(offset);
        let new_chunk = BytesChunk::from_chunk(&chunk.chunk, bytes_offset);
        Self {
            chunk: new_chunk,
            lines: chunk.lines - offset,
            delimiter: chunk.delimiter,
        }
    }
    /// Returns true if this buffer has stored any bytes.
    ///
    /// # Examples
    ///
    /// ```rust,ignore
    /// let mut chunk = LinesChunk::new(b'\n');
    /// assert!(!chunk.has_data());
    ///
    /// chunk.buffer[0] = 1;
    /// assert!(!chunk.has_data());
    ///
    /// chunk.bytes = 1;
    /// assert!(chunk.has_data());
    /// ```
    pub fn has_data(&self) -> bool {
        self.chunk.has_data()
    }
    /// Returns this buffer safely. See [`BytesChunk::get_buffer`]
    ///
    /// returns: &[u8] with length `self.bytes`
    pub fn get_buffer(&self) -> &[u8] {
        self.chunk.get_buffer()
    }
    /// Returns this buffer safely with an offset applied. See [`BytesChunk::get_buffer_with`].
    ///
    /// returns: &[u8] with length `self.bytes - offset`
    pub fn get_buffer_with(&self, offset: usize) -> &[u8] {
        self.chunk.get_buffer_with(offset)
    }
    /// Return the number of lines the buffer contains. `self.lines` needs to be set before the call
    /// to this function returns the correct value. If the calculation of lines is needed then
    /// use `self.count_lines`.
    pub fn get_lines(&self) -> usize {
        self.lines
    }
    /// Fills `self.buffer` with maximal [`BUFFER_SIZE`] number of bytes, draining the reader by
    /// that number of bytes. This function works like the [`BytesChunk::fill`] function besides
    /// that this function also counts and stores the number of lines encountered while reading from
    /// the `filehandle`.
    pub fn fill(&mut self, filehandle: &mut BufReader<impl Read>) -> UResult<Option<usize>> {
        match self.chunk.fill(filehandle)? {
            None => {
                self.lines = 0;
                Ok(None)
            }
            Some(bytes) => {
                self.lines = self.count_lines();
                Ok(Some(bytes))
            }
        }
    }
    /// Calculates the offset in bytes within this buffer from the offset in number of lines. The
    /// resulting offset is 0-based and points to the byte after the delimiter.
    ///
    /// # Arguments
    ///
    /// * `offset`: the offset in number of lines. If offset is 0 then 0 is returned, if larger than
    ///             the contained lines then self.bytes is returned.
    ///
    /// # Examples
    ///
    /// ```rust,ignore
    /// let mut chunk = LinesChunk::new(b'\n');
    /// chunk.buffer[0..12].copy_from_slice("hello\nworld\n".as_bytes());
    /// chunk.bytes = 12;
    /// chunk.lines = 2; // note that if not setting lines the result might not be what is expected
    /// let bytes_offset = chunk.calculate_bytes_offset_from(1);
    /// assert_eq!(6, bytes_offset);
    /// assert_eq!(
    ///     "world\n",
    ///     String::from_utf8_lossy(chunk.get_buffer_with(bytes_offset)));
    /// ```
    fn calculate_bytes_offset_from(&self, offset: usize) -> usize {
        let mut lines_offset = offset;
        let mut bytes_offset = 0;
        for byte in self.get_buffer().iter() {
            if lines_offset == 0 {
                break;
            }
            if byte == &self.delimiter {
                lines_offset -= 1;
            }
            bytes_offset += 1;
        }
        bytes_offset
    }
    /// Print the bytes contained in this buffer calculated with the given offset in number of
    /// lines.
    ///
    /// # Arguments
    ///
    /// * `writer`: must implement [`Write`]
    /// * `offset`: An offset in number of lines.
    pub fn print_lines(&self, writer: &mut impl Write, offset: usize) -> UResult<()> {
        self.print_bytes(writer, self.calculate_bytes_offset_from(offset))
    }
    /// Print the bytes contained in this buffer beginning from the given offset in number of bytes.
    ///
    /// # Arguments
    ///
    /// * `writer`: must implement [`Write`]
    /// * `offset`: An offset in number of bytes.
    pub fn print_bytes(&self, writer: &mut impl Write, offset: usize) -> UResult<()> {
        writer.write_all(self.get_buffer_with(offset))?;
        Ok(())
    }
 }
 /// An abstraction layer on top of [`LinesChunk`] mainly to simplify filling only the needed amount
 /// of chunks. See also [`Self::fill`]. Works similar like [`BytesChunkBuffer`], but works on top
 /// of lines delimited by `self.delimiter` instead of bytes.
 pub struct LinesChunkBuffer {
    /// The delimiter to recognize a line. Any [`u8`] is allowed.
    delimiter: u8,
    /// The amount of lines occurring in all currently stored [`LinesChunk`]s. Use u64 here to
    /// support files > 4GB on 32-bit systems. Note, this differs from [`LinesChunk::lines`] which
    /// is a usize. The choice of u64 is based on `tail::FilterMode::Lines`.
    lines: u64,
    /// The amount of lines to print.
    num_print: u64,
    /// Stores the [`LinesChunk`]
    chunks: VecDeque<Box<LinesChunk>>,
 }
 impl LinesChunkBuffer {
    /// Create a new [`LinesChunkBuffer`]
    pub fn new(delimiter: u8, num_print: u64) -> Self {
        Self {
            delimiter,
            num_print,
            lines: 0,
            chunks: VecDeque::new(),
        }
    }
    /// Fills this buffer with chunks and consumes the reader completely. This method ensures that
    /// there are exactly as many chunks as needed to match `self.num_print` lines, so there are
    /// in sum exactly `self.num_print` lines stored in all chunks. The method returns an iterator
    /// over these chunks. If there are no chunks, for example because the piped stdin contained no
    /// lines, or `num_print = 0` then `iterator.next` will return None.
    pub fn fill(&mut self, reader: &mut BufReader<impl Read>) -> UResult<()> {
        let mut chunk = Box::new(LinesChunk::new(self.delimiter));
        while (chunk.fill(reader)?).is_some() {
            self.lines += chunk.lines as u64;
            self.chunks.push_back(chunk);
            let first = &self.chunks[0];
            if self.lines - first.lines as u64 > self.num_print {
                chunk = self.chunks.pop_front().unwrap();
                self.lines -= chunk.lines as u64;
            } else {
                chunk = Box::new(LinesChunk::new(self.delimiter));
            }
        }
        if !&self.chunks.is_empty() {
            let length = &self.chunks.len();
            let last = &mut self.chunks[length - 1];
            if !last.get_buffer().ends_with(&[self.delimiter]) {
                last.lines += 1;
                self.lines += 1;
            }
        } else {
            // chunks is empty when a file is empty so quitting early here
            return Ok(());
        }
        // skip unnecessary chunks and save the first chunk which may hold some lines we have to
        // print
        let chunk = loop {
            // it's safe to call unwrap here because there is at least one chunk and sorting out
            // more chunks than exist shouldn't be possible.
            let chunk = self.chunks.pop_front().unwrap();
            // skip is true as long there are enough lines left in the other stored chunks.
            let skip = self.lines - chunk.lines as u64 > self.num_print;
            if skip {
                self.lines -= chunk.lines as u64;
            } else {
                break chunk;
            }
        };
        // Calculate the number of lines to skip in the current chunk. The calculated value must be
        // in the range 0 to BUFFER_SIZE and is therefore safely convertible to a usize without
        // losses.
        let skip_lines = self.lines.saturating_sub(self.num_print) as usize;
        let chunk = LinesChunk::from_chunk(&chunk, skip_lines);
        self.chunks.push_front(Box::new(chunk));
        Ok(())
    }
    pub fn print(&self, mut writer: impl Write) -> UResult<()> {
        for chunk in &self.chunks {
            chunk.print_bytes(&mut writer, 0)?;
        }
        Ok(())
    }
 }
 #[cfg(test)]
 mod tests {
    use crate::chunks::{BytesChunk, BUFFER_SIZE};
    #[test]
    fn test_bytes_chunk_from_when_offset_is_zero() {
        let mut chunk = BytesChunk::new();
        chunk.bytes = BUFFER_SIZE;
        chunk.buffer[1] = 1;
        let other = BytesChunk::from_chunk(&chunk, 0);
        assert_eq!(other, chunk);
        chunk.bytes = 2;
        let other = BytesChunk::from_chunk(&chunk, 0);
        assert_eq!(other, chunk);
        chunk.bytes = 1;
        let other = BytesChunk::from_chunk(&chunk, 0);
        assert_eq!(other.buffer, [0; BUFFER_SIZE]);
        assert_eq!(other.bytes, chunk.bytes);
        chunk.bytes = BUFFER_SIZE;
        let other = BytesChunk::from_chunk(&chunk, 2);
        assert_eq!(other.buffer, [0; BUFFER_SIZE]);
        assert_eq!(other.bytes, BUFFER_SIZE - 2);
    }
    #[test]
    fn test_bytes_chunk_from_when_offset_is_not_zero() {
        let mut chunk = BytesChunk::new();
        chunk.bytes = BUFFER_SIZE;
        chunk.buffer[1] = 1;
        let other = BytesChunk::from_chunk(&chunk, 1);
        let mut expected_buffer = [0; BUFFER_SIZE];
        expected_buffer[0] = 1;
        assert_eq!(other.buffer, expected_buffer);
        assert_eq!(other.bytes, BUFFER_SIZE - 1);
        let other = BytesChunk::from_chunk(&chunk, 2);
        assert_eq!(other.buffer, [0; BUFFER_SIZE]);
        assert_eq!(other.bytes, BUFFER_SIZE - 2);
    }
    #[test]
    fn test_bytes_chunk_from_when_offset_is_larger_than_chunk_size_1() {
        let mut chunk = BytesChunk::new();
        chunk.bytes = BUFFER_SIZE;
        let new_chunk = BytesChunk::from_chunk(&chunk, BUFFER_SIZE + 1);
        assert_eq!(0, new_chunk.bytes);
    }
    #[test]
    fn test_bytes_chunk_from_when_offset_is_larger_than_chunk_size_2() {
        let mut chunk = BytesChunk::new();
        chunk.bytes = 0;
        let new_chunk = BytesChunk::from_chunk(&chunk, 1);
        assert_eq!(0, new_chunk.bytes);
    }
    #[test]
    fn test_bytes_chunk_from_when_offset_is_larger_than_chunk_size_3() {
        let mut chunk = BytesChunk::new();
        chunk.bytes = 1;
        let new_chunk = BytesChunk::from_chunk(&chunk, 2);
        assert_eq!(0, new_chunk.bytes);
    }
    #[test]
    fn test_bytes_chunk_from_when_offset_is_equal_to_chunk_size() {
        let mut chunk = BytesChunk::new();
        chunk.buffer[0] = 1;
        chunk.bytes = 1;
        let new_chunk = BytesChunk::from_chunk(&chunk, 1);
        assert_eq!(0, new_chunk.bytes);
    }
 }
--- a/src/uu/tail/src/tail.rs
+++ b/src/uu/tail/src/tail.rs
@ -7,7 +7,7 @@
 //  * For the full copyright and license information, please view the LICENSE
 //  * file that was distributed with this source code.
-// spell-checker:ignore (ToDO) seekable seek'd tail'ing ringbuffer ringbuf unwatch Uncategorized
+// spell-checker:ignore (ToDO) seekable seek'd tail'ing ringbuffer ringbuf unwatch Uncategorized filehandle
 // spell-checker:ignore (libs) kqueue
 // spell-checker:ignore (acronyms)
 // spell-checker:ignore (env/flags)
@ -21,8 +21,9 @@ extern crate clap;
 #[macro_use]
 extern crate uucore;
 extern crate core;
-mod chunks;
+pub mod chunks;
 mod parse;
 mod platform;
 use crate::files::FileHandling;
@ -30,11 +31,11 @@ use chunks::ReverseChunks;
 use clap::{Arg, Command, ValueSource};
 use notify::{RecommendedWatcher, RecursiveMode, Watcher, WatcherKind};
 use std::cmp::Ordering;
 use std::collections::{HashMap, VecDeque};
 use std::ffi::OsString;
 use std::fmt;
 use std::fs::{File, Metadata};
-use std::io::{stdin, stdout, BufRead, BufReader, Read, Seek, SeekFrom, Write};
+use std::io::{self, stdin, stdout, BufRead, BufReader, BufWriter, Read, Seek, SeekFrom, Write};
 use std::path::{Path, PathBuf};
 use std::sync::mpsc::{self, channel, Receiver};
 use std::time::Duration;
@ -43,9 +44,7 @@ use uucore::error::{
    get_exit_code, set_exit_code, FromIo, UError, UResult, USimpleError, UUsageError,
 };
 use uucore::format_usage;
 use uucore::lines::lines;
 use uucore::parse_size::{parse_size, ParseSizeError};
 use uucore::ringbuffer::RingBuffer;
 #[cfg(unix)]
 use std::os::unix::fs::MetadataExt;
@ -1458,70 +1457,58 @@ fn bounded_tail(file: &mut File, settings: &Settings) {
    std::io::copy(file, &mut stdout).unwrap();
 }
 /// An alternative to [`Iterator::skip`] with u64 instead of usize. This is
 /// necessary because the usize limit doesn't make sense when iterating over
 /// something that's not in memory. For example, a very large file. This allows
 /// us to skip data larger than 4 GiB even on 32-bit platforms.
 fn skip_u64(iter: &mut impl Iterator, num: u64) {
    for _ in 0..num {
        if iter.next().is_none() {
            break;
        }
    }
 }
 /// Collect the last elements of an iterator into a `VecDeque`.
 ///
 /// This function returns a [`VecDeque`] containing either the last
 /// `count` elements of `iter`, an [`Iterator`] over [`Result`]
 /// instances, or all but the first `count` elements of `iter`. If
 /// `beginning` is `true`, then all but the first `count` elements are
 /// returned.
 ///
 /// # Panics
 ///
 /// If any element of `iter` is an [`Err`], then this function panics.
 fn unbounded_tail_collect<T, E>(
    mut iter: impl Iterator<Item = Result<T, E>>,
    count: u64,
    beginning: bool,
 ) -> UResult<VecDeque<T>>
 where
    E: fmt::Debug,
 {
    if beginning {
        // GNU `tail` seems to index bytes and lines starting at 1, not
        // at 0. It seems to treat `+0` and `+1` as the same thing.
        let i = count.max(1) - 1;
        skip_u64(&mut iter, i);
        Ok(iter.map(|r| r.unwrap()).collect())
    } else {
        let count: usize = count
            .try_into()
            .map_err(|_| USimpleError::new(1, "Insufficient addressable memory"))?;
        Ok(RingBuffer::from_iter(iter.map(|r| r.unwrap()), count).data)
    }
 }
 fn unbounded_tail<T: Read>(reader: &mut BufReader<T>, settings: &Settings) -> UResult<()> {
-    // Read through each line/char and store them in a ringbuffer that always
+    let stdout = stdout();
-    // contains count lines/chars. When reaching the end of file, output the
+    let mut writer = BufWriter::new(stdout.lock());
-    // data in the ringbuf.
+    match (&settings.mode, settings.beginning) {
-    match settings.mode {
+        (FilterMode::Lines(count, sep), false) => {
-        FilterMode::Lines(count, sep) => {
+            let mut chunks = chunks::LinesChunkBuffer::new(*sep, *count);
-            let mut stdout = stdout();
+            chunks.fill(reader)?;
-            for line in unbounded_tail_collect(lines(reader, sep), count, settings.beginning)? {
+            chunks.print(writer)?;
                stdout
                    .write_all(&line)
                    .map_err_context(|| String::from("IO error"))?;
            }
        }
-        FilterMode::Bytes(count) => {
+        (FilterMode::Lines(count, sep), true) => {
-            for byte in unbounded_tail_collect(reader.bytes(), count, settings.beginning)? {
+            let mut num_skip = (*count).max(1) - 1;
-                if let Err(err) = stdout().write(&[byte]) {
+            let mut chunk = chunks::LinesChunk::new(*sep);
-                    return Err(USimpleError::new(1, err.to_string()));
+            while chunk.fill(reader)?.is_some() {
                let lines = chunk.get_lines() as u64;
                if lines < num_skip {
                    num_skip -= lines;
                } else {
                    break;
                }
            }
            if chunk.has_data() {
                chunk.print_lines(&mut writer, num_skip as usize)?;
                io::copy(reader, &mut writer)?;
            }
        }
        (FilterMode::Bytes(count), false) => {
            let mut chunks = chunks::BytesChunkBuffer::new(*count);
            chunks.fill(reader)?;
            chunks.print(writer)?;
        }
        (FilterMode::Bytes(count), true) => {
            let mut num_skip = (*count).max(1) - 1;
            let mut chunk = chunks::BytesChunk::new();
            loop {
                if let Some(bytes) = chunk.fill(reader)? {
                    let bytes: u64 = bytes as u64;
                    match bytes.cmp(&num_skip) {
                        Ordering::Less => num_skip -= bytes,
                        Ordering::Equal => {
                            break;
                        }
                        Ordering::Greater => {
                            writer.write_all(chunk.get_buffer_with(num_skip as usize))?;
                            break;
                        }
                    }
                } else {
                    return Ok(());
                }
            }
            io::copy(reader, &mut writer)?;
        }
    }
    Ok(())
--- a/tests/by-util/test_tail.rs
+++ b/tests/by-util/test_tail.rs
@ -3,7 +3,7 @@
 //  * For the full copyright and license information, please view the LICENSE
 //  * file that was distributed with this source code.
-// spell-checker:ignore (ToDO) abcdefghijklmnopqrstuvwxyz efghijklmnopqrstuvwxyz vwxyz emptyfile file siette ocho nueve diez
+// spell-checker:ignore (ToDO) abcdefghijklmnopqrstuvwxyz efghijklmnopqrstuvwxyz vwxyz emptyfile file siette ocho nueve diez MULT
 // spell-checker:ignore (libs) kqueue
 // spell-checker:ignore (jargon) tailable untailable
@ -1090,18 +1090,6 @@ fn test_invalid_num() {
        .fails()
        .stderr_str()
        .starts_with("tail: invalid number of lines: '1Y': Value too large for defined data type");
    #[cfg(target_pointer_width = "32")]
    {
        let sizes = ["1000G", "10T"];
        for size in &sizes {
            new_ucmd!()
                .args(&["-c", size])
                .fails()
                .code_is(1)
                .stderr_str()
                .starts_with("tail: Insufficient addressable memory");
        }
    }
    new_ucmd!()
        .args(&["-c", "-³"])
        .fails()
@ -2484,6 +2472,725 @@ fn test_illegal_seek() {
    assert_eq!(p.wait().unwrap().code().unwrap(), 1);
 }
 #[cfg(all(not(target_os = "android"), not(target_os = "windows")))] // FIXME: See https://github.com/uutils/coreutils/issues/3881
 mod pipe_tests {
    use super::*;
    use crate::common::random::*;
    use rand::distributions::Alphanumeric;
    use tail::chunks::BUFFER_SIZE as CHUNK_BUFFER_SIZE;
    #[test]
    fn test_pipe_when_lines_option_value_is_higher_than_contained_lines() {
        let test_string = "a\nb\n";
        new_ucmd!()
            .args(&["-n", "3"])
            .pipe_in(test_string)
            .ignore_stdin_write_error()
            .succeeds()
            .stdout_only(test_string);
        new_ucmd!()
            .args(&["-n", "4"])
            .pipe_in(test_string)
            .ignore_stdin_write_error()
            .succeeds()
            .stdout_only(test_string);
        new_ucmd!()
            .args(&["-n", "999"])
            .pipe_in(test_string)
            .ignore_stdin_write_error()
            .succeeds()
            .stdout_only(test_string);
        new_ucmd!()
            .args(&["-n", "+3"])
            .pipe_in(test_string)
            .ignore_stdin_write_error()
            .succeeds()
            .no_stdout()
            .no_stderr();
        new_ucmd!()
            .args(&["-n", "+4"])
            .pipe_in(test_string)
            .ignore_stdin_write_error()
            .succeeds()
            .no_stdout()
            .no_stderr();
        new_ucmd!()
            .args(&["-n", "+999"])
            .pipe_in(test_string)
            .ignore_stdin_write_error()
            .succeeds()
            .no_stdout()
            .no_stderr();
    }
    #[test]
    fn test_pipe_when_negative_lines_option_given_no_newline_at_eof() {
        let test_string = "a\nb";
        new_ucmd!()
            .args(&["-n", "0"])
            .pipe_in(test_string)
            .ignore_stdin_write_error()
            .succeeds()
            .no_stdout()
            .no_stderr();
        new_ucmd!()
            .args(&["-n", "1"])
            .pipe_in(test_string)
            .ignore_stdin_write_error()
            .succeeds()
            .stdout_only("b");
        new_ucmd!()
            .args(&["-n", "2"])
            .pipe_in(test_string)
            .ignore_stdin_write_error()
            .succeeds()
            .stdout_only("a\nb");
    }
    #[test]
    fn test_pipe_when_positive_lines_option_given_no_newline_at_eof() {
        let test_string = "a\nb";
        new_ucmd!()
            .args(&["-n", "+0"])
            .pipe_in(test_string)
            .ignore_stdin_write_error()
            .succeeds()
            .stdout_only("a\nb");
        new_ucmd!()
            .args(&["-n", "+1"])
            .pipe_in(test_string)
            .ignore_stdin_write_error()
            .succeeds()
            .stdout_only("a\nb");
        new_ucmd!()
            .args(&["-n", "+2"])
            .pipe_in(test_string)
            .ignore_stdin_write_error()
            .succeeds()
            .stdout_only("b");
    }
    #[test]
    fn test_pipe_when_lines_option_given_multibyte_utf8_characters() {
        // the test string consists of from left to right a 4-byte,3-byte,2-byte,1-byte utf-8 character
        let test_string = "𝅘𝅥𝅮\n⏻\nƒ\na";
        new_ucmd!()
            .args(&["-n", "+0"])
            .pipe_in(test_string)
            .ignore_stdin_write_error()
            .succeeds()
            .stdout_only(test_string);
        new_ucmd!()
            .args(&["-n", "+2"])
            .pipe_in(test_string)
            .ignore_stdin_write_error()
            .succeeds()
            .stdout_only("⏻\nƒ\na");
        new_ucmd!()
            .args(&["-n", "+3"])
            .pipe_in(test_string)
            .ignore_stdin_write_error()
            .succeeds()
            .stdout_only("ƒ\na");
        new_ucmd!()
            .args(&["-n", "+4"])
            .pipe_in(test_string)
            .ignore_stdin_write_error()
            .succeeds()
            .stdout_only("a");
        new_ucmd!()
            .args(&["-n", "+5"])
            .pipe_in(test_string)
            .ignore_stdin_write_error()
            .succeeds()
            .no_stdout()
            .no_stderr();
        new_ucmd!()
            .args(&["-n", "-4"])
            .pipe_in(test_string)
            .ignore_stdin_write_error()
            .succeeds()
            .stdout_only(test_string);
        new_ucmd!()
            .args(&["-n", "-3"])
            .pipe_in(test_string)
            .ignore_stdin_write_error()
            .succeeds()
            .stdout_only("⏻\nƒ\na");
        new_ucmd!()
            .args(&["-n", "-2"])
            .pipe_in(test_string)
            .ignore_stdin_write_error()
            .succeeds()
            .stdout_only("ƒ\na");
        new_ucmd!()
            .args(&["-n", "-1"])
            .pipe_in(test_string)
            .ignore_stdin_write_error()
            .succeeds()
            .stdout_only("a");
        new_ucmd!()
            .args(&["-n", "-0"])
            .pipe_in(test_string)
            .ignore_stdin_write_error()
            .succeeds()
            .no_stdout()
            .no_stderr();
    }
    #[test]
    fn test_pipe_when_lines_option_given_input_size_is_equal_to_buffer_size_no_newline_at_eof() {
        let total_lines = 1;
        let random_string = RandomString::generate_with_delimiter(
            Alphanumeric,
            b'\n',
            total_lines,
            false,
            CHUNK_BUFFER_SIZE,
        );
        let random_string = random_string.as_str();
        let lines = random_string.split_inclusive('\n');
        let expected = lines.clone().skip(1).collect::<String>();
        new_ucmd!()
            .args(&["-n", "+2"])
            .pipe_in(random_string)
            .ignore_stdin_write_error()
            .succeeds()
            .stdout_only(expected);
        let expected = lines.clone().skip(1).collect::<String>();
        new_ucmd!()
            .args(&["-n", "-1"])
            .pipe_in(random_string)
            .ignore_stdin_write_error()
            .succeeds()
            .stdout_only(expected);
    }
    #[test]
    fn test_pipe_when_lines_option_given_input_size_is_equal_to_buffer_size() {
        let total_lines = 100;
        let random_string = RandomString::generate_with_delimiter(
            Alphanumeric,
            b'\n',
            total_lines,
            true,
            CHUNK_BUFFER_SIZE,
        );
        let random_string = random_string.as_str();
        let lines = random_string.split_inclusive('\n');
        new_ucmd!()
            .args(&["-n", "+0"])
            .pipe_in(random_string)
            .ignore_stdin_write_error()
            .succeeds()
            .stdout_only(random_string);
        let expected = lines.clone().skip(1).collect::<String>();
        new_ucmd!()
            .args(&["-n", "+2"])
            .pipe_in(random_string)
            .ignore_stdin_write_error()
            .succeeds()
            .stdout_only(expected);
        new_ucmd!()
            .args(&["-n", "-0"])
            .pipe_in(random_string)
            .ignore_stdin_write_error()
            .succeeds()
            .no_stdout()
            .no_stderr();
        let expected = lines.clone().skip(total_lines - 1).collect::<String>();
        new_ucmd!()
            .args(&["-n", "-1"])
            .pipe_in(random_string)
            .ignore_stdin_write_error()
            .succeeds()
            .stdout_only(expected);
        let expected = lines.clone().skip(1).collect::<String>();
        new_ucmd!()
            .args(&["-n", "-99"])
            .pipe_in(random_string)
            .ignore_stdin_write_error()
            .succeeds()
            .stdout_only(expected);
        new_ucmd!()
            .args(&["-n", "-100"])
            .pipe_in(random_string)
            .ignore_stdin_write_error()
            .succeeds()
            .stdout_only(random_string);
    }
    #[test]
    fn test_pipe_when_lines_option_given_input_size_is_one_byte_greater_than_buffer_size() {
        let total_lines = 100;
        let random_string = RandomString::generate_with_delimiter(
            Alphanumeric,
            b'\n',
            total_lines,
            true,
            CHUNK_BUFFER_SIZE + 1,
        );
        let random_string = random_string.as_str();
        let lines = random_string.split_inclusive('\n');
        new_ucmd!()
            .args(&["-n", "+0"])
            .pipe_in(random_string)
            .ignore_stdin_write_error()
            .succeeds()
            .stdout_only(random_string);
        let expected = lines.clone().skip(total_lines - 1).collect::<String>();
        new_ucmd!()
            .args(&["-n", "-1"])
            .pipe_in(random_string)
            .ignore_stdin_write_error()
            .succeeds()
            .stdout_only(expected);
        let expected = lines.clone().skip(1).collect::<String>();
        new_ucmd!()
            .args(&["-n", "+2"])
            .pipe_in(random_string)
            .ignore_stdin_write_error()
            .succeeds()
            .stdout_only(expected);
        let expected = lines.clone().skip(1).collect::<String>();
        new_ucmd!()
            .args(&["-n", "-99"])
            .pipe_in(random_string)
            .ignore_stdin_write_error()
            .succeeds()
            .stdout_only(expected);
    }
    #[test]
    fn test_pipe_when_lines_option_given_input_size_has_multiple_size_of_buffer_size() {
        let total_lines = 100;
        let random_string = RandomString::generate_with_delimiter(
            Alphanumeric,
            b'\n',
            total_lines,
            true,
            CHUNK_BUFFER_SIZE * 3 + 1,
        );
        let random_string = random_string.as_str();
        let lines = random_string.split_inclusive('\n');
        new_ucmd!()
            .args(&["-n", "+0"])
            .pipe_in(random_string)
            .ignore_stdin_write_error()
            .succeeds()
            .stdout_only(random_string);
        let expected = lines.clone().skip(1).collect::<String>();
        new_ucmd!()
            .args(&["-n", "+2"])
            .pipe_in(random_string)
            .ignore_stdin_write_error()
            .succeeds()
            .stdout_only(expected);
        new_ucmd!()
            .args(&["-n", "-0"])
            .pipe_in(random_string)
            .ignore_stdin_write_error()
            .succeeds()
            .no_stdout()
            .no_stderr();
        let expected = lines.clone().skip(total_lines - 1).collect::<String>();
        new_ucmd!()
            .args(&["-n", "-1"])
            .pipe_in(random_string)
            .ignore_stdin_write_error()
            .succeeds()
            .stdout_only(expected);
        let expected = lines.clone().skip(1).collect::<String>();
        new_ucmd!()
            .args(&["-n", "-99"])
            .pipe_in(random_string)
            .ignore_stdin_write_error()
            .succeeds()
            .stdout_only(expected);
        new_ucmd!()
            .args(&["-n", "-100"])
            .pipe_in(random_string)
            .ignore_stdin_write_error()
            .succeeds()
            .stdout_only(random_string);
    }
    #[test]
    fn test_pipe_when_bytes_option_value_is_higher_than_contained_bytes() {
        let test_string = "a\nb";
        new_ucmd!()
            .args(&["-c", "4"])
            .pipe_in(test_string)
            .ignore_stdin_write_error()
            .succeeds()
            .stdout_only(test_string);
        new_ucmd!()
            .args(&["-c", "5"])
            .pipe_in(test_string)
            .ignore_stdin_write_error()
            .succeeds()
            .stdout_only(test_string);
        new_ucmd!()
            .args(&["-c", "999"])
            .pipe_in(test_string)
            .ignore_stdin_write_error()
            .succeeds()
            .stdout_only(test_string);
        new_ucmd!()
            .args(&["-c", "+4"])
            .pipe_in(test_string)
            .ignore_stdin_write_error()
            .succeeds()
            .no_stdout()
            .no_stderr();
        new_ucmd!()
            .args(&["-c", "+5"])
            .pipe_in(test_string)
            .ignore_stdin_write_error()
            .succeeds()
            .no_stdout()
            .no_stderr();
        new_ucmd!()
            .args(&["-c", "+999"])
            .pipe_in(test_string)
            .ignore_stdin_write_error()
            .succeeds()
            .no_stdout()
            .no_stderr();
    }
    #[test]
    fn test_pipe_when_bytes_option_given_multibyte_utf8_characters() {
        // the test string consists of from left to right a 4-byte,3-byte,2-byte,1-byte utf-8 character
        let test_string = "𝅘𝅥𝅮⏻ƒa";
        new_ucmd!()
            .args(&["-c", "+0"])
            .pipe_in(test_string)
            .ignore_stdin_write_error()
            .succeeds()
            .stdout_only(test_string);
        new_ucmd!()
            .args(&["-c", "+2"])
            .pipe_in(test_string)
            .ignore_stdin_write_error()
            .succeeds()
            .stdout_only_bytes(&test_string.as_bytes()[1..]);
        new_ucmd!()
            .args(&["-c", "+5"])
            .pipe_in(test_string)
            .ignore_stdin_write_error()
            .succeeds()
            .stdout_only("⏻ƒa");
        new_ucmd!()
            .args(&["-c", "+8"])
            .pipe_in(test_string)
            .ignore_stdin_write_error()
            .succeeds()
            .stdout_only("ƒa");
        new_ucmd!()
            .args(&["-c", "+10"])
            .pipe_in(test_string)
            .ignore_stdin_write_error()
            .succeeds()
            .stdout_only("a");
        new_ucmd!()
            .args(&["-c", "+11"])
            .pipe_in(test_string)
            .ignore_stdin_write_error()
            .succeeds()
            .no_stdout()
            .no_stderr();
        new_ucmd!()
            .args(&["-c", "-1"])
            .pipe_in(test_string)
            .ignore_stdin_write_error()
            .succeeds()
            .stdout_only("a");
        new_ucmd!()
            .args(&["-c", "-2"])
            .pipe_in(test_string)
            .ignore_stdin_write_error()
            .succeeds()
            .stdout_only_bytes(&"ƒa".as_bytes()[1..]);
        new_ucmd!()
            .args(&["-c", "-3"])
            .pipe_in(test_string)
            .ignore_stdin_write_error()
            .succeeds()
            .stdout_only("ƒa");
        new_ucmd!()
            .args(&["-c", "-6"])
            .pipe_in(test_string)
            .ignore_stdin_write_error()
            .succeeds()
            .stdout_only("⏻ƒa");
        new_ucmd!()
            .args(&["-c", "-10"])
            .pipe_in(test_string)
            .ignore_stdin_write_error()
            .succeeds()
            .stdout_only(test_string);
    }
    #[test]
    fn test_pipe_when_bytes_option_given_input_size_is_equal_to_buffer_size() {
        let random_string = RandomString::generate(AlphanumericNewline, CHUNK_BUFFER_SIZE);
        let random_string = random_string.as_str();
        new_ucmd!()
            .args(&["-c", "+0"])
            .pipe_in(random_string)
            .ignore_stdin_write_error()
            .succeeds()
            .stdout_only(random_string);
        let expected = &random_string.as_bytes()[1..];
        new_ucmd!()
            .args(&["-c", "+2"])
            .pipe_in(random_string)
            .ignore_stdin_write_error()
            .succeeds()
            .stdout_only_bytes(expected);
        new_ucmd!()
            .args(&["-c", "-0"])
            .pipe_in(random_string)
            .ignore_stdin_write_error()
            .succeeds()
            .no_stdout()
            .no_stderr();
        let expected = &random_string.as_bytes()[1..];
        new_ucmd!()
            .args(&["-c", "-8191"])
            .pipe_in(random_string)
            .ignore_stdin_write_error()
            .succeeds()
            .stdout_only_bytes(expected);
        new_ucmd!()
            .args(&["-c", "-8192"])
            .pipe_in(random_string)
            .ignore_stdin_write_error()
            .succeeds()
            .stdout_only_bytes(random_string);
        new_ucmd!()
            .args(&["-c", "-8193"])
            .pipe_in(random_string)
            .ignore_stdin_write_error()
            .succeeds()
            .stdout_only_bytes(random_string);
        let expected = &random_string.as_bytes()[CHUNK_BUFFER_SIZE - 1..];
        new_ucmd!()
            .args(&["-c", "-1"])
            .pipe_in(random_string)
            .ignore_stdin_write_error()
            .succeeds()
            .stdout_only_bytes(expected);
    }
    #[test]
    fn test_pipe_when_bytes_option_given_input_size_is_one_byte_greater_than_buffer_size() {
        let random_string = RandomString::generate(AlphanumericNewline, CHUNK_BUFFER_SIZE + 1);
        let random_string = random_string.as_str();
        new_ucmd!()
            .args(&["-c", "+0"])
            .pipe_in(random_string)
            .ignore_stdin_write_error()
            .succeeds()
            .stdout_only(random_string);
        let expected = &random_string.as_bytes()[1..];
        new_ucmd!()
            .args(&["-c", "+2"])
            .pipe_in(random_string)
            .ignore_stdin_write_error()
            .succeeds()
            .stdout_only_bytes(expected);
        new_ucmd!()
            .args(&["-c", "-0"])
            .pipe_in(random_string)
            .ignore_stdin_write_error()
            .succeeds()
            .no_stdout()
            .no_stderr();
        let expected = &random_string.as_bytes()[CHUNK_BUFFER_SIZE..];
        new_ucmd!()
            .args(&["-c", "-1"])
            .pipe_in(random_string)
            .ignore_stdin_write_error()
            .succeeds()
            .stdout_only_bytes(expected);
        let expected = &random_string.as_bytes()[1..];
        new_ucmd!()
            .args(&["-c", "-8192"])
            .pipe_in(random_string)
            .ignore_stdin_write_error()
            .succeeds()
            .stdout_only_bytes(expected);
        new_ucmd!()
            .args(&["-c", "-8193"])
            .pipe_in(random_string)
            .ignore_stdin_write_error()
            .succeeds()
            .stdout_only(random_string);
    }
    #[test]
    fn test_pipe_when_bytes_option_given_input_size_has_multiple_size_of_buffer_size() {
        let random_string = RandomString::generate(AlphanumericNewline, CHUNK_BUFFER_SIZE * 3);
        let random_string = random_string.as_str();
        new_ucmd!()
            .args(&["-c", "+0"])
            .pipe_in(random_string)
            .ignore_stdin_write_error()
            .succeeds()
            .stdout_only(random_string);
        new_ucmd!()
            .args(&["-c", "-0"])
            .pipe_in(random_string)
            .ignore_stdin_write_error()
            .succeeds()
            .no_stdout()
            .no_stderr();
        let expected = &random_string.as_bytes()[8192..];
        new_ucmd!()
            .args(&["-c", "+8193"])
            .pipe_in(random_string)
            .ignore_stdin_write_error()
            .succeeds()
            .stdout_only_bytes(expected);
        let expected = &random_string.as_bytes()[8193..];
        new_ucmd!()
            .args(&["-c", "+8194"])
            .pipe_in(random_string)
            .ignore_stdin_write_error()
            .succeeds()
            .stdout_only_bytes(expected);
        let expected = &random_string.as_bytes()[16384..];
        new_ucmd!()
            .args(&["-c", "+16385"])
            .pipe_in(random_string)
            .ignore_stdin_write_error()
            .succeeds()
            .stdout_only_bytes(expected);
        let expected = &random_string.as_bytes()[16385..];
        new_ucmd!()
            .args(&["-c", "+16386"])
            .pipe_in(random_string)
            .ignore_stdin_write_error()
            .succeeds()
            .stdout_only_bytes(expected);
        let expected = &random_string.as_bytes()[16384..];
        new_ucmd!()
            .args(&["-c", "-8192"])
            .pipe_in(random_string)
            .ignore_stdin_write_error()
            .succeeds()
            .stdout_only_bytes(expected);
        let expected = &random_string.as_bytes()[16383..];
        new_ucmd!()
            .args(&["-c", "-8193"])
            .pipe_in(random_string)
            .ignore_stdin_write_error()
            .succeeds()
            .stdout_only_bytes(expected);
        let expected = &random_string.as_bytes()[8192..];
        new_ucmd!()
            .args(&["-c", "-16384"])
            .pipe_in(random_string)
            .ignore_stdin_write_error()
            .succeeds()
            .stdout_only_bytes(expected);
        let expected = &random_string.as_bytes()[8191..];
        new_ucmd!()
            .args(&["-c", "-16385"])
            .pipe_in(random_string)
            .ignore_stdin_write_error()
            .succeeds()
            .stdout_only_bytes(expected);
        new_ucmd!()
            .args(&["-c", "-24576"])
            .pipe_in(random_string)
            .ignore_stdin_write_error()
            .succeeds()
            .stdout_only(random_string);
    }
 }
 #[test]
 fn test_seek_bytes_backward_outside_file() {
    new_ucmd!()
--- a/tests/common/mod.rs
+++ b/tests/common/mod.rs
@ -1,3 +1,4 @@
 #[macro_use]
 pub mod macros;
 pub mod random;
 pub mod util;
--- a/tests/common/random.rs
+++ b/tests/common/random.rs
@ -0,0 +1,314 @@
 //  * This file is part of the uutils coreutils package.
 //  *
 //  * For the full copyright and license information, please view the LICENSE
 //  * file that was distributed with this source code.
 use rand::distributions::{Distribution, Uniform};
 use rand::{thread_rng, Rng};
 /// Samples alphanumeric characters `[A-Za-z0-9]` including newline `\n`
 ///
 /// # Examples
 ///
 /// ```rust,ignore
 /// use rand::{Rng, thread_rng};
 ///
 /// let vec = thread_rng()
 ///     .sample_iter(AlphanumericNewline)
 ///     .take(10)
 ///     .collect::<Vec<u8>>();
 /// println!("Random chars: {}", String::from_utf8(vec).unwrap());
 /// ```
 #[derive(Clone, Copy, Debug)]
 pub struct AlphanumericNewline;
 impl AlphanumericNewline {
    /// The charset to act upon
    const CHARSET: &'static [u8] =
        b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789\n";
    /// Generate a random byte from [`Self::CHARSET`] and return it as `u8`.
    ///
    /// # Arguments
    ///
    /// * `rng`: A [`rand::Rng`]
    ///
    /// returns: u8
    fn random<R>(rng: &mut R) -> u8
    where
        R: Rng + ?Sized,
    {
        let idx = rng.gen_range(0..Self::CHARSET.len());
        Self::CHARSET[idx]
    }
 }
 impl Distribution<u8> for AlphanumericNewline {
    fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> u8 {
        Self::random(rng)
    }
 }
 /// Generate a random string from a [`Distribution`]
 ///
 /// # Examples
 ///
 /// ```rust,ignore
 /// use crate::common::random::{AlphanumericNewline, RandomString};
 /// use rand::distributions::Alphanumeric;
 ///
 /// // generates a 100 byte string with characters from AlphanumericNewline
 /// let random_string = RandomString::generate(&AlphanumericNewline, 100);
 /// assert_eq!(100, random_string.len());
 ///
 /// // generates a 100 byte string with 10 newline characters not ending with a newline
 /// let string = RandomString::generate_with_delimiter(&Alphanumeric, b'\n', 10, false, 100);
 /// assert_eq!(100, random_string.len());
 /// ```
 pub struct RandomString;
 impl RandomString {
    /// Generate a random string from the given [`Distribution`] with the given `length` in bytes.
    ///
    /// # Arguments
    ///
    /// * `dist`: A u8 [`Distribution`]
    /// * `length`: the length of the resulting string in bytes
    ///
    /// returns: String
    pub fn generate<D>(dist: D, length: usize) -> String
    where
        D: Distribution<u8>,
    {
        thread_rng()
            .sample_iter(dist)
            .take(length)
            .map(|b| b as char)
            .collect()
    }
    /// Generate a random string from the [`Distribution`] with the given `length` in bytes. The
    /// function takes a `delimiter`, which is randomly distributed in the string, such that exactly
    /// `num_delimiter` amount of `delimiter`s occur. If `end_with_delimiter` is set, then the
    /// string ends with the delimiter, else the string does not end with the delimiter.
    ///
    /// # Arguments
    ///
    /// * `dist`: A `u8` [`Distribution`]
    /// * `delimiter`: A `u8` delimiter, which does not need to be included in the `Distribution`
    /// * `num_delimiter`: The number of `delimiter`s contained in the resulting string
    /// * `end_with_delimiter`: If the string shall end with the given delimiter
    /// * `length`: the length of the resulting string in bytes
    ///
    /// returns: String
    ///
    /// # Examples
    ///
    /// ```rust,ignore
    /// use crate::common::random::{AlphanumericNewline, RandomString};
    ///
    /// // generates a 100 byte string with 10 '\0' byte characters not ending with a '\0' byte
    /// let string = RandomString::generate_with_delimiter(&AlphanumericNewline, 0, 10, false, 100);
    /// assert_eq!(100, random_string.len());
    /// assert_eq!(
    ///     10,
    ///     random_string.as_bytes().iter().filter(|p| **p == 0).count()
    /// );
    /// assert!(!random_string.as_bytes().ends_with(&[0]));
    /// ```
    pub fn generate_with_delimiter<D>(
        dist: D,
        delimiter: u8,
        num_delimiter: usize,
        end_with_delimiter: bool,
        length: usize,
    ) -> String
    where
        D: Distribution<u8>,
    {
        if length == 0 {
            return String::from("");
        } else if length == 1 {
            return if num_delimiter > 0 {
                String::from(delimiter as char)
            } else {
                String::from(thread_rng().sample(&dist) as char)
            };
        }
        let samples = length - 1;
        let mut result: Vec<u8> = thread_rng().sample_iter(&dist).take(samples).collect();
        if num_delimiter == 0 {
            result.push(thread_rng().sample(&dist));
            return String::from_utf8(result).unwrap();
        }
        let num_delimiter = if end_with_delimiter {
            num_delimiter - 1
        } else {
            num_delimiter
        };
        let between = Uniform::new(0, samples);
        for _ in 0..num_delimiter {
            let mut pos = between.sample(&mut thread_rng());
            let turn = pos;
            while result[pos] == delimiter {
                pos += 1;
                if pos >= samples {
                    pos = 0;
                }
                if pos == turn {
                    break;
                }
            }
            result[pos] = delimiter;
        }
        if end_with_delimiter {
            result.push(delimiter);
        } else {
            result.push(thread_rng().sample(&dist));
        }
        String::from_utf8(result).unwrap()
    }
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    use rand::distributions::Alphanumeric;
    #[test]
    fn test_random_string_generate() {
        let random_string = RandomString::generate(&AlphanumericNewline, 0);
        assert_eq!(0, random_string.len());
        let random_string = RandomString::generate(&AlphanumericNewline, 1);
        assert_eq!(1, random_string.len());
        let random_string = RandomString::generate(&AlphanumericNewline, 100);
        assert_eq!(100, random_string.len());
    }
    #[test]
    fn test_random_string_generate_with_delimiter_when_length_is_zero() {
        let random_string = RandomString::generate_with_delimiter(&Alphanumeric, 0, 0, false, 0);
        assert_eq!(0, random_string.len());
    }
    #[test]
    fn test_random_string_generate_with_delimiter_when_num_delimiter_is_greater_than_length() {
        let random_string = RandomString::generate_with_delimiter(&Alphanumeric, 0, 2, false, 1);
        assert_eq!(1, random_string.len());
        assert!(random_string.as_bytes().contains(&0));
        assert!(random_string.as_bytes().ends_with(&[0]));
    }
    #[test]
    fn test_random_string_generate_with_delimiter_should_end_with_delimiter() {
        let random_string = RandomString::generate_with_delimiter(&Alphanumeric, 0, 1, true, 1);
        assert_eq!(1, random_string.len());
        assert_eq!(
            1,
            random_string.as_bytes().iter().filter(|p| **p == 0).count()
        );
        assert!(random_string.as_bytes().ends_with(&[0]));
        let random_string = RandomString::generate_with_delimiter(&Alphanumeric, 0, 1, false, 1);
        assert_eq!(1, random_string.len());
        assert_eq!(
            1,
            random_string.as_bytes().iter().filter(|p| **p == 0).count()
        );
        assert!(random_string.as_bytes().ends_with(&[0]));
        let random_string = RandomString::generate_with_delimiter(&Alphanumeric, 0, 1, true, 2);
        assert_eq!(2, random_string.len());
        assert_eq!(
            1,
            random_string.as_bytes().iter().filter(|p| **p == 0).count()
        );
        assert!(random_string.as_bytes().ends_with(&[0]));
        let random_string = RandomString::generate_with_delimiter(&Alphanumeric, 0, 2, true, 2);
        assert_eq!(2, random_string.len());
        assert_eq!(
            2,
            random_string.as_bytes().iter().filter(|p| **p == 0).count()
        );
        assert!(random_string.as_bytes().ends_with(&[0]));
        let random_string = RandomString::generate_with_delimiter(&Alphanumeric, 0, 1, true, 3);
        assert_eq!(3, random_string.len());
        assert_eq!(
            1,
            random_string.as_bytes().iter().filter(|p| **p == 0).count()
        );
        assert!(random_string.as_bytes().ends_with(&[0]));
    }
    #[test]
    fn test_random_string_generate_with_delimiter_should_not_end_with_delimiter() {
        let random_string = RandomString::generate_with_delimiter(&Alphanumeric, 0, 0, false, 1);
        assert_eq!(1, random_string.len());
        assert_eq!(
            0,
            random_string.as_bytes().iter().filter(|p| **p == 0).count()
        );
        let random_string = RandomString::generate_with_delimiter(&Alphanumeric, 0, 0, true, 1);
        assert_eq!(1, random_string.len());
        assert_eq!(
            0,
            random_string.as_bytes().iter().filter(|p| **p == 0).count()
        );
        let random_string = RandomString::generate_with_delimiter(&Alphanumeric, 0, 1, false, 2);
        assert_eq!(2, random_string.len());
        assert_eq!(
            1,
            random_string.as_bytes().iter().filter(|p| **p == 0).count()
        );
        assert!(!random_string.as_bytes().ends_with(&[0]));
        let random_string = RandomString::generate_with_delimiter(&Alphanumeric, 0, 1, false, 3);
        assert_eq!(3, random_string.len());
        assert_eq!(
            1,
            random_string.as_bytes().iter().filter(|p| **p == 0).count()
        );
        assert!(!random_string.as_bytes().ends_with(&[0]));
        let random_string = RandomString::generate_with_delimiter(&Alphanumeric, 0, 2, false, 3);
        assert_eq!(3, random_string.len());
        assert_eq!(
            2,
            random_string.as_bytes().iter().filter(|p| **p == 0).count()
        );
        assert!(!random_string.as_bytes().ends_with(&[0]));
    }
    #[test]
    fn test_generate_with_delimiter_with_greater_length() {
        let random_string =
            RandomString::generate_with_delimiter(&Alphanumeric, 0, 100, false, 1000);
        assert_eq!(1000, random_string.len());
        assert_eq!(
            100,
            random_string.as_bytes().iter().filter(|p| **p == 0).count()
        );
        assert!(!random_string.as_bytes().ends_with(&[0]));
        let random_string =
            RandomString::generate_with_delimiter(&Alphanumeric, 0, 100, true, 1000);
        assert_eq!(1000, random_string.len());
        assert_eq!(
            100,
            random_string.as_bytes().iter().filter(|p| **p == 0).count()
        );
        assert!(random_string.as_bytes().ends_with(&[0]));
    }
 }