mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-28 11:37:44 +00:00
tail: improve performance of piped stdin
Rewrite handling of stdin when it is piped and read input in chunks. Fixes https://github.com/uutils/coreutils/issues/3842
This commit is contained in:
parent
b39f5239e7
commit
2658f8ae5b
7 changed files with 1704 additions and 83 deletions
1
Cargo.lock
generated
1
Cargo.lock
generated
|
@ -2871,6 +2871,7 @@ version = "0.0.15"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"clap",
|
"clap",
|
||||||
"libc",
|
"libc",
|
||||||
|
"memchr",
|
||||||
"nix",
|
"nix",
|
||||||
"notify",
|
"notify",
|
||||||
"same-file",
|
"same-file",
|
||||||
|
|
|
@ -18,6 +18,7 @@ path = "src/tail.rs"
|
||||||
[dependencies]
|
[dependencies]
|
||||||
clap = { version = "3.2", features = ["wrap_help", "cargo"] }
|
clap = { version = "3.2", features = ["wrap_help", "cargo"] }
|
||||||
libc = "0.2.132"
|
libc = "0.2.132"
|
||||||
|
memchr = "2.5.0"
|
||||||
notify = { version = "=5.0.0-pre.16", features=["macos_kqueue"]}
|
notify = { version = "=5.0.0-pre.16", features=["macos_kqueue"]}
|
||||||
uucore = { version=">=0.0.15", package="uucore", path="../../uucore", features=["ringbuffer", "lines"] }
|
uucore = { version=">=0.0.15", package="uucore", path="../../uucore", features=["ringbuffer", "lines"] }
|
||||||
same-file = "1.0.6"
|
same-file = "1.0.6"
|
||||||
|
|
|
@ -1,14 +1,29 @@
|
||||||
//! Iterating over a file by chunks, starting at the end of the file.
|
// * This file is part of the uutils coreutils package.
|
||||||
|
// *
|
||||||
|
// * For the full copyright and license information, please view the LICENSE
|
||||||
|
// * file that was distributed with this source code.
|
||||||
|
|
||||||
|
//! Iterating over a file by chunks, either starting at the end of the file with [`ReverseChunks`]
|
||||||
|
//! or at the end of piped stdin with [`LinesChunk`] or [`BytesChunk`].
|
||||||
//!
|
//!
|
||||||
//! Use [`ReverseChunks::new`] to create a new iterator over chunks of
|
//! Use [`ReverseChunks::new`] to create a new iterator over chunks of bytes from the file.
|
||||||
//! bytes from the file.
|
// spell-checker:ignore (ToDO) filehandle BUFSIZ
|
||||||
|
use std::collections::VecDeque;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::{Read, Seek, SeekFrom};
|
use std::io::{BufReader, Read, Seek, SeekFrom, Write};
|
||||||
|
use uucore::error::UResult;
|
||||||
|
|
||||||
/// When reading files in reverse in `bounded_tail`, this is the size of each
|
/// When reading files in reverse in `bounded_tail`, this is the size of each
|
||||||
/// block read at a time.
|
/// block read at a time.
|
||||||
pub const BLOCK_SIZE: u64 = 1 << 16;
|
pub const BLOCK_SIZE: u64 = 1 << 16;
|
||||||
|
|
||||||
|
/// The size of the backing buffer of a LinesChunk or BytesChunk in bytes. The value of BUFFER_SIZE
|
||||||
|
/// originates from the BUFSIZ constant in stdio.h and the libc crate to make stream IO efficient.
|
||||||
|
/// In the latter the value is constantly set to 8192 on all platforms, where the value in stdio.h
|
||||||
|
/// is determined on each platform differently. Since libc chose 8192 as a reasonable default the
|
||||||
|
/// value here is set to this value, too.
|
||||||
|
pub const BUFFER_SIZE: usize = 8192;
|
||||||
|
|
||||||
/// An iterator over a file in non-overlapping chunks from the end of the file.
|
/// An iterator over a file in non-overlapping chunks from the end of the file.
|
||||||
///
|
///
|
||||||
/// Each chunk is a [`Vec`]<[`u8`]> of size [`BLOCK_SIZE`] (except
|
/// Each chunk is a [`Vec`]<[`u8`]> of size [`BLOCK_SIZE`] (except
|
||||||
|
@ -86,3 +101,598 @@ impl<'a> Iterator for ReverseChunks<'a> {
|
||||||
Some(buf[0..(block_size as usize)].to_vec())
|
Some(buf[0..(block_size as usize)].to_vec())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// The type of the backing buffer of [`BytesChunk`] and [`LinesChunk`] which can hold
|
||||||
|
/// [`BUFFER_SIZE`] elements at max.
|
||||||
|
type ChunkBuffer = [u8; BUFFER_SIZE];
|
||||||
|
|
||||||
|
/// A [`BytesChunk`] storing a fixed size number of bytes in a buffer.
|
||||||
|
#[derive(Clone, PartialEq, Eq, Debug)]
|
||||||
|
pub struct BytesChunk {
|
||||||
|
/// The [`ChunkBuffer`], an array storing the bytes, for example filled by
|
||||||
|
/// [`BytesChunk::fill`]
|
||||||
|
buffer: ChunkBuffer,
|
||||||
|
|
||||||
|
/// Stores the number of bytes, this buffer holds. This is not equal to buffer.len(), since the
|
||||||
|
/// [`BytesChunk`] may store less bytes than the internal buffer can hold. In addition
|
||||||
|
/// [`BytesChunk`] may be reused, what makes it necessary to track the number of stored bytes.
|
||||||
|
/// The choice of usize is sufficient here, since the number of bytes max value is
|
||||||
|
/// [`BUFFER_SIZE`], which is a usize.
|
||||||
|
bytes: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl BytesChunk {
|
||||||
|
#[allow(clippy::new_without_default)]
|
||||||
|
pub fn new() -> Self {
|
||||||
|
Self {
|
||||||
|
buffer: [0; BUFFER_SIZE],
|
||||||
|
bytes: 0,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Create a new chunk from an existing chunk. The new chunk's buffer will be copied from the
|
||||||
|
/// old chunk's buffer, copying the slice `[offset..old_chunk.bytes]` into the new chunk's
|
||||||
|
/// buffer but starting at 0 instead of offset. If the offset is larger or equal to
|
||||||
|
/// `chunk.lines` then a new empty `BytesChunk` is returned.
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
///
|
||||||
|
/// * `chunk`: The chunk to create a new `BytesChunk` chunk from
|
||||||
|
/// * `offset`: Start to copy the old chunk's buffer from this position. May not be larger
|
||||||
|
/// than `chunk.bytes`.
|
||||||
|
///
|
||||||
|
/// # Examples
|
||||||
|
///
|
||||||
|
/// ```rust,ignore
|
||||||
|
/// let mut chunk = BytesChunk::new();
|
||||||
|
/// chunk.buffer[1] = 1;
|
||||||
|
/// chunk.bytes = 2;
|
||||||
|
/// let new_chunk = BytesChunk::from_chunk(&chunk, 0);
|
||||||
|
/// assert_eq!(2, new_chunk.get_buffer().len());
|
||||||
|
/// assert_eq!(&[0, 1], new_chunk.get_buffer());
|
||||||
|
///
|
||||||
|
/// let new_chunk = BytesChunk::from_chunk(&chunk, 1);
|
||||||
|
/// assert_eq!(1, new_chunk.get_buffer().len());
|
||||||
|
/// assert_eq!(&[1], new_chunk.get_buffer());
|
||||||
|
/// ```
|
||||||
|
fn from_chunk(chunk: &Self, offset: usize) -> Self {
|
||||||
|
if offset >= chunk.bytes {
|
||||||
|
return Self::new();
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut buffer: ChunkBuffer = [0; BUFFER_SIZE];
|
||||||
|
let slice = chunk.get_buffer_with(offset);
|
||||||
|
buffer[..slice.len()].copy_from_slice(slice);
|
||||||
|
Self {
|
||||||
|
buffer,
|
||||||
|
bytes: chunk.bytes - offset,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Receive the internal buffer safely, so it returns a slice only containing as many bytes as
|
||||||
|
/// large the `self.bytes` value is.
|
||||||
|
///
|
||||||
|
/// returns: a slice containing the bytes of the internal buffer from `[0..self.bytes]`
|
||||||
|
///
|
||||||
|
/// # Examples
|
||||||
|
///
|
||||||
|
/// ```rust,ignore
|
||||||
|
/// let mut chunk = BytesChunk::new();
|
||||||
|
/// chunk.bytes = 1;
|
||||||
|
/// assert_eq!(&[0], chunk.get_buffer());
|
||||||
|
/// ```
|
||||||
|
pub fn get_buffer(&self) -> &[u8] {
|
||||||
|
&self.buffer[..self.bytes]
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Like [`BytesChunk::get_buffer`], but returning a slice from `[offset.self.bytes]`.
|
||||||
|
///
|
||||||
|
/// returns: a slice containing the bytes of the internal buffer from `[offset..self.bytes]`
|
||||||
|
///
|
||||||
|
/// # Examples
|
||||||
|
///
|
||||||
|
/// ```rust,ignore
|
||||||
|
/// let mut chunk = BytesChunk::new();
|
||||||
|
/// chunk.bytes = 2;
|
||||||
|
/// assert_eq!(&[0], chunk.get_buffer_with(1));
|
||||||
|
/// ```
|
||||||
|
pub fn get_buffer_with(&self, offset: usize) -> &[u8] {
|
||||||
|
&self.buffer[offset..self.bytes]
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn has_data(&self) -> bool {
|
||||||
|
self.bytes > 0
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Fills `self.buffer` with maximal [`BUFFER_SIZE`] number of bytes, draining the reader by
|
||||||
|
/// that number of bytes. If EOF is reached (so 0 bytes are read), then returns
|
||||||
|
/// [`UResult<None>`] or else the result with [`Some(bytes)`] where bytes is the number of bytes
|
||||||
|
/// read from the source.
|
||||||
|
pub fn fill(&mut self, filehandle: &mut BufReader<impl Read>) -> UResult<Option<usize>> {
|
||||||
|
let num_bytes = filehandle.read(&mut self.buffer)?;
|
||||||
|
self.bytes = num_bytes;
|
||||||
|
if num_bytes == 0 {
|
||||||
|
return Ok(None);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(Some(self.bytes))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// An abstraction layer on top of [`BytesChunk`] mainly to simplify filling only the needed amount
|
||||||
|
/// of chunks. See also [`Self::fill`].
|
||||||
|
pub struct BytesChunkBuffer {
|
||||||
|
/// The number of bytes to print
|
||||||
|
num_print: u64,
|
||||||
|
/// The current number of bytes summed over all stored chunks in [`Self::chunks`]. Use u64 here
|
||||||
|
/// to support files > 4GB on 32-bit systems. Note, this differs from `BytesChunk::bytes` which
|
||||||
|
/// is a usize. The choice of u64 is based on `tail::FilterMode::Bytes`.
|
||||||
|
bytes: u64,
|
||||||
|
/// The buffer to store [`BytesChunk`] in
|
||||||
|
chunks: VecDeque<Box<BytesChunk>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl BytesChunkBuffer {
|
||||||
|
/// Creates a new [`BytesChunkBuffer`].
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
///
|
||||||
|
/// * `num_print`: The number of bytes to print
|
||||||
|
///
|
||||||
|
/// # Examples
|
||||||
|
///
|
||||||
|
/// ```rust,ignore
|
||||||
|
/// let mut chunk = BytesChunk::new();
|
||||||
|
/// chunk.buffer[1] = 1;
|
||||||
|
/// chunk.bytes = 2;
|
||||||
|
/// let new_chunk = BytesChunk::from_chunk(&chunk, 0);
|
||||||
|
/// assert_eq!(2, new_chunk.get_buffer().len());
|
||||||
|
/// assert_eq!(&[0, 1], new_chunk.get_buffer());
|
||||||
|
///
|
||||||
|
/// let new_chunk = BytesChunk::from_chunk(&chunk, 1);
|
||||||
|
/// assert_eq!(1, new_chunk.get_buffer().len());
|
||||||
|
/// assert_eq!(&[1], new_chunk.get_buffer());
|
||||||
|
/// ```
|
||||||
|
pub fn new(num_print: u64) -> Self {
|
||||||
|
Self {
|
||||||
|
bytes: 0,
|
||||||
|
num_print,
|
||||||
|
chunks: VecDeque::new(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Fills this buffer with chunks and consumes the reader completely. This method ensures that
|
||||||
|
/// there are exactly as many chunks as needed to match `self.num_print` bytes, so there are
|
||||||
|
/// in sum exactly `self.num_print` bytes stored in all chunks. The method returns an iterator
|
||||||
|
/// over these chunks. If there are no chunks, for example because the piped stdin contained no
|
||||||
|
/// bytes, or `num_print = 0` then `iterator.next` returns None.
|
||||||
|
///
|
||||||
|
/// # Examples
|
||||||
|
///
|
||||||
|
/// ```rust,ignore
|
||||||
|
/// use crate::chunks::BytesChunkBuffer;
|
||||||
|
/// use std::io::{BufReader, Cursor};
|
||||||
|
///
|
||||||
|
/// let mut reader = BufReader::new(Cursor::new(""));
|
||||||
|
/// let num_print = 0;
|
||||||
|
/// let mut chunks = BytesChunkBuffer::new(num_print);
|
||||||
|
/// chunks.fill(&mut reader).unwrap();
|
||||||
|
///
|
||||||
|
/// let mut reader = BufReader::new(Cursor::new("a"));
|
||||||
|
/// let num_print = 1;
|
||||||
|
/// let mut chunks = BytesChunkBuffer::new(num_print);
|
||||||
|
/// chunks.fill(&mut reader).unwrap();
|
||||||
|
/// ```
|
||||||
|
pub fn fill(&mut self, reader: &mut BufReader<impl Read>) -> UResult<()> {
|
||||||
|
let mut chunk = Box::new(BytesChunk::new());
|
||||||
|
|
||||||
|
// fill chunks with all bytes from reader and reuse already instantiated chunks if possible
|
||||||
|
while (chunk.fill(reader)?).is_some() {
|
||||||
|
self.bytes += chunk.bytes as u64;
|
||||||
|
self.chunks.push_back(chunk);
|
||||||
|
|
||||||
|
let first = &self.chunks[0];
|
||||||
|
if self.bytes - first.bytes as u64 > self.num_print {
|
||||||
|
chunk = self.chunks.pop_front().unwrap();
|
||||||
|
self.bytes -= chunk.bytes as u64;
|
||||||
|
} else {
|
||||||
|
chunk = Box::new(BytesChunk::new());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// quit early if there are no chunks for example in case the pipe was empty
|
||||||
|
if self.chunks.is_empty() {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
let chunk = self.chunks.pop_front().unwrap();
|
||||||
|
|
||||||
|
// calculate the offset in the first chunk and put the calculated chunk as first element in
|
||||||
|
// the self.chunks collection. The calculated offset must be in the range 0 to BUFFER_SIZE
|
||||||
|
// and is therefore safely convertible to a usize without losses.
|
||||||
|
let offset = self.bytes.saturating_sub(self.num_print) as usize;
|
||||||
|
self.chunks
|
||||||
|
.push_front(Box::new(BytesChunk::from_chunk(&chunk, offset)));
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn print(&self, mut writer: impl Write) -> UResult<()> {
|
||||||
|
for chunk in &self.chunks {
|
||||||
|
writer.write_all(chunk.get_buffer())?;
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Works similar to a [`BytesChunk`] but also stores the number of lines encountered in the current
|
||||||
|
/// buffer. The size of the buffer is limited to a fixed size number of bytes.
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct LinesChunk {
|
||||||
|
/// Work on top of a [`BytesChunk`]
|
||||||
|
chunk: BytesChunk,
|
||||||
|
/// The number of lines delimited by `delimiter`. The choice of usize is sufficient here,
|
||||||
|
/// because lines max value is the number of bytes contained in this chunk's buffer, and the
|
||||||
|
/// number of bytes max value is [`BUFFER_SIZE`], which is a usize.
|
||||||
|
lines: usize,
|
||||||
|
/// The delimiter to use, to count the lines
|
||||||
|
delimiter: u8,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl LinesChunk {
|
||||||
|
pub fn new(delimiter: u8) -> Self {
|
||||||
|
Self {
|
||||||
|
chunk: BytesChunk::new(),
|
||||||
|
lines: 0,
|
||||||
|
delimiter,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Count the number of lines delimited with [`Self::delimiter`] contained in the buffer.
|
||||||
|
/// Currently [`memchr`] is used because performance is better than using an iterator or for
|
||||||
|
/// loop.
|
||||||
|
///
|
||||||
|
/// # Examples
|
||||||
|
///
|
||||||
|
/// ```rust,ignore
|
||||||
|
/// let mut chunk = LinesChunk::new(b'\n');
|
||||||
|
/// chunk.buffer[0..12].copy_from_slice("hello\nworld\n".as_bytes());
|
||||||
|
/// chunk.bytes = 12;
|
||||||
|
/// assert_eq!(2, chunk.count_lines());
|
||||||
|
///
|
||||||
|
/// chunk.buffer[0..14].copy_from_slice("hello\r\nworld\r\n".as_bytes());
|
||||||
|
/// chunk.bytes = 14;
|
||||||
|
/// assert_eq!(2, chunk.count_lines());
|
||||||
|
/// ```
|
||||||
|
fn count_lines(&self) -> usize {
|
||||||
|
memchr::memchr_iter(self.delimiter, self.get_buffer()).count()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Creates a new [`LinesChunk`] from an existing one with an offset in lines. The new chunk
|
||||||
|
/// contains exactly `chunk.lines - offset` lines. The offset in bytes is calculated and applied
|
||||||
|
/// to the new chunk, so the new chunk contains only the bytes encountered after the offset in
|
||||||
|
/// number of lines and the `delimiter`. If the offset is larger than `chunk.lines` then a new
|
||||||
|
/// empty `LinesChunk` is returned.
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
///
|
||||||
|
/// * `chunk`: The chunk to create the new chunk from
|
||||||
|
/// * `offset`: The offset in number of lines (not bytes)
|
||||||
|
///
|
||||||
|
/// # Examples
|
||||||
|
///
|
||||||
|
/// ```rust,ignore
|
||||||
|
/// let mut chunk = LinesChunk::new(b'\n');
|
||||||
|
/// // manually filling the buffer and setting the correct values for bytes and lines
|
||||||
|
/// chunk.buffer[0..12].copy_from_slice("hello\nworld\n".as_bytes());
|
||||||
|
/// chunk.bytes = 12;
|
||||||
|
/// chunk.lines = 2;
|
||||||
|
///
|
||||||
|
/// let offset = 1; // offset in number of lines
|
||||||
|
/// let new_chunk = LinesChunk::from(&chunk, offset);
|
||||||
|
/// assert_eq!("world\n".as_bytes(), new_chunk.get_buffer());
|
||||||
|
/// assert_eq!(6, new_chunk.bytes);
|
||||||
|
/// assert_eq!(1, new_chunk.lines);
|
||||||
|
/// ```
|
||||||
|
fn from_chunk(chunk: &Self, offset: usize) -> Self {
|
||||||
|
if offset > chunk.lines {
|
||||||
|
return Self::new(chunk.delimiter);
|
||||||
|
}
|
||||||
|
|
||||||
|
let bytes_offset = chunk.calculate_bytes_offset_from(offset);
|
||||||
|
let new_chunk = BytesChunk::from_chunk(&chunk.chunk, bytes_offset);
|
||||||
|
|
||||||
|
Self {
|
||||||
|
chunk: new_chunk,
|
||||||
|
lines: chunk.lines - offset,
|
||||||
|
delimiter: chunk.delimiter,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns true if this buffer has stored any bytes.
|
||||||
|
///
|
||||||
|
/// # Examples
|
||||||
|
///
|
||||||
|
/// ```rust,ignore
|
||||||
|
/// let mut chunk = LinesChunk::new(b'\n');
|
||||||
|
/// assert!(!chunk.has_data());
|
||||||
|
///
|
||||||
|
/// chunk.buffer[0] = 1;
|
||||||
|
/// assert!(!chunk.has_data());
|
||||||
|
///
|
||||||
|
/// chunk.bytes = 1;
|
||||||
|
/// assert!(chunk.has_data());
|
||||||
|
/// ```
|
||||||
|
pub fn has_data(&self) -> bool {
|
||||||
|
self.chunk.has_data()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns this buffer safely. See [`BytesChunk::get_buffer`]
|
||||||
|
///
|
||||||
|
/// returns: &[u8] with length `self.bytes`
|
||||||
|
pub fn get_buffer(&self) -> &[u8] {
|
||||||
|
self.chunk.get_buffer()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns this buffer safely with an offset applied. See [`BytesChunk::get_buffer_with`].
|
||||||
|
///
|
||||||
|
/// returns: &[u8] with length `self.bytes - offset`
|
||||||
|
pub fn get_buffer_with(&self, offset: usize) -> &[u8] {
|
||||||
|
self.chunk.get_buffer_with(offset)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return the number of lines the buffer contains. `self.lines` needs to be set before the call
|
||||||
|
/// to this function returns the correct value. If the calculation of lines is needed then
|
||||||
|
/// use `self.count_lines`.
|
||||||
|
pub fn get_lines(&self) -> usize {
|
||||||
|
self.lines
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Fills `self.buffer` with maximal [`BUFFER_SIZE`] number of bytes, draining the reader by
|
||||||
|
/// that number of bytes. This function works like the [`BytesChunk::fill`] function besides
|
||||||
|
/// that this function also counts and stores the number of lines encountered while reading from
|
||||||
|
/// the `filehandle`.
|
||||||
|
pub fn fill(&mut self, filehandle: &mut BufReader<impl Read>) -> UResult<Option<usize>> {
|
||||||
|
match self.chunk.fill(filehandle)? {
|
||||||
|
None => {
|
||||||
|
self.lines = 0;
|
||||||
|
Ok(None)
|
||||||
|
}
|
||||||
|
Some(bytes) => {
|
||||||
|
self.lines = self.count_lines();
|
||||||
|
Ok(Some(bytes))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Calculates the offset in bytes within this buffer from the offset in number of lines. The
|
||||||
|
/// resulting offset is 0-based and points to the byte after the delimiter.
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
///
|
||||||
|
/// * `offset`: the offset in number of lines. If offset is 0 then 0 is returned, if larger than
|
||||||
|
/// the contained lines then self.bytes is returned.
|
||||||
|
///
|
||||||
|
/// # Examples
|
||||||
|
///
|
||||||
|
/// ```rust,ignore
|
||||||
|
/// let mut chunk = LinesChunk::new(b'\n');
|
||||||
|
/// chunk.buffer[0..12].copy_from_slice("hello\nworld\n".as_bytes());
|
||||||
|
/// chunk.bytes = 12;
|
||||||
|
/// chunk.lines = 2; // note that if not setting lines the result might not be what is expected
|
||||||
|
/// let bytes_offset = chunk.calculate_bytes_offset_from(1);
|
||||||
|
/// assert_eq!(6, bytes_offset);
|
||||||
|
/// assert_eq!(
|
||||||
|
/// "world\n",
|
||||||
|
/// String::from_utf8_lossy(chunk.get_buffer_with(bytes_offset)));
|
||||||
|
/// ```
|
||||||
|
fn calculate_bytes_offset_from(&self, offset: usize) -> usize {
|
||||||
|
let mut lines_offset = offset;
|
||||||
|
let mut bytes_offset = 0;
|
||||||
|
for byte in self.get_buffer().iter() {
|
||||||
|
if lines_offset == 0 {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if byte == &self.delimiter {
|
||||||
|
lines_offset -= 1;
|
||||||
|
}
|
||||||
|
bytes_offset += 1;
|
||||||
|
}
|
||||||
|
bytes_offset
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Print the bytes contained in this buffer calculated with the given offset in number of
|
||||||
|
/// lines.
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
///
|
||||||
|
/// * `writer`: must implement [`Write`]
|
||||||
|
/// * `offset`: An offset in number of lines.
|
||||||
|
pub fn print_lines(&self, writer: &mut impl Write, offset: usize) -> UResult<()> {
|
||||||
|
self.print_bytes(writer, self.calculate_bytes_offset_from(offset))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Print the bytes contained in this buffer beginning from the given offset in number of bytes.
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
///
|
||||||
|
/// * `writer`: must implement [`Write`]
|
||||||
|
/// * `offset`: An offset in number of bytes.
|
||||||
|
pub fn print_bytes(&self, writer: &mut impl Write, offset: usize) -> UResult<()> {
|
||||||
|
writer.write_all(self.get_buffer_with(offset))?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// An abstraction layer on top of [`LinesChunk`] mainly to simplify filling only the needed amount
|
||||||
|
/// of chunks. See also [`Self::fill`]. Works similar like [`BytesChunkBuffer`], but works on top
|
||||||
|
/// of lines delimited by `self.delimiter` instead of bytes.
|
||||||
|
pub struct LinesChunkBuffer {
|
||||||
|
/// The delimiter to recognize a line. Any [`u8`] is allowed.
|
||||||
|
delimiter: u8,
|
||||||
|
/// The amount of lines occurring in all currently stored [`LinesChunk`]s. Use u64 here to
|
||||||
|
/// support files > 4GB on 32-bit systems. Note, this differs from [`LinesChunk::lines`] which
|
||||||
|
/// is a usize. The choice of u64 is based on `tail::FilterMode::Lines`.
|
||||||
|
lines: u64,
|
||||||
|
/// The amount of lines to print.
|
||||||
|
num_print: u64,
|
||||||
|
/// Stores the [`LinesChunk`]
|
||||||
|
chunks: VecDeque<Box<LinesChunk>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl LinesChunkBuffer {
|
||||||
|
/// Create a new [`LinesChunkBuffer`]
|
||||||
|
pub fn new(delimiter: u8, num_print: u64) -> Self {
|
||||||
|
Self {
|
||||||
|
delimiter,
|
||||||
|
num_print,
|
||||||
|
lines: 0,
|
||||||
|
chunks: VecDeque::new(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Fills this buffer with chunks and consumes the reader completely. This method ensures that
|
||||||
|
/// there are exactly as many chunks as needed to match `self.num_print` lines, so there are
|
||||||
|
/// in sum exactly `self.num_print` lines stored in all chunks. The method returns an iterator
|
||||||
|
/// over these chunks. If there are no chunks, for example because the piped stdin contained no
|
||||||
|
/// lines, or `num_print = 0` then `iterator.next` will return None.
|
||||||
|
pub fn fill(&mut self, reader: &mut BufReader<impl Read>) -> UResult<()> {
|
||||||
|
let mut chunk = Box::new(LinesChunk::new(self.delimiter));
|
||||||
|
|
||||||
|
while (chunk.fill(reader)?).is_some() {
|
||||||
|
self.lines += chunk.lines as u64;
|
||||||
|
self.chunks.push_back(chunk);
|
||||||
|
|
||||||
|
let first = &self.chunks[0];
|
||||||
|
if self.lines - first.lines as u64 > self.num_print {
|
||||||
|
chunk = self.chunks.pop_front().unwrap();
|
||||||
|
|
||||||
|
self.lines -= chunk.lines as u64;
|
||||||
|
} else {
|
||||||
|
chunk = Box::new(LinesChunk::new(self.delimiter));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if !&self.chunks.is_empty() {
|
||||||
|
let length = &self.chunks.len();
|
||||||
|
let last = &mut self.chunks[length - 1];
|
||||||
|
if !last.get_buffer().ends_with(&[self.delimiter]) {
|
||||||
|
last.lines += 1;
|
||||||
|
self.lines += 1;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// chunks is empty when a file is empty so quitting early here
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
// skip unnecessary chunks and save the first chunk which may hold some lines we have to
|
||||||
|
// print
|
||||||
|
let chunk = loop {
|
||||||
|
// it's safe to call unwrap here because there is at least one chunk and sorting out
|
||||||
|
// more chunks than exist shouldn't be possible.
|
||||||
|
let chunk = self.chunks.pop_front().unwrap();
|
||||||
|
|
||||||
|
// skip is true as long there are enough lines left in the other stored chunks.
|
||||||
|
let skip = self.lines - chunk.lines as u64 > self.num_print;
|
||||||
|
if skip {
|
||||||
|
self.lines -= chunk.lines as u64;
|
||||||
|
} else {
|
||||||
|
break chunk;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Calculate the number of lines to skip in the current chunk. The calculated value must be
|
||||||
|
// in the range 0 to BUFFER_SIZE and is therefore safely convertible to a usize without
|
||||||
|
// losses.
|
||||||
|
let skip_lines = self.lines.saturating_sub(self.num_print) as usize;
|
||||||
|
let chunk = LinesChunk::from_chunk(&chunk, skip_lines);
|
||||||
|
self.chunks.push_front(Box::new(chunk));
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn print(&self, mut writer: impl Write) -> UResult<()> {
|
||||||
|
for chunk in &self.chunks {
|
||||||
|
chunk.print_bytes(&mut writer, 0)?;
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use crate::chunks::{BytesChunk, BUFFER_SIZE};
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_bytes_chunk_from_when_offset_is_zero() {
|
||||||
|
let mut chunk = BytesChunk::new();
|
||||||
|
chunk.bytes = BUFFER_SIZE;
|
||||||
|
chunk.buffer[1] = 1;
|
||||||
|
let other = BytesChunk::from_chunk(&chunk, 0);
|
||||||
|
assert_eq!(other, chunk);
|
||||||
|
|
||||||
|
chunk.bytes = 2;
|
||||||
|
let other = BytesChunk::from_chunk(&chunk, 0);
|
||||||
|
assert_eq!(other, chunk);
|
||||||
|
|
||||||
|
chunk.bytes = 1;
|
||||||
|
let other = BytesChunk::from_chunk(&chunk, 0);
|
||||||
|
assert_eq!(other.buffer, [0; BUFFER_SIZE]);
|
||||||
|
assert_eq!(other.bytes, chunk.bytes);
|
||||||
|
|
||||||
|
chunk.bytes = BUFFER_SIZE;
|
||||||
|
let other = BytesChunk::from_chunk(&chunk, 2);
|
||||||
|
assert_eq!(other.buffer, [0; BUFFER_SIZE]);
|
||||||
|
assert_eq!(other.bytes, BUFFER_SIZE - 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_bytes_chunk_from_when_offset_is_not_zero() {
|
||||||
|
let mut chunk = BytesChunk::new();
|
||||||
|
chunk.bytes = BUFFER_SIZE;
|
||||||
|
chunk.buffer[1] = 1;
|
||||||
|
|
||||||
|
let other = BytesChunk::from_chunk(&chunk, 1);
|
||||||
|
let mut expected_buffer = [0; BUFFER_SIZE];
|
||||||
|
expected_buffer[0] = 1;
|
||||||
|
assert_eq!(other.buffer, expected_buffer);
|
||||||
|
assert_eq!(other.bytes, BUFFER_SIZE - 1);
|
||||||
|
|
||||||
|
let other = BytesChunk::from_chunk(&chunk, 2);
|
||||||
|
assert_eq!(other.buffer, [0; BUFFER_SIZE]);
|
||||||
|
assert_eq!(other.bytes, BUFFER_SIZE - 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_bytes_chunk_from_when_offset_is_larger_than_chunk_size_1() {
|
||||||
|
let mut chunk = BytesChunk::new();
|
||||||
|
chunk.bytes = BUFFER_SIZE;
|
||||||
|
let new_chunk = BytesChunk::from_chunk(&chunk, BUFFER_SIZE + 1);
|
||||||
|
assert_eq!(0, new_chunk.bytes);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_bytes_chunk_from_when_offset_is_larger_than_chunk_size_2() {
|
||||||
|
let mut chunk = BytesChunk::new();
|
||||||
|
chunk.bytes = 0;
|
||||||
|
let new_chunk = BytesChunk::from_chunk(&chunk, 1);
|
||||||
|
assert_eq!(0, new_chunk.bytes);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_bytes_chunk_from_when_offset_is_larger_than_chunk_size_3() {
|
||||||
|
let mut chunk = BytesChunk::new();
|
||||||
|
chunk.bytes = 1;
|
||||||
|
let new_chunk = BytesChunk::from_chunk(&chunk, 2);
|
||||||
|
assert_eq!(0, new_chunk.bytes);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_bytes_chunk_from_when_offset_is_equal_to_chunk_size() {
|
||||||
|
let mut chunk = BytesChunk::new();
|
||||||
|
chunk.buffer[0] = 1;
|
||||||
|
chunk.bytes = 1;
|
||||||
|
let new_chunk = BytesChunk::from_chunk(&chunk, 1);
|
||||||
|
assert_eq!(0, new_chunk.bytes);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -7,7 +7,7 @@
|
||||||
// * For the full copyright and license information, please view the LICENSE
|
// * For the full copyright and license information, please view the LICENSE
|
||||||
// * file that was distributed with this source code.
|
// * file that was distributed with this source code.
|
||||||
|
|
||||||
// spell-checker:ignore (ToDO) seekable seek'd tail'ing ringbuffer ringbuf unwatch Uncategorized
|
// spell-checker:ignore (ToDO) seekable seek'd tail'ing ringbuffer ringbuf unwatch Uncategorized filehandle
|
||||||
// spell-checker:ignore (libs) kqueue
|
// spell-checker:ignore (libs) kqueue
|
||||||
// spell-checker:ignore (acronyms)
|
// spell-checker:ignore (acronyms)
|
||||||
// spell-checker:ignore (env/flags)
|
// spell-checker:ignore (env/flags)
|
||||||
|
@ -21,8 +21,9 @@ extern crate clap;
|
||||||
|
|
||||||
#[macro_use]
|
#[macro_use]
|
||||||
extern crate uucore;
|
extern crate uucore;
|
||||||
|
extern crate core;
|
||||||
|
|
||||||
mod chunks;
|
pub mod chunks;
|
||||||
mod parse;
|
mod parse;
|
||||||
mod platform;
|
mod platform;
|
||||||
use crate::files::FileHandling;
|
use crate::files::FileHandling;
|
||||||
|
@ -30,11 +31,11 @@ use chunks::ReverseChunks;
|
||||||
|
|
||||||
use clap::{Arg, Command, ValueSource};
|
use clap::{Arg, Command, ValueSource};
|
||||||
use notify::{RecommendedWatcher, RecursiveMode, Watcher, WatcherKind};
|
use notify::{RecommendedWatcher, RecursiveMode, Watcher, WatcherKind};
|
||||||
|
use std::cmp::Ordering;
|
||||||
use std::collections::{HashMap, VecDeque};
|
use std::collections::{HashMap, VecDeque};
|
||||||
use std::ffi::OsString;
|
use std::ffi::OsString;
|
||||||
use std::fmt;
|
|
||||||
use std::fs::{File, Metadata};
|
use std::fs::{File, Metadata};
|
||||||
use std::io::{stdin, stdout, BufRead, BufReader, Read, Seek, SeekFrom, Write};
|
use std::io::{self, stdin, stdout, BufRead, BufReader, BufWriter, Read, Seek, SeekFrom, Write};
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
use std::sync::mpsc::{self, channel, Receiver};
|
use std::sync::mpsc::{self, channel, Receiver};
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
|
@ -43,9 +44,7 @@ use uucore::error::{
|
||||||
get_exit_code, set_exit_code, FromIo, UError, UResult, USimpleError, UUsageError,
|
get_exit_code, set_exit_code, FromIo, UError, UResult, USimpleError, UUsageError,
|
||||||
};
|
};
|
||||||
use uucore::format_usage;
|
use uucore::format_usage;
|
||||||
use uucore::lines::lines;
|
|
||||||
use uucore::parse_size::{parse_size, ParseSizeError};
|
use uucore::parse_size::{parse_size, ParseSizeError};
|
||||||
use uucore::ringbuffer::RingBuffer;
|
|
||||||
|
|
||||||
#[cfg(unix)]
|
#[cfg(unix)]
|
||||||
use std::os::unix::fs::MetadataExt;
|
use std::os::unix::fs::MetadataExt;
|
||||||
|
@ -1458,70 +1457,58 @@ fn bounded_tail(file: &mut File, settings: &Settings) {
|
||||||
std::io::copy(file, &mut stdout).unwrap();
|
std::io::copy(file, &mut stdout).unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
/// An alternative to [`Iterator::skip`] with u64 instead of usize. This is
|
|
||||||
/// necessary because the usize limit doesn't make sense when iterating over
|
|
||||||
/// something that's not in memory. For example, a very large file. This allows
|
|
||||||
/// us to skip data larger than 4 GiB even on 32-bit platforms.
|
|
||||||
fn skip_u64(iter: &mut impl Iterator, num: u64) {
|
|
||||||
for _ in 0..num {
|
|
||||||
if iter.next().is_none() {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Collect the last elements of an iterator into a `VecDeque`.
|
|
||||||
///
|
|
||||||
/// This function returns a [`VecDeque`] containing either the last
|
|
||||||
/// `count` elements of `iter`, an [`Iterator`] over [`Result`]
|
|
||||||
/// instances, or all but the first `count` elements of `iter`. If
|
|
||||||
/// `beginning` is `true`, then all but the first `count` elements are
|
|
||||||
/// returned.
|
|
||||||
///
|
|
||||||
/// # Panics
|
|
||||||
///
|
|
||||||
/// If any element of `iter` is an [`Err`], then this function panics.
|
|
||||||
fn unbounded_tail_collect<T, E>(
|
|
||||||
mut iter: impl Iterator<Item = Result<T, E>>,
|
|
||||||
count: u64,
|
|
||||||
beginning: bool,
|
|
||||||
) -> UResult<VecDeque<T>>
|
|
||||||
where
|
|
||||||
E: fmt::Debug,
|
|
||||||
{
|
|
||||||
if beginning {
|
|
||||||
// GNU `tail` seems to index bytes and lines starting at 1, not
|
|
||||||
// at 0. It seems to treat `+0` and `+1` as the same thing.
|
|
||||||
let i = count.max(1) - 1;
|
|
||||||
skip_u64(&mut iter, i);
|
|
||||||
Ok(iter.map(|r| r.unwrap()).collect())
|
|
||||||
} else {
|
|
||||||
let count: usize = count
|
|
||||||
.try_into()
|
|
||||||
.map_err(|_| USimpleError::new(1, "Insufficient addressable memory"))?;
|
|
||||||
Ok(RingBuffer::from_iter(iter.map(|r| r.unwrap()), count).data)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn unbounded_tail<T: Read>(reader: &mut BufReader<T>, settings: &Settings) -> UResult<()> {
|
fn unbounded_tail<T: Read>(reader: &mut BufReader<T>, settings: &Settings) -> UResult<()> {
|
||||||
// Read through each line/char and store them in a ringbuffer that always
|
let stdout = stdout();
|
||||||
// contains count lines/chars. When reaching the end of file, output the
|
let mut writer = BufWriter::new(stdout.lock());
|
||||||
// data in the ringbuf.
|
match (&settings.mode, settings.beginning) {
|
||||||
match settings.mode {
|
(FilterMode::Lines(count, sep), false) => {
|
||||||
FilterMode::Lines(count, sep) => {
|
let mut chunks = chunks::LinesChunkBuffer::new(*sep, *count);
|
||||||
let mut stdout = stdout();
|
chunks.fill(reader)?;
|
||||||
for line in unbounded_tail_collect(lines(reader, sep), count, settings.beginning)? {
|
chunks.print(writer)?;
|
||||||
stdout
|
|
||||||
.write_all(&line)
|
|
||||||
.map_err_context(|| String::from("IO error"))?;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
FilterMode::Bytes(count) => {
|
(FilterMode::Lines(count, sep), true) => {
|
||||||
for byte in unbounded_tail_collect(reader.bytes(), count, settings.beginning)? {
|
let mut num_skip = (*count).max(1) - 1;
|
||||||
if let Err(err) = stdout().write(&[byte]) {
|
let mut chunk = chunks::LinesChunk::new(*sep);
|
||||||
return Err(USimpleError::new(1, err.to_string()));
|
while chunk.fill(reader)?.is_some() {
|
||||||
|
let lines = chunk.get_lines() as u64;
|
||||||
|
if lines < num_skip {
|
||||||
|
num_skip -= lines;
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if chunk.has_data() {
|
||||||
|
chunk.print_lines(&mut writer, num_skip as usize)?;
|
||||||
|
io::copy(reader, &mut writer)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
(FilterMode::Bytes(count), false) => {
|
||||||
|
let mut chunks = chunks::BytesChunkBuffer::new(*count);
|
||||||
|
chunks.fill(reader)?;
|
||||||
|
chunks.print(writer)?;
|
||||||
|
}
|
||||||
|
(FilterMode::Bytes(count), true) => {
|
||||||
|
let mut num_skip = (*count).max(1) - 1;
|
||||||
|
let mut chunk = chunks::BytesChunk::new();
|
||||||
|
loop {
|
||||||
|
if let Some(bytes) = chunk.fill(reader)? {
|
||||||
|
let bytes: u64 = bytes as u64;
|
||||||
|
match bytes.cmp(&num_skip) {
|
||||||
|
Ordering::Less => num_skip -= bytes,
|
||||||
|
Ordering::Equal => {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
Ordering::Greater => {
|
||||||
|
writer.write_all(chunk.get_buffer_with(num_skip as usize))?;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
io::copy(reader, &mut writer)?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
// * For the full copyright and license information, please view the LICENSE
|
// * For the full copyright and license information, please view the LICENSE
|
||||||
// * file that was distributed with this source code.
|
// * file that was distributed with this source code.
|
||||||
|
|
||||||
// spell-checker:ignore (ToDO) abcdefghijklmnopqrstuvwxyz efghijklmnopqrstuvwxyz vwxyz emptyfile file siette ocho nueve diez
|
// spell-checker:ignore (ToDO) abcdefghijklmnopqrstuvwxyz efghijklmnopqrstuvwxyz vwxyz emptyfile file siette ocho nueve diez MULT
|
||||||
// spell-checker:ignore (libs) kqueue
|
// spell-checker:ignore (libs) kqueue
|
||||||
// spell-checker:ignore (jargon) tailable untailable
|
// spell-checker:ignore (jargon) tailable untailable
|
||||||
|
|
||||||
|
@ -1090,18 +1090,6 @@ fn test_invalid_num() {
|
||||||
.fails()
|
.fails()
|
||||||
.stderr_str()
|
.stderr_str()
|
||||||
.starts_with("tail: invalid number of lines: '1Y': Value too large for defined data type");
|
.starts_with("tail: invalid number of lines: '1Y': Value too large for defined data type");
|
||||||
#[cfg(target_pointer_width = "32")]
|
|
||||||
{
|
|
||||||
let sizes = ["1000G", "10T"];
|
|
||||||
for size in &sizes {
|
|
||||||
new_ucmd!()
|
|
||||||
.args(&["-c", size])
|
|
||||||
.fails()
|
|
||||||
.code_is(1)
|
|
||||||
.stderr_str()
|
|
||||||
.starts_with("tail: Insufficient addressable memory");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
new_ucmd!()
|
new_ucmd!()
|
||||||
.args(&["-c", "-³"])
|
.args(&["-c", "-³"])
|
||||||
.fails()
|
.fails()
|
||||||
|
@ -2484,6 +2472,725 @@ fn test_illegal_seek() {
|
||||||
assert_eq!(p.wait().unwrap().code().unwrap(), 1);
|
assert_eq!(p.wait().unwrap().code().unwrap(), 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(all(not(target_os = "android"), not(target_os = "windows")))] // FIXME: See https://github.com/uutils/coreutils/issues/3881
|
||||||
|
mod pipe_tests {
|
||||||
|
use super::*;
|
||||||
|
use crate::common::random::*;
|
||||||
|
use rand::distributions::Alphanumeric;
|
||||||
|
use tail::chunks::BUFFER_SIZE as CHUNK_BUFFER_SIZE;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_pipe_when_lines_option_value_is_higher_than_contained_lines() {
|
||||||
|
let test_string = "a\nb\n";
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-n", "3"])
|
||||||
|
.pipe_in(test_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only(test_string);
|
||||||
|
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-n", "4"])
|
||||||
|
.pipe_in(test_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only(test_string);
|
||||||
|
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-n", "999"])
|
||||||
|
.pipe_in(test_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only(test_string);
|
||||||
|
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-n", "+3"])
|
||||||
|
.pipe_in(test_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.no_stdout()
|
||||||
|
.no_stderr();
|
||||||
|
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-n", "+4"])
|
||||||
|
.pipe_in(test_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.no_stdout()
|
||||||
|
.no_stderr();
|
||||||
|
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-n", "+999"])
|
||||||
|
.pipe_in(test_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.no_stdout()
|
||||||
|
.no_stderr();
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_pipe_when_negative_lines_option_given_no_newline_at_eof() {
|
||||||
|
let test_string = "a\nb";
|
||||||
|
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-n", "0"])
|
||||||
|
.pipe_in(test_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.no_stdout()
|
||||||
|
.no_stderr();
|
||||||
|
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-n", "1"])
|
||||||
|
.pipe_in(test_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only("b");
|
||||||
|
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-n", "2"])
|
||||||
|
.pipe_in(test_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only("a\nb");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_pipe_when_positive_lines_option_given_no_newline_at_eof() {
|
||||||
|
let test_string = "a\nb";
|
||||||
|
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-n", "+0"])
|
||||||
|
.pipe_in(test_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only("a\nb");
|
||||||
|
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-n", "+1"])
|
||||||
|
.pipe_in(test_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only("a\nb");
|
||||||
|
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-n", "+2"])
|
||||||
|
.pipe_in(test_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only("b");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_pipe_when_lines_option_given_multibyte_utf8_characters() {
|
||||||
|
// the test string consists of from left to right a 4-byte,3-byte,2-byte,1-byte utf-8 character
|
||||||
|
let test_string = "𝅘𝅥𝅮\n⏻\nƒ\na";
|
||||||
|
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-n", "+0"])
|
||||||
|
.pipe_in(test_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only(test_string);
|
||||||
|
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-n", "+2"])
|
||||||
|
.pipe_in(test_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only("⏻\nƒ\na");
|
||||||
|
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-n", "+3"])
|
||||||
|
.pipe_in(test_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only("ƒ\na");
|
||||||
|
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-n", "+4"])
|
||||||
|
.pipe_in(test_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only("a");
|
||||||
|
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-n", "+5"])
|
||||||
|
.pipe_in(test_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.no_stdout()
|
||||||
|
.no_stderr();
|
||||||
|
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-n", "-4"])
|
||||||
|
.pipe_in(test_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only(test_string);
|
||||||
|
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-n", "-3"])
|
||||||
|
.pipe_in(test_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only("⏻\nƒ\na");
|
||||||
|
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-n", "-2"])
|
||||||
|
.pipe_in(test_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only("ƒ\na");
|
||||||
|
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-n", "-1"])
|
||||||
|
.pipe_in(test_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only("a");
|
||||||
|
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-n", "-0"])
|
||||||
|
.pipe_in(test_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.no_stdout()
|
||||||
|
.no_stderr();
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_pipe_when_lines_option_given_input_size_is_equal_to_buffer_size_no_newline_at_eof() {
|
||||||
|
let total_lines = 1;
|
||||||
|
let random_string = RandomString::generate_with_delimiter(
|
||||||
|
Alphanumeric,
|
||||||
|
b'\n',
|
||||||
|
total_lines,
|
||||||
|
false,
|
||||||
|
CHUNK_BUFFER_SIZE,
|
||||||
|
);
|
||||||
|
let random_string = random_string.as_str();
|
||||||
|
let lines = random_string.split_inclusive('\n');
|
||||||
|
|
||||||
|
let expected = lines.clone().skip(1).collect::<String>();
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-n", "+2"])
|
||||||
|
.pipe_in(random_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only(expected);
|
||||||
|
|
||||||
|
let expected = lines.clone().skip(1).collect::<String>();
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-n", "-1"])
|
||||||
|
.pipe_in(random_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only(expected);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_pipe_when_lines_option_given_input_size_is_equal_to_buffer_size() {
|
||||||
|
let total_lines = 100;
|
||||||
|
let random_string = RandomString::generate_with_delimiter(
|
||||||
|
Alphanumeric,
|
||||||
|
b'\n',
|
||||||
|
total_lines,
|
||||||
|
true,
|
||||||
|
CHUNK_BUFFER_SIZE,
|
||||||
|
);
|
||||||
|
let random_string = random_string.as_str();
|
||||||
|
let lines = random_string.split_inclusive('\n');
|
||||||
|
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-n", "+0"])
|
||||||
|
.pipe_in(random_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only(random_string);
|
||||||
|
|
||||||
|
let expected = lines.clone().skip(1).collect::<String>();
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-n", "+2"])
|
||||||
|
.pipe_in(random_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only(expected);
|
||||||
|
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-n", "-0"])
|
||||||
|
.pipe_in(random_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.no_stdout()
|
||||||
|
.no_stderr();
|
||||||
|
|
||||||
|
let expected = lines.clone().skip(total_lines - 1).collect::<String>();
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-n", "-1"])
|
||||||
|
.pipe_in(random_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only(expected);
|
||||||
|
|
||||||
|
let expected = lines.clone().skip(1).collect::<String>();
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-n", "-99"])
|
||||||
|
.pipe_in(random_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only(expected);
|
||||||
|
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-n", "-100"])
|
||||||
|
.pipe_in(random_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only(random_string);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_pipe_when_lines_option_given_input_size_is_one_byte_greater_than_buffer_size() {
|
||||||
|
let total_lines = 100;
|
||||||
|
let random_string = RandomString::generate_with_delimiter(
|
||||||
|
Alphanumeric,
|
||||||
|
b'\n',
|
||||||
|
total_lines,
|
||||||
|
true,
|
||||||
|
CHUNK_BUFFER_SIZE + 1,
|
||||||
|
);
|
||||||
|
let random_string = random_string.as_str();
|
||||||
|
let lines = random_string.split_inclusive('\n');
|
||||||
|
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-n", "+0"])
|
||||||
|
.pipe_in(random_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only(random_string);
|
||||||
|
|
||||||
|
let expected = lines.clone().skip(total_lines - 1).collect::<String>();
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-n", "-1"])
|
||||||
|
.pipe_in(random_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only(expected);
|
||||||
|
|
||||||
|
let expected = lines.clone().skip(1).collect::<String>();
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-n", "+2"])
|
||||||
|
.pipe_in(random_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only(expected);
|
||||||
|
|
||||||
|
let expected = lines.clone().skip(1).collect::<String>();
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-n", "-99"])
|
||||||
|
.pipe_in(random_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only(expected);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_pipe_when_lines_option_given_input_size_has_multiple_size_of_buffer_size() {
|
||||||
|
let total_lines = 100;
|
||||||
|
let random_string = RandomString::generate_with_delimiter(
|
||||||
|
Alphanumeric,
|
||||||
|
b'\n',
|
||||||
|
total_lines,
|
||||||
|
true,
|
||||||
|
CHUNK_BUFFER_SIZE * 3 + 1,
|
||||||
|
);
|
||||||
|
let random_string = random_string.as_str();
|
||||||
|
let lines = random_string.split_inclusive('\n');
|
||||||
|
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-n", "+0"])
|
||||||
|
.pipe_in(random_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only(random_string);
|
||||||
|
|
||||||
|
let expected = lines.clone().skip(1).collect::<String>();
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-n", "+2"])
|
||||||
|
.pipe_in(random_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only(expected);
|
||||||
|
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-n", "-0"])
|
||||||
|
.pipe_in(random_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.no_stdout()
|
||||||
|
.no_stderr();
|
||||||
|
|
||||||
|
let expected = lines.clone().skip(total_lines - 1).collect::<String>();
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-n", "-1"])
|
||||||
|
.pipe_in(random_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only(expected);
|
||||||
|
|
||||||
|
let expected = lines.clone().skip(1).collect::<String>();
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-n", "-99"])
|
||||||
|
.pipe_in(random_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only(expected);
|
||||||
|
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-n", "-100"])
|
||||||
|
.pipe_in(random_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only(random_string);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_pipe_when_bytes_option_value_is_higher_than_contained_bytes() {
|
||||||
|
let test_string = "a\nb";
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-c", "4"])
|
||||||
|
.pipe_in(test_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only(test_string);
|
||||||
|
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-c", "5"])
|
||||||
|
.pipe_in(test_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only(test_string);
|
||||||
|
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-c", "999"])
|
||||||
|
.pipe_in(test_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only(test_string);
|
||||||
|
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-c", "+4"])
|
||||||
|
.pipe_in(test_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.no_stdout()
|
||||||
|
.no_stderr();
|
||||||
|
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-c", "+5"])
|
||||||
|
.pipe_in(test_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.no_stdout()
|
||||||
|
.no_stderr();
|
||||||
|
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-c", "+999"])
|
||||||
|
.pipe_in(test_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.no_stdout()
|
||||||
|
.no_stderr();
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_pipe_when_bytes_option_given_multibyte_utf8_characters() {
|
||||||
|
// the test string consists of from left to right a 4-byte,3-byte,2-byte,1-byte utf-8 character
|
||||||
|
let test_string = "𝅘𝅥𝅮⏻ƒa";
|
||||||
|
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-c", "+0"])
|
||||||
|
.pipe_in(test_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only(test_string);
|
||||||
|
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-c", "+2"])
|
||||||
|
.pipe_in(test_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only_bytes(&test_string.as_bytes()[1..]);
|
||||||
|
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-c", "+5"])
|
||||||
|
.pipe_in(test_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only("⏻ƒa");
|
||||||
|
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-c", "+8"])
|
||||||
|
.pipe_in(test_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only("ƒa");
|
||||||
|
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-c", "+10"])
|
||||||
|
.pipe_in(test_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only("a");
|
||||||
|
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-c", "+11"])
|
||||||
|
.pipe_in(test_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.no_stdout()
|
||||||
|
.no_stderr();
|
||||||
|
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-c", "-1"])
|
||||||
|
.pipe_in(test_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only("a");
|
||||||
|
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-c", "-2"])
|
||||||
|
.pipe_in(test_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only_bytes(&"ƒa".as_bytes()[1..]);
|
||||||
|
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-c", "-3"])
|
||||||
|
.pipe_in(test_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only("ƒa");
|
||||||
|
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-c", "-6"])
|
||||||
|
.pipe_in(test_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only("⏻ƒa");
|
||||||
|
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-c", "-10"])
|
||||||
|
.pipe_in(test_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only(test_string);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_pipe_when_bytes_option_given_input_size_is_equal_to_buffer_size() {
|
||||||
|
let random_string = RandomString::generate(AlphanumericNewline, CHUNK_BUFFER_SIZE);
|
||||||
|
let random_string = random_string.as_str();
|
||||||
|
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-c", "+0"])
|
||||||
|
.pipe_in(random_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only(random_string);
|
||||||
|
|
||||||
|
let expected = &random_string.as_bytes()[1..];
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-c", "+2"])
|
||||||
|
.pipe_in(random_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only_bytes(expected);
|
||||||
|
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-c", "-0"])
|
||||||
|
.pipe_in(random_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.no_stdout()
|
||||||
|
.no_stderr();
|
||||||
|
|
||||||
|
let expected = &random_string.as_bytes()[1..];
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-c", "-8191"])
|
||||||
|
.pipe_in(random_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only_bytes(expected);
|
||||||
|
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-c", "-8192"])
|
||||||
|
.pipe_in(random_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only_bytes(random_string);
|
||||||
|
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-c", "-8193"])
|
||||||
|
.pipe_in(random_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only_bytes(random_string);
|
||||||
|
|
||||||
|
let expected = &random_string.as_bytes()[CHUNK_BUFFER_SIZE - 1..];
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-c", "-1"])
|
||||||
|
.pipe_in(random_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only_bytes(expected);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_pipe_when_bytes_option_given_input_size_is_one_byte_greater_than_buffer_size() {
|
||||||
|
let random_string = RandomString::generate(AlphanumericNewline, CHUNK_BUFFER_SIZE + 1);
|
||||||
|
let random_string = random_string.as_str();
|
||||||
|
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-c", "+0"])
|
||||||
|
.pipe_in(random_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only(random_string);
|
||||||
|
|
||||||
|
let expected = &random_string.as_bytes()[1..];
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-c", "+2"])
|
||||||
|
.pipe_in(random_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only_bytes(expected);
|
||||||
|
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-c", "-0"])
|
||||||
|
.pipe_in(random_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.no_stdout()
|
||||||
|
.no_stderr();
|
||||||
|
|
||||||
|
let expected = &random_string.as_bytes()[CHUNK_BUFFER_SIZE..];
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-c", "-1"])
|
||||||
|
.pipe_in(random_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only_bytes(expected);
|
||||||
|
|
||||||
|
let expected = &random_string.as_bytes()[1..];
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-c", "-8192"])
|
||||||
|
.pipe_in(random_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only_bytes(expected);
|
||||||
|
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-c", "-8193"])
|
||||||
|
.pipe_in(random_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only(random_string);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_pipe_when_bytes_option_given_input_size_has_multiple_size_of_buffer_size() {
|
||||||
|
let random_string = RandomString::generate(AlphanumericNewline, CHUNK_BUFFER_SIZE * 3);
|
||||||
|
let random_string = random_string.as_str();
|
||||||
|
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-c", "+0"])
|
||||||
|
.pipe_in(random_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only(random_string);
|
||||||
|
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-c", "-0"])
|
||||||
|
.pipe_in(random_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.no_stdout()
|
||||||
|
.no_stderr();
|
||||||
|
|
||||||
|
let expected = &random_string.as_bytes()[8192..];
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-c", "+8193"])
|
||||||
|
.pipe_in(random_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only_bytes(expected);
|
||||||
|
|
||||||
|
let expected = &random_string.as_bytes()[8193..];
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-c", "+8194"])
|
||||||
|
.pipe_in(random_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only_bytes(expected);
|
||||||
|
|
||||||
|
let expected = &random_string.as_bytes()[16384..];
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-c", "+16385"])
|
||||||
|
.pipe_in(random_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only_bytes(expected);
|
||||||
|
|
||||||
|
let expected = &random_string.as_bytes()[16385..];
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-c", "+16386"])
|
||||||
|
.pipe_in(random_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only_bytes(expected);
|
||||||
|
|
||||||
|
let expected = &random_string.as_bytes()[16384..];
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-c", "-8192"])
|
||||||
|
.pipe_in(random_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only_bytes(expected);
|
||||||
|
|
||||||
|
let expected = &random_string.as_bytes()[16383..];
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-c", "-8193"])
|
||||||
|
.pipe_in(random_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only_bytes(expected);
|
||||||
|
|
||||||
|
let expected = &random_string.as_bytes()[8192..];
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-c", "-16384"])
|
||||||
|
.pipe_in(random_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only_bytes(expected);
|
||||||
|
|
||||||
|
let expected = &random_string.as_bytes()[8191..];
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-c", "-16385"])
|
||||||
|
.pipe_in(random_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only_bytes(expected);
|
||||||
|
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-c", "-24576"])
|
||||||
|
.pipe_in(random_string)
|
||||||
|
.ignore_stdin_write_error()
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only(random_string);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_seek_bytes_backward_outside_file() {
|
fn test_seek_bytes_backward_outside_file() {
|
||||||
new_ucmd!()
|
new_ucmd!()
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
#[macro_use]
|
#[macro_use]
|
||||||
pub mod macros;
|
pub mod macros;
|
||||||
|
pub mod random;
|
||||||
pub mod util;
|
pub mod util;
|
||||||
|
|
314
tests/common/random.rs
Normal file
314
tests/common/random.rs
Normal file
|
@ -0,0 +1,314 @@
|
||||||
|
// * This file is part of the uutils coreutils package.
|
||||||
|
// *
|
||||||
|
// * For the full copyright and license information, please view the LICENSE
|
||||||
|
// * file that was distributed with this source code.
|
||||||
|
|
||||||
|
use rand::distributions::{Distribution, Uniform};
|
||||||
|
use rand::{thread_rng, Rng};
|
||||||
|
|
||||||
|
/// Samples alphanumeric characters `[A-Za-z0-9]` including newline `\n`
|
||||||
|
///
|
||||||
|
/// # Examples
|
||||||
|
///
|
||||||
|
/// ```rust,ignore
|
||||||
|
/// use rand::{Rng, thread_rng};
|
||||||
|
///
|
||||||
|
/// let vec = thread_rng()
|
||||||
|
/// .sample_iter(AlphanumericNewline)
|
||||||
|
/// .take(10)
|
||||||
|
/// .collect::<Vec<u8>>();
|
||||||
|
/// println!("Random chars: {}", String::from_utf8(vec).unwrap());
|
||||||
|
/// ```
|
||||||
|
#[derive(Clone, Copy, Debug)]
|
||||||
|
pub struct AlphanumericNewline;
|
||||||
|
|
||||||
|
impl AlphanumericNewline {
|
||||||
|
/// The charset to act upon
|
||||||
|
const CHARSET: &'static [u8] =
|
||||||
|
b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789\n";
|
||||||
|
|
||||||
|
/// Generate a random byte from [`Self::CHARSET`] and return it as `u8`.
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
///
|
||||||
|
/// * `rng`: A [`rand::Rng`]
|
||||||
|
///
|
||||||
|
/// returns: u8
|
||||||
|
fn random<R>(rng: &mut R) -> u8
|
||||||
|
where
|
||||||
|
R: Rng + ?Sized,
|
||||||
|
{
|
||||||
|
let idx = rng.gen_range(0..Self::CHARSET.len());
|
||||||
|
Self::CHARSET[idx]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Distribution<u8> for AlphanumericNewline {
|
||||||
|
fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> u8 {
|
||||||
|
Self::random(rng)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Generate a random string from a [`Distribution`]
|
||||||
|
///
|
||||||
|
/// # Examples
|
||||||
|
///
|
||||||
|
/// ```rust,ignore
|
||||||
|
/// use crate::common::random::{AlphanumericNewline, RandomString};
|
||||||
|
/// use rand::distributions::Alphanumeric;
|
||||||
|
///
|
||||||
|
/// // generates a 100 byte string with characters from AlphanumericNewline
|
||||||
|
/// let random_string = RandomString::generate(&AlphanumericNewline, 100);
|
||||||
|
/// assert_eq!(100, random_string.len());
|
||||||
|
///
|
||||||
|
/// // generates a 100 byte string with 10 newline characters not ending with a newline
|
||||||
|
/// let string = RandomString::generate_with_delimiter(&Alphanumeric, b'\n', 10, false, 100);
|
||||||
|
/// assert_eq!(100, random_string.len());
|
||||||
|
/// ```
|
||||||
|
pub struct RandomString;
|
||||||
|
|
||||||
|
impl RandomString {
|
||||||
|
/// Generate a random string from the given [`Distribution`] with the given `length` in bytes.
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
///
|
||||||
|
/// * `dist`: A u8 [`Distribution`]
|
||||||
|
/// * `length`: the length of the resulting string in bytes
|
||||||
|
///
|
||||||
|
/// returns: String
|
||||||
|
pub fn generate<D>(dist: D, length: usize) -> String
|
||||||
|
where
|
||||||
|
D: Distribution<u8>,
|
||||||
|
{
|
||||||
|
thread_rng()
|
||||||
|
.sample_iter(dist)
|
||||||
|
.take(length)
|
||||||
|
.map(|b| b as char)
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Generate a random string from the [`Distribution`] with the given `length` in bytes. The
|
||||||
|
/// function takes a `delimiter`, which is randomly distributed in the string, such that exactly
|
||||||
|
/// `num_delimiter` amount of `delimiter`s occur. If `end_with_delimiter` is set, then the
|
||||||
|
/// string ends with the delimiter, else the string does not end with the delimiter.
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
///
|
||||||
|
/// * `dist`: A `u8` [`Distribution`]
|
||||||
|
/// * `delimiter`: A `u8` delimiter, which does not need to be included in the `Distribution`
|
||||||
|
/// * `num_delimiter`: The number of `delimiter`s contained in the resulting string
|
||||||
|
/// * `end_with_delimiter`: If the string shall end with the given delimiter
|
||||||
|
/// * `length`: the length of the resulting string in bytes
|
||||||
|
///
|
||||||
|
/// returns: String
|
||||||
|
///
|
||||||
|
/// # Examples
|
||||||
|
///
|
||||||
|
/// ```rust,ignore
|
||||||
|
/// use crate::common::random::{AlphanumericNewline, RandomString};
|
||||||
|
///
|
||||||
|
/// // generates a 100 byte string with 10 '\0' byte characters not ending with a '\0' byte
|
||||||
|
/// let string = RandomString::generate_with_delimiter(&AlphanumericNewline, 0, 10, false, 100);
|
||||||
|
/// assert_eq!(100, random_string.len());
|
||||||
|
/// assert_eq!(
|
||||||
|
/// 10,
|
||||||
|
/// random_string.as_bytes().iter().filter(|p| **p == 0).count()
|
||||||
|
/// );
|
||||||
|
/// assert!(!random_string.as_bytes().ends_with(&[0]));
|
||||||
|
/// ```
|
||||||
|
pub fn generate_with_delimiter<D>(
|
||||||
|
dist: D,
|
||||||
|
delimiter: u8,
|
||||||
|
num_delimiter: usize,
|
||||||
|
end_with_delimiter: bool,
|
||||||
|
length: usize,
|
||||||
|
) -> String
|
||||||
|
where
|
||||||
|
D: Distribution<u8>,
|
||||||
|
{
|
||||||
|
if length == 0 {
|
||||||
|
return String::from("");
|
||||||
|
} else if length == 1 {
|
||||||
|
return if num_delimiter > 0 {
|
||||||
|
String::from(delimiter as char)
|
||||||
|
} else {
|
||||||
|
String::from(thread_rng().sample(&dist) as char)
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
let samples = length - 1;
|
||||||
|
let mut result: Vec<u8> = thread_rng().sample_iter(&dist).take(samples).collect();
|
||||||
|
|
||||||
|
if num_delimiter == 0 {
|
||||||
|
result.push(thread_rng().sample(&dist));
|
||||||
|
return String::from_utf8(result).unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
let num_delimiter = if end_with_delimiter {
|
||||||
|
num_delimiter - 1
|
||||||
|
} else {
|
||||||
|
num_delimiter
|
||||||
|
};
|
||||||
|
|
||||||
|
let between = Uniform::new(0, samples);
|
||||||
|
for _ in 0..num_delimiter {
|
||||||
|
let mut pos = between.sample(&mut thread_rng());
|
||||||
|
let turn = pos;
|
||||||
|
while result[pos] == delimiter {
|
||||||
|
pos += 1;
|
||||||
|
if pos >= samples {
|
||||||
|
pos = 0;
|
||||||
|
}
|
||||||
|
if pos == turn {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
result[pos] = delimiter;
|
||||||
|
}
|
||||||
|
|
||||||
|
if end_with_delimiter {
|
||||||
|
result.push(delimiter);
|
||||||
|
} else {
|
||||||
|
result.push(thread_rng().sample(&dist));
|
||||||
|
}
|
||||||
|
|
||||||
|
String::from_utf8(result).unwrap()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
use rand::distributions::Alphanumeric;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_random_string_generate() {
|
||||||
|
let random_string = RandomString::generate(&AlphanumericNewline, 0);
|
||||||
|
assert_eq!(0, random_string.len());
|
||||||
|
|
||||||
|
let random_string = RandomString::generate(&AlphanumericNewline, 1);
|
||||||
|
assert_eq!(1, random_string.len());
|
||||||
|
|
||||||
|
let random_string = RandomString::generate(&AlphanumericNewline, 100);
|
||||||
|
assert_eq!(100, random_string.len());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_random_string_generate_with_delimiter_when_length_is_zero() {
|
||||||
|
let random_string = RandomString::generate_with_delimiter(&Alphanumeric, 0, 0, false, 0);
|
||||||
|
assert_eq!(0, random_string.len());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_random_string_generate_with_delimiter_when_num_delimiter_is_greater_than_length() {
|
||||||
|
let random_string = RandomString::generate_with_delimiter(&Alphanumeric, 0, 2, false, 1);
|
||||||
|
assert_eq!(1, random_string.len());
|
||||||
|
assert!(random_string.as_bytes().contains(&0));
|
||||||
|
assert!(random_string.as_bytes().ends_with(&[0]));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_random_string_generate_with_delimiter_should_end_with_delimiter() {
|
||||||
|
let random_string = RandomString::generate_with_delimiter(&Alphanumeric, 0, 1, true, 1);
|
||||||
|
assert_eq!(1, random_string.len());
|
||||||
|
assert_eq!(
|
||||||
|
1,
|
||||||
|
random_string.as_bytes().iter().filter(|p| **p == 0).count()
|
||||||
|
);
|
||||||
|
assert!(random_string.as_bytes().ends_with(&[0]));
|
||||||
|
|
||||||
|
let random_string = RandomString::generate_with_delimiter(&Alphanumeric, 0, 1, false, 1);
|
||||||
|
assert_eq!(1, random_string.len());
|
||||||
|
assert_eq!(
|
||||||
|
1,
|
||||||
|
random_string.as_bytes().iter().filter(|p| **p == 0).count()
|
||||||
|
);
|
||||||
|
assert!(random_string.as_bytes().ends_with(&[0]));
|
||||||
|
|
||||||
|
let random_string = RandomString::generate_with_delimiter(&Alphanumeric, 0, 1, true, 2);
|
||||||
|
assert_eq!(2, random_string.len());
|
||||||
|
assert_eq!(
|
||||||
|
1,
|
||||||
|
random_string.as_bytes().iter().filter(|p| **p == 0).count()
|
||||||
|
);
|
||||||
|
assert!(random_string.as_bytes().ends_with(&[0]));
|
||||||
|
|
||||||
|
let random_string = RandomString::generate_with_delimiter(&Alphanumeric, 0, 2, true, 2);
|
||||||
|
assert_eq!(2, random_string.len());
|
||||||
|
assert_eq!(
|
||||||
|
2,
|
||||||
|
random_string.as_bytes().iter().filter(|p| **p == 0).count()
|
||||||
|
);
|
||||||
|
assert!(random_string.as_bytes().ends_with(&[0]));
|
||||||
|
|
||||||
|
let random_string = RandomString::generate_with_delimiter(&Alphanumeric, 0, 1, true, 3);
|
||||||
|
assert_eq!(3, random_string.len());
|
||||||
|
assert_eq!(
|
||||||
|
1,
|
||||||
|
random_string.as_bytes().iter().filter(|p| **p == 0).count()
|
||||||
|
);
|
||||||
|
assert!(random_string.as_bytes().ends_with(&[0]));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_random_string_generate_with_delimiter_should_not_end_with_delimiter() {
|
||||||
|
let random_string = RandomString::generate_with_delimiter(&Alphanumeric, 0, 0, false, 1);
|
||||||
|
assert_eq!(1, random_string.len());
|
||||||
|
assert_eq!(
|
||||||
|
0,
|
||||||
|
random_string.as_bytes().iter().filter(|p| **p == 0).count()
|
||||||
|
);
|
||||||
|
|
||||||
|
let random_string = RandomString::generate_with_delimiter(&Alphanumeric, 0, 0, true, 1);
|
||||||
|
assert_eq!(1, random_string.len());
|
||||||
|
assert_eq!(
|
||||||
|
0,
|
||||||
|
random_string.as_bytes().iter().filter(|p| **p == 0).count()
|
||||||
|
);
|
||||||
|
|
||||||
|
let random_string = RandomString::generate_with_delimiter(&Alphanumeric, 0, 1, false, 2);
|
||||||
|
assert_eq!(2, random_string.len());
|
||||||
|
assert_eq!(
|
||||||
|
1,
|
||||||
|
random_string.as_bytes().iter().filter(|p| **p == 0).count()
|
||||||
|
);
|
||||||
|
assert!(!random_string.as_bytes().ends_with(&[0]));
|
||||||
|
|
||||||
|
let random_string = RandomString::generate_with_delimiter(&Alphanumeric, 0, 1, false, 3);
|
||||||
|
assert_eq!(3, random_string.len());
|
||||||
|
assert_eq!(
|
||||||
|
1,
|
||||||
|
random_string.as_bytes().iter().filter(|p| **p == 0).count()
|
||||||
|
);
|
||||||
|
assert!(!random_string.as_bytes().ends_with(&[0]));
|
||||||
|
|
||||||
|
let random_string = RandomString::generate_with_delimiter(&Alphanumeric, 0, 2, false, 3);
|
||||||
|
assert_eq!(3, random_string.len());
|
||||||
|
assert_eq!(
|
||||||
|
2,
|
||||||
|
random_string.as_bytes().iter().filter(|p| **p == 0).count()
|
||||||
|
);
|
||||||
|
assert!(!random_string.as_bytes().ends_with(&[0]));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_generate_with_delimiter_with_greater_length() {
|
||||||
|
let random_string =
|
||||||
|
RandomString::generate_with_delimiter(&Alphanumeric, 0, 100, false, 1000);
|
||||||
|
assert_eq!(1000, random_string.len());
|
||||||
|
assert_eq!(
|
||||||
|
100,
|
||||||
|
random_string.as_bytes().iter().filter(|p| **p == 0).count()
|
||||||
|
);
|
||||||
|
assert!(!random_string.as_bytes().ends_with(&[0]));
|
||||||
|
|
||||||
|
let random_string =
|
||||||
|
RandomString::generate_with_delimiter(&Alphanumeric, 0, 100, true, 1000);
|
||||||
|
assert_eq!(1000, random_string.len());
|
||||||
|
assert_eq!(
|
||||||
|
100,
|
||||||
|
random_string.as_bytes().iter().filter(|p| **p == 0).count()
|
||||||
|
);
|
||||||
|
assert!(random_string.as_bytes().ends_with(&[0]));
|
||||||
|
}
|
||||||
|
}
|
Loading…
Add table
Add a link
Reference in a new issue