diff --git a/src/uu/head/src/head.rs b/src/uu/head/src/head.rs index 9d2255a6a..fb0a9e771 100644 --- a/src/uu/head/src/head.rs +++ b/src/uu/head/src/head.rs @@ -17,7 +17,6 @@ use thiserror::Error; use uucore::display::Quotable; use uucore::error::{FromIo, UError, UResult}; use uucore::line_ending::LineEnding; -use uucore::lines::lines; use uucore::{format_usage, help_about, help_usage, show}; const BUF_SIZE: usize = 65536; @@ -37,7 +36,8 @@ mod options { mod parse; mod take; -use take::take_all_but; +use take::copy_all_but_n_bytes; +use take::copy_all_but_n_lines; use take::take_lines; #[derive(Error, Debug)] @@ -274,14 +274,16 @@ fn read_n_lines(input: &mut impl std::io::BufRead, n: u64, separator: u8) -> std let mut reader = take_lines(input, n, separator); // Write those bytes to `stdout`. - let mut stdout = std::io::stdout(); + let stdout = std::io::stdout(); + let stdout = stdout.lock(); + let mut writer = BufWriter::with_capacity(BUF_SIZE, stdout); - let bytes_written = io::copy(&mut reader, &mut stdout).map_err(wrap_in_stdout_error)?; + let bytes_written = io::copy(&mut reader, &mut writer).map_err(wrap_in_stdout_error)?; // Make sure we finish writing everything to the target before // exiting. Otherwise, when Rust is implicitly flushing, any // error will be silently ignored. - stdout.flush().map_err(wrap_in_stdout_error)?; + writer.flush().map_err(wrap_in_stdout_error)?; Ok(bytes_written) } @@ -296,43 +298,37 @@ fn catch_too_large_numbers_in_backwards_bytes_or_lines(n: u64) -> Option } } -fn read_but_last_n_bytes(input: impl std::io::BufRead, n: u64) -> std::io::Result { - let mut bytes_written = 0; - if let Some(n) = catch_too_large_numbers_in_backwards_bytes_or_lines(n) { - let stdout = std::io::stdout(); - let stdout = stdout.lock(); - // Even though stdout is buffered, it will flush on each newline in the - // input stream. This can be costly, so add an extra layer of buffering - // over the top. This gives a significant speedup (approx 4x). - let mut writer = BufWriter::with_capacity(BUF_SIZE, stdout); - for byte in take_all_but(input.bytes(), n) { - writer.write_all(&[byte?]).map_err(wrap_in_stdout_error)?; - bytes_written += 1; - } - // Make sure we finish writing everything to the target before - // exiting. Otherwise, when Rust is implicitly flushing, any - // error will be silently ignored. - writer.flush().map_err(wrap_in_stdout_error)?; - } - Ok(bytes_written) -} - -fn read_but_last_n_lines( - input: impl std::io::BufRead, - n: u64, - separator: u8, -) -> std::io::Result { +fn read_but_last_n_bytes(mut input: impl Read, n: u64) -> std::io::Result { let mut bytes_written: u64 = 0; if let Some(n) = catch_too_large_numbers_in_backwards_bytes_or_lines(n) { let stdout = std::io::stdout(); let mut stdout = stdout.lock(); - for bytes in take_all_but(lines(input, separator), n) { - let bytes = bytes?; - bytes_written += u64::try_from(bytes.len()).unwrap(); + bytes_written = copy_all_but_n_bytes(&mut input, &mut stdout, n) + .map_err(wrap_in_stdout_error)? + .try_into() + .unwrap(); - stdout.write_all(&bytes).map_err(wrap_in_stdout_error)?; - } + // Make sure we finish writing everything to the target before + // exiting. Otherwise, when Rust is implicitly flushing, any + // error will be silently ignored. + stdout.flush().map_err(wrap_in_stdout_error)?; + } + Ok(bytes_written) +} + +fn read_but_last_n_lines(mut input: impl Read, n: u64, separator: u8) -> std::io::Result { + let stdout = std::io::stdout(); + let mut stdout = stdout.lock(); + if n == 0 { + return io::copy(&mut input, &mut stdout).map_err(wrap_in_stdout_error); + } + let mut bytes_written: u64 = 0; + if let Some(n) = catch_too_large_numbers_in_backwards_bytes_or_lines(n) { + bytes_written = copy_all_but_n_lines(input, &mut stdout, n, separator) + .map_err(wrap_in_stdout_error)? + .try_into() + .unwrap(); // Make sure we finish writing everything to the target before // exiting. Otherwise, when Rust is implicitly flushing, any // error will be silently ignored. @@ -434,10 +430,9 @@ fn head_backwards_without_seek_file( input: &mut std::fs::File, options: &HeadOptions, ) -> std::io::Result { - let reader = std::io::BufReader::with_capacity(BUF_SIZE, &*input); match options.mode { - Mode::AllButLastBytes(n) => read_but_last_n_bytes(reader, n), - Mode::AllButLastLines(n) => read_but_last_n_lines(reader, n, options.line_ending.into()), + Mode::AllButLastBytes(n) => read_but_last_n_bytes(input, n), + Mode::AllButLastLines(n) => read_but_last_n_lines(input, n, options.line_ending.into()), _ => unreachable!(), } } @@ -452,18 +447,12 @@ fn head_backwards_on_seekable_file( if n >= size { Ok(0) } else { - read_n_bytes( - &mut std::io::BufReader::with_capacity(BUF_SIZE, input), - size - n, - ) + read_n_bytes(input, size - n) } } Mode::AllButLastLines(n) => { let found = find_nth_line_from_end(input, n, options.line_ending.into())?; - read_n_bytes( - &mut std::io::BufReader::with_capacity(BUF_SIZE, input), - found, - ) + read_n_bytes(input, found) } _ => unreachable!(), } @@ -471,9 +460,7 @@ fn head_backwards_on_seekable_file( fn head_file(input: &mut std::fs::File, options: &HeadOptions) -> std::io::Result { match options.mode { - Mode::FirstBytes(n) => { - read_n_bytes(&mut std::io::BufReader::with_capacity(BUF_SIZE, input), n) - } + Mode::FirstBytes(n) => read_n_bytes(input, n), Mode::FirstLines(n) => read_n_lines( &mut std::io::BufReader::with_capacity(BUF_SIZE, input), n, diff --git a/src/uu/head/src/take.rs b/src/uu/head/src/take.rs index da48afd6a..dd80b8dd1 100644 --- a/src/uu/head/src/take.rs +++ b/src/uu/head/src/take.rs @@ -3,67 +3,308 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. //! Take all but the last elements of an iterator. -use std::io::Read; - use memchr::memchr_iter; +use std::collections::VecDeque; +use std::io::{ErrorKind, Read, Write}; -use uucore::ringbuffer::RingBuffer; +const BUF_SIZE: usize = 65536; -/// Create an iterator over all but the last `n` elements of `iter`. -/// -/// # Examples -/// -/// ```rust,ignore -/// let data = [1, 2, 3, 4, 5]; -/// let n = 2; -/// let mut iter = take_all_but(data.iter(), n); -/// assert_eq!(Some(4), iter.next()); -/// assert_eq!(Some(5), iter.next()); -/// assert_eq!(None, iter.next()); -/// ``` -pub fn take_all_but(iter: I, n: usize) -> TakeAllBut { - TakeAllBut::new(iter, n) +struct TakeAllBuffer { + buffer: Vec, + start_index: usize, } -/// An iterator that only iterates over the last elements of another iterator. -pub struct TakeAllBut { - iter: I, - buf: RingBuffer<::Item>, +impl TakeAllBuffer { + fn new() -> Self { + TakeAllBuffer { + buffer: vec![], + start_index: 0, + } + } + + fn fill_buffer(&mut self, reader: &mut impl Read) -> std::io::Result { + self.buffer.resize(BUF_SIZE, 0); + self.start_index = 0; + loop { + match reader.read(&mut self.buffer[..]) { + Ok(n) => { + self.buffer.truncate(n); + return Ok(n); + } + Err(e) if e.kind() == ErrorKind::Interrupted => continue, + Err(e) => return Err(e), + } + } + } + + fn write_bytes_exact(&mut self, writer: &mut impl Write, bytes: usize) -> std::io::Result<()> { + let buffer_to_write = &self.remaining_buffer()[..bytes]; + writer.write_all(buffer_to_write)?; + self.start_index += bytes; + assert!(self.start_index <= self.buffer.len()); + Ok(()) + } + + fn write_all(&mut self, writer: &mut impl Write) -> std::io::Result { + let remaining_bytes = self.remaining_bytes(); + self.write_bytes_exact(writer, remaining_bytes)?; + Ok(remaining_bytes) + } + + fn write_bytes_limit( + &mut self, + writer: &mut impl Write, + max_bytes: usize, + ) -> std::io::Result { + let bytes_to_write = self.remaining_bytes().min(max_bytes); + self.write_bytes_exact(writer, bytes_to_write)?; + Ok(bytes_to_write) + } + + fn remaining_buffer(&self) -> &[u8] { + &self.buffer[self.start_index..] + } + + fn remaining_bytes(&self) -> usize { + self.remaining_buffer().len() + } + + fn is_empty(&self) -> bool { + assert!(self.start_index <= self.buffer.len()); + self.start_index == self.buffer.len() + } } -impl TakeAllBut { - pub fn new(mut iter: I, n: usize) -> Self { - // Create a new ring buffer and fill it up. - // - // If there are fewer than `n` elements in `iter`, then we - // exhaust the iterator so that whenever `TakeAllBut::next()` is - // called, it will return `None`, as expected. - let mut buf = RingBuffer::new(n); - for _ in 0..n { - let value = match iter.next() { - None => { +/// Function to copy all but `n` bytes from the reader to the writer. +/// +/// If `n` exceeds the number of bytes in the input file then nothing is copied. +/// If no errors are encountered then the function returns the number of bytes +/// copied. +/// +/// Algorithm for this function is as follows... +/// 1 - Chunks of the input file are read into a queue of TakeAllBuffer instances. +/// Chunks are read until at least we have enough data to write out the entire contents of the +/// first TakeAllBuffer in the queue whilst still retaining at least `n` bytes in the queue. +/// If we hit EoF at any point, stop reading. +/// 2 - Asses whether we managed to queue up greater-than `n` bytes. If not, we must be done, in +/// which case break and return. +/// 3 - Write either the full first buffer of data, or just enough bytes to get back down to having +/// the required `n` bytes of data queued. +/// 4 - Go back to (1). +pub fn copy_all_but_n_bytes( + reader: &mut impl Read, + writer: &mut impl Write, + n: usize, +) -> std::io::Result { + let mut buffers: VecDeque = VecDeque::new(); + let mut empty_buffer_pool: Vec = vec![]; + let mut buffered_bytes: usize = 0; + let mut total_bytes_copied = 0; + loop { + loop { + // Try to buffer at least enough to write the entire first buffer. + let front_buffer = buffers.front(); + if let Some(front_buffer) = front_buffer { + if buffered_bytes >= n + front_buffer.remaining_bytes() { break; } - Some(x) => x, - }; - buf.push_back(value); + } + let mut new_buffer = empty_buffer_pool.pop().unwrap_or_else(TakeAllBuffer::new); + let filled_bytes = new_buffer.fill_buffer(reader)?; + if filled_bytes == 0 { + // filled_bytes==0 => Eof + break; + } + buffers.push_back(new_buffer); + buffered_bytes += filled_bytes; } - Self { iter, buf } + + // If we've got <=n bytes buffered here we have nothing left to do. + if buffered_bytes <= n { + break; + } + + let excess_buffered_bytes = buffered_bytes - n; + // Since we have some data buffered, can assume we have >=1 buffer - i.e. safe to unwrap. + let front_buffer = buffers.front_mut().unwrap(); + let bytes_written = front_buffer.write_bytes_limit(writer, excess_buffered_bytes)?; + buffered_bytes -= bytes_written; + total_bytes_copied += bytes_written; + // If the front buffer is empty (which it probably is), push it into the empty-buffer-pool. + if front_buffer.is_empty() { + empty_buffer_pool.push(buffers.pop_front().unwrap()); + } + } + Ok(total_bytes_copied) +} + +struct TakeAllLinesBuffer { + inner: TakeAllBuffer, + terminated_lines: usize, + partial_line: bool, +} + +struct BytesAndLines { + bytes: usize, + terminated_lines: usize, +} + +impl TakeAllLinesBuffer { + fn new() -> Self { + TakeAllLinesBuffer { + inner: TakeAllBuffer::new(), + terminated_lines: 0, + partial_line: false, + } + } + + fn fill_buffer( + &mut self, + reader: &mut impl Read, + separator: u8, + ) -> std::io::Result { + let bytes_read = self.inner.fill_buffer(reader)?; + // Count the number of lines... + self.terminated_lines = memchr_iter(separator, self.inner.remaining_buffer()).count(); + if let Some(last_char) = self.inner.remaining_buffer().last() { + if *last_char != separator { + self.partial_line = true; + } + } + Ok(BytesAndLines { + bytes: bytes_read, + terminated_lines: self.terminated_lines, + }) + } + + fn write_lines( + &mut self, + writer: &mut impl Write, + max_lines: usize, + separator: u8, + ) -> std::io::Result { + assert!(max_lines > 0, "Must request at least 1 line."); + let ret; + if max_lines > self.terminated_lines { + ret = BytesAndLines { + bytes: self.inner.write_all(writer)?, + terminated_lines: self.terminated_lines, + }; + self.terminated_lines = 0; + } else { + let index = memchr_iter(separator, self.inner.remaining_buffer()).nth(max_lines - 1); + assert!( + index.is_some(), + "Somehow we're being asked to write more lines than we have, that's a bug in copy_all_but_lines." + ); + let index = index.unwrap(); + // index is the offset of the separator character, zero indexed. Need to add 1 to get the number + // of bytes to write. + let bytes_to_write = index + 1; + self.inner.write_bytes_exact(writer, bytes_to_write)?; + ret = BytesAndLines { + bytes: bytes_to_write, + terminated_lines: max_lines, + }; + self.terminated_lines -= max_lines; + } + Ok(ret) + } + + fn is_empty(&self) -> bool { + self.inner.is_empty() + } + + fn terminated_lines(&self) -> usize { + self.terminated_lines + } + + fn partial_line(&self) -> bool { + self.partial_line } } -impl Iterator for TakeAllBut -where - I: Iterator, -{ - type Item = ::Item; +/// Function to copy all but `n` lines from the reader to the writer. +/// +/// Lines are inferred from the `separator` value passed in by the client. +/// If `n` exceeds the number of lines in the input file then nothing is copied. +/// The last line in the file is not required to end with a `separator` character. +/// If no errors are encountered then they function returns the number of bytes +/// copied. +/// +/// Algorithm for this function is as follows... +/// 1 - Chunks of the input file are read into a queue of TakeAllLinesBuffer instances. +/// Chunks are read until at least we have enough lines that we can write out the entire +/// contents of the first TakeAllLinesBuffer in the queue whilst still retaining at least +/// `n` lines in the queue. +/// If we hit EoF at any point, stop reading. +/// 2 - Asses whether we managed to queue up greater-than `n` lines. If not, we must be done, in +/// which case break and return. +/// 3 - Write either the full first buffer of data, or just enough lines to get back down to +/// having the required `n` lines of data queued. +/// 4 - Go back to (1). +/// +/// Note that lines will regularly straddle multiple TakeAllLinesBuffer instances. The partial_line +/// flag on TakeAllLinesBuffer tracks this, and we use that to ensure that we write out enough +/// lines in the case that the input file doesn't end with a `separator` character. +pub fn copy_all_but_n_lines( + mut reader: R, + writer: &mut W, + n: usize, + separator: u8, +) -> std::io::Result { + // This function requires `n` > 0. Assert it! + assert!(n > 0); + let mut buffers: VecDeque = VecDeque::new(); + let mut buffered_terminated_lines: usize = 0; + let mut empty_buffers = vec![]; + let mut total_bytes_copied = 0; + loop { + // Try to buffer enough such that we can write out the entire first buffer. + loop { + // First check if we have enough lines buffered that we can write out the entire + // front buffer. If so, break. + let front_buffer = buffers.front(); + if let Some(front_buffer) = front_buffer { + if buffered_terminated_lines > n + front_buffer.terminated_lines() { + break; + } + } + // Else we need to try to buffer more data... + let mut new_buffer = empty_buffers.pop().unwrap_or_else(TakeAllLinesBuffer::new); + let fill_result = new_buffer.fill_buffer(&mut reader, separator)?; + if fill_result.bytes == 0 { + // fill_result.bytes == 0 => EoF. + break; + } + buffered_terminated_lines += fill_result.terminated_lines; + buffers.push_back(new_buffer); + } - fn next(&mut self) -> Option<::Item> { - match self.iter.next() { - Some(value) => self.buf.push_back(value), - None => None, + // If we've not buffered more lines than we need to hold back we must be done. + if buffered_terminated_lines < n + || (buffered_terminated_lines == n && !buffers.back().unwrap().partial_line()) + { + break; + } + + let excess_buffered_terminated_lines = buffered_terminated_lines - n; + // Since we have some data buffered can assume we have at least 1 buffer, so safe to unwrap. + let lines_to_write = if buffers.back().unwrap().partial_line() { + excess_buffered_terminated_lines + 1 + } else { + excess_buffered_terminated_lines + }; + let front_buffer = buffers.front_mut().unwrap(); + let write_result = front_buffer.write_lines(writer, lines_to_write, separator)?; + buffered_terminated_lines -= write_result.terminated_lines; + total_bytes_copied += write_result.bytes; + // If the front buffer is empty (which it probably is), push it into the empty-buffer-pool. + if front_buffer.is_empty() { + empty_buffers.push(buffers.pop_front().unwrap()); } } + Ok(total_bytes_copied) } /// Like `std::io::Take`, but for lines instead of bytes. @@ -118,38 +359,284 @@ pub fn take_lines(reader: R, limit: u64, separator: u8) -> TakeLines { #[cfg(test)] mod tests { - use std::io::BufRead; - use std::io::BufReader; + use std::io::{BufRead, BufReader}; - use crate::take::take_all_but; - use crate::take::take_lines; + use crate::take::{ + copy_all_but_n_bytes, copy_all_but_n_lines, take_lines, TakeAllBuffer, TakeAllLinesBuffer, + }; #[test] - fn test_fewer_elements() { - let mut iter = take_all_but([0, 1, 2].iter(), 2); - assert_eq!(Some(&0), iter.next()); - assert_eq!(None, iter.next()); + fn test_take_all_buffer_exact_bytes() { + let input_buffer = "abc"; + let mut input_reader = std::io::Cursor::new(input_buffer); + let mut take_all_buffer = TakeAllBuffer::new(); + let bytes_read = take_all_buffer.fill_buffer(&mut input_reader).unwrap(); + assert_eq!(bytes_read, input_buffer.len()); + assert_eq!(take_all_buffer.remaining_bytes(), input_buffer.len()); + assert_eq!(take_all_buffer.remaining_buffer(), input_buffer.as_bytes()); + assert!(!take_all_buffer.is_empty()); + let mut output_reader = std::io::Cursor::new(vec![0x10; 0]); + for (index, c) in input_buffer.bytes().enumerate() { + take_all_buffer + .write_bytes_exact(&mut output_reader, 1) + .unwrap(); + let buf_ref = output_reader.get_ref(); + assert_eq!(buf_ref.len(), index + 1); + assert_eq!(buf_ref[index], c); + assert_eq!( + take_all_buffer.remaining_bytes(), + input_buffer.len() - (index + 1) + ); + assert_eq!( + take_all_buffer.remaining_buffer(), + &input_buffer.as_bytes()[index + 1..] + ); + } + + assert!(take_all_buffer.is_empty()); + assert_eq!(take_all_buffer.remaining_bytes(), 0); + assert_eq!(take_all_buffer.remaining_buffer(), "".as_bytes()); } #[test] - fn test_same_number_of_elements() { - let mut iter = take_all_but([0, 1].iter(), 2); - assert_eq!(None, iter.next()); + fn test_take_all_buffer_all_bytes() { + let input_buffer = "abc"; + let mut input_reader = std::io::Cursor::new(input_buffer); + let mut take_all_buffer = TakeAllBuffer::new(); + let bytes_read = take_all_buffer.fill_buffer(&mut input_reader).unwrap(); + assert_eq!(bytes_read, input_buffer.len()); + assert_eq!(take_all_buffer.remaining_bytes(), input_buffer.len()); + let mut output_reader = std::io::Cursor::new(vec![0x10; 0]); + let bytes_written = take_all_buffer.write_all(&mut output_reader).unwrap(); + assert_eq!(bytes_written, input_buffer.len()); + assert_eq!(output_reader.get_ref().as_slice(), input_buffer.as_bytes()); + + assert!(take_all_buffer.is_empty()); + assert_eq!(take_all_buffer.remaining_bytes(), 0); + assert_eq!(take_all_buffer.remaining_buffer(), "".as_bytes()); + + // Now do a write_all on an empty TakeAllBuffer. Confirm correct behavior. + let mut output_reader = std::io::Cursor::new(vec![0x10; 0]); + let bytes_written = take_all_buffer.write_all(&mut output_reader).unwrap(); + assert_eq!(bytes_written, 0); + assert_eq!(output_reader.get_ref().as_slice().len(), 0); } #[test] - fn test_more_elements() { - let mut iter = take_all_but([0].iter(), 2); - assert_eq!(None, iter.next()); + fn test_take_all_buffer_limit_bytes() { + let input_buffer = "abc"; + let mut input_reader = std::io::Cursor::new(input_buffer); + let mut take_all_buffer = TakeAllBuffer::new(); + let bytes_read = take_all_buffer.fill_buffer(&mut input_reader).unwrap(); + assert_eq!(bytes_read, input_buffer.len()); + assert_eq!(take_all_buffer.remaining_bytes(), input_buffer.len()); + let mut output_reader = std::io::Cursor::new(vec![0x10; 0]); + // Write all but 1 bytes. + let bytes_to_write = input_buffer.len() - 1; + let bytes_written = take_all_buffer + .write_bytes_limit(&mut output_reader, bytes_to_write) + .unwrap(); + assert_eq!(bytes_written, bytes_to_write); + assert_eq!( + output_reader.get_ref().as_slice(), + &input_buffer.as_bytes()[..bytes_to_write] + ); + assert!(!take_all_buffer.is_empty()); + assert_eq!(take_all_buffer.remaining_bytes(), 1); + assert_eq!( + take_all_buffer.remaining_buffer(), + &input_buffer.as_bytes()[bytes_to_write..] + ); + + // Write 1 more byte - i.e. last byte in buffer. + let bytes_to_write = 1; + let bytes_written = take_all_buffer + .write_bytes_limit(&mut output_reader, bytes_to_write) + .unwrap(); + assert_eq!(bytes_written, bytes_to_write); + assert_eq!(output_reader.get_ref().as_slice(), input_buffer.as_bytes()); + assert!(take_all_buffer.is_empty()); + assert_eq!(take_all_buffer.remaining_bytes(), 0); + assert_eq!(take_all_buffer.remaining_buffer(), "".as_bytes()); + + // Write 1 more byte - i.e. confirm behavior on already empty buffer. + let mut output_reader = std::io::Cursor::new(vec![0x10; 0]); + let bytes_to_write = 1; + let bytes_written = take_all_buffer + .write_bytes_limit(&mut output_reader, bytes_to_write) + .unwrap(); + assert_eq!(bytes_written, 0); + assert_eq!(output_reader.get_ref().as_slice().len(), 0); + assert!(take_all_buffer.is_empty()); + assert_eq!(take_all_buffer.remaining_bytes(), 0); + assert_eq!(take_all_buffer.remaining_buffer(), "".as_bytes()); } #[test] - fn test_zero_elements() { - let mut iter = take_all_but([0, 1, 2].iter(), 0); - assert_eq!(Some(&0), iter.next()); - assert_eq!(Some(&1), iter.next()); - assert_eq!(Some(&2), iter.next()); - assert_eq!(None, iter.next()); + fn test_take_all_lines_buffer() { + // 3 lines with new-lines and one partial line. + let input_buffer = "a\nb\nc\ndef"; + let separator = b'\n'; + let mut input_reader = std::io::Cursor::new(input_buffer); + let mut take_all_lines_buffer = TakeAllLinesBuffer::new(); + let fill_result = take_all_lines_buffer + .fill_buffer(&mut input_reader, separator) + .unwrap(); + assert_eq!(fill_result.bytes, input_buffer.len()); + assert_eq!(fill_result.terminated_lines, 3); + assert_eq!(take_all_lines_buffer.terminated_lines(), 3); + assert!(!take_all_lines_buffer.is_empty()); + assert!(take_all_lines_buffer.partial_line()); + + // Write 1st line. + let mut output_reader = std::io::Cursor::new(vec![0x10; 0]); + let lines_to_write = 1; + let write_result = take_all_lines_buffer + .write_lines(&mut output_reader, lines_to_write, separator) + .unwrap(); + assert_eq!(write_result.bytes, 2); + assert_eq!(write_result.terminated_lines, lines_to_write); + assert_eq!(output_reader.get_ref().as_slice(), "a\n".as_bytes()); + assert!(!take_all_lines_buffer.is_empty()); + assert_eq!(take_all_lines_buffer.terminated_lines(), 2); + + // Write 2nd line. + let mut output_reader = std::io::Cursor::new(vec![0x10; 0]); + let lines_to_write = 1; + let write_result = take_all_lines_buffer + .write_lines(&mut output_reader, lines_to_write, separator) + .unwrap(); + assert_eq!(write_result.bytes, 2); + assert_eq!(write_result.terminated_lines, lines_to_write); + assert_eq!(output_reader.get_ref().as_slice(), "b\n".as_bytes()); + assert!(!take_all_lines_buffer.is_empty()); + assert_eq!(take_all_lines_buffer.terminated_lines(), 1); + + // Now try to write 3 lines even though we have only 1 line remaining. Should write everything left in the buffer. + let mut output_reader = std::io::Cursor::new(vec![0x10; 0]); + let lines_to_write = 3; + let write_result = take_all_lines_buffer + .write_lines(&mut output_reader, lines_to_write, separator) + .unwrap(); + assert_eq!(write_result.bytes, 5); + assert_eq!(write_result.terminated_lines, 1); + assert_eq!(output_reader.get_ref().as_slice(), "c\ndef".as_bytes()); + assert!(take_all_lines_buffer.is_empty()); + assert_eq!(take_all_lines_buffer.terminated_lines(), 0); + + // Test empty buffer. + let input_buffer = ""; + let mut input_reader = std::io::Cursor::new(input_buffer); + let mut take_all_lines_buffer = TakeAllLinesBuffer::new(); + let fill_result = take_all_lines_buffer + .fill_buffer(&mut input_reader, separator) + .unwrap(); + assert_eq!(fill_result.bytes, 0); + assert_eq!(fill_result.terminated_lines, 0); + assert_eq!(take_all_lines_buffer.terminated_lines(), 0); + assert!(take_all_lines_buffer.is_empty()); + assert!(!take_all_lines_buffer.partial_line()); + + // Test buffer that ends with newline. + let input_buffer = "\n"; + let mut input_reader = std::io::Cursor::new(input_buffer); + let mut take_all_lines_buffer = TakeAllLinesBuffer::new(); + let fill_result = take_all_lines_buffer + .fill_buffer(&mut input_reader, separator) + .unwrap(); + assert_eq!(fill_result.bytes, 1); + assert_eq!(fill_result.terminated_lines, 1); + assert_eq!(take_all_lines_buffer.terminated_lines(), 1); + assert!(!take_all_lines_buffer.is_empty()); + assert!(!take_all_lines_buffer.partial_line()); + } + + #[test] + fn test_copy_all_but_n_bytes() { + // Test the copy_all_but_bytes fn. Test several scenarios... + // 1 - Hold back more bytes than the input will provide. Should have nothing written to output. + let input_buffer = "a\nb\nc\ndef"; + let mut input_reader = std::io::Cursor::new(input_buffer); + let mut output_reader = std::io::Cursor::new(vec![0x10; 0]); + let bytes_copied = copy_all_but_n_bytes( + &mut input_reader, + &mut output_reader, + input_buffer.len() + 1, + ) + .unwrap(); + assert_eq!(bytes_copied, 0); + + // 2 - Hold back exactly the number of bytes the input will provide. Should have nothing written to output. + let mut input_reader = std::io::Cursor::new(input_buffer); + let mut output_reader = std::io::Cursor::new(vec![0x10; 0]); + let bytes_copied = + copy_all_but_n_bytes(&mut input_reader, &mut output_reader, input_buffer.len()) + .unwrap(); + assert_eq!(bytes_copied, 0); + + // 3 - Hold back 1 fewer byte than input will provide. Should have one byte written to output. + let mut input_reader = std::io::Cursor::new(input_buffer); + let mut output_reader = std::io::Cursor::new(vec![0x10; 0]); + let bytes_copied = copy_all_but_n_bytes( + &mut input_reader, + &mut output_reader, + input_buffer.len() - 1, + ) + .unwrap(); + assert_eq!(bytes_copied, 1); + assert_eq!(output_reader.get_ref()[..], input_buffer.as_bytes()[0..1]); + } + + #[test] + fn test_copy_all_but_n_lines() { + // Test the copy_all_but_lines fn. Test several scenarios... + // 1 - Hold back more lines than the input will provide. Should have nothing written to output. + let input_buffer = "a\nb\nc\ndef"; + let separator = b'\n'; + let mut input_reader = std::io::Cursor::new(input_buffer); + let mut output_reader = std::io::Cursor::new(vec![0x10; 0]); + let bytes_copied = + copy_all_but_n_lines(&mut input_reader, &mut output_reader, 5, separator).unwrap(); + assert_eq!(bytes_copied, 0); + + // 2 - Hold back exactly the number of lines the input will provide. Should have nothing written to output. + let mut input_reader = std::io::Cursor::new(input_buffer); + let mut output_reader = std::io::Cursor::new(vec![0x10; 0]); + let bytes_copied = + copy_all_but_n_lines(&mut input_reader, &mut output_reader, 4, separator).unwrap(); + assert_eq!(bytes_copied, 0); + + // 3 - Hold back 1 fewer lines than input will provide. Should have one line written to output. + let mut input_reader = std::io::Cursor::new(input_buffer); + let mut output_reader = std::io::Cursor::new(vec![0x10; 0]); + let bytes_copied = + copy_all_but_n_lines(&mut input_reader, &mut output_reader, 3, separator).unwrap(); + assert_eq!(bytes_copied, 2); + assert_eq!(output_reader.get_ref()[..], input_buffer.as_bytes()[0..2]); + + // Now test again with an input that has a new-line ending... + // 4 - Hold back more lines than the input will provide. Should have nothing written to output. + let input_buffer = "a\nb\nc\ndef\n"; + let mut input_reader = std::io::Cursor::new(input_buffer); + let mut output_reader = std::io::Cursor::new(vec![0x10; 0]); + let bytes_copied = + copy_all_but_n_lines(&mut input_reader, &mut output_reader, 5, separator).unwrap(); + assert_eq!(bytes_copied, 0); + + // 5 - Hold back exactly the number of lines the input will provide. Should have nothing written to output. + let mut input_reader = std::io::Cursor::new(input_buffer); + let mut output_reader = std::io::Cursor::new(vec![0x10; 0]); + let bytes_copied = + copy_all_but_n_lines(&mut input_reader, &mut output_reader, 4, separator).unwrap(); + assert_eq!(bytes_copied, 0); + + // 6 - Hold back 1 fewer lines than input will provide. Should have one line written to output. + let mut input_reader = std::io::Cursor::new(input_buffer); + let mut output_reader = std::io::Cursor::new(vec![0x10; 0]); + let bytes_copied = + copy_all_but_n_lines(&mut input_reader, &mut output_reader, 3, separator).unwrap(); + assert_eq!(bytes_copied, 2); + assert_eq!(output_reader.get_ref()[..], input_buffer.as_bytes()[0..2]); } #[test]