mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-30 20:47:46 +00:00
Merge pull request #2166 from jfinkels/wc-word-countable-lines
wc: add lines() method for iterating over lines
This commit is contained in:
commit
e3b7a8bd22
8 changed files with 241 additions and 46 deletions
72
src/uu/wc/src/countable.rs
Normal file
72
src/uu/wc/src/countable.rs
Normal file
|
@ -0,0 +1,72 @@
|
||||||
|
//! Traits and implementations for iterating over lines in a file-like object.
|
||||||
|
//!
|
||||||
|
//! This module provides a [`WordCountable`] trait and implementations
|
||||||
|
//! for some common file-like objects. Use the [`WordCountable::lines`]
|
||||||
|
//! method to get an iterator over lines of a file-like object.
|
||||||
|
use std::fs::File;
|
||||||
|
use std::io::{self, BufRead, BufReader, Read, StdinLock};
|
||||||
|
|
||||||
|
#[cfg(unix)]
|
||||||
|
use std::os::unix::io::AsRawFd;
|
||||||
|
|
||||||
|
#[cfg(unix)]
|
||||||
|
pub trait WordCountable: AsRawFd + Read {
|
||||||
|
type Buffered: BufRead;
|
||||||
|
fn lines(self) -> Lines<Self::Buffered>;
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(not(unix))]
|
||||||
|
pub trait WordCountable: Read {
|
||||||
|
type Buffered: BufRead;
|
||||||
|
fn lines(self) -> Lines<Self::Buffered>;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl WordCountable for StdinLock<'_> {
|
||||||
|
type Buffered = Self;
|
||||||
|
|
||||||
|
fn lines(self) -> Lines<Self::Buffered>
|
||||||
|
where
|
||||||
|
Self: Sized,
|
||||||
|
{
|
||||||
|
Lines { buf: self }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
impl WordCountable for File {
|
||||||
|
type Buffered = BufReader<Self>;
|
||||||
|
|
||||||
|
fn lines(self) -> Lines<Self::Buffered>
|
||||||
|
where
|
||||||
|
Self: Sized,
|
||||||
|
{
|
||||||
|
Lines {
|
||||||
|
buf: BufReader::new(self),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// An iterator over the lines of an instance of `BufRead`.
|
||||||
|
///
|
||||||
|
/// Similar to [`io::Lines`] but yields each line as a `Vec<u8>` and
|
||||||
|
/// includes the newline character (`\n`, the `0xA` byte) that
|
||||||
|
/// terminates the line.
|
||||||
|
///
|
||||||
|
/// [`io::Lines`]:: io::Lines
|
||||||
|
pub struct Lines<B> {
|
||||||
|
buf: B,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<B: BufRead> Iterator for Lines<B> {
|
||||||
|
type Item = io::Result<Vec<u8>>;
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
|
let mut line = Vec::new();
|
||||||
|
|
||||||
|
// reading from a TTY seems to raise a condition on, rather than return Some(0) like a file.
|
||||||
|
// hence the option wrapped in a result here
|
||||||
|
match self.buf.read_until(b'\n', &mut line) {
|
||||||
|
Ok(0) => None,
|
||||||
|
Ok(_n) => Some(Ok(line)),
|
||||||
|
Err(e) => Some(Err(e)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -11,17 +11,17 @@
|
||||||
extern crate uucore;
|
extern crate uucore;
|
||||||
|
|
||||||
mod count_bytes;
|
mod count_bytes;
|
||||||
|
mod countable;
|
||||||
use count_bytes::count_bytes_fast;
|
use count_bytes::count_bytes_fast;
|
||||||
|
use countable::WordCountable;
|
||||||
|
|
||||||
use clap::{App, Arg, ArgMatches};
|
use clap::{App, Arg, ArgMatches};
|
||||||
use thiserror::Error;
|
use thiserror::Error;
|
||||||
|
|
||||||
use std::cmp::max;
|
use std::cmp::max;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::{self, BufRead, BufReader, Read, StdinLock, Write};
|
use std::io::{self, Write};
|
||||||
use std::ops::{Add, AddAssign};
|
use std::ops::{Add, AddAssign};
|
||||||
#[cfg(unix)]
|
|
||||||
use std::os::unix::io::AsRawFd;
|
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
use std::str::from_utf8;
|
use std::str::from_utf8;
|
||||||
|
|
||||||
|
@ -82,32 +82,6 @@ impl Settings {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(unix)]
|
|
||||||
trait WordCountable: AsRawFd + Read {
|
|
||||||
type Buffered: BufRead;
|
|
||||||
fn get_buffered(self) -> Self::Buffered;
|
|
||||||
}
|
|
||||||
#[cfg(not(unix))]
|
|
||||||
trait WordCountable: Read {
|
|
||||||
type Buffered: BufRead;
|
|
||||||
fn get_buffered(self) -> Self::Buffered;
|
|
||||||
}
|
|
||||||
|
|
||||||
impl WordCountable for StdinLock<'_> {
|
|
||||||
type Buffered = Self;
|
|
||||||
|
|
||||||
fn get_buffered(self) -> Self::Buffered {
|
|
||||||
self
|
|
||||||
}
|
|
||||||
}
|
|
||||||
impl WordCountable for File {
|
|
||||||
type Buffered = BufReader<Self>;
|
|
||||||
|
|
||||||
fn get_buffered(self) -> Self::Buffered {
|
|
||||||
BufReader::new(self)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Default, Copy, Clone)]
|
#[derive(Debug, Default, Copy, Clone)]
|
||||||
struct WordCount {
|
struct WordCount {
|
||||||
bytes: usize,
|
bytes: usize,
|
||||||
|
@ -270,25 +244,16 @@ fn word_count_from_reader<T: WordCountable>(
|
||||||
let mut byte_count: usize = 0;
|
let mut byte_count: usize = 0;
|
||||||
let mut char_count: usize = 0;
|
let mut char_count: usize = 0;
|
||||||
let mut longest_line_length: usize = 0;
|
let mut longest_line_length: usize = 0;
|
||||||
let mut raw_line = Vec::new();
|
|
||||||
let mut ends_lf: bool;
|
let mut ends_lf: bool;
|
||||||
|
|
||||||
// reading from a TTY seems to raise a condition on, rather than return Some(0) like a file.
|
// reading from a TTY seems to raise a condition on, rather than return Some(0) like a file.
|
||||||
// hence the option wrapped in a result here
|
// hence the option wrapped in a result here
|
||||||
let mut buffered_reader = reader.get_buffered();
|
for line_result in reader.lines() {
|
||||||
loop {
|
let raw_line = match line_result {
|
||||||
match buffered_reader.read_until(LF, &mut raw_line) {
|
Ok(l) => l,
|
||||||
Ok(n) => {
|
Err(e) => {
|
||||||
if n == 0 {
|
show_warning!("Error while reading {}: {}", path, e);
|
||||||
break;
|
continue;
|
||||||
}
|
|
||||||
}
|
|
||||||
Err(ref e) => {
|
|
||||||
if !raw_line.is_empty() {
|
|
||||||
show_warning!("Error while reading {}: {}", path, e);
|
|
||||||
} else {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -317,8 +282,6 @@ fn word_count_from_reader<T: WordCountable>(
|
||||||
longest_line_length = current_char_count - (ends_lf as usize);
|
longest_line_length = current_char_count - (ends_lf as usize);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
raw_line.truncate(0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(WordCount {
|
Ok(WordCount {
|
||||||
|
|
|
@ -112,3 +112,60 @@ fn test_multiple_default() {
|
||||||
alice_in_wonderland.txt\n 36 370 2189 total\n",
|
alice_in_wonderland.txt\n 36 370 2189 total\n",
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Test for an empty file.
|
||||||
|
#[test]
|
||||||
|
fn test_file_empty() {
|
||||||
|
// TODO There is a leading space in the output that should be
|
||||||
|
// removed; see issue #2173.
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-clmwL", "emptyfile.txt"])
|
||||||
|
.run()
|
||||||
|
.stdout_is(" 0 0 0 0 0 emptyfile.txt\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Test for an file containing a single non-whitespace character
|
||||||
|
/// *without* a trailing newline.
|
||||||
|
#[test]
|
||||||
|
fn test_file_single_line_no_trailing_newline() {
|
||||||
|
// TODO There is a leading space in the output that should be
|
||||||
|
// removed; see issue #2173.
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-clmwL", "notrailingnewline.txt"])
|
||||||
|
.run()
|
||||||
|
.stdout_is(" 1 1 2 2 1 notrailingnewline.txt\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Test for a file that has 100 empty lines (that is, the contents of
|
||||||
|
/// the file are the newline character repeated one hundred times).
|
||||||
|
#[test]
|
||||||
|
fn test_file_many_empty_lines() {
|
||||||
|
// TODO There is a leading space in the output that should be
|
||||||
|
// removed; see issue #2173.
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-clmwL", "manyemptylines.txt"])
|
||||||
|
.run()
|
||||||
|
.stdout_is(" 100 0 100 100 0 manyemptylines.txt\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Test for a file that has one long line comprising only spaces.
|
||||||
|
#[test]
|
||||||
|
fn test_file_one_long_line_only_spaces() {
|
||||||
|
// TODO There is a leading space in the output that should be
|
||||||
|
// removed; see issue #2173.
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-clmwL", "onelongemptyline.txt"])
|
||||||
|
.run()
|
||||||
|
.stdout_is(" 1 0 10001 10001 10000 onelongemptyline.txt\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Test for a file that has one long line comprising a single "word".
|
||||||
|
#[test]
|
||||||
|
fn test_file_one_long_word() {
|
||||||
|
// TODO There is a leading space in the output that should be
|
||||||
|
// removed; see issue #2173.
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-clmwL", "onelongword.txt"])
|
||||||
|
.run()
|
||||||
|
.stdout_is(" 1 1 10001 10001 10000 onelongword.txt\n");
|
||||||
|
}
|
||||||
|
|
0
tests/fixtures/wc/emptyfile.txt
vendored
Normal file
0
tests/fixtures/wc/emptyfile.txt
vendored
Normal file
100
tests/fixtures/wc/manyemptylines.txt
vendored
Normal file
100
tests/fixtures/wc/manyemptylines.txt
vendored
Normal file
|
@ -0,0 +1,100 @@
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
1
tests/fixtures/wc/notrailingnewline.txt
vendored
Normal file
1
tests/fixtures/wc/notrailingnewline.txt
vendored
Normal file
|
@ -0,0 +1 @@
|
||||||
|
a
|
1
tests/fixtures/wc/onelongemptyline.txt
vendored
Normal file
1
tests/fixtures/wc/onelongemptyline.txt
vendored
Normal file
File diff suppressed because one or more lines are too long
1
tests/fixtures/wc/onelongword.txt
vendored
Normal file
1
tests/fixtures/wc/onelongword.txt
vendored
Normal file
File diff suppressed because one or more lines are too long
Loading…
Add table
Add a link
Reference in a new issue