mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-28 11:37:44 +00:00
Merge pull request #4696 from jeddenlea/wc
wc: streaming --files0-from and other improvements
This commit is contained in:
commit
388fa1b717
9 changed files with 675 additions and 331 deletions
1
Cargo.lock
generated
1
Cargo.lock
generated
|
@ -3350,6 +3350,7 @@ dependencies = [
|
||||||
"clap",
|
"clap",
|
||||||
"libc",
|
"libc",
|
||||||
"nix",
|
"nix",
|
||||||
|
"thiserror",
|
||||||
"unicode-width",
|
"unicode-width",
|
||||||
"uucore",
|
"uucore",
|
||||||
]
|
]
|
||||||
|
|
|
@ -18,6 +18,7 @@ path = "src/wc.rs"
|
||||||
clap = { workspace=true }
|
clap = { workspace=true }
|
||||||
uucore = { workspace=true, features=["pipes"] }
|
uucore = { workspace=true, features=["pipes"] }
|
||||||
bytecount = { workspace=true }
|
bytecount = { workspace=true }
|
||||||
|
thiserror = { workspace=true }
|
||||||
unicode-width = { workspace=true }
|
unicode-width = { workspace=true }
|
||||||
|
|
||||||
[target.'cfg(unix)'.dependencies]
|
[target.'cfg(unix)'.dependencies]
|
||||||
|
|
|
@ -28,6 +28,7 @@ impl WordCountable for StdinLock<'_> {
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl WordCountable for File {
|
impl WordCountable for File {
|
||||||
type Buffered = BufReader<Self>;
|
type Buffered = BufReader<Self>;
|
||||||
|
|
||||||
|
|
|
@ -5,59 +5,79 @@
|
||||||
// * For the full copyright and license information, please view the LICENSE
|
// * For the full copyright and license information, please view the LICENSE
|
||||||
// * file that was distributed with this source code.
|
// * file that was distributed with this source code.
|
||||||
|
|
||||||
// cSpell:ignore wc wc's
|
// cSpell:ignore ilog wc wc's
|
||||||
|
|
||||||
mod count_fast;
|
mod count_fast;
|
||||||
mod countable;
|
mod countable;
|
||||||
mod utf8;
|
mod utf8;
|
||||||
mod word_count;
|
mod word_count;
|
||||||
use clap::builder::ValueParser;
|
|
||||||
use count_fast::{count_bytes_chars_and_lines_fast, count_bytes_fast};
|
use std::{
|
||||||
use countable::WordCountable;
|
borrow::{Borrow, Cow},
|
||||||
|
cmp::max,
|
||||||
|
ffi::OsString,
|
||||||
|
fs::{self, File},
|
||||||
|
io::{self, Write},
|
||||||
|
iter,
|
||||||
|
path::{Path, PathBuf},
|
||||||
|
};
|
||||||
|
|
||||||
|
use clap::{builder::ValueParser, crate_version, Arg, ArgAction, ArgMatches, Command};
|
||||||
|
use thiserror::Error;
|
||||||
use unicode_width::UnicodeWidthChar;
|
use unicode_width::UnicodeWidthChar;
|
||||||
use utf8::{BufReadDecoder, BufReadDecoderError};
|
use utf8::{BufReadDecoder, BufReadDecoderError};
|
||||||
use uucore::{format_usage, help_about, help_usage, show};
|
|
||||||
use word_count::{TitledWordCount, WordCount};
|
|
||||||
|
|
||||||
use clap::{crate_version, Arg, ArgAction, ArgMatches, Command};
|
use uucore::{
|
||||||
|
error::{FromIo, UError, UResult},
|
||||||
|
format_usage, help_about, help_usage,
|
||||||
|
quoting_style::{escape_name, QuotingStyle},
|
||||||
|
show,
|
||||||
|
};
|
||||||
|
|
||||||
use std::cmp::max;
|
use crate::{
|
||||||
use std::error::Error;
|
count_fast::{count_bytes_chars_and_lines_fast, count_bytes_fast},
|
||||||
use std::ffi::{OsStr, OsString};
|
countable::WordCountable,
|
||||||
use std::fmt::Display;
|
word_count::WordCount,
|
||||||
use std::fs::{self, File};
|
};
|
||||||
use std::io::{self, Read, Write};
|
|
||||||
use std::path::PathBuf;
|
|
||||||
|
|
||||||
use uucore::error::{UError, UResult, USimpleError};
|
|
||||||
use uucore::quoting_style::{escape_name, QuotingStyle};
|
|
||||||
|
|
||||||
/// The minimum character width for formatting counts when reading from stdin.
|
/// The minimum character width for formatting counts when reading from stdin.
|
||||||
const MINIMUM_WIDTH: usize = 7;
|
const MINIMUM_WIDTH: usize = 7;
|
||||||
|
|
||||||
struct Settings {
|
struct Settings<'a> {
|
||||||
show_bytes: bool,
|
show_bytes: bool,
|
||||||
show_chars: bool,
|
show_chars: bool,
|
||||||
show_lines: bool,
|
show_lines: bool,
|
||||||
show_words: bool,
|
show_words: bool,
|
||||||
show_max_line_length: bool,
|
show_max_line_length: bool,
|
||||||
files0_from_stdin_mode: bool,
|
files0_from: Option<Input<'a>>,
|
||||||
title_quoting_style: QuotingStyle,
|
|
||||||
total_when: TotalWhen,
|
total_when: TotalWhen,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Settings {
|
impl Default for Settings<'_> {
|
||||||
fn new(matches: &ArgMatches) -> Self {
|
fn default() -> Self {
|
||||||
let title_quoting_style = QuotingStyle::Shell {
|
// Defaults if none of -c, -m, -l, -w, nor -L are specified.
|
||||||
escape: true,
|
Self {
|
||||||
always_quote: false,
|
show_bytes: true,
|
||||||
show_control: false,
|
show_chars: false,
|
||||||
};
|
show_lines: true,
|
||||||
|
show_words: true,
|
||||||
|
show_max_line_length: false,
|
||||||
|
files0_from: None,
|
||||||
|
total_when: TotalWhen::default(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
let files0_from_stdin_mode = match matches.get_one::<String>(options::FILES0_FROM) {
|
impl<'a> Settings<'a> {
|
||||||
Some(files_0_from) => files_0_from == STDIN_REPR,
|
fn new(matches: &'a ArgMatches) -> Self {
|
||||||
None => false,
|
let files0_from = matches
|
||||||
};
|
.get_one::<OsString>(options::FILES0_FROM)
|
||||||
|
.map(Into::into);
|
||||||
|
|
||||||
|
let total_when = matches
|
||||||
|
.get_one::<String>(options::TOTAL)
|
||||||
|
.map(Into::into)
|
||||||
|
.unwrap_or_default();
|
||||||
|
|
||||||
let settings = Self {
|
let settings = Self {
|
||||||
show_bytes: matches.get_flag(options::BYTES),
|
show_bytes: matches.get_flag(options::BYTES),
|
||||||
|
@ -65,47 +85,39 @@ impl Settings {
|
||||||
show_lines: matches.get_flag(options::LINES),
|
show_lines: matches.get_flag(options::LINES),
|
||||||
show_words: matches.get_flag(options::WORDS),
|
show_words: matches.get_flag(options::WORDS),
|
||||||
show_max_line_length: matches.get_flag(options::MAX_LINE_LENGTH),
|
show_max_line_length: matches.get_flag(options::MAX_LINE_LENGTH),
|
||||||
files0_from_stdin_mode,
|
files0_from,
|
||||||
title_quoting_style,
|
total_when,
|
||||||
total_when: matches.get_one::<String>(options::TOTAL).unwrap().into(),
|
|
||||||
};
|
};
|
||||||
|
|
||||||
if settings.show_bytes
|
if settings.number_enabled() > 0 {
|
||||||
|| settings.show_chars
|
settings
|
||||||
|| settings.show_lines
|
} else {
|
||||||
|| settings.show_words
|
Self {
|
||||||
|| settings.show_max_line_length
|
files0_from: settings.files0_from,
|
||||||
{
|
total_when,
|
||||||
return settings;
|
..Default::default()
|
||||||
}
|
}
|
||||||
|
|
||||||
Self {
|
|
||||||
show_bytes: true,
|
|
||||||
show_chars: false,
|
|
||||||
show_lines: true,
|
|
||||||
show_words: true,
|
|
||||||
show_max_line_length: false,
|
|
||||||
files0_from_stdin_mode,
|
|
||||||
title_quoting_style: settings.title_quoting_style,
|
|
||||||
total_when: settings.total_when,
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn number_enabled(&self) -> u32 {
|
fn number_enabled(&self) -> u32 {
|
||||||
let mut result = 0;
|
[
|
||||||
result += self.show_bytes as u32;
|
self.show_bytes,
|
||||||
result += self.show_chars as u32;
|
self.show_chars,
|
||||||
result += self.show_lines as u32;
|
self.show_lines,
|
||||||
result += self.show_max_line_length as u32;
|
self.show_max_line_length,
|
||||||
result += self.show_words as u32;
|
self.show_words,
|
||||||
result
|
]
|
||||||
|
.into_iter()
|
||||||
|
.map(Into::<u32>::into)
|
||||||
|
.sum()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const ABOUT: &str = help_about!("wc.md");
|
const ABOUT: &str = help_about!("wc.md");
|
||||||
const USAGE: &str = help_usage!("wc.md");
|
const USAGE: &str = help_usage!("wc.md");
|
||||||
|
|
||||||
pub mod options {
|
mod options {
|
||||||
pub static BYTES: &str = "bytes";
|
pub static BYTES: &str = "bytes";
|
||||||
pub static CHAR: &str = "chars";
|
pub static CHAR: &str = "chars";
|
||||||
pub static FILES0_FROM: &str = "files0-from";
|
pub static FILES0_FROM: &str = "files0-from";
|
||||||
|
@ -114,68 +126,202 @@ pub mod options {
|
||||||
pub static TOTAL: &str = "total";
|
pub static TOTAL: &str = "total";
|
||||||
pub static WORDS: &str = "words";
|
pub static WORDS: &str = "words";
|
||||||
}
|
}
|
||||||
|
|
||||||
static ARG_FILES: &str = "files";
|
static ARG_FILES: &str = "files";
|
||||||
static STDIN_REPR: &str = "-";
|
static STDIN_REPR: &str = "-";
|
||||||
|
|
||||||
enum StdinKind {
|
static QS_ESCAPE: &QuotingStyle = &QuotingStyle::Shell {
|
||||||
/// Stdin specified on command-line with "-".
|
escape: true,
|
||||||
Explicit,
|
always_quote: false,
|
||||||
|
show_control: false,
|
||||||
|
};
|
||||||
|
static QS_QUOTE_ESCAPE: &QuotingStyle = &QuotingStyle::Shell {
|
||||||
|
escape: true,
|
||||||
|
always_quote: true,
|
||||||
|
show_control: false,
|
||||||
|
};
|
||||||
|
|
||||||
/// Stdin implicitly specified on command-line by not passing any positional argument.
|
/// Supported inputs.
|
||||||
|
#[derive(Debug)]
|
||||||
|
enum Inputs<'a> {
|
||||||
|
/// Default Standard input, i.e. no arguments.
|
||||||
|
Stdin,
|
||||||
|
/// Files; "-" means stdin, possibly multiple times!
|
||||||
|
Paths(Vec<Input<'a>>),
|
||||||
|
/// --files0-from; "-" means stdin.
|
||||||
|
Files0From(Input<'a>),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> Inputs<'a> {
|
||||||
|
fn new(matches: &'a ArgMatches) -> UResult<Self> {
|
||||||
|
let arg_files = matches.get_many::<OsString>(ARG_FILES);
|
||||||
|
let files0_from = matches.get_one::<OsString>(options::FILES0_FROM);
|
||||||
|
|
||||||
|
match (arg_files, files0_from) {
|
||||||
|
(None, None) => Ok(Self::Stdin),
|
||||||
|
(Some(files), None) => Ok(Self::Paths(files.map(Into::into).collect())),
|
||||||
|
(None, Some(path)) => {
|
||||||
|
// If path is a file, and the file isn't too large, we'll load it ahead
|
||||||
|
// of time. Every path within the file will have its length checked to
|
||||||
|
// hopefully better align the output columns.
|
||||||
|
let input = Input::from(path);
|
||||||
|
match input.try_as_files0()? {
|
||||||
|
Some(paths) => Ok(Self::Paths(paths)),
|
||||||
|
None => Ok(Self::Files0From(input)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
(Some(_), Some(_)) => Err(WcError::FilesDisabled.into()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Creates an iterator which yields values borrowed from the command line arguments.
|
||||||
|
// Returns an error if the file specified in --files0-from cannot be opened.
|
||||||
|
fn try_iter(
|
||||||
|
&'a self,
|
||||||
|
settings: &'a Settings<'a>,
|
||||||
|
) -> UResult<impl Iterator<Item = InputIterItem<'a>>> {
|
||||||
|
let base: Box<dyn Iterator<Item = _>> = match self {
|
||||||
|
Self::Stdin => Box::new(iter::once(Ok(Input::Stdin(StdinKind::Implicit)))),
|
||||||
|
Self::Paths(inputs) => Box::new(inputs.iter().map(|i| Ok(i.as_borrowed()))),
|
||||||
|
Self::Files0From(input) => match input {
|
||||||
|
Input::Path(path) => Box::new(files0_iter_file(path)?),
|
||||||
|
Input::Stdin(_) => Box::new(files0_iter_stdin()),
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
// The 1-based index of each yielded item must be tracked for error reporting.
|
||||||
|
let mut with_idx = base.enumerate().map(|(i, v)| (i + 1, v));
|
||||||
|
let files0_from_path = settings.files0_from.as_ref().map(|p| p.as_borrowed());
|
||||||
|
|
||||||
|
let iter = iter::from_fn(move || {
|
||||||
|
let (idx, next) = with_idx.next()?;
|
||||||
|
match next {
|
||||||
|
// filter zero length file names...
|
||||||
|
Ok(Input::Path(p)) if p.as_os_str().is_empty() => Some(Err({
|
||||||
|
let maybe_ctx = files0_from_path.as_ref().map(|p| (p, idx));
|
||||||
|
WcError::zero_len(maybe_ctx).into()
|
||||||
|
})),
|
||||||
|
_ => Some(next),
|
||||||
|
}
|
||||||
|
});
|
||||||
|
Ok(iter)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Copy, Debug)]
|
||||||
|
enum StdinKind {
|
||||||
|
/// Specified on command-line with "-" (STDIN_REPR)
|
||||||
|
Explicit,
|
||||||
|
/// Implied by the lack of any arguments
|
||||||
Implicit,
|
Implicit,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Supported inputs.
|
/// Represents a single input, either to be counted or processed for other files names via
|
||||||
enum Input {
|
/// --files0-from.
|
||||||
/// A regular file.
|
#[derive(Debug)]
|
||||||
Path(PathBuf),
|
enum Input<'a> {
|
||||||
|
Path(Cow<'a, Path>),
|
||||||
/// Standard input.
|
|
||||||
Stdin(StdinKind),
|
Stdin(StdinKind),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<&OsStr> for Input {
|
impl From<PathBuf> for Input<'_> {
|
||||||
fn from(input: &OsStr) -> Self {
|
fn from(p: PathBuf) -> Self {
|
||||||
if input == STDIN_REPR {
|
if p.as_os_str() == STDIN_REPR {
|
||||||
Self::Stdin(StdinKind::Explicit)
|
Self::Stdin(StdinKind::Explicit)
|
||||||
} else {
|
} else {
|
||||||
Self::Path(input.into())
|
Self::Path(Cow::Owned(p))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Input {
|
impl<'a, T: AsRef<Path> + ?Sized> From<&'a T> for Input<'a> {
|
||||||
/// Converts input to title that appears in stats.
|
fn from(p: &'a T) -> Self {
|
||||||
fn to_title(&self, quoting_style: &QuotingStyle) -> Option<String> {
|
let p = p.as_ref();
|
||||||
|
if p.as_os_str() == STDIN_REPR {
|
||||||
|
Self::Stdin(StdinKind::Explicit)
|
||||||
|
} else {
|
||||||
|
Self::Path(Cow::Borrowed(p))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> Input<'a> {
|
||||||
|
/// Translates Path(Cow::Owned(_)) to Path(Cow::Borrowed(_)).
|
||||||
|
fn as_borrowed(&'a self) -> Self {
|
||||||
match self {
|
match self {
|
||||||
Self::Path(path) => Some(escape_name(&path.clone().into_os_string(), quoting_style)),
|
Self::Path(p) => Self::Path(Cow::Borrowed(p.borrow())),
|
||||||
Self::Stdin(StdinKind::Explicit) => {
|
Self::Stdin(k) => Self::Stdin(*k),
|
||||||
Some(escape_name(OsStr::new(STDIN_REPR), quoting_style))
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Converts input to title that appears in stats.
|
||||||
|
fn to_title(&self) -> Option<Cow<str>> {
|
||||||
|
match self {
|
||||||
|
Self::Path(path) => Some(match path.to_str() {
|
||||||
|
Some(s) if !s.contains('\n') => Cow::Borrowed(s),
|
||||||
|
_ => Cow::Owned(escape_name(path.as_os_str(), QS_ESCAPE)),
|
||||||
|
}),
|
||||||
|
Self::Stdin(StdinKind::Explicit) => Some(Cow::Borrowed(STDIN_REPR)),
|
||||||
Self::Stdin(StdinKind::Implicit) => None,
|
Self::Stdin(StdinKind::Implicit) => None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn path_display(&self, quoting_style: &QuotingStyle) -> String {
|
/// Converts input into the form that appears in errors.
|
||||||
|
fn path_display(&self) -> String {
|
||||||
match self {
|
match self {
|
||||||
Self::Path(path) => escape_name(&path.clone().into_os_string(), quoting_style),
|
Self::Path(path) => escape_name(path.as_os_str(), QS_ESCAPE),
|
||||||
Self::Stdin(_) => escape_name(OsStr::new("standard input"), quoting_style),
|
Self::Stdin(_) => String::from("standard input"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// When given --files0-from, we may be given a path or stdin. Either may be a stream or
|
||||||
|
/// a regular file. If given a file less than 10 MiB, it will be consumed and turned into
|
||||||
|
/// a Vec of Input::Paths which can be scanned to determine the widths of the columns that
|
||||||
|
/// will ultimately be printed.
|
||||||
|
fn try_as_files0(&self) -> UResult<Option<Vec<Input<'static>>>> {
|
||||||
|
match self {
|
||||||
|
Self::Path(path) => match fs::metadata(path) {
|
||||||
|
Ok(meta) if meta.is_file() && meta.len() <= (10 << 20) => Ok(Some(
|
||||||
|
files0_iter_file(path)?.collect::<Result<Vec<_>, _>>()?,
|
||||||
|
)),
|
||||||
|
_ => Ok(None),
|
||||||
|
},
|
||||||
|
Self::Stdin(_) if is_stdin_small_file() => {
|
||||||
|
Ok(Some(files0_iter_stdin().collect::<Result<Vec<_>, _>>()?))
|
||||||
|
}
|
||||||
|
Self::Stdin(_) => Ok(None),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(unix)]
|
||||||
|
fn is_stdin_small_file() -> bool {
|
||||||
|
use std::os::unix::io::{AsRawFd, FromRawFd};
|
||||||
|
// Safety: we'll rely on Rust to give us a valid RawFd for stdin with which we can attempt to
|
||||||
|
// open a File, but only for the sake of fetching .metadata(). ManuallyDrop will ensure we
|
||||||
|
// don't do anything else to the FD if anything unexpected happens.
|
||||||
|
let f = std::mem::ManuallyDrop::new(unsafe { File::from_raw_fd(io::stdin().as_raw_fd()) });
|
||||||
|
matches!(f.metadata(), Ok(meta) if meta.is_file() && meta.len() <= (10 << 20))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(not(unix))]
|
||||||
|
// Windows presents a piped stdin as a "normal file" with a length equal to however many bytes
|
||||||
|
// have been buffered at the time it's checked. To be safe, we must never assume it's a file.
|
||||||
|
fn is_stdin_small_file() -> bool {
|
||||||
|
false
|
||||||
|
}
|
||||||
|
|
||||||
/// When to show the "total" line
|
/// When to show the "total" line
|
||||||
#[derive(PartialEq)]
|
#[derive(Clone, Copy, Default, PartialEq)]
|
||||||
enum TotalWhen {
|
enum TotalWhen {
|
||||||
|
#[default]
|
||||||
Auto,
|
Auto,
|
||||||
Always,
|
Always,
|
||||||
Only,
|
Only,
|
||||||
Never,
|
Never,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<&String> for TotalWhen {
|
impl<T: AsRef<str>> From<T> for TotalWhen {
|
||||||
fn from(s: &String) -> Self {
|
fn from(s: T) -> Self {
|
||||||
match s.as_ref() {
|
match s.as_ref() {
|
||||||
"auto" => Self::Auto,
|
"auto" => Self::Auto,
|
||||||
"always" => Self::Always,
|
"always" => Self::Always,
|
||||||
|
@ -196,33 +342,36 @@ impl TotalWhen {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug, Error)]
|
||||||
enum WcError {
|
enum WcError {
|
||||||
FilesDisabled(String),
|
#[error("file operands cannot be combined with --files0-from")]
|
||||||
StdinReprNotAllowed(String),
|
FilesDisabled,
|
||||||
|
#[error("when reading file names from stdin, no file name of '-' allowed")]
|
||||||
|
StdinReprNotAllowed,
|
||||||
|
#[error("invalid zero-length file name")]
|
||||||
|
ZeroLengthFileName,
|
||||||
|
#[error("{path}:{idx}: invalid zero-length file name")]
|
||||||
|
ZeroLengthFileNameCtx { path: Cow<'static, str>, idx: usize },
|
||||||
|
}
|
||||||
|
|
||||||
|
impl WcError {
|
||||||
|
fn zero_len(ctx: Option<(&Input, usize)>) -> Self {
|
||||||
|
match ctx {
|
||||||
|
Some((input, idx)) => {
|
||||||
|
let path = match input {
|
||||||
|
Input::Stdin(_) => STDIN_REPR.into(),
|
||||||
|
Input::Path(path) => escape_name(path.as_os_str(), QS_ESCAPE).into(),
|
||||||
|
};
|
||||||
|
Self::ZeroLengthFileNameCtx { path, idx }
|
||||||
|
}
|
||||||
|
None => Self::ZeroLengthFileName,
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl UError for WcError {
|
impl UError for WcError {
|
||||||
fn code(&self) -> i32 {
|
|
||||||
match self {
|
|
||||||
Self::FilesDisabled(_) | Self::StdinReprNotAllowed(_) => 1,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn usage(&self) -> bool {
|
fn usage(&self) -> bool {
|
||||||
matches!(self, Self::FilesDisabled(_))
|
matches!(self, Self::FilesDisabled)
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Error for WcError {}
|
|
||||||
|
|
||||||
impl Display for WcError {
|
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
||||||
match self {
|
|
||||||
Self::FilesDisabled(message) | Self::StdinReprNotAllowed(message) => {
|
|
||||||
write!(f, "{message}")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -230,9 +379,8 @@ impl Display for WcError {
|
||||||
pub fn uumain(args: impl uucore::Args) -> UResult<()> {
|
pub fn uumain(args: impl uucore::Args) -> UResult<()> {
|
||||||
let matches = uu_app().try_get_matches_from(args)?;
|
let matches = uu_app().try_get_matches_from(args)?;
|
||||||
|
|
||||||
let inputs = inputs(&matches)?;
|
|
||||||
|
|
||||||
let settings = Settings::new(&matches);
|
let settings = Settings::new(&matches);
|
||||||
|
let inputs = Inputs::new(&matches)?;
|
||||||
|
|
||||||
wc(&inputs, &settings)
|
wc(&inputs, &settings)
|
||||||
}
|
}
|
||||||
|
@ -261,11 +409,12 @@ pub fn uu_app() -> Command {
|
||||||
Arg::new(options::FILES0_FROM)
|
Arg::new(options::FILES0_FROM)
|
||||||
.long(options::FILES0_FROM)
|
.long(options::FILES0_FROM)
|
||||||
.value_name("F")
|
.value_name("F")
|
||||||
.help(
|
.help(concat!(
|
||||||
"read input from the files specified by
|
"read input from the files specified by\n",
|
||||||
NUL-terminated names in file F;
|
" NUL-terminated names in file F;\n",
|
||||||
If F is - then read names from standard input",
|
" If F is - then read names from standard input"
|
||||||
)
|
))
|
||||||
|
.value_parser(ValueParser::os_string())
|
||||||
.value_hint(clap::ValueHint::FilePath),
|
.value_hint(clap::ValueHint::FilePath),
|
||||||
)
|
)
|
||||||
.arg(
|
.arg(
|
||||||
|
@ -286,10 +435,12 @@ pub fn uu_app() -> Command {
|
||||||
Arg::new(options::TOTAL)
|
Arg::new(options::TOTAL)
|
||||||
.long(options::TOTAL)
|
.long(options::TOTAL)
|
||||||
.value_parser(["auto", "always", "only", "never"])
|
.value_parser(["auto", "always", "only", "never"])
|
||||||
.default_value("auto")
|
|
||||||
.hide_default_value(true)
|
|
||||||
.value_name("WHEN")
|
.value_name("WHEN")
|
||||||
.help("when to print a line with total counts"),
|
.hide_possible_values(true)
|
||||||
|
.help(concat!(
|
||||||
|
"when to print a line with total counts;\n",
|
||||||
|
" WHEN can be: auto, always, only, never"
|
||||||
|
)),
|
||||||
)
|
)
|
||||||
.arg(
|
.arg(
|
||||||
Arg::new(options::WORDS)
|
Arg::new(options::WORDS)
|
||||||
|
@ -306,47 +457,6 @@ pub fn uu_app() -> Command {
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn inputs(matches: &ArgMatches) -> UResult<Vec<Input>> {
|
|
||||||
match matches.get_many::<OsString>(ARG_FILES) {
|
|
||||||
Some(os_values) => {
|
|
||||||
if matches.contains_id(options::FILES0_FROM) {
|
|
||||||
return Err(WcError::FilesDisabled(
|
|
||||||
"file operands cannot be combined with --files0-from".into(),
|
|
||||||
)
|
|
||||||
.into());
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(os_values.map(|s| Input::from(s.as_os_str())).collect())
|
|
||||||
}
|
|
||||||
None => match matches.get_one::<String>(options::FILES0_FROM) {
|
|
||||||
Some(files_0_from) => create_paths_from_files0(files_0_from),
|
|
||||||
None => Ok(vec![Input::Stdin(StdinKind::Implicit)]),
|
|
||||||
},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn create_paths_from_files0(files_0_from: &str) -> UResult<Vec<Input>> {
|
|
||||||
let mut paths = String::new();
|
|
||||||
let read_from_stdin = files_0_from == STDIN_REPR;
|
|
||||||
|
|
||||||
if read_from_stdin {
|
|
||||||
io::stdin().lock().read_to_string(&mut paths)?;
|
|
||||||
} else {
|
|
||||||
File::open(files_0_from)?.read_to_string(&mut paths)?;
|
|
||||||
}
|
|
||||||
|
|
||||||
let paths: Vec<&str> = paths.split_terminator('\0').collect();
|
|
||||||
|
|
||||||
if read_from_stdin && paths.contains(&STDIN_REPR) {
|
|
||||||
return Err(WcError::StdinReprNotAllowed(
|
|
||||||
"when reading file names from stdin, no file name of '-' allowed".into(),
|
|
||||||
)
|
|
||||||
.into());
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(paths.iter().map(OsStr::new).map(Input::from).collect())
|
|
||||||
}
|
|
||||||
|
|
||||||
fn word_count_from_reader<T: WordCountable>(
|
fn word_count_from_reader<T: WordCountable>(
|
||||||
mut reader: T,
|
mut reader: T,
|
||||||
settings: &Settings,
|
settings: &Settings,
|
||||||
|
@ -528,182 +638,250 @@ enum CountResult {
|
||||||
Failure(io::Error),
|
Failure(io::Error),
|
||||||
}
|
}
|
||||||
|
|
||||||
/// If we fail opening a file we only show the error. If we fail reading it
|
/// If we fail opening a file, we only show the error. If we fail reading the
|
||||||
/// we show a count for what we managed to read.
|
/// file, we show a count for what we managed to read.
|
||||||
///
|
///
|
||||||
/// Therefore the reading implementations always return a total and sometimes
|
/// Therefore, the reading implementations always return a total and sometimes
|
||||||
/// return an error: (WordCount, Option<io::Error>).
|
/// return an error: (WordCount, Option<io::Error>).
|
||||||
fn word_count_from_input(input: &Input, settings: &Settings) -> CountResult {
|
fn word_count_from_input(input: &Input<'_>, settings: &Settings) -> CountResult {
|
||||||
match input {
|
let (total, maybe_err) = match input {
|
||||||
Input::Stdin(_) => {
|
Input::Stdin(_) => word_count_from_reader(io::stdin().lock(), settings),
|
||||||
let stdin = io::stdin();
|
|
||||||
let stdin_lock = stdin.lock();
|
|
||||||
let count = word_count_from_reader(stdin_lock, settings);
|
|
||||||
match count {
|
|
||||||
(total, Some(error)) => CountResult::Interrupted(total, error),
|
|
||||||
(total, None) => CountResult::Success(total),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Input::Path(path) => match File::open(path) {
|
Input::Path(path) => match File::open(path) {
|
||||||
Err(error) => CountResult::Failure(error),
|
Ok(f) => word_count_from_reader(f, settings),
|
||||||
Ok(file) => match word_count_from_reader(file, settings) {
|
Err(err) => return CountResult::Failure(err),
|
||||||
(total, Some(error)) => CountResult::Interrupted(total, error),
|
|
||||||
(total, None) => CountResult::Success(total),
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
|
};
|
||||||
|
match maybe_err {
|
||||||
|
None => CountResult::Success(total),
|
||||||
|
Some(err) => CountResult::Interrupted(total, err),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Compute the number of digits needed to represent all counts in all inputs.
|
/// Compute the number of digits needed to represent all counts in all inputs.
|
||||||
///
|
///
|
||||||
/// `inputs` may include zero or more [`Input::Stdin`] entries, each of
|
/// For [`Inputs::Stdin`], [`MINIMUM_WIDTH`] is returned, unless there is only one counter number
|
||||||
/// which represents reading from `stdin`. The presence of any such
|
/// to be printed, in which case 1 is returned.
|
||||||
/// entry causes this function to return a width that is at least
|
|
||||||
/// [`MINIMUM_WIDTH`].
|
|
||||||
///
|
///
|
||||||
/// If `input` is empty, or if only one number needs to be printed (for just
|
/// For [`Inputs::Files0From`], [`MINIMUM_WIDTH`] is returned.
|
||||||
/// one file) then this function is optimized to return 1 without making any
|
|
||||||
/// calls to get file metadata.
|
|
||||||
///
|
///
|
||||||
/// If file metadata could not be read from any of the [`Input::Path`] input,
|
/// An [`Inputs::Paths`] may include zero or more "-" entries, each of which represents reading
|
||||||
/// that input does not affect number width computation
|
/// from `stdin`. The presence of any such entry causes this function to return a width that is at
|
||||||
|
/// least [`MINIMUM_WIDTH`].
|
||||||
///
|
///
|
||||||
/// Otherwise, the file sizes in the file metadata are summed and the number of
|
/// If an [`Inputs::Paths`] contains only one path and only one number needs to be printed then
|
||||||
/// digits in that total size is returned as the number width
|
/// this function is optimized to return 1 without making any calls to get file metadata.
|
||||||
///
|
///
|
||||||
/// To mirror GNU wc's behavior a special case is added. If --files0-from is
|
/// If file metadata could not be read from any of the [`Input::Path`] input, that input does not
|
||||||
/// used and input is read from stdin and there is only one calculation enabled
|
/// affect number width computation. Otherwise, the file sizes from the files' metadata are summed
|
||||||
/// columns will not be aligned. This is not exactly GNU wc's behavior, but it
|
/// and the number of digits in that total size is returned.
|
||||||
/// is close enough to pass the GNU test suite.
|
fn compute_number_width(inputs: &Inputs, settings: &Settings) -> usize {
|
||||||
fn compute_number_width(inputs: &[Input], settings: &Settings) -> usize {
|
match inputs {
|
||||||
if inputs.is_empty()
|
Inputs::Stdin if settings.number_enabled() == 1 => 1,
|
||||||
|| (inputs.len() == 1 && settings.number_enabled() == 1)
|
Inputs::Stdin => MINIMUM_WIDTH,
|
||||||
|| (settings.files0_from_stdin_mode && settings.number_enabled() == 1)
|
Inputs::Files0From(_) => 1,
|
||||||
{
|
Inputs::Paths(inputs) => {
|
||||||
return 1;
|
if settings.number_enabled() == 1 && inputs.len() == 1 {
|
||||||
}
|
return 1;
|
||||||
|
|
||||||
let mut minimum_width = 1;
|
|
||||||
let mut total = 0;
|
|
||||||
|
|
||||||
for input in inputs {
|
|
||||||
match input {
|
|
||||||
Input::Stdin(_) => {
|
|
||||||
minimum_width = MINIMUM_WIDTH;
|
|
||||||
}
|
}
|
||||||
Input::Path(path) => {
|
|
||||||
if let Ok(meta) = fs::metadata(path) {
|
let mut minimum_width = 1;
|
||||||
if meta.is_file() {
|
let mut total: u64 = 0;
|
||||||
total += meta.len();
|
for input in inputs.iter() {
|
||||||
} else {
|
match input {
|
||||||
minimum_width = MINIMUM_WIDTH;
|
Input::Stdin(_) => minimum_width = MINIMUM_WIDTH,
|
||||||
|
Input::Path(path) => {
|
||||||
|
if let Ok(meta) = fs::metadata(path) {
|
||||||
|
if meta.is_file() {
|
||||||
|
total += meta.len();
|
||||||
|
} else {
|
||||||
|
minimum_width = MINIMUM_WIDTH;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if total == 0 {
|
||||||
|
minimum_width
|
||||||
|
} else {
|
||||||
|
let total_width = (1 + ilog10_u64(total))
|
||||||
|
.try_into()
|
||||||
|
.expect("ilog of a u64 should fit into a usize");
|
||||||
|
max(total_width, minimum_width)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
max(minimum_width, total.to_string().len())
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn wc(inputs: &[Input], settings: &Settings) -> UResult<()> {
|
type InputIterItem<'a> = Result<Input<'a>, Box<dyn UError>>;
|
||||||
|
|
||||||
|
/// To be used with `--files0-from=-`, this applies a filter on the results of files0_iter to
|
||||||
|
/// translate '-' into the appropriate error.
|
||||||
|
fn files0_iter_stdin<'a>() -> impl Iterator<Item = InputIterItem<'a>> {
|
||||||
|
files0_iter(io::stdin().lock(), STDIN_REPR.into()).map(|i| match i {
|
||||||
|
Ok(Input::Stdin(_)) => Err(WcError::StdinReprNotAllowed.into()),
|
||||||
|
_ => i,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn files0_iter_file<'a>(path: &Path) -> UResult<impl Iterator<Item = InputIterItem<'a>>> {
|
||||||
|
match File::open(path) {
|
||||||
|
Ok(f) => Ok(files0_iter(f, path.into())),
|
||||||
|
Err(e) => Err(e.map_err_context(|| {
|
||||||
|
format!(
|
||||||
|
"cannot open {} for reading",
|
||||||
|
escape_name(path.as_os_str(), QS_QUOTE_ESCAPE)
|
||||||
|
)
|
||||||
|
})),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn files0_iter<'a>(
|
||||||
|
r: impl io::Read + 'static,
|
||||||
|
err_path: OsString,
|
||||||
|
) -> impl Iterator<Item = InputIterItem<'a>> {
|
||||||
|
use std::io::BufRead;
|
||||||
|
let mut i = Some(
|
||||||
|
io::BufReader::new(r)
|
||||||
|
.split(b'\0')
|
||||||
|
.map(move |res| match res {
|
||||||
|
Ok(p) if p == STDIN_REPR.as_bytes() => Ok(Input::Stdin(StdinKind::Explicit)),
|
||||||
|
Ok(p) => {
|
||||||
|
// On Unix systems, OsStrings are just strings of bytes, not necessarily UTF-8.
|
||||||
|
#[cfg(unix)]
|
||||||
|
{
|
||||||
|
use std::os::unix::ffi::OsStringExt;
|
||||||
|
Ok(Input::Path(PathBuf::from(OsString::from_vec(p)).into()))
|
||||||
|
}
|
||||||
|
|
||||||
|
// ...Windows does not, we must go through Strings.
|
||||||
|
#[cfg(not(unix))]
|
||||||
|
{
|
||||||
|
let s = String::from_utf8(p)
|
||||||
|
.map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
|
||||||
|
Ok(Input::Path(PathBuf::from(s).into()))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(e) => Err(e.map_err_context(|| {
|
||||||
|
format!("{}: read error", escape_name(&err_path, QS_ESCAPE))
|
||||||
|
}) as Box<dyn UError>),
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
// Loop until there is an error; yield that error and then nothing else.
|
||||||
|
std::iter::from_fn(move || {
|
||||||
|
let next = i.as_mut().and_then(Iterator::next);
|
||||||
|
if matches!(next, Some(Err(_)) | None) {
|
||||||
|
i = None;
|
||||||
|
}
|
||||||
|
next
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn wc(inputs: &Inputs, settings: &Settings) -> UResult<()> {
|
||||||
let mut total_word_count = WordCount::default();
|
let mut total_word_count = WordCount::default();
|
||||||
|
let mut num_inputs: usize = 0;
|
||||||
|
|
||||||
let (number_width, are_stats_visible, total_row_title) =
|
let (number_width, are_stats_visible) = match settings.total_when {
|
||||||
if settings.total_when == TotalWhen::Only {
|
TotalWhen::Only => (1, false),
|
||||||
(1, false, None)
|
_ => (compute_number_width(inputs, settings), true),
|
||||||
} else {
|
};
|
||||||
let number_width = compute_number_width(inputs, settings);
|
|
||||||
let title = Some(String::from("total"));
|
|
||||||
|
|
||||||
(number_width, true, title)
|
for maybe_input in inputs.try_iter(settings)? {
|
||||||
|
num_inputs += 1;
|
||||||
|
|
||||||
|
let input = match maybe_input {
|
||||||
|
Ok(input) => input,
|
||||||
|
Err(err) => {
|
||||||
|
show!(err);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
let is_total_row_visible = settings.total_when.is_total_row_visible(inputs.len());
|
let word_count = match word_count_from_input(&input, settings) {
|
||||||
|
|
||||||
for input in inputs {
|
|
||||||
let word_count = match word_count_from_input(input, settings) {
|
|
||||||
CountResult::Success(word_count) => word_count,
|
CountResult::Success(word_count) => word_count,
|
||||||
CountResult::Interrupted(word_count, error) => {
|
CountResult::Interrupted(word_count, err) => {
|
||||||
show!(USimpleError::new(
|
show!(err.map_err_context(|| input.path_display()));
|
||||||
1,
|
|
||||||
format!(
|
|
||||||
"{}: {}",
|
|
||||||
input.path_display(&settings.title_quoting_style),
|
|
||||||
error
|
|
||||||
)
|
|
||||||
));
|
|
||||||
word_count
|
word_count
|
||||||
}
|
}
|
||||||
CountResult::Failure(error) => {
|
CountResult::Failure(err) => {
|
||||||
show!(USimpleError::new(
|
show!(err.map_err_context(|| input.path_display()));
|
||||||
1,
|
|
||||||
format!(
|
|
||||||
"{}: {}",
|
|
||||||
input.path_display(&settings.title_quoting_style),
|
|
||||||
error
|
|
||||||
)
|
|
||||||
));
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
total_word_count += word_count;
|
total_word_count += word_count;
|
||||||
let result = word_count.with_title(input.to_title(&settings.title_quoting_style));
|
|
||||||
|
|
||||||
if are_stats_visible {
|
if are_stats_visible {
|
||||||
if let Err(err) = print_stats(settings, &result, number_width) {
|
let maybe_title = input.to_title();
|
||||||
show!(USimpleError::new(
|
let maybe_title_str = maybe_title.as_deref();
|
||||||
1,
|
if let Err(err) = print_stats(settings, &word_count, maybe_title_str, number_width) {
|
||||||
format!(
|
let title = maybe_title_str.unwrap_or("<stdin>");
|
||||||
"failed to print result for {}: {}",
|
show!(err.map_err_context(|| format!("failed to print result for {title}")));
|
||||||
&result.title.unwrap_or_else(|| String::from("<stdin>")),
|
|
||||||
err,
|
|
||||||
),
|
|
||||||
));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if is_total_row_visible {
|
if settings.total_when.is_total_row_visible(num_inputs) {
|
||||||
let total_result = total_word_count.with_title(total_row_title);
|
let title = are_stats_visible.then_some("total");
|
||||||
if let Err(err) = print_stats(settings, &total_result, number_width) {
|
if let Err(err) = print_stats(settings, &total_word_count, title, number_width) {
|
||||||
show!(USimpleError::new(
|
show!(err.map_err_context(|| "failed to print total".into()));
|
||||||
1,
|
|
||||||
format!("failed to print total: {err}")
|
|
||||||
));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Although this appears to be returning `Ok`, the exit code may
|
// Although this appears to be returning `Ok`, the exit code may have been set to a non-zero
|
||||||
// have been set to a non-zero value by a call to `show!()` above.
|
// value by a call to `record_error!()` above.
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn print_stats(
|
fn print_stats(
|
||||||
settings: &Settings,
|
settings: &Settings,
|
||||||
result: &TitledWordCount,
|
result: &WordCount,
|
||||||
|
title: Option<&str>,
|
||||||
number_width: usize,
|
number_width: usize,
|
||||||
) -> io::Result<()> {
|
) -> io::Result<()> {
|
||||||
let mut columns = Vec::new();
|
let mut stdout = io::stdout().lock();
|
||||||
|
|
||||||
if settings.show_lines {
|
let maybe_cols = [
|
||||||
columns.push(format!("{:1$}", result.count.lines, number_width));
|
(settings.show_lines, result.lines),
|
||||||
}
|
(settings.show_words, result.words),
|
||||||
if settings.show_words {
|
(settings.show_chars, result.chars),
|
||||||
columns.push(format!("{:1$}", result.count.words, number_width));
|
(settings.show_bytes, result.bytes),
|
||||||
}
|
(settings.show_max_line_length, result.max_line_length),
|
||||||
if settings.show_chars {
|
];
|
||||||
columns.push(format!("{:1$}", result.count.chars, number_width));
|
|
||||||
}
|
let mut space = "";
|
||||||
if settings.show_bytes {
|
for (_, num) in maybe_cols.iter().filter(|(show, _)| *show) {
|
||||||
columns.push(format!("{:1$}", result.count.bytes, number_width));
|
write!(stdout, "{space}{num:number_width$}")?;
|
||||||
}
|
space = " ";
|
||||||
if settings.show_max_line_length {
|
|
||||||
columns.push(format!("{:1$}", result.count.max_line_length, number_width));
|
|
||||||
}
|
|
||||||
if let Some(title) = &result.title {
|
|
||||||
columns.push(title.clone());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
writeln!(io::stdout().lock(), "{}", columns.join(" "))
|
if let Some(title) = title {
|
||||||
|
writeln!(stdout, "{space}{title}")
|
||||||
|
} else {
|
||||||
|
writeln!(stdout)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: remove and just use usize::ilog10 once the MSRV is >= 1.67.
|
||||||
|
fn ilog10_u64(mut u: u64) -> u32 {
|
||||||
|
if u == 0 {
|
||||||
|
panic!("cannot compute log of 0")
|
||||||
|
}
|
||||||
|
let mut log = 0;
|
||||||
|
if u >= 10_000_000_000 {
|
||||||
|
log += 10;
|
||||||
|
u /= 10_000_000_000;
|
||||||
|
}
|
||||||
|
if u >= 100_000 {
|
||||||
|
log += 5;
|
||||||
|
u /= 100_000;
|
||||||
|
}
|
||||||
|
// Rust's standard library in versions >= 1.67 does something even more clever than this, but
|
||||||
|
// this should work just fine for the time being.
|
||||||
|
log + match u {
|
||||||
|
1..=9 => 0,
|
||||||
|
10..=99 => 1,
|
||||||
|
100..=999 => 2,
|
||||||
|
1000..=9999 => 3,
|
||||||
|
10000..=99999 => 4,
|
||||||
|
_ => unreachable!(),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -29,19 +29,3 @@ impl AddAssign for WordCount {
|
||||||
*self = *self + other;
|
*self = *self + other;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl WordCount {
|
|
||||||
pub fn with_title(self, title: Option<String>) -> TitledWordCount {
|
|
||||||
TitledWordCount { title, count: self }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// This struct supplements the actual word count with an optional title that is
|
|
||||||
/// displayed to the user at the end of the program.
|
|
||||||
/// The reason we don't simply include title in the `WordCount` struct is that
|
|
||||||
/// it would result in unnecessary copying of `String`.
|
|
||||||
#[derive(Debug, Default, Clone)]
|
|
||||||
pub struct TitledWordCount {
|
|
||||||
pub title: Option<String>,
|
|
||||||
pub count: WordCount,
|
|
||||||
}
|
|
||||||
|
|
|
@ -268,12 +268,16 @@ fn test_multiple_default() {
|
||||||
"lorem_ipsum.txt",
|
"lorem_ipsum.txt",
|
||||||
"moby_dick.txt",
|
"moby_dick.txt",
|
||||||
"alice_in_wonderland.txt",
|
"alice_in_wonderland.txt",
|
||||||
|
"alice in wonderland.txt",
|
||||||
])
|
])
|
||||||
.run()
|
.run()
|
||||||
.stdout_is(
|
.stdout_is(concat!(
|
||||||
" 13 109 772 lorem_ipsum.txt\n 18 204 1115 moby_dick.txt\n 5 57 302 \
|
" 13 109 772 lorem_ipsum.txt\n",
|
||||||
alice_in_wonderland.txt\n 36 370 2189 total\n",
|
" 18 204 1115 moby_dick.txt\n",
|
||||||
);
|
" 5 57 302 alice_in_wonderland.txt\n",
|
||||||
|
" 5 57 302 alice in wonderland.txt\n",
|
||||||
|
" 41 427 2491 total\n",
|
||||||
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Test for an empty file.
|
/// Test for an empty file.
|
||||||
|
@ -352,17 +356,24 @@ fn test_file_bytes_dictate_width() {
|
||||||
new_ucmd!()
|
new_ucmd!()
|
||||||
.args(&["-lwc", "alice_in_wonderland.txt", "lorem_ipsum.txt"])
|
.args(&["-lwc", "alice_in_wonderland.txt", "lorem_ipsum.txt"])
|
||||||
.run()
|
.run()
|
||||||
.stdout_is(
|
.stdout_is(concat!(
|
||||||
" 5 57 302 alice_in_wonderland.txt\n 13 109 772 \
|
" 5 57 302 alice_in_wonderland.txt\n",
|
||||||
lorem_ipsum.txt\n 18 166 1074 total\n",
|
" 13 109 772 lorem_ipsum.txt\n",
|
||||||
);
|
" 18 166 1074 total\n",
|
||||||
|
));
|
||||||
|
|
||||||
// . is a directory, so minimum_width should get set to 7
|
// . is a directory, so minimum_width should get set to 7
|
||||||
#[cfg(not(windows))]
|
#[cfg(not(windows))]
|
||||||
const STDOUT: &str = " 0 0 0 emptyfile.txt\n 0 0 0 \
|
const STDOUT: &str = concat!(
|
||||||
.\n 0 0 0 total\n";
|
" 0 0 0 emptyfile.txt\n",
|
||||||
|
" 0 0 0 .\n",
|
||||||
|
" 0 0 0 total\n",
|
||||||
|
);
|
||||||
#[cfg(windows)]
|
#[cfg(windows)]
|
||||||
const STDOUT: &str = " 0 0 0 emptyfile.txt\n 0 0 0 total\n";
|
const STDOUT: &str = concat!(
|
||||||
|
" 0 0 0 emptyfile.txt\n",
|
||||||
|
" 0 0 0 total\n",
|
||||||
|
);
|
||||||
new_ucmd!()
|
new_ucmd!()
|
||||||
.args(&["-lwc", "emptyfile.txt", "."])
|
.args(&["-lwc", "emptyfile.txt", "."])
|
||||||
.run()
|
.run()
|
||||||
|
@ -375,7 +386,7 @@ fn test_read_from_directory_error() {
|
||||||
#[cfg(not(windows))]
|
#[cfg(not(windows))]
|
||||||
const STDERR: &str = ".: Is a directory";
|
const STDERR: &str = ".: Is a directory";
|
||||||
#[cfg(windows)]
|
#[cfg(windows)]
|
||||||
const STDERR: &str = ".: Access is denied";
|
const STDERR: &str = ".: Permission denied";
|
||||||
|
|
||||||
#[cfg(not(windows))]
|
#[cfg(not(windows))]
|
||||||
const STDOUT: &str = " 0 0 0 .\n";
|
const STDOUT: &str = " 0 0 0 .\n";
|
||||||
|
@ -392,15 +403,10 @@ fn test_read_from_directory_error() {
|
||||||
/// Test that getting counts from nonexistent file is an error.
|
/// Test that getting counts from nonexistent file is an error.
|
||||||
#[test]
|
#[test]
|
||||||
fn test_read_from_nonexistent_file() {
|
fn test_read_from_nonexistent_file() {
|
||||||
#[cfg(not(windows))]
|
|
||||||
const MSG: &str = "bogusfile: No such file or directory";
|
|
||||||
#[cfg(windows)]
|
|
||||||
const MSG: &str = "bogusfile: The system cannot find the file specified";
|
|
||||||
new_ucmd!()
|
new_ucmd!()
|
||||||
.args(&["bogusfile"])
|
.args(&["bogusfile"])
|
||||||
.fails()
|
.fails()
|
||||||
.stderr_contains(MSG)
|
.stderr_only("wc: bogusfile: No such file or directory\n");
|
||||||
.stdout_is("");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
@ -424,13 +430,30 @@ fn test_files0_disabled_files_argument() {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_files0_from() {
|
fn test_files0_from() {
|
||||||
|
// file
|
||||||
new_ucmd!()
|
new_ucmd!()
|
||||||
.args(&["--files0-from=files0_list.txt"])
|
.args(&["--files0-from=files0_list.txt"])
|
||||||
.run()
|
.run()
|
||||||
.stdout_is(
|
.success()
|
||||||
" 13 109 772 lorem_ipsum.txt\n 18 204 1115 moby_dick.txt\n 5 57 302 \
|
.stdout_is(concat!(
|
||||||
alice_in_wonderland.txt\n 36 370 2189 total\n",
|
" 13 109 772 lorem_ipsum.txt\n",
|
||||||
);
|
" 18 204 1115 moby_dick.txt\n",
|
||||||
|
" 5 57 302 alice_in_wonderland.txt\n",
|
||||||
|
" 36 370 2189 total\n",
|
||||||
|
));
|
||||||
|
|
||||||
|
// stream
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["--files0-from=-"])
|
||||||
|
.pipe_in_fixture("files0_list.txt")
|
||||||
|
.run()
|
||||||
|
.success()
|
||||||
|
.stdout_is(concat!(
|
||||||
|
"13 109 772 lorem_ipsum.txt\n",
|
||||||
|
"18 204 1115 moby_dick.txt\n",
|
||||||
|
"5 57 302 alice_in_wonderland.txt\n",
|
||||||
|
"36 370 2189 total\n",
|
||||||
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
@ -439,7 +462,7 @@ fn test_files0_from_with_stdin() {
|
||||||
.args(&["--files0-from=-"])
|
.args(&["--files0-from=-"])
|
||||||
.pipe_in("lorem_ipsum.txt")
|
.pipe_in("lorem_ipsum.txt")
|
||||||
.run()
|
.run()
|
||||||
.stdout_is(" 13 109 772 lorem_ipsum.txt\n");
|
.stdout_is("13 109 772 lorem_ipsum.txt\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
@ -448,10 +471,12 @@ fn test_files0_from_with_stdin_in_file() {
|
||||||
.args(&["--files0-from=files0_list_with_stdin.txt"])
|
.args(&["--files0-from=files0_list_with_stdin.txt"])
|
||||||
.pipe_in_fixture("alice_in_wonderland.txt")
|
.pipe_in_fixture("alice_in_wonderland.txt")
|
||||||
.run()
|
.run()
|
||||||
.stdout_is(
|
.stdout_is(concat!(
|
||||||
" 13 109 772 lorem_ipsum.txt\n 18 204 1115 moby_dick.txt\n 5 57 302 \
|
" 13 109 772 lorem_ipsum.txt\n",
|
||||||
-\n 36 370 2189 total\n",
|
" 18 204 1115 moby_dick.txt\n",
|
||||||
);
|
" 5 57 302 -\n", // alice_in_wonderland.txt
|
||||||
|
" 36 370 2189 total\n",
|
||||||
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
@ -530,3 +555,152 @@ fn test_total_only() {
|
||||||
.run()
|
.run()
|
||||||
.stdout_is("31 313 1887\n");
|
.stdout_is("31 313 1887\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_zero_length_files() {
|
||||||
|
// A trailing zero is ignored, but otherwise empty file names are an error...
|
||||||
|
const LIST: &str = "\0moby_dick.txt\0\0alice_in_wonderland.txt\0\0lorem_ipsum.txt\0";
|
||||||
|
|
||||||
|
// Try with and without the last \0
|
||||||
|
for l in [LIST.len(), LIST.len() - 1] {
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["--files0-from=-"])
|
||||||
|
.pipe_in(&LIST[..l])
|
||||||
|
.run()
|
||||||
|
.failure()
|
||||||
|
.stdout_is(concat!(
|
||||||
|
"18 204 1115 moby_dick.txt\n",
|
||||||
|
"5 57 302 alice_in_wonderland.txt\n",
|
||||||
|
"13 109 772 lorem_ipsum.txt\n",
|
||||||
|
"36 370 2189 total\n",
|
||||||
|
))
|
||||||
|
.stderr_is(concat!(
|
||||||
|
"wc: -:1: invalid zero-length file name\n",
|
||||||
|
"wc: -:3: invalid zero-length file name\n",
|
||||||
|
"wc: -:5: invalid zero-length file name\n",
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
// But, just as important, a zero-length file name may still be at the end...
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["--files0-from=-"])
|
||||||
|
.pipe_in(
|
||||||
|
LIST.as_bytes()
|
||||||
|
.iter()
|
||||||
|
.chain(b"\0")
|
||||||
|
.copied()
|
||||||
|
.collect::<Vec<_>>(),
|
||||||
|
)
|
||||||
|
.run()
|
||||||
|
.failure()
|
||||||
|
.stdout_is(concat!(
|
||||||
|
"18 204 1115 moby_dick.txt\n",
|
||||||
|
"5 57 302 alice_in_wonderland.txt\n",
|
||||||
|
"13 109 772 lorem_ipsum.txt\n",
|
||||||
|
"36 370 2189 total\n",
|
||||||
|
))
|
||||||
|
.stderr_is(concat!(
|
||||||
|
"wc: -:1: invalid zero-length file name\n",
|
||||||
|
"wc: -:3: invalid zero-length file name\n",
|
||||||
|
"wc: -:5: invalid zero-length file name\n",
|
||||||
|
"wc: -:7: invalid zero-length file name\n",
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_files0_errors_quoting() {
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["--files0-from=files0 with nonexistent.txt"])
|
||||||
|
.run()
|
||||||
|
.failure()
|
||||||
|
.stderr_is(concat!(
|
||||||
|
"wc: this_file_does_not_exist.txt: No such file or directory\n",
|
||||||
|
"wc: 'files0 with nonexistent.txt':2: invalid zero-length file name\n",
|
||||||
|
"wc: 'this file does not exist.txt': No such file or directory\n",
|
||||||
|
"wc: \"this files doesn't exist either.txt\": No such file or directory\n",
|
||||||
|
))
|
||||||
|
.stdout_is("0 0 0 total\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_files0_progressive_stream() {
|
||||||
|
use std::process::Stdio;
|
||||||
|
// You should be able to run wc and have a back-and-forth exchange with wc...
|
||||||
|
let mut child = new_ucmd!()
|
||||||
|
.args(&["--files0-from=-"])
|
||||||
|
.set_stdin(Stdio::piped())
|
||||||
|
.set_stdout(Stdio::piped())
|
||||||
|
.set_stderr(Stdio::piped())
|
||||||
|
.run_no_wait();
|
||||||
|
|
||||||
|
macro_rules! chk {
|
||||||
|
($fn:ident, $exp:literal) => {
|
||||||
|
assert_eq!(child.$fn($exp.len()), $exp.as_bytes());
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// File in, count out...
|
||||||
|
child.write_in("moby_dick.txt\0");
|
||||||
|
chk!(stdout_exact_bytes, "18 204 1115 moby_dick.txt\n");
|
||||||
|
child.write_in("lorem_ipsum.txt\0");
|
||||||
|
chk!(stdout_exact_bytes, "13 109 772 lorem_ipsum.txt\n");
|
||||||
|
|
||||||
|
// Introduce an error!
|
||||||
|
child.write_in("\0");
|
||||||
|
chk!(
|
||||||
|
stderr_exact_bytes,
|
||||||
|
"wc: -:3: invalid zero-length file name\n"
|
||||||
|
);
|
||||||
|
|
||||||
|
// wc is quick to forgive, let's move on...
|
||||||
|
child.write_in("alice_in_wonderland.txt\0");
|
||||||
|
chk!(stdout_exact_bytes, "5 57 302 alice_in_wonderland.txt\n");
|
||||||
|
|
||||||
|
// Fin.
|
||||||
|
child
|
||||||
|
.wait()
|
||||||
|
.expect("wc should finish")
|
||||||
|
.failure()
|
||||||
|
.stdout_only("36 370 2189 total\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn files0_from_dir() {
|
||||||
|
// On Unix, `read(open("."))` fails. On Windows, `open(".")` fails. Thus, the errors happen in
|
||||||
|
// different contexts.
|
||||||
|
#[cfg(not(windows))]
|
||||||
|
macro_rules! dir_err {
|
||||||
|
($p:literal) => {
|
||||||
|
concat!("wc: ", $p, ": read error: Is a directory\n")
|
||||||
|
};
|
||||||
|
}
|
||||||
|
#[cfg(windows)]
|
||||||
|
macro_rules! dir_err {
|
||||||
|
($p:literal) => {
|
||||||
|
concat!("wc: cannot open ", $p, " for reading: Permission denied\n")
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["--files0-from=dir with spaces"])
|
||||||
|
.fails()
|
||||||
|
.stderr_only(dir_err!("'dir with spaces'"));
|
||||||
|
|
||||||
|
// Those contexts have different rules about quoting in errors...
|
||||||
|
#[cfg(windows)]
|
||||||
|
const DOT_ERR: &str = dir_err!("'.'");
|
||||||
|
#[cfg(not(windows))]
|
||||||
|
const DOT_ERR: &str = dir_err!(".");
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["--files0-from=."])
|
||||||
|
.fails()
|
||||||
|
.stderr_only(DOT_ERR);
|
||||||
|
|
||||||
|
// That also means you cannot `< . wc --files0-from=-` on Windows.
|
||||||
|
#[cfg(not(windows))]
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["--files0-from=-"])
|
||||||
|
.set_stdin(std::fs::File::open(".").unwrap())
|
||||||
|
.fails()
|
||||||
|
.stderr_only(dir_err!("-"));
|
||||||
|
}
|
||||||
|
|
5
tests/fixtures/wc/alice in wonderland.txt
vendored
Normal file
5
tests/fixtures/wc/alice in wonderland.txt
vendored
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
Alice was beginning to get very tired of sitting by
|
||||||
|
her sister on the bank, and of having nothing to do: once or twice
|
||||||
|
she had peeped into the book her sister was reading, but it had no
|
||||||
|
pictures or conversations in it, "and what is the use of a book,"
|
||||||
|
thought Alice "without pictures or conversation?"
|
0
tests/fixtures/wc/dir with spaces/.keep
vendored
Normal file
0
tests/fixtures/wc/dir with spaces/.keep
vendored
Normal file
BIN
tests/fixtures/wc/files0 with nonexistent.txt
vendored
Normal file
BIN
tests/fixtures/wc/files0 with nonexistent.txt
vendored
Normal file
Binary file not shown.
Loading…
Add table
Add a link
Reference in a new issue