From 4f043ff57f2d4f689d9fced166dd1213cc80cf1c Mon Sep 17 00:00:00 2001 From: Gergely Kalas Date: Mon, 27 Jun 2022 17:35:28 +0200 Subject: [PATCH] Fix 'wc' gnu test-suite compatibility #3678 This change will extract a utility already present in ls to uucore. This utility is used by dir and vdir too, which are adjusted to look it up in uucode. No further changes to ls, dir or dirv intended. The change here largely fiddles with the output of uu_wc to match that of GNU wc. This is the case to the extent to make unit tests pass, however, there are differences remaining. One specific difference I did not tackle is that GNU wc will not align the output columns (compute_number_width() -> 1) in the specific case of the input for --files0-from=- being a named pipe, not real stdin. This difference can be triggered using the following two invocations. - wc --files0-from=- < files0 # use a named pipe, GNU does align - cat files0- | wc --files0-from=- # use real stdin, GNU does not align. --- src/uu/dir/src/dir.rs | 2 +- src/uu/ls/src/ls.rs | 6 +- src/uu/vdir/src/vdir.rs | 2 +- src/uu/wc/src/wc.rs | 73 ++++++++++++++----- src/uu/wc/src/word_count.rs | 7 +- src/uucore/src/lib/lib.rs | 1 + src/uucore/src/lib/mods.rs | 2 + .../src/lib/mods}/quoting_style.rs | 5 +- 8 files changed, 66 insertions(+), 32 deletions(-) rename src/{uu/ls/src => uucore/src/lib/mods}/quoting_style.rs (99%) diff --git a/src/uu/dir/src/dir.rs b/src/uu/dir/src/dir.rs index 749074726..e34bd2de5 100644 --- a/src/uu/dir/src/dir.rs +++ b/src/uu/dir/src/dir.rs @@ -7,9 +7,9 @@ use clap::Command; use std::path::Path; -use uu_ls::quoting_style::{Quotes, QuotingStyle}; use uu_ls::{options, Config, Format}; use uucore::error::UResult; +use uucore::quoting_style::{Quotes, QuotingStyle}; #[uucore::main] pub fn uumain(args: impl uucore::Args) -> UResult<()> { diff --git a/src/uu/ls/src/ls.rs b/src/uu/ls/src/ls.rs index 0fa748324..cd400ec7c 100644 --- a/src/uu/ls/src/ls.rs +++ b/src/uu/ls/src/ls.rs @@ -13,15 +13,11 @@ extern crate uucore; #[macro_use] extern crate lazy_static; -// dir and vdir also need access to the quoting_style module -pub mod quoting_style; - use clap::{crate_version, Arg, Command}; use glob::Pattern; use lscolors::LsColors; use number_prefix::NumberPrefix; use once_cell::unsync::OnceCell; -use quoting_style::{escape_name, QuotingStyle}; #[cfg(windows)] use std::os::windows::fs::MetadataExt; use std::{ @@ -44,6 +40,7 @@ use term_grid::{Cell, Direction, Filling, Grid, GridOptions}; use unicode_width::UnicodeWidthStr; #[cfg(unix)] use uucore::libc::{S_IXGRP, S_IXOTH, S_IXUSR}; +use uucore::quoting_style::{escape_name, QuotingStyle}; use uucore::{ display::Quotable, error::{set_exit_code, UError, UResult}, @@ -2257,6 +2254,7 @@ fn get_inode(metadata: &Metadata) -> String { use std::sync::Mutex; #[cfg(unix)] use uucore::entries; +use uucore::quoting_style; #[cfg(unix)] fn cached_uid2usr(uid: u32) -> String { diff --git a/src/uu/vdir/src/vdir.rs b/src/uu/vdir/src/vdir.rs index b2af0b332..fed535926 100644 --- a/src/uu/vdir/src/vdir.rs +++ b/src/uu/vdir/src/vdir.rs @@ -7,9 +7,9 @@ use clap::Command; use std::path::Path; -use uu_ls::quoting_style::{Quotes, QuotingStyle}; use uu_ls::{options, Config, Format}; use uucore::error::UResult; +use uucore::quoting_style::{Quotes, QuotingStyle}; #[uucore::main] pub fn uumain(args: impl uucore::Args) -> UResult<()> { diff --git a/src/uu/wc/src/wc.rs b/src/uu/wc/src/wc.rs index ba7c4c1af..7afee92b2 100644 --- a/src/uu/wc/src/wc.rs +++ b/src/uu/wc/src/wc.rs @@ -5,6 +5,8 @@ // * For the full copyright and license information, please view the LICENSE // * file that was distributed with this source code. +// cSpell:ignore wc wc's + #[macro_use] extern crate uucore; @@ -26,10 +28,10 @@ use std::ffi::OsStr; use std::fmt::Display; use std::fs::{self, File}; use std::io::{self, Read, Write}; -use std::path::{Path, PathBuf}; +use std::path::PathBuf; -use uucore::display::{Quotable, Quoted}; use uucore::error::{UError, UResult, USimpleError}; +use uucore::quoting_style::{escape_name, QuotingStyle}; /// The minimum character width for formatting counts when reading from stdin. const MINIMUM_WIDTH: usize = 7; @@ -40,16 +42,31 @@ struct Settings { show_lines: bool, show_words: bool, show_max_line_length: bool, + files0_from_stdin_mode: bool, + title_quoting_style: QuotingStyle, } impl Settings { fn new(matches: &ArgMatches) -> Self { + let title_quoting_style = QuotingStyle::Shell { + escape: true, + always_quote: false, + show_control: false, + }; + + let files0_from_stdin_mode = match matches.value_of(options::FILES0_FROM) { + Some(files_0_from) => files_0_from == STDIN_REPR, + None => false, + }; + let settings = Self { show_bytes: matches.is_present(options::BYTES), show_chars: matches.is_present(options::CHAR), show_lines: matches.is_present(options::LINES), show_words: matches.is_present(options::WORDS), show_max_line_length: matches.is_present(options::MAX_LINE_LENGTH), + files0_from_stdin_mode, + title_quoting_style, }; if settings.show_bytes @@ -67,6 +84,8 @@ impl Settings { show_lines: true, show_words: true, show_max_line_length: false, + files0_from_stdin_mode, + title_quoting_style: settings.title_quoting_style, } } @@ -126,18 +145,20 @@ impl From<&OsStr> for Input { impl Input { /// Converts input to title that appears in stats. - fn to_title(&self) -> Option<&Path> { + fn to_title(&self, quoting_style: &QuotingStyle) -> Option { match self { - Input::Path(path) => Some(path), - Input::Stdin(StdinKind::Explicit) => Some(STDIN_REPR.as_ref()), + Input::Path(path) => Some(escape_name(&path.clone().into_os_string(), quoting_style)), + Input::Stdin(StdinKind::Explicit) => { + Some(escape_name(OsStr::new(STDIN_REPR), quoting_style)) + } Input::Stdin(StdinKind::Implicit) => None, } } - fn path_display(&self) -> Quoted<'_> { + fn path_display(&self, quoting_style: &QuotingStyle) -> String { match self { - Input::Path(path) => path.maybe_quote(), - Input::Stdin(_) => "standard input".maybe_quote(), + Input::Path(path) => escape_name(&path.clone().into_os_string(), quoting_style), + Input::Stdin(_) => escape_name(OsStr::new("standard input"), quoting_style), } } } @@ -417,8 +438,16 @@ fn word_count_from_input(input: &Input, settings: &Settings) -> CountResult { /// /// Otherwise, the file sizes in the file metadata are summed and the number of /// digits in that total size is returned as the number width +/// +/// To mirror GNU wc's behavior a special case is added. If --files0-from is +/// used and input is read from stdin and there is only one calculation enabled +/// columns will not be aligned. This is not exactly GNU wc's behavior, but it +/// is close enough to pass the GNU test suite. fn compute_number_width(inputs: &[Input], settings: &Settings) -> usize { - if inputs.is_empty() || (inputs.len() == 1 && settings.number_enabled() == 1) { + if inputs.is_empty() + || (inputs.len() == 1 && settings.number_enabled() == 1) + || (settings.files0_from_stdin_mode && settings.number_enabled() == 1) + { return 1; } @@ -458,29 +487,34 @@ fn wc(inputs: &[Input], settings: &Settings) -> UResult<()> { CountResult::Interrupted(word_count, error) => { show!(USimpleError::new( 1, - format!("{}: {}", input.path_display(), error) + format!( + "{}: {}", + input.path_display(&settings.title_quoting_style), + error + ) )); word_count } CountResult::Failure(error) => { show!(USimpleError::new( 1, - format!("{}: {}", input.path_display(), error) + format!( + "{}: {}", + input.path_display(&settings.title_quoting_style), + error + ) )); continue; } }; total_word_count += word_count; - let result = word_count.with_title(input.to_title()); + let result = word_count.with_title(input.to_title(&settings.title_quoting_style)); if let Err(err) = print_stats(settings, &result, number_width) { show!(USimpleError::new( 1, format!( "failed to print result for {}: {}", - result - .title - .unwrap_or_else(|| "".as_ref()) - .maybe_quote(), + &result.title.unwrap_or_else(|| String::from("")), err, ), )); @@ -488,7 +522,7 @@ fn wc(inputs: &[Input], settings: &Settings) -> UResult<()> { } if num_inputs > 1 { - let total_result = total_word_count.with_title(Some("total".as_ref())); + let total_result = total_word_count.with_title(Some(String::from("total"))); if let Err(err) = print_stats(settings, &total_result, number_width) { show!(USimpleError::new( 1, @@ -524,9 +558,8 @@ fn print_stats( if settings.show_max_line_length { columns.push(format!("{:1$}", result.count.max_line_length, number_width)); } - - if let Some(title) = result.title { - columns.push(title.maybe_quote().to_string()); + if let Some(title) = &result.title { + columns.push(title.clone()); } writeln!(io::stdout().lock(), "{}", columns.join(" ")) diff --git a/src/uu/wc/src/word_count.rs b/src/uu/wc/src/word_count.rs index c1dd0c3d3..b2dd70450 100644 --- a/src/uu/wc/src/word_count.rs +++ b/src/uu/wc/src/word_count.rs @@ -1,6 +1,5 @@ use std::cmp::max; use std::ops::{Add, AddAssign}; -use std::path::Path; #[derive(Debug, Default, Copy, Clone)] pub struct WordCount { @@ -32,7 +31,7 @@ impl AddAssign for WordCount { } impl WordCount { - pub fn with_title(self, title: Option<&Path>) -> TitledWordCount { + pub fn with_title(self, title: Option) -> TitledWordCount { TitledWordCount { title, count: self } } } @@ -42,7 +41,7 @@ impl WordCount { /// The reason we don't simply include title in the `WordCount` struct is that /// it would result in unnecessary copying of `String`. #[derive(Debug, Default, Clone)] -pub struct TitledWordCount<'a> { - pub title: Option<&'a Path>, +pub struct TitledWordCount { + pub title: Option, pub count: WordCount, } diff --git a/src/uucore/src/lib/lib.rs b/src/uucore/src/lib/lib.rs index d5aa5e672..1c405ce98 100644 --- a/src/uucore/src/lib/lib.rs +++ b/src/uucore/src/lib/lib.rs @@ -24,6 +24,7 @@ pub use crate::mods::display; pub use crate::mods::error; pub use crate::mods::os; pub use crate::mods::panic; +pub use crate::mods::quoting_style; pub use crate::mods::ranges; pub use crate::mods::version_cmp; diff --git a/src/uucore/src/lib/mods.rs b/src/uucore/src/lib/mods.rs index bbde696dc..4b6c53f95 100644 --- a/src/uucore/src/lib/mods.rs +++ b/src/uucore/src/lib/mods.rs @@ -7,3 +7,5 @@ pub mod os; pub mod panic; pub mod ranges; pub mod version_cmp; +// dir and vdir also need access to the quoting_style module +pub mod quoting_style; diff --git a/src/uu/ls/src/quoting_style.rs b/src/uucore/src/lib/mods/quoting_style.rs similarity index 99% rename from src/uu/ls/src/quoting_style.rs rename to src/uucore/src/lib/mods/quoting_style.rs index 9ff689273..38401a169 100644 --- a/src/uu/ls/src/quoting_style.rs +++ b/src/uucore/src/lib/mods/quoting_style.rs @@ -256,7 +256,7 @@ fn shell_with_escape(name: &str, quotes: Quotes) -> (String, bool) { (escaped_str, must_quote) } -pub(super) fn escape_name(name: &OsStr, style: &QuotingStyle) -> String { +pub fn escape_name(name: &OsStr, style: &QuotingStyle) -> String { match style { QuotingStyle::Literal { show_control } => { if !show_control { @@ -314,9 +314,10 @@ pub(super) fn escape_name(name: &OsStr, style: &QuotingStyle) -> String { #[cfg(test)] mod tests { + use crate::quoting_style::{escape_name, Quotes, QuotingStyle}; + // spell-checker:ignore (tests/words) one\'two one'two - use crate::quoting_style::{escape_name, Quotes, QuotingStyle}; fn get_style(s: &str) -> QuotingStyle { match s { "literal" => QuotingStyle::Literal {