From d6562b2b5fee55d97b228eb92fb6e9265e1c8f0e Mon Sep 17 00:00:00 2001 From: Jed Denlea Date: Sun, 28 May 2023 17:24:13 -0700 Subject: [PATCH] uniq: non-UTF-8 file names and fewer copies uniq ought to be able to work with files whose names are not UTF-8. I've also taken the chance to remove some unnecessary String allocations. Additionally, both open_input_file and open_output_file were returning items with two implie levels of `Box` that were not necessary. In the case of stdin as input, it was not necessary to wrap stdin with a BufReader, StdinLock implements BufRead itself. --- src/uu/uniq/src/uniq.rs | 83 ++++++++++++++++++----------------------- 1 file changed, 37 insertions(+), 46 deletions(-) diff --git a/src/uu/uniq/src/uniq.rs b/src/uu/uniq/src/uniq.rs index 16b4ab2c4..89141f35f 100644 --- a/src/uu/uniq/src/uniq.rs +++ b/src/uu/uniq/src/uniq.rs @@ -5,10 +5,10 @@ // * For the full copyright and license information, please view the LICENSE // * file that was distributed with this source code. -use clap::{crate_version, Arg, ArgAction, ArgMatches, Command}; +use clap::{builder::ValueParser, crate_version, Arg, ArgAction, ArgMatches, Command}; +use std::ffi::{OsStr, OsString}; use std::fs::File; -use std::io::{self, stdin, stdout, BufRead, BufReader, BufWriter, Read, Write}; -use std::path::Path; +use std::io::{self, stdin, stdout, BufRead, BufReader, BufWriter, Write}; use std::str::FromStr; use uucore::display::Quotable; use uucore::error::{FromIo, UResult, USimpleError, UUsageError}; @@ -64,11 +64,7 @@ macro_rules! write_line_terminator { } impl Uniq { - pub fn print_uniq( - &self, - reader: &mut BufReader, - writer: &mut BufWriter, - ) -> UResult<()> { + pub fn print_uniq(&self, reader: impl BufRead, mut writer: impl Write) -> UResult<()> { let mut first_line_printed = false; let mut group_count = 1; let line_terminator = self.get_line_terminator(); @@ -78,6 +74,8 @@ impl Uniq { None => return Ok(()), }; + let writer = &mut writer; + // compare current `line` with consecutive lines (`next_line`) of the input // and if needed, print `line` based on the command line options provided for next_line in lines { @@ -122,7 +120,6 @@ impl Uniq { } match char_indices.find(|(_, c)| c.is_whitespace()) { None => return "", - Some((next_field_i, _)) => i = next_field_i, } } @@ -195,9 +192,9 @@ impl Uniq { || self.delimiters == Delimiters::Both) } - fn print_line( + fn print_line( &self, - writer: &mut BufWriter, + writer: &mut impl Write, line: &str, count: usize, first_line_printed: bool, @@ -209,7 +206,7 @@ impl Uniq { } if self.show_counts { - writer.write_all(format!("{count:7} {line}").as_bytes()) + write!(writer, "{count:7} {line}") } else { writer.write_all(line.as_bytes()) } @@ -245,19 +242,12 @@ fn opt_parsed(opt_name: &str, matches: &ArgMatches) -> UResult UResult<()> { let matches = uu_app().after_help(AFTER_HELP).try_get_matches_from(args)?; - let files: Vec = matches - .get_many::(ARG_FILES) - .map(|v| v.map(ToString::to_string).collect()) - .unwrap_or_default(); + let files = matches.get_many::(ARG_FILES); - let (in_file_name, out_file_name) = match files.len() { - 0 => ("-".to_owned(), "-".to_owned()), - 1 => (files[0].clone(), "-".to_owned()), - 2 => (files[0].clone(), files[1].clone()), - _ => { - unreachable!() // Cannot happen as clap will fail earlier - } - }; + let (in_file_name, out_file_name) = files + .map(|fi| fi.map(AsRef::as_ref)) + .map(|mut fi| (fi.next(), fi.next())) + .unwrap_or_default(); let uniq = Uniq { repeats_only: matches.get_flag(options::REPEATED) @@ -282,8 +272,8 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { } uniq.print_uniq( - &mut open_input_file(&in_file_name)?, - &mut open_output_file(&out_file_name)?, + open_input_file(in_file_name)?, + open_output_file(out_file_name)?, ) } @@ -387,6 +377,7 @@ pub fn uu_app() -> Command { .arg( Arg::new(ARG_FILES) .action(ArgAction::Append) + .value_parser(ValueParser::os_string()) .num_args(0..=2) .value_hint(clap::ValueHint::FilePath), ) @@ -412,26 +403,26 @@ fn get_delimiter(matches: &ArgMatches) -> Delimiters { } } -fn open_input_file(in_file_name: &str) -> UResult>> { - let in_file = if in_file_name == "-" { - Box::new(stdin()) as Box - } else { - let path = Path::new(in_file_name); - let in_file = File::open(path) - .map_err_context(|| format!("Could not open {}", in_file_name.maybe_quote()))?; - Box::new(in_file) as Box - }; - Ok(BufReader::new(in_file)) +// None or "-" means stdin. +fn open_input_file(in_file_name: Option<&OsStr>) -> UResult> { + Ok(match in_file_name { + Some(path) if path != "-" => { + let in_file = File::open(path) + .map_err_context(|| format!("Could not open {}", path.maybe_quote()))?; + Box::new(BufReader::new(in_file)) + } + _ => Box::new(stdin().lock()), + }) } -fn open_output_file(out_file_name: &str) -> UResult>> { - let out_file = if out_file_name == "-" { - Box::new(stdout()) as Box - } else { - let path = Path::new(out_file_name); - let out_file = File::create(path) - .map_err_context(|| format!("Could not create {}", out_file_name.maybe_quote()))?; - Box::new(out_file) as Box - }; - Ok(BufWriter::new(out_file)) +// None or "-" means stdout. +fn open_output_file(out_file_name: Option<&OsStr>) -> UResult> { + Ok(match out_file_name { + Some(path) if path != "-" => { + let out_file = File::create(path) + .map_err_context(|| format!("Could not open {}", path.maybe_quote()))?; + Box::new(BufWriter::new(out_file)) + } + _ => Box::new(stdout().lock()), + }) }