diff --git a/src/uu/ptx/src/ptx.rs b/src/uu/ptx/src/ptx.rs index 5d7945448..b833282d8 100644 --- a/src/uu/ptx/src/ptx.rs +++ b/src/uu/ptx/src/ptx.rs @@ -15,14 +15,12 @@ use std::fs::File; use std::io::{stdin, stdout, BufRead, BufReader, BufWriter, Read, Write}; use std::num::ParseIntError; use uucore::display::Quotable; -use uucore::error::{FromIo, UError, UResult}; +use uucore::error::{FromIo, UError, UResult, UUsageError}; use uucore::{format_usage, help_about, help_usage}; const USAGE: &str = help_usage!("ptx.md"); const ABOUT: &str = help_about!("ptx.md"); -const REGEX_CHARCLASS: &str = "^-]\\"; - #[derive(Debug)] enum OutFormat { Dumb, @@ -71,8 +69,12 @@ fn read_word_filter_file( .get_one::(option) .expect("parsing options failed!") .to_string(); - let file = File::open(filename)?; - let reader = BufReader::new(file); + let reader: BufReader> = BufReader::new(if filename == "-" { + Box::new(stdin()) + } else { + let file = File::open(filename)?; + Box::new(file) + }); let mut words: HashSet = HashSet::new(); for word in reader.lines() { words.insert(word?); @@ -88,7 +90,12 @@ fn read_char_filter_file( let filename = matches .get_one::(option) .expect("parsing options failed!"); - let mut reader = File::open(filename)?; + let mut reader: Box = if filename == "-" { + Box::new(stdin()) + } else { + let file = File::open(filename)?; + Box::new(file) + }; let mut buffer = String::new(); reader.read_to_string(&mut buffer)?; Ok(buffer.chars().collect()) @@ -155,18 +162,10 @@ impl WordFilter { let reg = match arg_reg { Some(arg_reg) => arg_reg, None => { - if break_set.is_some() { + if let Some(break_set) = break_set { format!( "[^{}]+", - break_set - .unwrap() - .into_iter() - .map(|c| if REGEX_CHARCLASS.contains(c) { - format!("\\{c}") - } else { - c.to_string() - }) - .collect::() + regex::escape(&break_set.into_iter().collect::()) ) } else if config.gnu_ext { "\\w+".to_owned() @@ -260,10 +259,17 @@ fn get_config(matches: &clap::ArgMatches) -> UResult { .parse() .map_err(PtxError::ParseError)?; } - if matches.get_flag(options::FORMAT_ROFF) { + if let Some(format) = matches.get_one::(options::FORMAT) { + config.format = match format.as_str() { + "roff" => OutFormat::Roff, + "tex" => OutFormat::Tex, + _ => unreachable!("should be caught by clap"), + }; + } + if matches.get_flag(options::format::ROFF) { config.format = OutFormat::Roff; } - if matches.get_flag(options::FORMAT_TEX) { + if matches.get_flag(options::format::TEX) { config.format = OutFormat::Tex; } Ok(config) @@ -277,20 +283,10 @@ struct FileContent { type FileMap = HashMap; -fn read_input(input_files: &[String], config: &Config) -> std::io::Result { +fn read_input(input_files: &[String]) -> std::io::Result { let mut file_map: FileMap = HashMap::new(); - let mut files = Vec::new(); - if input_files.is_empty() { - files.push("-"); - } else if config.gnu_ext { - for file in input_files { - files.push(file); - } - } else { - files.push(&input_files[0]); - } let mut offset: usize = 0; - for filename in files { + for filename in input_files { let reader: BufReader> = BufReader::new(if filename == "-" { Box::new(stdin()) } else { @@ -344,7 +340,7 @@ fn create_word_set(config: &Config, filter: &WordFilter, file_map: &FileMap) -> continue; } if config.ignore_case { - word = word.to_lowercase(); + word = word.to_uppercase(); } word_set.insert(WordRef { word, @@ -693,15 +689,19 @@ fn write_traditional_output( } mod options { + pub mod format { + pub static ROFF: &str = "roff"; + pub static TEX: &str = "tex"; + } + pub static FILE: &str = "file"; pub static AUTO_REFERENCE: &str = "auto-reference"; pub static TRADITIONAL: &str = "traditional"; pub static FLAG_TRUNCATION: &str = "flag-truncation"; pub static MACRO_NAME: &str = "macro-name"; - pub static FORMAT_ROFF: &str = "format=roff"; + pub static FORMAT: &str = "format"; pub static RIGHT_SIDE_REFS: &str = "right-side-refs"; pub static SENTENCE_REGEXP: &str = "sentence-regexp"; - pub static FORMAT_TEX: &str = "format=tex"; pub static WORD_REGEXP: &str = "word-regexp"; pub static BREAK_FILE: &str = "break-file"; pub static IGNORE_CASE: &str = "ignore-case"; @@ -715,21 +715,40 @@ mod options { #[uucore::main] pub fn uumain(args: impl uucore::Args) -> UResult<()> { let matches = uu_app().try_get_matches_from(args)?; - - let mut input_files: Vec = match &matches.get_many::(options::FILE) { - Some(v) => v.clone().cloned().collect(), - None => vec!["-".to_string()], - }; - let config = get_config(&matches)?; - let word_filter = WordFilter::new(&matches, &config)?; - let file_map = read_input(&input_files, &config).map_err_context(String::new)?; - let word_set = create_word_set(&config, &word_filter, &file_map); - let output_file = if !config.gnu_ext && input_files.len() == 2 { - input_files.pop().unwrap() + + let input_files; + let output_file; + + let mut files = matches + .get_many::(options::FILE) + .into_iter() + .flatten() + .cloned(); + + if !config.gnu_ext { + input_files = vec![files.next().unwrap_or("-".to_string())]; + output_file = files.next().unwrap_or("-".to_string()); + if let Some(file) = files.next() { + return Err(UUsageError::new( + 1, + format!("extra operand {}", file.quote()), + )); + } } else { - "-".to_string() - }; + input_files = { + let mut files = files.collect::>(); + if files.is_empty() { + files.push("-".to_string()); + } + files + }; + output_file = "-".to_string(); + } + + let word_filter = WordFilter::new(&matches, &config)?; + let file_map = read_input(&input_files).map_err_context(String::new)?; + let word_set = create_word_set(&config, &word_filter, &file_map); write_traditional_output(&config, &file_map, &word_set, &output_file) } @@ -774,10 +793,24 @@ pub fn uu_app() -> Command { .value_name("STRING"), ) .arg( - Arg::new(options::FORMAT_ROFF) + Arg::new(options::FORMAT) + .long(options::FORMAT) + .hide(true) + .value_parser(["roff", "tex"]) + .overrides_with_all([options::FORMAT, options::format::ROFF, options::format::TEX]), + ) + .arg( + Arg::new(options::format::ROFF) .short('O') - .long(options::FORMAT_ROFF) .help("generate output as roff directives") + .overrides_with_all([options::FORMAT, options::format::ROFF, options::format::TEX]) + .action(ArgAction::SetTrue), + ) + .arg( + Arg::new(options::format::TEX) + .short('T') + .help("generate output as TeX directives") + .overrides_with_all([options::FORMAT, options::format::ROFF, options::format::TEX]) .action(ArgAction::SetTrue), ) .arg( @@ -794,13 +827,6 @@ pub fn uu_app() -> Command { .help("for end of lines or end of sentences") .value_name("REGEXP"), ) - .arg( - Arg::new(options::FORMAT_TEX) - .short('T') - .long(options::FORMAT_TEX) - .help("generate output as TeX directives") - .action(ArgAction::SetTrue), - ) .arg( Arg::new(options::WORD_REGEXP) .short('W') diff --git a/tests/by-util/test_ptx.rs b/tests/by-util/test_ptx.rs index 4ae4fcba6..20d4a3280 100644 --- a/tests/by-util/test_ptx.rs +++ b/tests/by-util/test_ptx.rs @@ -2,6 +2,7 @@ // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. +// spell-checker:ignore roff use crate::common::util::TestScenario; #[test] @@ -112,3 +113,50 @@ fn gnu_ext_disabled_empty_word_regexp_ignores_break_file() { .succeeds() .stdout_only_fixture("gnu_ext_disabled_rightward_no_ref.expected"); } + +#[test] +fn test_reject_too_many_operands() { + new_ucmd!().args(&["-G", "-", "-", "-"]).fails_with_code(1); +} + +#[test] +fn test_break_file_regex_escaping() { + new_ucmd!() + .pipe_in("\\.+*?()|[]{}^$#&-~") + .args(&["-G", "-b", "-", "input"]) + .succeeds() + .stdout_only_fixture("break_file_regex_escaping.expected"); +} + +#[test] +fn test_ignore_case() { + new_ucmd!() + .args(&["-G", "-f"]) + .pipe_in("a _") + .succeeds() + .stdout_only(".xx \"\" \"\" \"a _\" \"\"\n.xx \"\" \"a\" \"_\" \"\"\n"); +} + +#[test] +fn test_format() { + new_ucmd!() + .args(&["-G", "-O"]) + .pipe_in("a") + .succeeds() + .stdout_only(".xx \"\" \"\" \"a\" \"\"\n"); + new_ucmd!() + .args(&["-G", "-T"]) + .pipe_in("a") + .succeeds() + .stdout_only("\\xx {}{}{a}{}{}\n"); + new_ucmd!() + .args(&["-G", "--format=roff"]) + .pipe_in("a") + .succeeds() + .stdout_only(".xx \"\" \"\" \"a\" \"\"\n"); + new_ucmd!() + .args(&["-G", "--format=tex"]) + .pipe_in("a") + .succeeds() + .stdout_only("\\xx {}{}{a}{}{}\n"); +} diff --git a/tests/fixtures/ptx/break_file_regex_escaping.expected b/tests/fixtures/ptx/break_file_regex_escaping.expected new file mode 100644 index 000000000..48e3b1519 --- /dev/null +++ b/tests/fixtures/ptx/break_file_regex_escaping.expected @@ -0,0 +1,28 @@ +.xx "" "" """quotes"", for roff" "" +.xx "" "and some other like" "%a, b#, c$c" "" +.xx "" "and some other like %a, b#" ", c$c" "" +.xx "" "maybe" "also~or^" "" +.xx "" "" "and some other like %a, b#, c$c" "" +.xx "" "oh," "and back\slash" "" +.xx "" "and some other like %a," "b#, c$c" "" +.xx "" "oh, and" "back\slash" "" +.xx "" "{" "brackets} for tex" "" +.xx "" "and some other like %a, b#," "c$c" "" +.xx "" "and some other like %a, b#, c$" "c" "" +.xx "" "let's check special" "characters:" "" +.xx "" "let's" "check special characters:" "" +.xx "" """quotes""," "for roff" "" +.xx "" "{brackets}" "for tex" "" +.xx "" "" "hello world!" "" +.xx "" "" "let's check special characters:" "" +.xx "" "and some other" "like %a, b#, c$c" "" +.xx "" "" "maybe also~or^" "" +.xx "" "" "oh, and back\slash" "" +.xx "" "maybe also~" "or^" "" +.xx "" "and some" "other like %a, b#, c$c" "" +.xx "" """quotes"", for" "roff" "" +.xx "" "oh, and back\" "slash" "" +.xx "" "and" "some other like %a, b#, c$c" "" +.xx "" "let's check" "special characters:" "" +.xx "" "{brackets} for" "tex" "" +.xx "" "hello" "world!" ""