diff --git a/src/uu/ptx/src/ptx.rs b/src/uu/ptx/src/ptx.rs index 86a123530..2f253b580 100644 --- a/src/uu/ptx/src/ptx.rs +++ b/src/uu/ptx/src/ptx.rs @@ -5,7 +5,7 @@ // * For the full copyright and license information, please view the LICENSE // * file that was distributed with this source code. -// spell-checker:ignore (ToDOs) corasick memchr Roff trunc oset iset +// spell-checker:ignore (ToDOs) corasick memchr Roff trunc oset iset CHARCLASS use clap::{crate_version, Arg, Command}; use regex::Regex; @@ -31,6 +31,8 @@ const ABOUT: &str = "\ Mandatory arguments to long options are mandatory for short options too.\n\ With no FILE, or when FILE is -, read standard input. Default is '-F /'."; +const REGEX_CHARCLASS: &str = "^-]\\"; + #[derive(Debug)] enum OutFormat { Dumb, @@ -88,6 +90,18 @@ fn read_word_filter_file( Ok(words) } +/// reads contents of file as unique set of characters to be used with the break-file option +fn read_char_filter_file( + matches: &clap::ArgMatches, + option: &str, +) -> std::io::Result> { + let filename = matches.value_of(option).expect("parsing options failed!"); + let mut reader = File::open(filename)?; + let mut buffer = String::new(); + reader.read_to_string(&mut buffer)?; + Ok(buffer.chars().collect()) +} + #[derive(Debug)] struct WordFilter { only_specified: bool, @@ -113,9 +127,23 @@ impl WordFilter { } else { (false, HashSet::new()) }; - if matches.is_present(options::BREAK_FILE) { - return Err(PtxError::NotImplemented("-b").into()); - } + let break_set: Option> = if matches.is_present(options::BREAK_FILE) + && !matches.is_present(options::WORD_REGEXP) + { + let chars = + read_char_filter_file(matches, options::BREAK_FILE).map_err_context(String::new)?; + let mut hs: HashSet = if config.gnu_ext { + HashSet::new() // really only chars found in file + } else { + // GNU off means at least these are considered + [' ', '\t', '\n'].iter().cloned().collect() + }; + hs.extend(chars); + Some(hs) + } else { + // if -W takes precedence or default + None + }; // Ignore empty string regex from cmd-line-args let arg_reg: Option = if matches.is_present(options::WORD_REGEXP) { match matches.value_of(options::WORD_REGEXP) { @@ -134,7 +162,21 @@ impl WordFilter { let reg = match arg_reg { Some(arg_reg) => arg_reg, None => { - if config.gnu_ext { + if break_set.is_some() { + format!( + "[^{}]+", + break_set + .unwrap() + .into_iter() + .map(|c| if REGEX_CHARCLASS.contains(c) { + format!("\\{}", c) + } else { + c.to_string() + }) + .collect::>() + .join("") + ) + } else if config.gnu_ext { "\\w+".to_owned() } else { "[^ \t\n]+".to_owned() diff --git a/tests/by-util/test_ptx.rs b/tests/by-util/test_ptx.rs index c17d473f5..75c96e42e 100644 --- a/tests/by-util/test_ptx.rs +++ b/tests/by-util/test_ptx.rs @@ -71,3 +71,35 @@ fn gnu_ext_disabled_ignore_and_only_file() { .succeeds() .stdout_only_fixture("gnu_ext_disabled_ignore_and_only_file.expected"); } + +#[test] +fn gnu_ext_disabled_output_width_50() { + new_ucmd!() + .args(&["-G", "-w", "50", "input"]) + .succeeds() + .stdout_only_fixture("gnu_ext_disabled_output_width_50.expected"); +} + +#[test] +fn gnu_ext_disabled_output_width_70() { + new_ucmd!() + .args(&["-G", "-w", "70", "input"]) + .succeeds() + .stdout_only_fixture("gnu_ext_disabled_output_width_70.expected"); +} + +#[test] +fn gnu_ext_disabled_break_file() { + new_ucmd!() + .args(&["-G", "-b", "break_file", "input"]) + .succeeds() + .stdout_only_fixture("gnu_ext_disabled_break_file.expected"); +} + +#[test] +fn gnu_ext_disabled_empty_word_regexp_ignores_break_file() { + new_ucmd!() + .args(&["-G", "-b", "break_file", "-R", "-W", "", "input"]) + .succeeds() + .stdout_only_fixture("gnu_ext_disabled_rightward_no_ref.expected"); +} diff --git a/tests/fixtures/ptx/break_file b/tests/fixtures/ptx/break_file new file mode 100644 index 000000000..499598c20 --- /dev/null +++ b/tests/fixtures/ptx/break_file @@ -0,0 +1 @@ +abc_e^-]\ diff --git a/tests/fixtures/ptx/gnu_ext_disabled_break_file.expected b/tests/fixtures/ptx/gnu_ext_disabled_break_file.expected new file mode 100644 index 000000000..7ea13471d --- /dev/null +++ b/tests/fixtures/ptx/gnu_ext_disabled_break_file.expected @@ -0,0 +1,42 @@ +.xx "" "" """quotes"", for roff" "" +.xx "" "and some other like %a, b" "#, c$c" "" +.xx "" "and some other like %a, b#, c" "$c" "" +.xx "" "and some other like" "%a, b#, c$c" "" +.xx "" "and some other like %a" ", b#, c$c" "" +.xx "" """quotes""," "for roff" "" +.xx "" "{brackets}" "for tex" "" +.xx "" "" "hello world!" "" +.xx "" "let's c" "heck special characters:" "" +.xx "" "let's check special c" "haracters:" "" +.xx "" "let's check spec" "ial characters:" "" +.xx "" "let's chec" "k special characters:" "" +.xx "" "{brac" "kets} for tex" "" +.xx "" "oh, and bac" "k\slash" "" +.xx "" "" "let's check special characters:" "" +.xx "" "let's check specia" "l characters:" "" +.xx "" "and some other" "like %a, b#, c$c" "" +.xx "" "he" "llo world!" "" +.xx "" "maybe a" "lso~or^" "" +.xx "" "" "maybe also~or^" "" +.xx "" "a" "nd some other like %a, b#, c$c" "" +.xx "" "oh, a" "nd back\slash" "" +.xx "" "" "oh, and back\slash" "" +.xx "" "and some" "other like %a, b#, c$c" "" +.xx "" "let's check special cha" "racters:" "" +.xx "" "{b" "rackets} for tex" "" +.xx "" "and some othe" "r like %a, b#, c$c" "" +.xx "" """quotes"", for" "roff" "" +.xx "" "let's check special characte" "rs:" "" +.xx "" """quote" "s"", for roff" "" +.xx "" "oh, and back\sla" "sh" "" +.xx "" "oh, and back\" "slash" "" +.xx "" "and" "some other like %a, b#, c$c" "" +.xx "" "let's check" "special characters:" "" +.xx "" "let's check special charac" "ters:" "" +.xx "" "{brackets} for" "tex" "" +.xx "" "le" "t's check special characters:" "" +.xx "" "{bracke" "ts} for tex" "" +.xx "" "hello" "world!" "" +.xx "" "{brackets} for te" "x" "" +.xx "" "ma" "ybe also~or^" "" +.xx "" "" "{brackets} for tex" "" diff --git a/tests/fixtures/ptx/gnu_ext_disabled_output_width_50.expected b/tests/fixtures/ptx/gnu_ext_disabled_output_width_50.expected new file mode 100644 index 000000000..c71b0508c --- /dev/null +++ b/tests/fixtures/ptx/gnu_ext_disabled_output_width_50.expected @@ -0,0 +1,24 @@ +.xx "" "" """quotes"", for roff" "" +.xx "" "and some other like" "%a, b#, c$c" "" +.xx "" "maybe" "also~or^" "" +.xx "%a, b#, c$c" "" "and some other like" "" +.xx "" "oh," "and back\slash" "" +.xx "" "some other like %a," "b#, c$c" "and" +.xx "" "oh, and" "back\slash" "" +.xx "" "other like %a, b#," "c$c" "and some" +.xx "" "let's check special" "characters:" "" +.xx "characters:" "let's" "check special" "" +.xx "" """quotes""," "for roff" "" +.xx "" "{brackets}" "for tex" "" +.xx "" "" "hello world!" "" +.xx "characters:" "" "let's check special" "" +.xx "" "and some other" "like %a, b#, c$c" "" +.xx "" "" "maybe also~or^" "" +.xx "" "" "oh, and back\slash" "" +.xx "" "and some" "other like %a, b#, c$c" "" +.xx "" """quotes"", for" "roff" "" +.xx "b#, c$c" "and" "some other like %a," "" +.xx "" "let's check" "special characters:" "" +.xx "" "{brackets} for" "tex" "" +.xx "" "hello" "world!" "" +.xx "" "" "{brackets} for tex" "" diff --git a/tests/fixtures/ptx/gnu_ext_disabled_output_width_70.expected b/tests/fixtures/ptx/gnu_ext_disabled_output_width_70.expected new file mode 100644 index 000000000..3886e087d --- /dev/null +++ b/tests/fixtures/ptx/gnu_ext_disabled_output_width_70.expected @@ -0,0 +1,24 @@ +.xx "" "" """quotes"", for roff" "" +.xx "" "and some other like" "%a, b#, c$c" "" +.xx "" "maybe" "also~or^" "" +.xx "" "" "and some other like %a, b#, c$c" "" +.xx "" "oh," "and back\slash" "" +.xx "" "and some other like %a," "b#, c$c" "" +.xx "" "oh, and" "back\slash" "" +.xx "" "and some other like %a, b#," "c$c" "" +.xx "" "let's check special" "characters:" "" +.xx "" "let's" "check special characters:" "" +.xx "" """quotes""," "for roff" "" +.xx "" "{brackets}" "for tex" "" +.xx "" "" "hello world!" "" +.xx "" "" "let's check special characters:" "" +.xx "" "and some other" "like %a, b#, c$c" "" +.xx "" "" "maybe also~or^" "" +.xx "" "" "oh, and back\slash" "" +.xx "" "and some" "other like %a, b#, c$c" "" +.xx "" """quotes"", for" "roff" "" +.xx "" "and" "some other like %a, b#, c$c" "" +.xx "" "let's check" "special characters:" "" +.xx "" "{brackets} for" "tex" "" +.xx "" "hello" "world!" "" +.xx "" "" "{brackets} for tex" ""