From c6ad2441424d546c6ade98d9f5b9f27926e4c30c Mon Sep 17 00:00:00 2001 From: Michael Kefeder Date: Thu, 28 Apr 2022 16:46:10 +0200 Subject: [PATCH 1/6] tests/ptx: verify output width is handled correctly --- tests/by-util/test_ptx.rs | 16 +++++++++++++ .../gnu_ext_disabled_output_width_50.expected | 24 +++++++++++++++++++ .../gnu_ext_disabled_output_width_70.expected | 24 +++++++++++++++++++ 3 files changed, 64 insertions(+) create mode 100644 tests/fixtures/ptx/gnu_ext_disabled_output_width_50.expected create mode 100644 tests/fixtures/ptx/gnu_ext_disabled_output_width_70.expected diff --git a/tests/by-util/test_ptx.rs b/tests/by-util/test_ptx.rs index c17d473f5..e990cac73 100644 --- a/tests/by-util/test_ptx.rs +++ b/tests/by-util/test_ptx.rs @@ -71,3 +71,19 @@ fn gnu_ext_disabled_ignore_and_only_file() { .succeeds() .stdout_only_fixture("gnu_ext_disabled_ignore_and_only_file.expected"); } + +#[test] +fn gnu_ext_disabled_output_width_50() { + new_ucmd!() + .args(&["-G", "-w", "50", "input"]) + .succeeds() + .stdout_only_fixture("gnu_ext_disabled_output_width_50.expected"); +} + +#[test] +fn gnu_ext_disabled_output_width_70() { + new_ucmd!() + .args(&["-G", "-w", "70", "input"]) + .succeeds() + .stdout_only_fixture("gnu_ext_disabled_output_width_70.expected"); +} diff --git a/tests/fixtures/ptx/gnu_ext_disabled_output_width_50.expected b/tests/fixtures/ptx/gnu_ext_disabled_output_width_50.expected new file mode 100644 index 000000000..c71b0508c --- /dev/null +++ b/tests/fixtures/ptx/gnu_ext_disabled_output_width_50.expected @@ -0,0 +1,24 @@ +.xx "" "" """quotes"", for roff" "" +.xx "" "and some other like" "%a, b#, c$c" "" +.xx "" "maybe" "also~or^" "" +.xx "%a, b#, c$c" "" "and some other like" "" +.xx "" "oh," "and back\slash" "" +.xx "" "some other like %a," "b#, c$c" "and" +.xx "" "oh, and" "back\slash" "" +.xx "" "other like %a, b#," "c$c" "and some" +.xx "" "let's check special" "characters:" "" +.xx "characters:" "let's" "check special" "" +.xx "" """quotes""," "for roff" "" +.xx "" "{brackets}" "for tex" "" +.xx "" "" "hello world!" "" +.xx "characters:" "" "let's check special" "" +.xx "" "and some other" "like %a, b#, c$c" "" +.xx "" "" "maybe also~or^" "" +.xx "" "" "oh, and back\slash" "" +.xx "" "and some" "other like %a, b#, c$c" "" +.xx "" """quotes"", for" "roff" "" +.xx "b#, c$c" "and" "some other like %a," "" +.xx "" "let's check" "special characters:" "" +.xx "" "{brackets} for" "tex" "" +.xx "" "hello" "world!" "" +.xx "" "" "{brackets} for tex" "" diff --git a/tests/fixtures/ptx/gnu_ext_disabled_output_width_70.expected b/tests/fixtures/ptx/gnu_ext_disabled_output_width_70.expected new file mode 100644 index 000000000..3886e087d --- /dev/null +++ b/tests/fixtures/ptx/gnu_ext_disabled_output_width_70.expected @@ -0,0 +1,24 @@ +.xx "" "" """quotes"", for roff" "" +.xx "" "and some other like" "%a, b#, c$c" "" +.xx "" "maybe" "also~or^" "" +.xx "" "" "and some other like %a, b#, c$c" "" +.xx "" "oh," "and back\slash" "" +.xx "" "and some other like %a," "b#, c$c" "" +.xx "" "oh, and" "back\slash" "" +.xx "" "and some other like %a, b#," "c$c" "" +.xx "" "let's check special" "characters:" "" +.xx "" "let's" "check special characters:" "" +.xx "" """quotes""," "for roff" "" +.xx "" "{brackets}" "for tex" "" +.xx "" "" "hello world!" "" +.xx "" "" "let's check special characters:" "" +.xx "" "and some other" "like %a, b#, c$c" "" +.xx "" "" "maybe also~or^" "" +.xx "" "" "oh, and back\slash" "" +.xx "" "and some" "other like %a, b#, c$c" "" +.xx "" """quotes"", for" "roff" "" +.xx "" "and" "some other like %a, b#, c$c" "" +.xx "" "let's check" "special characters:" "" +.xx "" "{brackets} for" "tex" "" +.xx "" "hello" "world!" "" +.xx "" "" "{brackets} for tex" "" From b0567670d104c4b0595ec45f916c6235f5f6c087 Mon Sep 17 00:00:00 2001 From: Michael Kefeder Date: Thu, 28 Apr 2022 16:51:06 +0200 Subject: [PATCH 2/6] ptx: implement breakfile option --- src/uu/ptx/src/ptx.rs | 43 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 39 insertions(+), 4 deletions(-) diff --git a/src/uu/ptx/src/ptx.rs b/src/uu/ptx/src/ptx.rs index 86a123530..64fe421ad 100644 --- a/src/uu/ptx/src/ptx.rs +++ b/src/uu/ptx/src/ptx.rs @@ -88,6 +88,17 @@ fn read_word_filter_file( Ok(words) } +fn read_char_filter_file( + matches: &clap::ArgMatches, + option: &str, +) -> std::io::Result> { + let filename = matches.value_of(option).expect("parsing options failed!"); + let mut reader = File::open(filename)?; + let mut buffer = String::new(); + reader.read_to_string(&mut buffer)?; + Ok(buffer.chars().collect()) +} + #[derive(Debug)] struct WordFilter { only_specified: bool, @@ -113,9 +124,23 @@ impl WordFilter { } else { (false, HashSet::new()) }; - if matches.is_present(options::BREAK_FILE) { - return Err(PtxError::NotImplemented("-b").into()); - } + let break_set: Option> = if matches.is_present(options::BREAK_FILE) + && !matches.is_present(options::WORD_REGEXP) + { + let chars = + read_char_filter_file(matches, options::BREAK_FILE).map_err_context(String::new)?; + let mut hs: HashSet = if config.gnu_ext { + HashSet::new() // really only chars found in file + } else { + // GNU off means at least these are considered + [' ', '\t', '\n'].iter().cloned().collect() + }; + hs.extend(chars); + Some(hs) + } else { + // if -W takes precedence or default + None + }; // Ignore empty string regex from cmd-line-args let arg_reg: Option = if matches.is_present(options::WORD_REGEXP) { match matches.value_of(options::WORD_REGEXP) { @@ -134,7 +159,17 @@ impl WordFilter { let reg = match arg_reg { Some(arg_reg) => arg_reg, None => { - if config.gnu_ext { + if break_set.is_some() { + format!( + "[^{}]+", + break_set + .unwrap() + .into_iter() + .map(|c| c.to_string()) + .collect::>() + .join("") + ) + } else if config.gnu_ext { "\\w+".to_owned() } else { "[^ \t\n]+".to_owned() From 994dedd6d97ff0cedc9b190e355f1e612c497638 Mon Sep 17 00:00:00 2001 From: Michael Kefeder Date: Fri, 29 Apr 2022 10:15:06 +0200 Subject: [PATCH 3/6] tests/ptx: added breakfile option tests --- tests/by-util/test_ptx.rs | 16 ++++++++ tests/fixtures/ptx/break_file | 1 + .../ptx/gnu_ext_disabled_break_file.expected | 41 +++++++++++++++++++ 3 files changed, 58 insertions(+) create mode 100644 tests/fixtures/ptx/break_file create mode 100644 tests/fixtures/ptx/gnu_ext_disabled_break_file.expected diff --git a/tests/by-util/test_ptx.rs b/tests/by-util/test_ptx.rs index e990cac73..75c96e42e 100644 --- a/tests/by-util/test_ptx.rs +++ b/tests/by-util/test_ptx.rs @@ -87,3 +87,19 @@ fn gnu_ext_disabled_output_width_70() { .succeeds() .stdout_only_fixture("gnu_ext_disabled_output_width_70.expected"); } + +#[test] +fn gnu_ext_disabled_break_file() { + new_ucmd!() + .args(&["-G", "-b", "break_file", "input"]) + .succeeds() + .stdout_only_fixture("gnu_ext_disabled_break_file.expected"); +} + +#[test] +fn gnu_ext_disabled_empty_word_regexp_ignores_break_file() { + new_ucmd!() + .args(&["-G", "-b", "break_file", "-R", "-W", "", "input"]) + .succeeds() + .stdout_only_fixture("gnu_ext_disabled_rightward_no_ref.expected"); +} diff --git a/tests/fixtures/ptx/break_file b/tests/fixtures/ptx/break_file new file mode 100644 index 000000000..4c992d40a --- /dev/null +++ b/tests/fixtures/ptx/break_file @@ -0,0 +1 @@ +abc_e diff --git a/tests/fixtures/ptx/gnu_ext_disabled_break_file.expected b/tests/fixtures/ptx/gnu_ext_disabled_break_file.expected new file mode 100644 index 000000000..7afce1861 --- /dev/null +++ b/tests/fixtures/ptx/gnu_ext_disabled_break_file.expected @@ -0,0 +1,41 @@ +.xx "" "" """quotes"", for roff" "" +.xx "" "and some other like %a, b" "#, c$c" "" +.xx "" "and some other like %a, b#, c" "$c" "" +.xx "" "and some other like" "%a, b#, c$c" "" +.xx "" "and some other like %a" ", b#, c$c" "" +.xx "" """quotes""," "for roff" "" +.xx "" "{brackets}" "for tex" "" +.xx "" "" "hello world!" "" +.xx "" "let's c" "heck special characters:" "" +.xx "" "let's check special c" "haracters:" "" +.xx "" "let's check spec" "ial characters:" "" +.xx "" "let's chec" "k special characters:" "" +.xx "" "{brac" "kets} for tex" "" +.xx "" "oh, and bac" "k\slash" "" +.xx "" "" "let's check special characters:" "" +.xx "" "let's check specia" "l characters:" "" +.xx "" "and some other" "like %a, b#, c$c" "" +.xx "" "he" "llo world!" "" +.xx "" "maybe a" "lso~or^" "" +.xx "" "" "maybe also~or^" "" +.xx "" "a" "nd some other like %a, b#, c$c" "" +.xx "" "oh, a" "nd back\slash" "" +.xx "" "" "oh, and back\slash" "" +.xx "" "and some" "other like %a, b#, c$c" "" +.xx "" "let's check special cha" "racters:" "" +.xx "" "{b" "rackets} for tex" "" +.xx "" "and some othe" "r like %a, b#, c$c" "" +.xx "" """quotes"", for" "roff" "" +.xx "" "let's check special characte" "rs:" "" +.xx "" """quote" "s"", for roff" "" +.xx "" "oh, and back\sla" "sh" "" +.xx "" "and" "some other like %a, b#, c$c" "" +.xx "" "let's check" "special characters:" "" +.xx "" "let's check special charac" "ters:" "" +.xx "" "{brackets} for" "tex" "" +.xx "" "le" "t's check special characters:" "" +.xx "" "{bracke" "ts} for tex" "" +.xx "" "hello" "world!" "" +.xx "" "{brackets} for te" "x" "" +.xx "" "ma" "ybe also~or^" "" +.xx "" "" "{brackets} for tex" "" From 4889128edefa9293a15e7c8febce35ede94ef1b0 Mon Sep 17 00:00:00 2001 From: Michael Kefeder Date: Fri, 29 Apr 2022 10:57:36 +0200 Subject: [PATCH 4/6] ptx: add documentation to read_char_filter_file function --- src/uu/ptx/src/ptx.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/uu/ptx/src/ptx.rs b/src/uu/ptx/src/ptx.rs index 64fe421ad..c273b976c 100644 --- a/src/uu/ptx/src/ptx.rs +++ b/src/uu/ptx/src/ptx.rs @@ -88,6 +88,7 @@ fn read_word_filter_file( Ok(words) } +/// reads contents of file as unique set of characters to be used with the break-file option fn read_char_filter_file( matches: &clap::ArgMatches, option: &str, From 83a64f4afed34f4d0c885ea00d4971cdd9ffdaeb Mon Sep 17 00:00:00 2001 From: Michael Kefeder Date: Sat, 30 Apr 2022 10:01:11 +0200 Subject: [PATCH 5/6] ptx: escape regular expression character class special chars --- src/uu/ptx/src/ptx.rs | 8 +++++++- tests/fixtures/ptx/break_file | 2 +- tests/fixtures/ptx/gnu_ext_disabled_break_file.expected | 1 + 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/uu/ptx/src/ptx.rs b/src/uu/ptx/src/ptx.rs index c273b976c..c3bedb266 100644 --- a/src/uu/ptx/src/ptx.rs +++ b/src/uu/ptx/src/ptx.rs @@ -31,6 +31,8 @@ const ABOUT: &str = "\ Mandatory arguments to long options are mandatory for short options too.\n\ With no FILE, or when FILE is -, read standard input. Default is '-F /'."; +const REGEX_CHARCLASS: &str = "^-]\\"; + #[derive(Debug)] enum OutFormat { Dumb, @@ -166,7 +168,11 @@ impl WordFilter { break_set .unwrap() .into_iter() - .map(|c| c.to_string()) + .map(|c| if REGEX_CHARCLASS.contains(c) { + format!("\\{}", c) + } else { + c.to_string() + }) .collect::>() .join("") ) diff --git a/tests/fixtures/ptx/break_file b/tests/fixtures/ptx/break_file index 4c992d40a..499598c20 100644 --- a/tests/fixtures/ptx/break_file +++ b/tests/fixtures/ptx/break_file @@ -1 +1 @@ -abc_e +abc_e^-]\ diff --git a/tests/fixtures/ptx/gnu_ext_disabled_break_file.expected b/tests/fixtures/ptx/gnu_ext_disabled_break_file.expected index 7afce1861..7ea13471d 100644 --- a/tests/fixtures/ptx/gnu_ext_disabled_break_file.expected +++ b/tests/fixtures/ptx/gnu_ext_disabled_break_file.expected @@ -29,6 +29,7 @@ .xx "" "let's check special characte" "rs:" "" .xx "" """quote" "s"", for roff" "" .xx "" "oh, and back\sla" "sh" "" +.xx "" "oh, and back\" "slash" "" .xx "" "and" "some other like %a, b#, c$c" "" .xx "" "let's check" "special characters:" "" .xx "" "let's check special charac" "ters:" "" From 3078ca8346fd15b89aab74916c048da877c33dfd Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Sun, 1 May 2022 20:18:33 +0200 Subject: [PATCH 6/6] Add CHARCLASS to the spell ignore --- src/uu/ptx/src/ptx.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/uu/ptx/src/ptx.rs b/src/uu/ptx/src/ptx.rs index c3bedb266..2f253b580 100644 --- a/src/uu/ptx/src/ptx.rs +++ b/src/uu/ptx/src/ptx.rs @@ -5,7 +5,7 @@ // * For the full copyright and license information, please view the LICENSE // * file that was distributed with this source code. -// spell-checker:ignore (ToDOs) corasick memchr Roff trunc oset iset +// spell-checker:ignore (ToDOs) corasick memchr Roff trunc oset iset CHARCLASS use clap::{crate_version, Arg, Command}; use regex::Regex;