mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-29 03:57:44 +00:00
Merge pull request #3455 from mike-kfed/ptx_breakfile
ptx: implement breakfile option
This commit is contained in:
commit
fe7829d2f9
6 changed files with 170 additions and 5 deletions
|
@ -5,7 +5,7 @@
|
|||
// * For the full copyright and license information, please view the LICENSE
|
||||
// * file that was distributed with this source code.
|
||||
|
||||
// spell-checker:ignore (ToDOs) corasick memchr Roff trunc oset iset
|
||||
// spell-checker:ignore (ToDOs) corasick memchr Roff trunc oset iset CHARCLASS
|
||||
|
||||
use clap::{crate_version, Arg, Command};
|
||||
use regex::Regex;
|
||||
|
@ -31,6 +31,8 @@ const ABOUT: &str = "\
|
|||
Mandatory arguments to long options are mandatory for short options too.\n\
|
||||
With no FILE, or when FILE is -, read standard input. Default is '-F /'.";
|
||||
|
||||
const REGEX_CHARCLASS: &str = "^-]\\";
|
||||
|
||||
#[derive(Debug)]
|
||||
enum OutFormat {
|
||||
Dumb,
|
||||
|
@ -88,6 +90,18 @@ fn read_word_filter_file(
|
|||
Ok(words)
|
||||
}
|
||||
|
||||
/// reads contents of file as unique set of characters to be used with the break-file option
|
||||
fn read_char_filter_file(
|
||||
matches: &clap::ArgMatches,
|
||||
option: &str,
|
||||
) -> std::io::Result<HashSet<char>> {
|
||||
let filename = matches.value_of(option).expect("parsing options failed!");
|
||||
let mut reader = File::open(filename)?;
|
||||
let mut buffer = String::new();
|
||||
reader.read_to_string(&mut buffer)?;
|
||||
Ok(buffer.chars().collect())
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct WordFilter {
|
||||
only_specified: bool,
|
||||
|
@ -113,9 +127,23 @@ impl WordFilter {
|
|||
} else {
|
||||
(false, HashSet::new())
|
||||
};
|
||||
if matches.is_present(options::BREAK_FILE) {
|
||||
return Err(PtxError::NotImplemented("-b").into());
|
||||
}
|
||||
let break_set: Option<HashSet<char>> = if matches.is_present(options::BREAK_FILE)
|
||||
&& !matches.is_present(options::WORD_REGEXP)
|
||||
{
|
||||
let chars =
|
||||
read_char_filter_file(matches, options::BREAK_FILE).map_err_context(String::new)?;
|
||||
let mut hs: HashSet<char> = if config.gnu_ext {
|
||||
HashSet::new() // really only chars found in file
|
||||
} else {
|
||||
// GNU off means at least these are considered
|
||||
[' ', '\t', '\n'].iter().cloned().collect()
|
||||
};
|
||||
hs.extend(chars);
|
||||
Some(hs)
|
||||
} else {
|
||||
// if -W takes precedence or default
|
||||
None
|
||||
};
|
||||
// Ignore empty string regex from cmd-line-args
|
||||
let arg_reg: Option<String> = if matches.is_present(options::WORD_REGEXP) {
|
||||
match matches.value_of(options::WORD_REGEXP) {
|
||||
|
@ -134,7 +162,21 @@ impl WordFilter {
|
|||
let reg = match arg_reg {
|
||||
Some(arg_reg) => arg_reg,
|
||||
None => {
|
||||
if config.gnu_ext {
|
||||
if break_set.is_some() {
|
||||
format!(
|
||||
"[^{}]+",
|
||||
break_set
|
||||
.unwrap()
|
||||
.into_iter()
|
||||
.map(|c| if REGEX_CHARCLASS.contains(c) {
|
||||
format!("\\{}", c)
|
||||
} else {
|
||||
c.to_string()
|
||||
})
|
||||
.collect::<Vec<String>>()
|
||||
.join("")
|
||||
)
|
||||
} else if config.gnu_ext {
|
||||
"\\w+".to_owned()
|
||||
} else {
|
||||
"[^ \t\n]+".to_owned()
|
||||
|
|
|
@ -71,3 +71,35 @@ fn gnu_ext_disabled_ignore_and_only_file() {
|
|||
.succeeds()
|
||||
.stdout_only_fixture("gnu_ext_disabled_ignore_and_only_file.expected");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn gnu_ext_disabled_output_width_50() {
|
||||
new_ucmd!()
|
||||
.args(&["-G", "-w", "50", "input"])
|
||||
.succeeds()
|
||||
.stdout_only_fixture("gnu_ext_disabled_output_width_50.expected");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn gnu_ext_disabled_output_width_70() {
|
||||
new_ucmd!()
|
||||
.args(&["-G", "-w", "70", "input"])
|
||||
.succeeds()
|
||||
.stdout_only_fixture("gnu_ext_disabled_output_width_70.expected");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn gnu_ext_disabled_break_file() {
|
||||
new_ucmd!()
|
||||
.args(&["-G", "-b", "break_file", "input"])
|
||||
.succeeds()
|
||||
.stdout_only_fixture("gnu_ext_disabled_break_file.expected");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn gnu_ext_disabled_empty_word_regexp_ignores_break_file() {
|
||||
new_ucmd!()
|
||||
.args(&["-G", "-b", "break_file", "-R", "-W", "", "input"])
|
||||
.succeeds()
|
||||
.stdout_only_fixture("gnu_ext_disabled_rightward_no_ref.expected");
|
||||
}
|
||||
|
|
1
tests/fixtures/ptx/break_file
vendored
Normal file
1
tests/fixtures/ptx/break_file
vendored
Normal file
|
@ -0,0 +1 @@
|
|||
abc_e^-]\
|
42
tests/fixtures/ptx/gnu_ext_disabled_break_file.expected
vendored
Normal file
42
tests/fixtures/ptx/gnu_ext_disabled_break_file.expected
vendored
Normal file
|
@ -0,0 +1,42 @@
|
|||
.xx "" "" """quotes"", for roff" ""
|
||||
.xx "" "and some other like %a, b" "#, c$c" ""
|
||||
.xx "" "and some other like %a, b#, c" "$c" ""
|
||||
.xx "" "and some other like" "%a, b#, c$c" ""
|
||||
.xx "" "and some other like %a" ", b#, c$c" ""
|
||||
.xx "" """quotes""," "for roff" ""
|
||||
.xx "" "{brackets}" "for tex" ""
|
||||
.xx "" "" "hello world!" ""
|
||||
.xx "" "let's c" "heck special characters:" ""
|
||||
.xx "" "let's check special c" "haracters:" ""
|
||||
.xx "" "let's check spec" "ial characters:" ""
|
||||
.xx "" "let's chec" "k special characters:" ""
|
||||
.xx "" "{brac" "kets} for tex" ""
|
||||
.xx "" "oh, and bac" "k\slash" ""
|
||||
.xx "" "" "let's check special characters:" ""
|
||||
.xx "" "let's check specia" "l characters:" ""
|
||||
.xx "" "and some other" "like %a, b#, c$c" ""
|
||||
.xx "" "he" "llo world!" ""
|
||||
.xx "" "maybe a" "lso~or^" ""
|
||||
.xx "" "" "maybe also~or^" ""
|
||||
.xx "" "a" "nd some other like %a, b#, c$c" ""
|
||||
.xx "" "oh, a" "nd back\slash" ""
|
||||
.xx "" "" "oh, and back\slash" ""
|
||||
.xx "" "and some" "other like %a, b#, c$c" ""
|
||||
.xx "" "let's check special cha" "racters:" ""
|
||||
.xx "" "{b" "rackets} for tex" ""
|
||||
.xx "" "and some othe" "r like %a, b#, c$c" ""
|
||||
.xx "" """quotes"", for" "roff" ""
|
||||
.xx "" "let's check special characte" "rs:" ""
|
||||
.xx "" """quote" "s"", for roff" ""
|
||||
.xx "" "oh, and back\sla" "sh" ""
|
||||
.xx "" "oh, and back\" "slash" ""
|
||||
.xx "" "and" "some other like %a, b#, c$c" ""
|
||||
.xx "" "let's check" "special characters:" ""
|
||||
.xx "" "let's check special charac" "ters:" ""
|
||||
.xx "" "{brackets} for" "tex" ""
|
||||
.xx "" "le" "t's check special characters:" ""
|
||||
.xx "" "{bracke" "ts} for tex" ""
|
||||
.xx "" "hello" "world!" ""
|
||||
.xx "" "{brackets} for te" "x" ""
|
||||
.xx "" "ma" "ybe also~or^" ""
|
||||
.xx "" "" "{brackets} for tex" ""
|
24
tests/fixtures/ptx/gnu_ext_disabled_output_width_50.expected
vendored
Normal file
24
tests/fixtures/ptx/gnu_ext_disabled_output_width_50.expected
vendored
Normal file
|
@ -0,0 +1,24 @@
|
|||
.xx "" "" """quotes"", for roff" ""
|
||||
.xx "" "and some other like" "%a, b#, c$c" ""
|
||||
.xx "" "maybe" "also~or^" ""
|
||||
.xx "%a, b#, c$c" "" "and some other like" ""
|
||||
.xx "" "oh," "and back\slash" ""
|
||||
.xx "" "some other like %a," "b#, c$c" "and"
|
||||
.xx "" "oh, and" "back\slash" ""
|
||||
.xx "" "other like %a, b#," "c$c" "and some"
|
||||
.xx "" "let's check special" "characters:" ""
|
||||
.xx "characters:" "let's" "check special" ""
|
||||
.xx "" """quotes""," "for roff" ""
|
||||
.xx "" "{brackets}" "for tex" ""
|
||||
.xx "" "" "hello world!" ""
|
||||
.xx "characters:" "" "let's check special" ""
|
||||
.xx "" "and some other" "like %a, b#, c$c" ""
|
||||
.xx "" "" "maybe also~or^" ""
|
||||
.xx "" "" "oh, and back\slash" ""
|
||||
.xx "" "and some" "other like %a, b#, c$c" ""
|
||||
.xx "" """quotes"", for" "roff" ""
|
||||
.xx "b#, c$c" "and" "some other like %a," ""
|
||||
.xx "" "let's check" "special characters:" ""
|
||||
.xx "" "{brackets} for" "tex" ""
|
||||
.xx "" "hello" "world!" ""
|
||||
.xx "" "" "{brackets} for tex" ""
|
24
tests/fixtures/ptx/gnu_ext_disabled_output_width_70.expected
vendored
Normal file
24
tests/fixtures/ptx/gnu_ext_disabled_output_width_70.expected
vendored
Normal file
|
@ -0,0 +1,24 @@
|
|||
.xx "" "" """quotes"", for roff" ""
|
||||
.xx "" "and some other like" "%a, b#, c$c" ""
|
||||
.xx "" "maybe" "also~or^" ""
|
||||
.xx "" "" "and some other like %a, b#, c$c" ""
|
||||
.xx "" "oh," "and back\slash" ""
|
||||
.xx "" "and some other like %a," "b#, c$c" ""
|
||||
.xx "" "oh, and" "back\slash" ""
|
||||
.xx "" "and some other like %a, b#," "c$c" ""
|
||||
.xx "" "let's check special" "characters:" ""
|
||||
.xx "" "let's" "check special characters:" ""
|
||||
.xx "" """quotes""," "for roff" ""
|
||||
.xx "" "{brackets}" "for tex" ""
|
||||
.xx "" "" "hello world!" ""
|
||||
.xx "" "" "let's check special characters:" ""
|
||||
.xx "" "and some other" "like %a, b#, c$c" ""
|
||||
.xx "" "" "maybe also~or^" ""
|
||||
.xx "" "" "oh, and back\slash" ""
|
||||
.xx "" "and some" "other like %a, b#, c$c" ""
|
||||
.xx "" """quotes"", for" "roff" ""
|
||||
.xx "" "and" "some other like %a, b#, c$c" ""
|
||||
.xx "" "let's check" "special characters:" ""
|
||||
.xx "" "{brackets} for" "tex" ""
|
||||
.xx "" "hello" "world!" ""
|
||||
.xx "" "" "{brackets} for tex" ""
|
Loading…
Add table
Add a link
Reference in a new issue