1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-29 03:57:44 +00:00

Merge pull request #3455 from mike-kfed/ptx_breakfile

ptx: implement breakfile option
This commit is contained in:
Sylvestre Ledru 2022-05-02 08:39:14 +02:00 committed by GitHub
commit fe7829d2f9
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 170 additions and 5 deletions

View file

@ -5,7 +5,7 @@
// * For the full copyright and license information, please view the LICENSE
// * file that was distributed with this source code.
// spell-checker:ignore (ToDOs) corasick memchr Roff trunc oset iset
// spell-checker:ignore (ToDOs) corasick memchr Roff trunc oset iset CHARCLASS
use clap::{crate_version, Arg, Command};
use regex::Regex;
@ -31,6 +31,8 @@ const ABOUT: &str = "\
Mandatory arguments to long options are mandatory for short options too.\n\
With no FILE, or when FILE is -, read standard input. Default is '-F /'.";
const REGEX_CHARCLASS: &str = "^-]\\";
#[derive(Debug)]
enum OutFormat {
Dumb,
@ -88,6 +90,18 @@ fn read_word_filter_file(
Ok(words)
}
/// reads contents of file as unique set of characters to be used with the break-file option
fn read_char_filter_file(
matches: &clap::ArgMatches,
option: &str,
) -> std::io::Result<HashSet<char>> {
let filename = matches.value_of(option).expect("parsing options failed!");
let mut reader = File::open(filename)?;
let mut buffer = String::new();
reader.read_to_string(&mut buffer)?;
Ok(buffer.chars().collect())
}
#[derive(Debug)]
struct WordFilter {
only_specified: bool,
@ -113,9 +127,23 @@ impl WordFilter {
} else {
(false, HashSet::new())
};
if matches.is_present(options::BREAK_FILE) {
return Err(PtxError::NotImplemented("-b").into());
}
let break_set: Option<HashSet<char>> = if matches.is_present(options::BREAK_FILE)
&& !matches.is_present(options::WORD_REGEXP)
{
let chars =
read_char_filter_file(matches, options::BREAK_FILE).map_err_context(String::new)?;
let mut hs: HashSet<char> = if config.gnu_ext {
HashSet::new() // really only chars found in file
} else {
// GNU off means at least these are considered
[' ', '\t', '\n'].iter().cloned().collect()
};
hs.extend(chars);
Some(hs)
} else {
// if -W takes precedence or default
None
};
// Ignore empty string regex from cmd-line-args
let arg_reg: Option<String> = if matches.is_present(options::WORD_REGEXP) {
match matches.value_of(options::WORD_REGEXP) {
@ -134,7 +162,21 @@ impl WordFilter {
let reg = match arg_reg {
Some(arg_reg) => arg_reg,
None => {
if config.gnu_ext {
if break_set.is_some() {
format!(
"[^{}]+",
break_set
.unwrap()
.into_iter()
.map(|c| if REGEX_CHARCLASS.contains(c) {
format!("\\{}", c)
} else {
c.to_string()
})
.collect::<Vec<String>>()
.join("")
)
} else if config.gnu_ext {
"\\w+".to_owned()
} else {
"[^ \t\n]+".to_owned()

View file

@ -71,3 +71,35 @@ fn gnu_ext_disabled_ignore_and_only_file() {
.succeeds()
.stdout_only_fixture("gnu_ext_disabled_ignore_and_only_file.expected");
}
#[test]
fn gnu_ext_disabled_output_width_50() {
new_ucmd!()
.args(&["-G", "-w", "50", "input"])
.succeeds()
.stdout_only_fixture("gnu_ext_disabled_output_width_50.expected");
}
#[test]
fn gnu_ext_disabled_output_width_70() {
new_ucmd!()
.args(&["-G", "-w", "70", "input"])
.succeeds()
.stdout_only_fixture("gnu_ext_disabled_output_width_70.expected");
}
#[test]
fn gnu_ext_disabled_break_file() {
new_ucmd!()
.args(&["-G", "-b", "break_file", "input"])
.succeeds()
.stdout_only_fixture("gnu_ext_disabled_break_file.expected");
}
#[test]
fn gnu_ext_disabled_empty_word_regexp_ignores_break_file() {
new_ucmd!()
.args(&["-G", "-b", "break_file", "-R", "-W", "", "input"])
.succeeds()
.stdout_only_fixture("gnu_ext_disabled_rightward_no_ref.expected");
}

1
tests/fixtures/ptx/break_file vendored Normal file
View file

@ -0,0 +1 @@
abc_e^-]\

View file

@ -0,0 +1,42 @@
.xx "" "" """quotes"", for roff" ""
.xx "" "and some other like %a, b" "#, c$c" ""
.xx "" "and some other like %a, b#, c" "$c" ""
.xx "" "and some other like" "%a, b#, c$c" ""
.xx "" "and some other like %a" ", b#, c$c" ""
.xx "" """quotes""," "for roff" ""
.xx "" "{brackets}" "for tex" ""
.xx "" "" "hello world!" ""
.xx "" "let's c" "heck special characters:" ""
.xx "" "let's check special c" "haracters:" ""
.xx "" "let's check spec" "ial characters:" ""
.xx "" "let's chec" "k special characters:" ""
.xx "" "{brac" "kets} for tex" ""
.xx "" "oh, and bac" "k\slash" ""
.xx "" "" "let's check special characters:" ""
.xx "" "let's check specia" "l characters:" ""
.xx "" "and some other" "like %a, b#, c$c" ""
.xx "" "he" "llo world!" ""
.xx "" "maybe a" "lso~or^" ""
.xx "" "" "maybe also~or^" ""
.xx "" "a" "nd some other like %a, b#, c$c" ""
.xx "" "oh, a" "nd back\slash" ""
.xx "" "" "oh, and back\slash" ""
.xx "" "and some" "other like %a, b#, c$c" ""
.xx "" "let's check special cha" "racters:" ""
.xx "" "{b" "rackets} for tex" ""
.xx "" "and some othe" "r like %a, b#, c$c" ""
.xx "" """quotes"", for" "roff" ""
.xx "" "let's check special characte" "rs:" ""
.xx "" """quote" "s"", for roff" ""
.xx "" "oh, and back\sla" "sh" ""
.xx "" "oh, and back\" "slash" ""
.xx "" "and" "some other like %a, b#, c$c" ""
.xx "" "let's check" "special characters:" ""
.xx "" "let's check special charac" "ters:" ""
.xx "" "{brackets} for" "tex" ""
.xx "" "le" "t's check special characters:" ""
.xx "" "{bracke" "ts} for tex" ""
.xx "" "hello" "world!" ""
.xx "" "{brackets} for te" "x" ""
.xx "" "ma" "ybe also~or^" ""
.xx "" "" "{brackets} for tex" ""

View file

@ -0,0 +1,24 @@
.xx "" "" """quotes"", for roff" ""
.xx "" "and some other like" "%a, b#, c$c" ""
.xx "" "maybe" "also~or^" ""
.xx "%a, b#, c$c" "" "and some other like" ""
.xx "" "oh," "and back\slash" ""
.xx "" "some other like %a," "b#, c$c" "and"
.xx "" "oh, and" "back\slash" ""
.xx "" "other like %a, b#," "c$c" "and some"
.xx "" "let's check special" "characters:" ""
.xx "characters:" "let's" "check special" ""
.xx "" """quotes""," "for roff" ""
.xx "" "{brackets}" "for tex" ""
.xx "" "" "hello world!" ""
.xx "characters:" "" "let's check special" ""
.xx "" "and some other" "like %a, b#, c$c" ""
.xx "" "" "maybe also~or^" ""
.xx "" "" "oh, and back\slash" ""
.xx "" "and some" "other like %a, b#, c$c" ""
.xx "" """quotes"", for" "roff" ""
.xx "b#, c$c" "and" "some other like %a," ""
.xx "" "let's check" "special characters:" ""
.xx "" "{brackets} for" "tex" ""
.xx "" "hello" "world!" ""
.xx "" "" "{brackets} for tex" ""

View file

@ -0,0 +1,24 @@
.xx "" "" """quotes"", for roff" ""
.xx "" "and some other like" "%a, b#, c$c" ""
.xx "" "maybe" "also~or^" ""
.xx "" "" "and some other like %a, b#, c$c" ""
.xx "" "oh," "and back\slash" ""
.xx "" "and some other like %a," "b#, c$c" ""
.xx "" "oh, and" "back\slash" ""
.xx "" "and some other like %a, b#," "c$c" ""
.xx "" "let's check special" "characters:" ""
.xx "" "let's" "check special characters:" ""
.xx "" """quotes""," "for roff" ""
.xx "" "{brackets}" "for tex" ""
.xx "" "" "hello world!" ""
.xx "" "" "let's check special characters:" ""
.xx "" "and some other" "like %a, b#, c$c" ""
.xx "" "" "maybe also~or^" ""
.xx "" "" "oh, and back\slash" ""
.xx "" "and some" "other like %a, b#, c$c" ""
.xx "" """quotes"", for" "roff" ""
.xx "" "and" "some other like %a, b#, c$c" ""
.xx "" "let's check" "special characters:" ""
.xx "" "{brackets} for" "tex" ""
.xx "" "hello" "world!" ""
.xx "" "" "{brackets} for tex" ""