mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-27 19:17:43 +00:00
ptx: fixes
This commit is contained in:
parent
a64fce8286
commit
412d2b3b1f
3 changed files with 157 additions and 55 deletions
|
@ -15,14 +15,12 @@ use std::fs::File;
|
||||||
use std::io::{stdin, stdout, BufRead, BufReader, BufWriter, Read, Write};
|
use std::io::{stdin, stdout, BufRead, BufReader, BufWriter, Read, Write};
|
||||||
use std::num::ParseIntError;
|
use std::num::ParseIntError;
|
||||||
use uucore::display::Quotable;
|
use uucore::display::Quotable;
|
||||||
use uucore::error::{FromIo, UError, UResult};
|
use uucore::error::{FromIo, UError, UResult, UUsageError};
|
||||||
use uucore::{format_usage, help_about, help_usage};
|
use uucore::{format_usage, help_about, help_usage};
|
||||||
|
|
||||||
const USAGE: &str = help_usage!("ptx.md");
|
const USAGE: &str = help_usage!("ptx.md");
|
||||||
const ABOUT: &str = help_about!("ptx.md");
|
const ABOUT: &str = help_about!("ptx.md");
|
||||||
|
|
||||||
const REGEX_CHARCLASS: &str = "^-]\\";
|
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
enum OutFormat {
|
enum OutFormat {
|
||||||
Dumb,
|
Dumb,
|
||||||
|
@ -71,8 +69,12 @@ fn read_word_filter_file(
|
||||||
.get_one::<String>(option)
|
.get_one::<String>(option)
|
||||||
.expect("parsing options failed!")
|
.expect("parsing options failed!")
|
||||||
.to_string();
|
.to_string();
|
||||||
let file = File::open(filename)?;
|
let reader: BufReader<Box<dyn Read>> = BufReader::new(if filename == "-" {
|
||||||
let reader = BufReader::new(file);
|
Box::new(stdin())
|
||||||
|
} else {
|
||||||
|
let file = File::open(filename)?;
|
||||||
|
Box::new(file)
|
||||||
|
});
|
||||||
let mut words: HashSet<String> = HashSet::new();
|
let mut words: HashSet<String> = HashSet::new();
|
||||||
for word in reader.lines() {
|
for word in reader.lines() {
|
||||||
words.insert(word?);
|
words.insert(word?);
|
||||||
|
@ -88,7 +90,12 @@ fn read_char_filter_file(
|
||||||
let filename = matches
|
let filename = matches
|
||||||
.get_one::<String>(option)
|
.get_one::<String>(option)
|
||||||
.expect("parsing options failed!");
|
.expect("parsing options failed!");
|
||||||
let mut reader = File::open(filename)?;
|
let mut reader: Box<dyn Read> = if filename == "-" {
|
||||||
|
Box::new(stdin())
|
||||||
|
} else {
|
||||||
|
let file = File::open(filename)?;
|
||||||
|
Box::new(file)
|
||||||
|
};
|
||||||
let mut buffer = String::new();
|
let mut buffer = String::new();
|
||||||
reader.read_to_string(&mut buffer)?;
|
reader.read_to_string(&mut buffer)?;
|
||||||
Ok(buffer.chars().collect())
|
Ok(buffer.chars().collect())
|
||||||
|
@ -155,18 +162,10 @@ impl WordFilter {
|
||||||
let reg = match arg_reg {
|
let reg = match arg_reg {
|
||||||
Some(arg_reg) => arg_reg,
|
Some(arg_reg) => arg_reg,
|
||||||
None => {
|
None => {
|
||||||
if break_set.is_some() {
|
if let Some(break_set) = break_set {
|
||||||
format!(
|
format!(
|
||||||
"[^{}]+",
|
"[^{}]+",
|
||||||
break_set
|
regex::escape(&break_set.into_iter().collect::<String>())
|
||||||
.unwrap()
|
|
||||||
.into_iter()
|
|
||||||
.map(|c| if REGEX_CHARCLASS.contains(c) {
|
|
||||||
format!("\\{c}")
|
|
||||||
} else {
|
|
||||||
c.to_string()
|
|
||||||
})
|
|
||||||
.collect::<String>()
|
|
||||||
)
|
)
|
||||||
} else if config.gnu_ext {
|
} else if config.gnu_ext {
|
||||||
"\\w+".to_owned()
|
"\\w+".to_owned()
|
||||||
|
@ -260,10 +259,17 @@ fn get_config(matches: &clap::ArgMatches) -> UResult<Config> {
|
||||||
.parse()
|
.parse()
|
||||||
.map_err(PtxError::ParseError)?;
|
.map_err(PtxError::ParseError)?;
|
||||||
}
|
}
|
||||||
if matches.get_flag(options::FORMAT_ROFF) {
|
if let Some(format) = matches.get_one::<String>(options::FORMAT) {
|
||||||
|
config.format = match format.as_str() {
|
||||||
|
"roff" => OutFormat::Roff,
|
||||||
|
"tex" => OutFormat::Tex,
|
||||||
|
_ => unreachable!("should be caught by clap"),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
if matches.get_flag(options::format::ROFF) {
|
||||||
config.format = OutFormat::Roff;
|
config.format = OutFormat::Roff;
|
||||||
}
|
}
|
||||||
if matches.get_flag(options::FORMAT_TEX) {
|
if matches.get_flag(options::format::TEX) {
|
||||||
config.format = OutFormat::Tex;
|
config.format = OutFormat::Tex;
|
||||||
}
|
}
|
||||||
Ok(config)
|
Ok(config)
|
||||||
|
@ -277,20 +283,10 @@ struct FileContent {
|
||||||
|
|
||||||
type FileMap = HashMap<String, FileContent>;
|
type FileMap = HashMap<String, FileContent>;
|
||||||
|
|
||||||
fn read_input(input_files: &[String], config: &Config) -> std::io::Result<FileMap> {
|
fn read_input(input_files: &[String]) -> std::io::Result<FileMap> {
|
||||||
let mut file_map: FileMap = HashMap::new();
|
let mut file_map: FileMap = HashMap::new();
|
||||||
let mut files = Vec::new();
|
|
||||||
if input_files.is_empty() {
|
|
||||||
files.push("-");
|
|
||||||
} else if config.gnu_ext {
|
|
||||||
for file in input_files {
|
|
||||||
files.push(file);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
files.push(&input_files[0]);
|
|
||||||
}
|
|
||||||
let mut offset: usize = 0;
|
let mut offset: usize = 0;
|
||||||
for filename in files {
|
for filename in input_files {
|
||||||
let reader: BufReader<Box<dyn Read>> = BufReader::new(if filename == "-" {
|
let reader: BufReader<Box<dyn Read>> = BufReader::new(if filename == "-" {
|
||||||
Box::new(stdin())
|
Box::new(stdin())
|
||||||
} else {
|
} else {
|
||||||
|
@ -344,7 +340,7 @@ fn create_word_set(config: &Config, filter: &WordFilter, file_map: &FileMap) ->
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if config.ignore_case {
|
if config.ignore_case {
|
||||||
word = word.to_lowercase();
|
word = word.to_uppercase();
|
||||||
}
|
}
|
||||||
word_set.insert(WordRef {
|
word_set.insert(WordRef {
|
||||||
word,
|
word,
|
||||||
|
@ -693,15 +689,19 @@ fn write_traditional_output(
|
||||||
}
|
}
|
||||||
|
|
||||||
mod options {
|
mod options {
|
||||||
|
pub mod format {
|
||||||
|
pub static ROFF: &str = "roff";
|
||||||
|
pub static TEX: &str = "tex";
|
||||||
|
}
|
||||||
|
|
||||||
pub static FILE: &str = "file";
|
pub static FILE: &str = "file";
|
||||||
pub static AUTO_REFERENCE: &str = "auto-reference";
|
pub static AUTO_REFERENCE: &str = "auto-reference";
|
||||||
pub static TRADITIONAL: &str = "traditional";
|
pub static TRADITIONAL: &str = "traditional";
|
||||||
pub static FLAG_TRUNCATION: &str = "flag-truncation";
|
pub static FLAG_TRUNCATION: &str = "flag-truncation";
|
||||||
pub static MACRO_NAME: &str = "macro-name";
|
pub static MACRO_NAME: &str = "macro-name";
|
||||||
pub static FORMAT_ROFF: &str = "format=roff";
|
pub static FORMAT: &str = "format";
|
||||||
pub static RIGHT_SIDE_REFS: &str = "right-side-refs";
|
pub static RIGHT_SIDE_REFS: &str = "right-side-refs";
|
||||||
pub static SENTENCE_REGEXP: &str = "sentence-regexp";
|
pub static SENTENCE_REGEXP: &str = "sentence-regexp";
|
||||||
pub static FORMAT_TEX: &str = "format=tex";
|
|
||||||
pub static WORD_REGEXP: &str = "word-regexp";
|
pub static WORD_REGEXP: &str = "word-regexp";
|
||||||
pub static BREAK_FILE: &str = "break-file";
|
pub static BREAK_FILE: &str = "break-file";
|
||||||
pub static IGNORE_CASE: &str = "ignore-case";
|
pub static IGNORE_CASE: &str = "ignore-case";
|
||||||
|
@ -715,21 +715,40 @@ mod options {
|
||||||
#[uucore::main]
|
#[uucore::main]
|
||||||
pub fn uumain(args: impl uucore::Args) -> UResult<()> {
|
pub fn uumain(args: impl uucore::Args) -> UResult<()> {
|
||||||
let matches = uu_app().try_get_matches_from(args)?;
|
let matches = uu_app().try_get_matches_from(args)?;
|
||||||
|
|
||||||
let mut input_files: Vec<String> = match &matches.get_many::<String>(options::FILE) {
|
|
||||||
Some(v) => v.clone().cloned().collect(),
|
|
||||||
None => vec!["-".to_string()],
|
|
||||||
};
|
|
||||||
|
|
||||||
let config = get_config(&matches)?;
|
let config = get_config(&matches)?;
|
||||||
let word_filter = WordFilter::new(&matches, &config)?;
|
|
||||||
let file_map = read_input(&input_files, &config).map_err_context(String::new)?;
|
let input_files;
|
||||||
let word_set = create_word_set(&config, &word_filter, &file_map);
|
let output_file;
|
||||||
let output_file = if !config.gnu_ext && input_files.len() == 2 {
|
|
||||||
input_files.pop().unwrap()
|
let mut files = matches
|
||||||
|
.get_many::<String>(options::FILE)
|
||||||
|
.into_iter()
|
||||||
|
.flatten()
|
||||||
|
.cloned();
|
||||||
|
|
||||||
|
if !config.gnu_ext {
|
||||||
|
input_files = vec![files.next().unwrap_or("-".to_string())];
|
||||||
|
output_file = files.next().unwrap_or("-".to_string());
|
||||||
|
if let Some(file) = files.next() {
|
||||||
|
return Err(UUsageError::new(
|
||||||
|
1,
|
||||||
|
format!("extra operand {}", file.quote()),
|
||||||
|
));
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
"-".to_string()
|
input_files = {
|
||||||
};
|
let mut files = files.collect::<Vec<_>>();
|
||||||
|
if files.is_empty() {
|
||||||
|
files.push("-".to_string());
|
||||||
|
}
|
||||||
|
files
|
||||||
|
};
|
||||||
|
output_file = "-".to_string();
|
||||||
|
}
|
||||||
|
|
||||||
|
let word_filter = WordFilter::new(&matches, &config)?;
|
||||||
|
let file_map = read_input(&input_files).map_err_context(String::new)?;
|
||||||
|
let word_set = create_word_set(&config, &word_filter, &file_map);
|
||||||
write_traditional_output(&config, &file_map, &word_set, &output_file)
|
write_traditional_output(&config, &file_map, &word_set, &output_file)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -774,10 +793,24 @@ pub fn uu_app() -> Command {
|
||||||
.value_name("STRING"),
|
.value_name("STRING"),
|
||||||
)
|
)
|
||||||
.arg(
|
.arg(
|
||||||
Arg::new(options::FORMAT_ROFF)
|
Arg::new(options::FORMAT)
|
||||||
|
.long(options::FORMAT)
|
||||||
|
.hide(true)
|
||||||
|
.value_parser(["roff", "tex"])
|
||||||
|
.overrides_with_all([options::FORMAT, options::format::ROFF, options::format::TEX]),
|
||||||
|
)
|
||||||
|
.arg(
|
||||||
|
Arg::new(options::format::ROFF)
|
||||||
.short('O')
|
.short('O')
|
||||||
.long(options::FORMAT_ROFF)
|
|
||||||
.help("generate output as roff directives")
|
.help("generate output as roff directives")
|
||||||
|
.overrides_with_all([options::FORMAT, options::format::ROFF, options::format::TEX])
|
||||||
|
.action(ArgAction::SetTrue),
|
||||||
|
)
|
||||||
|
.arg(
|
||||||
|
Arg::new(options::format::TEX)
|
||||||
|
.short('T')
|
||||||
|
.help("generate output as TeX directives")
|
||||||
|
.overrides_with_all([options::FORMAT, options::format::ROFF, options::format::TEX])
|
||||||
.action(ArgAction::SetTrue),
|
.action(ArgAction::SetTrue),
|
||||||
)
|
)
|
||||||
.arg(
|
.arg(
|
||||||
|
@ -794,13 +827,6 @@ pub fn uu_app() -> Command {
|
||||||
.help("for end of lines or end of sentences")
|
.help("for end of lines or end of sentences")
|
||||||
.value_name("REGEXP"),
|
.value_name("REGEXP"),
|
||||||
)
|
)
|
||||||
.arg(
|
|
||||||
Arg::new(options::FORMAT_TEX)
|
|
||||||
.short('T')
|
|
||||||
.long(options::FORMAT_TEX)
|
|
||||||
.help("generate output as TeX directives")
|
|
||||||
.action(ArgAction::SetTrue),
|
|
||||||
)
|
|
||||||
.arg(
|
.arg(
|
||||||
Arg::new(options::WORD_REGEXP)
|
Arg::new(options::WORD_REGEXP)
|
||||||
.short('W')
|
.short('W')
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
//
|
//
|
||||||
// For the full copyright and license information, please view the LICENSE
|
// For the full copyright and license information, please view the LICENSE
|
||||||
// file that was distributed with this source code.
|
// file that was distributed with this source code.
|
||||||
|
// spell-checker:ignore roff
|
||||||
use crate::common::util::TestScenario;
|
use crate::common::util::TestScenario;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
@ -112,3 +113,50 @@ fn gnu_ext_disabled_empty_word_regexp_ignores_break_file() {
|
||||||
.succeeds()
|
.succeeds()
|
||||||
.stdout_only_fixture("gnu_ext_disabled_rightward_no_ref.expected");
|
.stdout_only_fixture("gnu_ext_disabled_rightward_no_ref.expected");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_reject_too_many_operands() {
|
||||||
|
new_ucmd!().args(&["-G", "-", "-", "-"]).fails_with_code(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_break_file_regex_escaping() {
|
||||||
|
new_ucmd!()
|
||||||
|
.pipe_in("\\.+*?()|[]{}^$#&-~")
|
||||||
|
.args(&["-G", "-b", "-", "input"])
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only_fixture("break_file_regex_escaping.expected");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_ignore_case() {
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-G", "-f"])
|
||||||
|
.pipe_in("a _")
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only(".xx \"\" \"\" \"a _\" \"\"\n.xx \"\" \"a\" \"_\" \"\"\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_format() {
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-G", "-O"])
|
||||||
|
.pipe_in("a")
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only(".xx \"\" \"\" \"a\" \"\"\n");
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-G", "-T"])
|
||||||
|
.pipe_in("a")
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only("\\xx {}{}{a}{}{}\n");
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-G", "--format=roff"])
|
||||||
|
.pipe_in("a")
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only(".xx \"\" \"\" \"a\" \"\"\n");
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-G", "--format=tex"])
|
||||||
|
.pipe_in("a")
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only("\\xx {}{}{a}{}{}\n");
|
||||||
|
}
|
||||||
|
|
28
tests/fixtures/ptx/break_file_regex_escaping.expected
vendored
Normal file
28
tests/fixtures/ptx/break_file_regex_escaping.expected
vendored
Normal file
|
@ -0,0 +1,28 @@
|
||||||
|
.xx "" "" """quotes"", for roff" ""
|
||||||
|
.xx "" "and some other like" "%a, b#, c$c" ""
|
||||||
|
.xx "" "and some other like %a, b#" ", c$c" ""
|
||||||
|
.xx "" "maybe" "also~or^" ""
|
||||||
|
.xx "" "" "and some other like %a, b#, c$c" ""
|
||||||
|
.xx "" "oh," "and back\slash" ""
|
||||||
|
.xx "" "and some other like %a," "b#, c$c" ""
|
||||||
|
.xx "" "oh, and" "back\slash" ""
|
||||||
|
.xx "" "{" "brackets} for tex" ""
|
||||||
|
.xx "" "and some other like %a, b#," "c$c" ""
|
||||||
|
.xx "" "and some other like %a, b#, c$" "c" ""
|
||||||
|
.xx "" "let's check special" "characters:" ""
|
||||||
|
.xx "" "let's" "check special characters:" ""
|
||||||
|
.xx "" """quotes""," "for roff" ""
|
||||||
|
.xx "" "{brackets}" "for tex" ""
|
||||||
|
.xx "" "" "hello world!" ""
|
||||||
|
.xx "" "" "let's check special characters:" ""
|
||||||
|
.xx "" "and some other" "like %a, b#, c$c" ""
|
||||||
|
.xx "" "" "maybe also~or^" ""
|
||||||
|
.xx "" "" "oh, and back\slash" ""
|
||||||
|
.xx "" "maybe also~" "or^" ""
|
||||||
|
.xx "" "and some" "other like %a, b#, c$c" ""
|
||||||
|
.xx "" """quotes"", for" "roff" ""
|
||||||
|
.xx "" "oh, and back\" "slash" ""
|
||||||
|
.xx "" "and" "some other like %a, b#, c$c" ""
|
||||||
|
.xx "" "let's check" "special characters:" ""
|
||||||
|
.xx "" "{brackets} for" "tex" ""
|
||||||
|
.xx "" "hello" "world!" ""
|
Loading…
Add table
Add a link
Reference in a new issue