1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-27 19:17:43 +00:00

ptx: fixes

This commit is contained in:
usamoi 2025-03-22 01:05:52 +08:00
parent a64fce8286
commit 412d2b3b1f
3 changed files with 157 additions and 55 deletions

View file

@ -15,14 +15,12 @@ use std::fs::File;
use std::io::{stdin, stdout, BufRead, BufReader, BufWriter, Read, Write}; use std::io::{stdin, stdout, BufRead, BufReader, BufWriter, Read, Write};
use std::num::ParseIntError; use std::num::ParseIntError;
use uucore::display::Quotable; use uucore::display::Quotable;
use uucore::error::{FromIo, UError, UResult}; use uucore::error::{FromIo, UError, UResult, UUsageError};
use uucore::{format_usage, help_about, help_usage}; use uucore::{format_usage, help_about, help_usage};
const USAGE: &str = help_usage!("ptx.md"); const USAGE: &str = help_usage!("ptx.md");
const ABOUT: &str = help_about!("ptx.md"); const ABOUT: &str = help_about!("ptx.md");
const REGEX_CHARCLASS: &str = "^-]\\";
#[derive(Debug)] #[derive(Debug)]
enum OutFormat { enum OutFormat {
Dumb, Dumb,
@ -71,8 +69,12 @@ fn read_word_filter_file(
.get_one::<String>(option) .get_one::<String>(option)
.expect("parsing options failed!") .expect("parsing options failed!")
.to_string(); .to_string();
let file = File::open(filename)?; let reader: BufReader<Box<dyn Read>> = BufReader::new(if filename == "-" {
let reader = BufReader::new(file); Box::new(stdin())
} else {
let file = File::open(filename)?;
Box::new(file)
});
let mut words: HashSet<String> = HashSet::new(); let mut words: HashSet<String> = HashSet::new();
for word in reader.lines() { for word in reader.lines() {
words.insert(word?); words.insert(word?);
@ -88,7 +90,12 @@ fn read_char_filter_file(
let filename = matches let filename = matches
.get_one::<String>(option) .get_one::<String>(option)
.expect("parsing options failed!"); .expect("parsing options failed!");
let mut reader = File::open(filename)?; let mut reader: Box<dyn Read> = if filename == "-" {
Box::new(stdin())
} else {
let file = File::open(filename)?;
Box::new(file)
};
let mut buffer = String::new(); let mut buffer = String::new();
reader.read_to_string(&mut buffer)?; reader.read_to_string(&mut buffer)?;
Ok(buffer.chars().collect()) Ok(buffer.chars().collect())
@ -155,18 +162,10 @@ impl WordFilter {
let reg = match arg_reg { let reg = match arg_reg {
Some(arg_reg) => arg_reg, Some(arg_reg) => arg_reg,
None => { None => {
if break_set.is_some() { if let Some(break_set) = break_set {
format!( format!(
"[^{}]+", "[^{}]+",
break_set regex::escape(&break_set.into_iter().collect::<String>())
.unwrap()
.into_iter()
.map(|c| if REGEX_CHARCLASS.contains(c) {
format!("\\{c}")
} else {
c.to_string()
})
.collect::<String>()
) )
} else if config.gnu_ext { } else if config.gnu_ext {
"\\w+".to_owned() "\\w+".to_owned()
@ -260,10 +259,17 @@ fn get_config(matches: &clap::ArgMatches) -> UResult<Config> {
.parse() .parse()
.map_err(PtxError::ParseError)?; .map_err(PtxError::ParseError)?;
} }
if matches.get_flag(options::FORMAT_ROFF) { if let Some(format) = matches.get_one::<String>(options::FORMAT) {
config.format = match format.as_str() {
"roff" => OutFormat::Roff,
"tex" => OutFormat::Tex,
_ => unreachable!("should be caught by clap"),
};
}
if matches.get_flag(options::format::ROFF) {
config.format = OutFormat::Roff; config.format = OutFormat::Roff;
} }
if matches.get_flag(options::FORMAT_TEX) { if matches.get_flag(options::format::TEX) {
config.format = OutFormat::Tex; config.format = OutFormat::Tex;
} }
Ok(config) Ok(config)
@ -277,20 +283,10 @@ struct FileContent {
type FileMap = HashMap<String, FileContent>; type FileMap = HashMap<String, FileContent>;
fn read_input(input_files: &[String], config: &Config) -> std::io::Result<FileMap> { fn read_input(input_files: &[String]) -> std::io::Result<FileMap> {
let mut file_map: FileMap = HashMap::new(); let mut file_map: FileMap = HashMap::new();
let mut files = Vec::new();
if input_files.is_empty() {
files.push("-");
} else if config.gnu_ext {
for file in input_files {
files.push(file);
}
} else {
files.push(&input_files[0]);
}
let mut offset: usize = 0; let mut offset: usize = 0;
for filename in files { for filename in input_files {
let reader: BufReader<Box<dyn Read>> = BufReader::new(if filename == "-" { let reader: BufReader<Box<dyn Read>> = BufReader::new(if filename == "-" {
Box::new(stdin()) Box::new(stdin())
} else { } else {
@ -344,7 +340,7 @@ fn create_word_set(config: &Config, filter: &WordFilter, file_map: &FileMap) ->
continue; continue;
} }
if config.ignore_case { if config.ignore_case {
word = word.to_lowercase(); word = word.to_uppercase();
} }
word_set.insert(WordRef { word_set.insert(WordRef {
word, word,
@ -693,15 +689,19 @@ fn write_traditional_output(
} }
mod options { mod options {
pub mod format {
pub static ROFF: &str = "roff";
pub static TEX: &str = "tex";
}
pub static FILE: &str = "file"; pub static FILE: &str = "file";
pub static AUTO_REFERENCE: &str = "auto-reference"; pub static AUTO_REFERENCE: &str = "auto-reference";
pub static TRADITIONAL: &str = "traditional"; pub static TRADITIONAL: &str = "traditional";
pub static FLAG_TRUNCATION: &str = "flag-truncation"; pub static FLAG_TRUNCATION: &str = "flag-truncation";
pub static MACRO_NAME: &str = "macro-name"; pub static MACRO_NAME: &str = "macro-name";
pub static FORMAT_ROFF: &str = "format=roff"; pub static FORMAT: &str = "format";
pub static RIGHT_SIDE_REFS: &str = "right-side-refs"; pub static RIGHT_SIDE_REFS: &str = "right-side-refs";
pub static SENTENCE_REGEXP: &str = "sentence-regexp"; pub static SENTENCE_REGEXP: &str = "sentence-regexp";
pub static FORMAT_TEX: &str = "format=tex";
pub static WORD_REGEXP: &str = "word-regexp"; pub static WORD_REGEXP: &str = "word-regexp";
pub static BREAK_FILE: &str = "break-file"; pub static BREAK_FILE: &str = "break-file";
pub static IGNORE_CASE: &str = "ignore-case"; pub static IGNORE_CASE: &str = "ignore-case";
@ -715,21 +715,40 @@ mod options {
#[uucore::main] #[uucore::main]
pub fn uumain(args: impl uucore::Args) -> UResult<()> { pub fn uumain(args: impl uucore::Args) -> UResult<()> {
let matches = uu_app().try_get_matches_from(args)?; let matches = uu_app().try_get_matches_from(args)?;
let mut input_files: Vec<String> = match &matches.get_many::<String>(options::FILE) {
Some(v) => v.clone().cloned().collect(),
None => vec!["-".to_string()],
};
let config = get_config(&matches)?; let config = get_config(&matches)?;
let word_filter = WordFilter::new(&matches, &config)?;
let file_map = read_input(&input_files, &config).map_err_context(String::new)?; let input_files;
let word_set = create_word_set(&config, &word_filter, &file_map); let output_file;
let output_file = if !config.gnu_ext && input_files.len() == 2 {
input_files.pop().unwrap() let mut files = matches
.get_many::<String>(options::FILE)
.into_iter()
.flatten()
.cloned();
if !config.gnu_ext {
input_files = vec![files.next().unwrap_or("-".to_string())];
output_file = files.next().unwrap_or("-".to_string());
if let Some(file) = files.next() {
return Err(UUsageError::new(
1,
format!("extra operand {}", file.quote()),
));
}
} else { } else {
"-".to_string() input_files = {
}; let mut files = files.collect::<Vec<_>>();
if files.is_empty() {
files.push("-".to_string());
}
files
};
output_file = "-".to_string();
}
let word_filter = WordFilter::new(&matches, &config)?;
let file_map = read_input(&input_files).map_err_context(String::new)?;
let word_set = create_word_set(&config, &word_filter, &file_map);
write_traditional_output(&config, &file_map, &word_set, &output_file) write_traditional_output(&config, &file_map, &word_set, &output_file)
} }
@ -774,10 +793,24 @@ pub fn uu_app() -> Command {
.value_name("STRING"), .value_name("STRING"),
) )
.arg( .arg(
Arg::new(options::FORMAT_ROFF) Arg::new(options::FORMAT)
.long(options::FORMAT)
.hide(true)
.value_parser(["roff", "tex"])
.overrides_with_all([options::FORMAT, options::format::ROFF, options::format::TEX]),
)
.arg(
Arg::new(options::format::ROFF)
.short('O') .short('O')
.long(options::FORMAT_ROFF)
.help("generate output as roff directives") .help("generate output as roff directives")
.overrides_with_all([options::FORMAT, options::format::ROFF, options::format::TEX])
.action(ArgAction::SetTrue),
)
.arg(
Arg::new(options::format::TEX)
.short('T')
.help("generate output as TeX directives")
.overrides_with_all([options::FORMAT, options::format::ROFF, options::format::TEX])
.action(ArgAction::SetTrue), .action(ArgAction::SetTrue),
) )
.arg( .arg(
@ -794,13 +827,6 @@ pub fn uu_app() -> Command {
.help("for end of lines or end of sentences") .help("for end of lines or end of sentences")
.value_name("REGEXP"), .value_name("REGEXP"),
) )
.arg(
Arg::new(options::FORMAT_TEX)
.short('T')
.long(options::FORMAT_TEX)
.help("generate output as TeX directives")
.action(ArgAction::SetTrue),
)
.arg( .arg(
Arg::new(options::WORD_REGEXP) Arg::new(options::WORD_REGEXP)
.short('W') .short('W')

View file

@ -2,6 +2,7 @@
// //
// For the full copyright and license information, please view the LICENSE // For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code. // file that was distributed with this source code.
// spell-checker:ignore roff
use crate::common::util::TestScenario; use crate::common::util::TestScenario;
#[test] #[test]
@ -112,3 +113,50 @@ fn gnu_ext_disabled_empty_word_regexp_ignores_break_file() {
.succeeds() .succeeds()
.stdout_only_fixture("gnu_ext_disabled_rightward_no_ref.expected"); .stdout_only_fixture("gnu_ext_disabled_rightward_no_ref.expected");
} }
#[test]
fn test_reject_too_many_operands() {
new_ucmd!().args(&["-G", "-", "-", "-"]).fails_with_code(1);
}
#[test]
fn test_break_file_regex_escaping() {
new_ucmd!()
.pipe_in("\\.+*?()|[]{}^$#&-~")
.args(&["-G", "-b", "-", "input"])
.succeeds()
.stdout_only_fixture("break_file_regex_escaping.expected");
}
#[test]
fn test_ignore_case() {
new_ucmd!()
.args(&["-G", "-f"])
.pipe_in("a _")
.succeeds()
.stdout_only(".xx \"\" \"\" \"a _\" \"\"\n.xx \"\" \"a\" \"_\" \"\"\n");
}
#[test]
fn test_format() {
new_ucmd!()
.args(&["-G", "-O"])
.pipe_in("a")
.succeeds()
.stdout_only(".xx \"\" \"\" \"a\" \"\"\n");
new_ucmd!()
.args(&["-G", "-T"])
.pipe_in("a")
.succeeds()
.stdout_only("\\xx {}{}{a}{}{}\n");
new_ucmd!()
.args(&["-G", "--format=roff"])
.pipe_in("a")
.succeeds()
.stdout_only(".xx \"\" \"\" \"a\" \"\"\n");
new_ucmd!()
.args(&["-G", "--format=tex"])
.pipe_in("a")
.succeeds()
.stdout_only("\\xx {}{}{a}{}{}\n");
}

View file

@ -0,0 +1,28 @@
.xx "" "" """quotes"", for roff" ""
.xx "" "and some other like" "%a, b#, c$c" ""
.xx "" "and some other like %a, b#" ", c$c" ""
.xx "" "maybe" "also~or^" ""
.xx "" "" "and some other like %a, b#, c$c" ""
.xx "" "oh," "and back\slash" ""
.xx "" "and some other like %a," "b#, c$c" ""
.xx "" "oh, and" "back\slash" ""
.xx "" "{" "brackets} for tex" ""
.xx "" "and some other like %a, b#," "c$c" ""
.xx "" "and some other like %a, b#, c$" "c" ""
.xx "" "let's check special" "characters:" ""
.xx "" "let's" "check special characters:" ""
.xx "" """quotes""," "for roff" ""
.xx "" "{brackets}" "for tex" ""
.xx "" "" "hello world!" ""
.xx "" "" "let's check special characters:" ""
.xx "" "and some other" "like %a, b#, c$c" ""
.xx "" "" "maybe also~or^" ""
.xx "" "" "oh, and back\slash" ""
.xx "" "maybe also~" "or^" ""
.xx "" "and some" "other like %a, b#, c$c" ""
.xx "" """quotes"", for" "roff" ""
.xx "" "oh, and back\" "slash" ""
.xx "" "and" "some other like %a, b#, c$c" ""
.xx "" "let's check" "special characters:" ""
.xx "" "{brackets} for" "tex" ""
.xx "" "hello" "world!" ""