1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-28 11:37:44 +00:00

Merge pull request #679 from jbcrail/stabilize-5

Remove unstable features from ptx.
This commit is contained in:
Heather 2015-08-12 08:12:24 +03:00
commit f98a704604

View file

@ -1,5 +1,4 @@
#![crate_name = "ptx"]
#![feature(convert, vec_push_all)]
/*
* This file is part of the uutils coreutils package.
@ -15,7 +14,7 @@ extern crate getopts;
extern crate memchr;
extern crate regex_syntax;
extern crate regex;
use std::collections::{HashMap, HashSet, BTreeSet};
use std::default::Default;
use std::fs::File;
@ -34,8 +33,8 @@ static VERSION: &'static str = "1.0.0";
#[derive(Debug)]
enum OutFormat {
Dumb,
Roff,
Dumb,
Roff,
Tex,
}
@ -93,13 +92,13 @@ struct WordFilter {
impl WordFilter {
fn new(matches: &Matches, config: &Config) -> WordFilter {
let (o, oset): (bool, HashSet<String>) =
let (o, oset): (bool, HashSet<String>) =
if matches.opt_present("o") {
(true, read_word_filter_file(matches, "o"))
} else {
(false, HashSet::new())
};
let (i, iset): (bool, HashSet<String>) =
let (i, iset): (bool, HashSet<String>) =
if matches.opt_present("i") {
(true, read_word_filter_file(matches, "i"))
} else {
@ -108,7 +107,7 @@ impl WordFilter {
if matches.opt_present("b") {
crash!(1, "-b not implemented yet");
}
let reg =
let reg =
if matches.opt_present("W") {
matches.opt_str("W").expect("parsing options failed!")
} else if config.gnu_ext {
@ -116,10 +115,10 @@ impl WordFilter {
} else {
"[^ \t\n]+".to_string()
};
WordFilter {
only_specified: o,
ignore_specified: i,
only_set: oset,
WordFilter {
only_specified: o,
ignore_specified: i,
only_set: oset,
ignore_set: iset,
word_regex: reg
}
@ -150,7 +149,6 @@ fn print_usage(opts: &Options) {
println!("{}\n{}", opts.usage(&brief), explaination);
}
fn get_config(matches: &Matches) -> Config {
let mut config: Config = Default::default();
let err_msg = "parsing options failed";
@ -167,24 +165,24 @@ fn get_config(matches: &Matches) -> Config {
config.auto_ref = matches.opt_present("A");
config.input_ref = matches.opt_present("r");
config.right_ref &= matches.opt_present("R");
config.ignore_case = matches.opt_present("f");
config.ignore_case = matches.opt_present("f");
if matches.opt_present("M") {
config.macro_name =
config.macro_name =
matches.opt_str("M").expect(err_msg).to_string();
}
if matches.opt_present("F") {
config.trunc_str =
config.trunc_str =
matches.opt_str("F").expect(err_msg).to_string();
}
if matches.opt_present("w") {
let width_str = matches.opt_str("w").expect(err_msg);
config.line_width = crash_if_err!(
1, usize::from_str_radix(width_str.as_str(), 10));
1, usize::from_str_radix(&width_str, 10));
}
if matches.opt_present("g") {
let gap_str = matches.opt_str("g").expect(err_msg);
config.gap_size = crash_if_err!(
1, usize::from_str_radix(gap_str.as_str(), 10));
1, usize::from_str_radix(&gap_str, 10));
}
if matches.opt_present("O") {
config.format = OutFormat::Roff;
@ -195,18 +193,20 @@ fn get_config(matches: &Matches) -> Config {
config
}
fn read_input(input_files: Vec<&str>, config: &Config) ->
fn read_input(input_files: &Vec<String>, config: &Config) ->
HashMap<String, (Vec<String>, usize)> {
let mut file_map : HashMap<String, (Vec<String>, usize)> =
let mut file_map : HashMap<String, (Vec<String>, usize)> =
HashMap::new();
let mut files = Vec::new();
if input_files.is_empty() {
files.push("-");
} else {
if config.gnu_ext {
files.push_all(input_files.as_slice());
for file in input_files {
files.push(&file);
}
} else {
files.push(input_files[0]);
files.push(&input_files[0]);
}
}
let mut lines_so_far: usize = 0;
@ -227,11 +227,11 @@ fn read_input(input_files: Vec<&str>, config: &Config) ->
file_map
}
fn create_word_set(config: &Config, filter: &WordFilter,
file_map: &HashMap<String, (Vec<String>, usize)>)->
fn create_word_set(config: &Config, filter: &WordFilter,
file_map: &HashMap<String, (Vec<String>, usize)>)->
BTreeSet<WordRef> {
let reg = Regex::new(filter.word_regex.as_str()).unwrap();
let ref_reg = Regex::new(config.context_regex.as_str()).unwrap();
let reg = Regex::new(&filter.word_regex).unwrap();
let ref_reg = Regex::new(&config.context_regex).unwrap();
let mut word_set: BTreeSet<WordRef> = BTreeSet::new();
for (file, lines) in file_map.iter() {
let mut count: usize = 0;
@ -248,11 +248,11 @@ fn create_word_set(config: &Config, filter: &WordFilter,
continue;
}
let mut word = line[beg .. end].to_string();
if filter.only_specified &&
if filter.only_specified &&
!(filter.only_set.contains(&word)) {
continue;
}
if filter.ignore_specified &&
if filter.ignore_specified &&
filter.ignore_set.contains(&word) {
continue;
}
@ -261,7 +261,7 @@ fn create_word_set(config: &Config, filter: &WordFilter,
}
word_set.insert(WordRef{
word: word,
filename: String::from(file.as_str()),
filename: String::from(file.clone()),
global_line_nr: offs + count,
local_line_nr: count,
position: beg,
@ -274,12 +274,12 @@ fn create_word_set(config: &Config, filter: &WordFilter,
word_set
}
fn get_reference(config: &Config, word_ref: &WordRef, line: &String) ->
fn get_reference(config: &Config, word_ref: &WordRef, line: &String) ->
String {
if config.auto_ref {
format!("{}:{}", word_ref.filename, word_ref.local_line_nr + 1)
} else if config.input_ref {
let reg = Regex::new(config.context_regex.as_str()).unwrap();
let reg = Regex::new(&config.context_regex).unwrap();
let (beg, end) = match reg.find(line) {
Some(x) => x,
None => (0,0)
@ -297,7 +297,7 @@ fn assert_str_integrity(s: &Vec<char>, beg: usize, end: usize) {
fn trim_broken_word_left(s: &Vec<char>, beg: usize, end: usize) -> usize {
assert_str_integrity(s, beg, end);
if beg == end || beg == 0 || s[beg].is_whitespace() ||
if beg == end || beg == 0 || s[beg].is_whitespace() ||
s[beg-1].is_whitespace() {
return beg;
}
@ -310,7 +310,7 @@ fn trim_broken_word_left(s: &Vec<char>, beg: usize, end: usize) -> usize {
fn trim_broken_word_right(s: &Vec<char>, beg: usize, end: usize) -> usize {
assert_str_integrity(s, beg, end);
if beg == end || end == s.len() || s[end-1].is_whitespace() ||
if beg == end || end == s.len() || s[end-1].is_whitespace() ||
s[end].is_whitespace() {
return end;
}
@ -334,77 +334,76 @@ fn trim_idx(s: &Vec<char>, beg: usize, end: usize) -> (usize, usize) {
(b,e)
}
fn get_output_chunks(all_before: &String, keyword: &String, all_after: &String,
config: &Config) -> (String, String, String, String) {
assert!(all_before.trim() == all_before.as_str());
assert!(keyword.trim() == keyword.as_str());
assert!(all_after.trim() == all_after.as_str());
assert_eq!(all_before.trim().to_string(), *all_before);
assert_eq!(keyword.trim().to_string(), *keyword);
assert_eq!(all_after.trim().to_string(), *all_after);
let mut head = String::new();
let mut before = String::new();
let mut after = String::new();
let mut tail = String::new();
let half_line_size = cmp::max((config.line_width/2) as isize -
(2*config.trunc_str.len()) as isize, 0) as usize;
let max_after_size = cmp::max(half_line_size as isize -
let half_line_size = cmp::max((config.line_width/2) as isize -
(2*config.trunc_str.len()) as isize, 0) as usize;
let max_after_size = cmp::max(half_line_size as isize -
keyword.len() as isize - 1, 0) as usize;
let max_before_size = half_line_size;
let max_before_size = half_line_size;
let all_before_vec: Vec<char> = all_before.chars().collect();
let all_after_vec: Vec<char> = all_after.chars().collect();
// get before
let mut bb_tmp =
let mut bb_tmp =
cmp::max(all_before.len() as isize - max_before_size as isize, 0) as usize;
bb_tmp = trim_broken_word_left(&all_before_vec, bb_tmp, all_before.len());
let (before_beg, before_end) =
let (before_beg, before_end) =
trim_idx(&all_before_vec, bb_tmp, all_before.len());
before.push_str(&all_before[before_beg .. before_end]);
assert!(max_before_size >= before.len());
// get after
let mut ae_tmp = cmp::min(max_after_size, all_after.len());
ae_tmp = trim_broken_word_right(&all_after_vec, 0, ae_tmp);
let (after_beg, after_end) = trim_idx(&all_after_vec, 0, ae_tmp);
after.push_str(&all_after[after_beg .. after_end]);
assert!(max_after_size >= after.len());
assert!(max_after_size >= after.len());
// get tail
let max_tail_size = max_before_size - before.len();
let (tb, _) = trim_idx(&all_after_vec, after_end, all_after.len());
let mut te_tmp = cmp::min(tb + max_tail_size, all_after.len());
let mut te_tmp = cmp::min(tb + max_tail_size, all_after.len());
te_tmp = trim_broken_word_right(&all_after_vec, tb, te_tmp);
let (tail_beg, tail_end) = trim_idx(&all_after_vec, tb, te_tmp);
tail.push_str(&all_after[tail_beg .. tail_end]);
// get head
let max_head_size = max_after_size - after.len();
let (_, he) = trim_idx(&all_before_vec, 0, before_beg);
let mut hb_tmp =
let mut hb_tmp =
cmp::max(he as isize - max_head_size as isize, 0) as usize;
hb_tmp = trim_broken_word_left(&all_before_vec, hb_tmp, he);
let (head_beg, head_end) = trim_idx(&all_before_vec, hb_tmp, he);
head.push_str(&all_before[head_beg .. head_end]);
// put right context truncation string if needed
if after_end != all_after.len() && tail_beg == tail_end {
after.push_str(config.trunc_str.as_str());
after.push_str(&config.trunc_str);
} else if after_end != all_after.len() && tail_end != all_after.len() {
tail.push_str(config.trunc_str.as_str());
tail.push_str(&config.trunc_str);
}
// put left context truncation string if needed
if before_beg != 0 && head_beg == head_end {
before = format!("{}{}", config.trunc_str, before);
} else if before_beg != 0 && head_beg != 0 {
head = format!("{}{}", config.trunc_str, head);
}
// add space before "after" if needed
if after.len() > 0 {
after = format!(" {}", after);
}
(tail, before, after, head)
}
@ -421,11 +420,13 @@ fn adjust_tex_str(context: &str) -> String {
let ws_reg = Regex::new(r"[\t\n\v\f\r ]").unwrap();
let mut fix: String = ws_reg.replace_all(context, " ").trim().to_string();
let mapped_chunks: Vec<String> = fix.chars().map(tex_mapper).collect();
fix = mapped_chunks.join("");
// NB: Using deprecated connect() until Rust 1.3 becomes stable.
// When 1.3 is released, replace connect() with join().
fix = mapped_chunks.connect("");
fix
}
fn format_tex_line(config: &Config, word_ref: &WordRef, line: &String,
fn format_tex_line(config: &Config, word_ref: &WordRef, line: &String,
reference: &String) -> String {
let mut output = String::new();
output.push_str(&format!("\\{} ", config.macro_name));
@ -439,10 +440,10 @@ fn format_tex_line(config: &Config, word_ref: &WordRef, line: &String,
&line[word_ref.position .. word_ref.position_end]);
let all_after = adjust_tex_str(
&line[word_ref.position_end .. line.len()]);
let (tail, before, after, head) =
let (tail, before, after, head) =
get_output_chunks(&all_before, &keyword, &all_after, &config);
output.push_str(format!("{5}{0}{6}{5}{1}{6}{5}{2}{6}{5}{3}{6}{5}{4}{6}",
tail, before, keyword, after, head, "{", "}").as_str());
output.push_str(&format!("{5}{0}{6}{5}{1}{6}{5}{2}{6}{5}{3}{6}{5}{4}{6}",
tail, before, keyword, after, head, "{", "}"));
if config.auto_ref || config.input_ref {
output.push_str(
&format!("{}{}{}", "{", adjust_tex_str(&reference), "}"));
@ -455,7 +456,7 @@ fn adjust_roff_str(context: &str) -> String {
ws_reg.replace_all(context, " ").replace("\"", "\"\"").trim().to_string()
}
fn format_roff_line(config: &Config, word_ref: &WordRef, line: &str,
fn format_roff_line(config: &Config, word_ref: &WordRef, line: &str,
reference: &str) -> String {
let mut output = String::new();
output.push_str(&format!(".{}", config.macro_name));
@ -469,19 +470,19 @@ fn format_roff_line(config: &Config, word_ref: &WordRef, line: &str,
&line[word_ref.position .. word_ref.position_end]);
let all_after = adjust_roff_str(
&line[word_ref.position_end .. line.len()]);
let (tail, before, after, head) =
let (tail, before, after, head) =
get_output_chunks(&all_before, &keyword, &all_after, &config);
output.push_str(format!(" \"{}\" \"{}\" \"{}{}\" \"{}\"",
tail, before, keyword, after, head).as_str());
output.push_str(&format!(" \"{}\" \"{}\" \"{}{}\" \"{}\"",
tail, before, keyword, after, head));
if config.auto_ref || config.input_ref {
output.push_str(&format!(" \"{}\"", adjust_roff_str(&reference)));
}
output
}
fn write_traditional_output(config: &Config,
file_map: &HashMap<String, (Vec<String>,usize)>,
words: &BTreeSet<WordRef>, output_filename: &str) {
fn write_traditional_output(config: &Config,
file_map: &HashMap<String, (Vec<String>,usize)>,
words: &BTreeSet<WordRef>, output_filename: &String) {
let mut writer: BufWriter<Box<Write>> = BufWriter::new(
if output_filename == "-" {
Box::new(stdout())
@ -494,7 +495,7 @@ fn write_traditional_output(config: &Config,
file_map.get(&(word_ref.filename))
.expect("Missing file in file map");
let (ref lines, _) = *(file_map_value);
let reference =
let reference =
get_reference(config, word_ref, &lines[word_ref.local_line_nr]);
let output_line: String = match config.format {
OutFormat::Tex => format_tex_line(
@ -510,38 +511,38 @@ fn write_traditional_output(config: &Config,
pub fn uumain(args: Vec<String>) -> i32 {
let mut opts = Options::new();
opts.optflag("A", "auto-reference",
opts.optflag("A", "auto-reference",
"output automatically generated references");
opts.optflag("G", "traditional", "behave more like System V 'ptx'");
opts.optopt("F", "flag-truncation",
opts.optopt("F", "flag-truncation",
"use STRING for flagging line truncations", "STRING");
opts.optopt("M", "macro-name", "macro name to use instead of 'xx'",
opts.optopt("M", "macro-name", "macro name to use instead of 'xx'",
"STRING");
opts.optflag("O", "format=roff", "generate output as roff directives");
opts.optflag("R", "right-side-refs",
opts.optflag("R", "right-side-refs",
"put references at right, not counted in -w");
opts.optopt("S", "sentence-regexp", "for end of lines or end of sentences",
"REGEXP");
opts.optflag("T", "format=tex", "generate output as TeX directives");
opts.optopt("W", "word-regexp", "use REGEXP to match each keyword",
opts.optopt("W", "word-regexp", "use REGEXP to match each keyword",
"REGEXP");
opts.optopt("b", "break-file", "word break characters in this FILE",
opts.optopt("b", "break-file", "word break characters in this FILE",
"FILE");
opts.optflag("f", "ignore-case",
opts.optflag("f", "ignore-case",
"fold lower case to upper case for sorting");
opts.optopt("g", "gap-size", "gap size in columns between output fields",
opts.optopt("g", "gap-size", "gap size in columns between output fields",
"NUMBER");
opts.optopt("i", "ignore-file", "read ignore word list from FILE", "FILE");
opts.optopt("o", "only-file", "read only word list from this FILE",
opts.optopt("o", "only-file", "read only word list from this FILE",
"FILE");
opts.optflag("r", "references", "first field of each line is a reference");
opts.optopt("w", "width", "output width in columns, reference excluded",
"NUMBER");
opts.optflag("", "help", "display this help and exit");
opts.optflag("", "version", "output version information and exit");
let matches = return_if_err!(1, opts.parse(&args[1..]));
if matches.opt_present("help") {
print_usage(&opts);
return 0;
@ -552,14 +553,14 @@ pub fn uumain(args: Vec<String>) -> i32 {
}
let config = get_config(&matches);
let word_filter = WordFilter::new(&matches, &config);
let file_map =
read_input(matches.free.iter().map(|x| x.as_str()).collect(), &config);
let file_map =
read_input(&matches.free, &config);
let word_set = create_word_set(&config, &word_filter, &file_map);
let output_file = if !config.gnu_ext && matches.free.len() == 2 {
matches.free[1].as_str()
matches.free[1].clone()
} else {
"-"
"-".to_string()
};
write_traditional_output(&config, &file_map, &word_set, output_file);
write_traditional_output(&config, &file_map, &word_set, &output_file);
0
}