From ea7df03c0aa4db5f5355b0217e2c4c67a854ada2 Mon Sep 17 00:00:00 2001
From: dokaptur <dokaptur@gmail.com>
Date: Tue, 27 Jan 2015 16:37:07 +0100
Subject: [PATCH] initial ptx commit

---
 Makefile        |   1 +
 src/ptx/deps.mk |   1 +
 src/ptx/ptx.rs  | 561 ++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 563 insertions(+)
 create mode 100644 src/ptx/deps.mk
 create mode 100644 src/ptx/ptx.rs
diff --git a/Makefile b/Makefile
index b8a16495a..7ab1b0921 100644
--- a/Makefile
+++ b/Makefile
@@ -67,6 +67,7 @@ PROGS       := \
   od \
   paste \
   printenv \
+  ptx \
   pwd \
   readlink \
   realpath \
diff --git a/src/ptx/deps.mk b/src/ptx/deps.mk
new file mode 100644
index 000000000..3115648f6
--- /dev/null
+++ b/src/ptx/deps.mk
@@ -0,0 +1 @@
+DEPLIBS += regex
\ No newline at end of file
diff --git a/src/ptx/ptx.rs b/src/ptx/ptx.rs
new file mode 100644
index 000000000..a029bb144
--- /dev/null
+++ b/src/ptx/ptx.rs
@@ -0,0 +1,561 @@
+#![crate_name = "ptx"]
+#![feature(convert, collections)]
+
+/*
+ * This file is part of the uutils coreutils package.
+ *
+ * (c) Dorota Kapturkiewicz <dokaptur@gmail.com>
+ *
+ * For the full copyright and license information, please view the LICENSE
+ * file that was distributed with this source code.
+ */
+extern crate getopts;
+extern crate regex;
+ 
+use std::collections::{HashMap, HashSet, BTreeSet};
+use std::default::Default;
+use std::fs::File;
+use getopts::{Options, Matches};
+use std::io::{stdin, stdout, BufReader, BufWriter, BufRead, Read, Write};
+use regex::Regex;
+use std::cmp;
+
+
+#[path = "../common/util.rs"]
+#[macro_use]
+mod util;
+
+static NAME: &'static str = "ptx";
+static VERSION: &'static str = "1.0.0";
+
+#[derive(Debug)]
+enum OutFormat {
+    Dumb, 
+    Roff, 
+    Tex,
+}
+
+#[derive(Debug)]
+struct Config {
+    format : OutFormat,
+    gnu_ext : bool,
+    auto_ref : bool,
+    input_ref : bool,
+    right_ref : bool,
+    ignore_case : bool,
+    macro_name : String,
+    trunc_str : String,
+    context_regex : String,
+    line_width : usize,
+    gap_size : usize,
+}
+
+impl Default for Config {
+    fn default() -> Config {
+        Config {
+            format : OutFormat::Dumb,
+            gnu_ext : true,
+            auto_ref : false,
+            input_ref : false,
+            right_ref : false,
+            ignore_case : false,
+            macro_name : "xx".to_string(),
+            trunc_str : "/".to_string(),
+            context_regex : "\\w+".to_string(),
+            line_width : 72,
+            gap_size : 3
+        }
+    }
+}
+
+fn read_word_filter_file(matches: &Matches, option: &str) -> HashSet<String> {
+    let filename = matches.opt_str(option).expect("parsing options failed!");
+    let reader = BufReader::new(crash_if_err!(1, File::open(filename)));
+    let mut words: HashSet<String> = HashSet::new();
+    for word in reader.lines() {
+        words.insert(crash_if_err!(1, word));
+    }
+    words
+}
+
+#[derive(Debug)]
+struct WordFilter {
+    only_specified: bool,
+    ignore_specified: bool,
+    only_set: HashSet<String>,
+    ignore_set: HashSet<String>,
+    word_regex: String,
+}
+
+impl WordFilter {
+   fn new(matches: &Matches, config: &Config) -> WordFilter {
+        let (o, oset): (bool, HashSet<String>) = 
+            if matches.opt_present("o") {
+                (true, read_word_filter_file(matches, "o"))
+            } else {
+                (false, HashSet::new())
+            };
+        let (i, iset): (bool, HashSet<String>) = 
+            if matches.opt_present("i") {
+                (true, read_word_filter_file(matches, "i"))
+            } else {
+                (false, HashSet::new())
+            };
+        if matches.opt_present("b") {
+            crash!(1, "-b not implemented yet");
+        }
+        let reg = 
+            if matches.opt_present("W") {
+                matches.opt_str("W").expect("parsing options failed!")
+            } else if config.gnu_ext {
+                "\\w+".to_string()
+            } else {
+                "[^ \t\n]+".to_string()
+            };
+        WordFilter { 
+            only_specified: o, 
+            ignore_specified: i, 
+            only_set: oset, 
+            ignore_set: iset,
+            word_regex: reg
+        }
+    }
+}
+
+#[derive(Debug, PartialOrd, PartialEq, Eq, Ord)]
+struct WordRef {
+    word: String,
+    global_line_nr: usize,
+    local_line_nr: usize,
+    position: usize,
+    position_end: usize,
+    filename: String,
+}
+
+fn print_version() {
+    println!("{} version {}", NAME, VERSION);
+}
+
+fn print_usage(opts: &Options) {
+    let brief = "Usage: ptx [OPTION]... [INPUT]...   (without -G) or: \
+        ptx -G [OPTION]... [INPUT [OUTPUT]] \n Output a permuted index, \
+        including context, of the words in the input files. \n\n Mandatory \
+        arguments to long options are mandatory for short options too.";
+    let explaination = "With no FILE, or when FILE is -, read standard input. \
+        Default is '-F /'.";
+    println!("{}\n{}", opts.usage(&brief), explaination);
+}
+
+
+fn get_config(matches: &Matches) -> Config {
+    let mut config: Config = Default::default();
+    let err_msg = "parsing options failed";
+    if matches.opt_present("G") {
+        config.gnu_ext = false;
+        config.format = OutFormat::Roff;
+        config.context_regex = "[^ \t\n]+".to_string();
+    } else {
+        crash!(1, "GNU extensions not implemented yet");
+    }
+    if matches.opt_present("S") {
+        crash!(1, "-S not implemented yet");
+    }
+    config.auto_ref = matches.opt_present("A");
+    config.input_ref = matches.opt_present("r");
+    config.right_ref &= matches.opt_present("R");
+    config.ignore_case = matches.opt_present("f"); 
+    if matches.opt_present("M") {
+        config.macro_name = 
+            matches.opt_str("M").expect(err_msg).to_string();
+    }
+    if matches.opt_present("F") {
+        config.trunc_str = 
+            matches.opt_str("F").expect(err_msg).to_string();
+    }
+    if matches.opt_present("w") {
+        let width_str = matches.opt_str("w").expect(err_msg);
+        config.line_width = crash_if_err!(
+            1, usize::from_str_radix(width_str.as_str(), 10));
+    }
+    if matches.opt_present("g") {
+        let gap_str = matches.opt_str("g").expect(err_msg);
+        config.gap_size = crash_if_err!(
+            1, usize::from_str_radix(gap_str.as_str(), 10));
+    }
+    if matches.opt_present("O") {
+        config.format = OutFormat::Roff;
+    }
+    if matches.opt_present("T") {
+        config.format = OutFormat::Tex;
+    }
+    config
+}
+
+fn read_input(input_files: Vec<&str>, config: &Config) ->
+             HashMap<String, (Vec<String>, usize)> {
+    let mut file_map : HashMap<String, (Vec<String>, usize)> = 
+        HashMap::new();
+    let mut files = Vec::new();
+    if input_files.is_empty() {
+        files.push("-");
+    } else {
+        if config.gnu_ext {
+            files.push_all(input_files.as_slice());
+        } else {
+            files.push(input_files[0]);
+        }
+    }
+    let mut lines_so_far: usize = 0;
+    for filename in files {
+        let reader: BufReader<Box<Read>> = BufReader::new(
+            if filename == "-" {
+                Box::new(stdin())
+            } else {
+                let file = crash_if_err!(1, File::open(filename));
+                Box::new(file)
+            });
+        let lines: Vec<String> = reader.lines().map(|x| crash_if_err!(1, x))
+            .collect();
+        let size = lines.len();
+        file_map.insert(filename.to_string(), (lines, lines_so_far));
+        lines_so_far += size
+    }
+    file_map
+}
+
+fn create_word_set(config: &Config, filter: &WordFilter, 
+                  file_map: &HashMap<String, (Vec<String>, usize)>)-> 
+                  BTreeSet<WordRef> {
+    let reg = Regex::new(filter.word_regex.as_str()).unwrap();
+    let ref_reg = Regex::new(config.context_regex.as_str()).unwrap();
+    let mut word_set: BTreeSet<WordRef> = BTreeSet::new();
+    for (file, lines) in file_map.iter() {
+        let mut count: usize = 0;
+        let offs = lines.1;
+        for line in (lines.0).iter() {
+            // if -r, exclude reference from word set
+            let (ref_beg, ref_end) = match ref_reg.find(line) {
+                Some(x) => x,
+                None => (0,0)
+            };
+            // match words with given regex
+            for (beg, end) in reg.find_iter(line) {
+                if config.input_ref && ((beg, end) == (ref_beg, ref_end)) {
+                    continue;
+                }
+                let mut word = line.slice_chars(beg, end).to_string();
+                if filter.only_specified && 
+                   !(filter.only_set.contains(&word)) {
+                    continue;
+                }
+                if filter.ignore_specified && 
+                   filter.ignore_set.contains(&word) {
+                    continue;
+                }
+                if config.ignore_case {
+                    word = word.to_lowercase();
+                }
+                word_set.insert(WordRef{
+                    word: word,
+                    filename: String::from(file.as_str()),
+                    global_line_nr: offs + count,
+                    local_line_nr: count,
+                    position: beg,
+                    position_end: end
+                });
+            }
+            count += 1;
+        }
+    }
+    word_set
+}
+
+fn get_reference(config: &Config, word_ref: &WordRef, line: &String) -> 
+                String {
+    if config.auto_ref {
+        format!("{}:{}", word_ref.filename, word_ref.local_line_nr + 1)
+    } else if config.input_ref {
+        let reg = Regex::new(config.context_regex.as_str()).unwrap();
+        let (beg, end) = match reg.find(line) {
+            Some(x) => x,
+            None => (0,0)
+        };
+        format!("{}", line.slice_chars(beg, end))
+    } else {
+        String::new()
+    }
+}
+
+fn assert_str_integrity(s: &Vec<char>, beg: usize, end: usize) {
+    assert!(beg <= end);
+    assert!(end <= s.len());
+}
+
+fn trim_broken_word_left(s: &Vec<char>, beg: usize, end: usize) -> usize {
+    assert_str_integrity(s, beg, end);
+    if beg == end || beg == 0 || s[beg].is_whitespace() || 
+       s[beg-1].is_whitespace() {
+        return beg;
+    }
+    let mut b = beg;
+    while b < end && !s[b].is_whitespace() {
+        b += 1;
+    }
+    b
+}
+
+fn trim_broken_word_right(s: &Vec<char>, beg: usize, end: usize) -> usize {
+    assert_str_integrity(s, beg, end);
+    if beg == end || end == s.len() || s[end-1].is_whitespace() || 
+       s[end].is_whitespace() {
+        return end;
+    }
+    let mut e = end;
+    while beg < e && !s[e-1].is_whitespace() {
+        e -= 1;
+    }
+    e
+}
+
+fn trim_idx(s: &Vec<char>, beg: usize, end: usize) -> (usize, usize) {
+    assert_str_integrity(s, beg, end);
+    let mut b = beg;
+    let mut e = end;
+    while b < e && s[b].is_whitespace() {
+        b += 1;
+    }
+    while b < e && s[e-1].is_whitespace() {
+        e -= 1;
+    }
+    (b,e)
+}
+
+
+fn get_output_chunks(all_before: &String, keyword: &String, all_after: &String,
+                    config: &Config) -> (String, String, String, String) {
+    assert!(all_before.trim() == all_before.as_str());
+    assert!(keyword.trim() == keyword.as_str());
+    assert!(all_after.trim() == all_after.as_str());
+    let mut head = String::new();
+    let mut before = String::new();
+    let mut after = String::new();
+    let mut tail = String::new();
+    
+    let half_line_size = cmp::max((config.line_width/2) as isize - 
+        (2*config.trunc_str.len()) as isize, 0) as usize; 
+    let max_after_size = cmp::max(half_line_size as isize - 
+        keyword.len() as isize - 1, 0) as usize;
+    let max_before_size = half_line_size; 
+    let all_before_vec: Vec<char> = all_before.chars().collect();
+    let all_after_vec: Vec<char> = all_after.chars().collect();
+    
+    // get before
+    let mut bb_tmp = 
+        cmp::max(all_before.len() as isize - max_before_size as isize, 0) as usize;
+    bb_tmp = trim_broken_word_left(&all_before_vec, bb_tmp, all_before.len());
+    let (before_beg, before_end) = 
+        trim_idx(&all_before_vec, bb_tmp, all_before.len());
+    before.push_str(all_before.slice_chars(before_beg, before_end));
+    assert!(max_before_size >= before.len());
+    
+    // get after
+    let mut ae_tmp = cmp::min(max_after_size, all_after.len());
+    ae_tmp = trim_broken_word_right(&all_after_vec, 0, ae_tmp);
+    let (after_beg, after_end) = trim_idx(&all_after_vec, 0, ae_tmp);
+    after.push_str(all_after.slice_chars(after_beg, after_end));
+    assert!(max_after_size >= after.len()); 
+    
+    // get tail
+    let max_tail_size = max_before_size - before.len();
+    let (tb, _) = trim_idx(&all_after_vec, after_end, all_after.len());
+    let mut te_tmp = cmp::min(tb + max_tail_size, all_after.len()); 
+    te_tmp = trim_broken_word_right(&all_after_vec, tb, te_tmp);
+    let (tail_beg, tail_end) = trim_idx(&all_after_vec, tb, te_tmp);
+    tail.push_str(all_after.slice_chars(tail_beg, tail_end));
+    
+    // get head
+    let max_head_size = max_after_size - after.len();
+    let (_, he) = trim_idx(&all_before_vec, 0, before_beg);
+    let mut hb_tmp = 
+        cmp::max(he as isize - max_head_size as isize, 0) as usize;
+    hb_tmp = trim_broken_word_left(&all_before_vec, hb_tmp, he);
+    let (head_beg, head_end) = trim_idx(&all_before_vec, hb_tmp, he);
+    head.push_str(all_before.slice_chars(head_beg, head_end));
+    
+    // put right context truncation string if needed
+    if after_end != all_after.len() && tail_beg == tail_end {
+        after.push_str(config.trunc_str.as_str());
+    } else if after_end != all_after.len() && tail_end != all_after.len() {
+        tail.push_str(config.trunc_str.as_str());
+    }
+    
+    // put left context truncation string if needed
+    if before_beg != 0 && head_beg == head_end {
+        before = format!("{}{}", config.trunc_str, before);
+    } else if before_beg != 0 && head_beg != 0 {
+        head = format!("{}{}", config.trunc_str, head);
+    }
+    
+    // add space before "after" if needed
+    if after.len() > 0 {
+        after = format!(" {}", after);
+    }
+    
+    (tail, before, after, head)
+}
+
+fn tex_mapper(x: char) -> String {
+    match x {
+        '\\' => "\\backslash{}".to_string(),
+        '$' | '%' | '#' | '&' | '_' => format!("\\{}", x),
+        '^' | '~' => format!("{}\\{}{}", x, "{", "}"),
+        '}' | '{' => format!("$\\{}$", x),
+        _ => x.to_string()
+    }
+}
+
+fn adjust_tex_str(context: &str) -> String {
+    let ws_reg = Regex::new(r"[\t\n\v\f\r ]").unwrap();
+    let mut fix: String = ws_reg.replace_all(context, " ").trim().to_string();
+    let mapped_chunks: Vec<String> = fix.chars().map(tex_mapper).collect();
+    fix = mapped_chunks.connect("");
+    fix
+}
+
+fn format_tex_line(config: &Config, word_ref: &WordRef, line: &String, 
+                  reference: &String) -> String {
+    let mut output = String::new();
+    output.push_str(&format!("\\{} ", config.macro_name));
+    let all_before = if config.input_ref {
+        let before = line.slice_chars(0, word_ref.position);
+        adjust_tex_str(before.trim().trim_left_matches(reference))
+    } else {
+        adjust_tex_str(line.slice_chars(0, word_ref.position))
+    };
+    let keyword = adjust_tex_str(
+        line.slice_chars(word_ref.position, word_ref.position_end));
+    let all_after = adjust_tex_str(
+        line.slice_chars(word_ref.position_end, line.len()));
+    let (tail, before, after, head) = 
+        get_output_chunks(&all_before, &keyword, &all_after, &config);
+    output.push_str(format!("{5}{0}{6}{5}{1}{6}{5}{2}{6}{5}{3}{6}{5}{4}{6}", 
+        tail, before, keyword, after, head, "{", "}").as_str());
+    if config.auto_ref || config.input_ref {
+        output.push_str(&format!("{}{}{}", "{", reference, "}"));
+    }
+    output
+}
+
+fn adjust_roff_str(context: &str) -> String {
+    let ws_reg = Regex::new(r"[\t\n\v\f\r]").unwrap();
+    ws_reg.replace_all(context, " ").replace("\"", "\"\"").trim().to_string()
+}
+
+fn format_roff_line(config: &Config, word_ref: &WordRef, line: &str, 
+                   reference: &str) -> String {
+    let mut output = String::new();
+    output.push_str(&format!(".{}", config.macro_name));
+    let all_before = if config.input_ref {
+        let before = line.slice_chars(0, word_ref.position);
+        adjust_roff_str(before.trim().trim_left_matches(reference))
+    } else {
+        adjust_roff_str(line.slice_chars(0, word_ref.position))
+    };
+    let keyword = adjust_roff_str(
+        line.slice_chars(word_ref.position, word_ref.position_end));
+    let all_after = adjust_roff_str(
+        line.slice_chars(word_ref.position_end, line.len()));
+    let (tail, before, after, head) = 
+        get_output_chunks(&all_before, &keyword, &all_after, &config);
+    output.push_str(format!(" \"{}\" \"{}\" \"{}{}\" \"{}\"", 
+        tail, before, keyword, after, head).as_str());
+    if config.auto_ref || config.input_ref {
+        output.push_str(&format!(" \"{}\"", reference));
+    }
+    output
+}
+
+fn write_traditional_output(config: &Config, 
+                            file_map: &HashMap<String, (Vec<String>,usize)>, 
+                            words: &BTreeSet<WordRef>, output_filename: &str) {
+    let mut writer: BufWriter<Box<Write>> = BufWriter::new(
+    if output_filename == "-" {
+        Box::new(stdout())
+    } else {
+        let file = crash_if_err!(1, File::create(output_filename));
+        Box::new(file)
+    });
+    for word_ref in words.iter() {
+        let file_map_value : &(Vec<String>, usize) =
+            file_map.get(&(word_ref.filename))
+                .expect("Missing file in file map");
+        let (ref lines, _) = *(file_map_value);
+        let reference = 
+            get_reference(config, word_ref, &lines[word_ref.local_line_nr]);
+        let output_line: String = match config.format {
+            OutFormat::Tex => format_tex_line(
+                config, word_ref, &lines[word_ref.local_line_nr], &reference),
+            OutFormat::Roff => format_roff_line(
+                config, word_ref, &lines[word_ref.local_line_nr], &reference),
+            OutFormat::Dumb => crash!(
+                1, "There is no dumb format with GNU extensions disabled")
+        };
+        crash_if_err!(1, writeln!(writer, "{}", output_line));
+    }
+}
+
+pub fn uumain(args: Vec<String>) -> i32 {
+    let mut opts = Options::new();
+    opts.optflag("A", "auto-reference", 
+        "output automatically generated references");
+    opts.optflag("G", "traditional", "behave more like System V 'ptx'");
+    opts.optopt("F", "flag-truncation", 
+        "use STRING for flagging line truncations", "STRING");
+    opts.optopt("M", "macro-name", "macro name to use instead of 'xx'", 
+        "STRING");
+    opts.optflag("O", "format=roff", "generate output as roff directives");
+    opts.optflag("R", "right-side-refs", 
+        "put references at right, not counted in -w");
+    opts.optopt("S", "sentence-regexp", "for end of lines or end of sentences",
+        "REGEXP");
+    opts.optflag("T", "format=tex", "generate output as TeX directives");
+    opts.optopt("W", "word-regexp", "use REGEXP to match each keyword", 
+        "REGEXP");
+    opts.optopt("b", "break-file", "word break characters in this FILE", 
+        "FILE");
+    opts.optflag("f", "ignore-case", 
+        "fold lower case to upper case for sorting");
+    opts.optopt("g", "gap-size", "gap size in columns between output fields", 
+        "NUMBER");
+    opts.optopt("i", "ignore-file", "read ignore word list from FILE", "FILE");
+    opts.optopt("o", "only-file", "read only word list from this FILE", 
+        "FILE");
+    opts.optflag("r", "references", "first field of each line is a reference");
+    opts.optopt("w", "width", "output width in columns, reference excluded",
+        "NUMBER");
+    opts.optflag("", "help", "display this help and exit");
+    opts.optflag("", "version", "output version information and exit");
+    
+    let matches = return_if_err!(1, opts.parse(&args[1..]));
+    
+    if matches.opt_present("help") {
+        print_usage(&opts);
+        return 0;
+    }
+    if matches.opt_present("version") {
+        print_version();
+        return 0;
+    }
+    let config = get_config(&matches);
+    let word_filter = WordFilter::new(&matches, &config);
+    let file_map = 
+        read_input(matches.free.iter().map(|x| x.as_str()).collect(), &config);
+    let word_set = create_word_set(&config, &word_filter, &file_map);
+    let output_file = if !config.gnu_ext && matches.free.len() == 2 {
+        matches.free[1].as_str()
+    } else {
+        "-"
+    };
+    write_traditional_output(&config, &file_map, &word_set, output_file);
+    0
+}