mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-28 11:37:44 +00:00
commit
ac6c8e2794
2 changed files with 166 additions and 58 deletions
177
src/tr/tr.rs
177
src/tr/tr.rs
|
@ -32,66 +32,131 @@ static NAME: &'static str = "tr";
|
|||
static VERSION: &'static str = env!("CARGO_PKG_VERSION");
|
||||
const BUFFER_LEN: usize = 1024;
|
||||
|
||||
fn delete(set: ExpandSet, complement: bool) {
|
||||
let mut bset = BitSet::new();
|
||||
let stdin = stdin();
|
||||
let mut locked_stdin = stdin.lock();
|
||||
let mut buffered_stdout = BufWriter::new(stdout());
|
||||
let mut buf = String::with_capacity(BUFFER_LEN + 4);
|
||||
let mut char_output_buffer: [u8; 4] = [0;4];
|
||||
|
||||
for c in set {
|
||||
bset.insert(c as usize);
|
||||
trait SymbolTranslator {
|
||||
fn translate(&self, c: &char, prev_c: &char) -> Option<char>;
|
||||
}
|
||||
|
||||
let is_allowed = |c : char| {
|
||||
if complement {
|
||||
bset.contains(c as usize)
|
||||
struct DeleteOperation {
|
||||
bset: BitSet,
|
||||
complement: bool,
|
||||
}
|
||||
|
||||
impl DeleteOperation {
|
||||
fn new(set: ExpandSet, complement: bool) -> DeleteOperation {
|
||||
DeleteOperation {
|
||||
bset: set.map(|c| c as usize).collect(),
|
||||
complement: complement
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl SymbolTranslator for DeleteOperation {
|
||||
fn translate(&self, c: &char, _prev_c: &char) -> Option<char> {
|
||||
let uc = *c as usize;
|
||||
if self.complement == self.bset.contains(uc) {
|
||||
Some(*c)
|
||||
} else {
|
||||
!bset.contains(c as usize)
|
||||
None
|
||||
}
|
||||
};
|
||||
|
||||
while let Ok(length) = locked_stdin.read_line(&mut buf) {
|
||||
if length == 0 { break }
|
||||
{ // isolation to make borrow checker happy
|
||||
let filtered = buf.chars().filter(|c| is_allowed(*c));
|
||||
for c in filtered {
|
||||
let char_as_bytes = c.encode_utf8(&mut char_output_buffer);
|
||||
buffered_stdout.write_all(char_as_bytes.as_bytes()).unwrap();
|
||||
}
|
||||
}
|
||||
buf.clear();
|
||||
}
|
||||
}
|
||||
|
||||
fn tr<'a>(set1: ExpandSet<'a>, mut set2: ExpandSet<'a>) {
|
||||
struct SqueezeOperation {
|
||||
squeeze_set: BitSet,
|
||||
complement: bool,
|
||||
}
|
||||
|
||||
impl SqueezeOperation {
|
||||
fn new(squeeze_set: ExpandSet, complement: bool) -> SqueezeOperation {
|
||||
SqueezeOperation {
|
||||
squeeze_set: squeeze_set.map(|c| c as usize).collect(),
|
||||
complement: complement
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl SymbolTranslator for SqueezeOperation {
|
||||
fn translate(&self, c: &char, prev_c: &char) -> Option<char> {
|
||||
if *prev_c == *c && self.complement != self.squeeze_set.contains(*c as usize) {
|
||||
None
|
||||
} else {
|
||||
Some(*c)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct DeleteAndSqueezeOperation {
|
||||
delete_set: BitSet,
|
||||
squeeze_set: BitSet,
|
||||
complement: bool,
|
||||
}
|
||||
|
||||
impl DeleteAndSqueezeOperation {
|
||||
fn new(delete_set: ExpandSet, squeeze_set: ExpandSet, complement: bool) -> DeleteAndSqueezeOperation {
|
||||
DeleteAndSqueezeOperation {
|
||||
delete_set: delete_set.map(|c| c as usize).collect(),
|
||||
squeeze_set: squeeze_set.map(|c| c as usize).collect(),
|
||||
complement: complement
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl SymbolTranslator for DeleteAndSqueezeOperation {
|
||||
fn translate(&self, c: &char, prev_c: &char) -> Option<char> {
|
||||
if self.complement != self.delete_set.contains(*c as usize) || *prev_c == *c && self.squeeze_set.contains(*c as usize) {
|
||||
None
|
||||
} else {
|
||||
Some(*c)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct TranslateOperation {
|
||||
translate_map: FnvHashMap<usize, char>,
|
||||
}
|
||||
|
||||
impl TranslateOperation {
|
||||
fn new(set1: ExpandSet, set2: &mut ExpandSet) -> TranslateOperation {
|
||||
let mut map = FnvHashMap::default();
|
||||
let stdin = stdin();
|
||||
let mut locked_stdin = stdin.lock();
|
||||
let mut buffered_stdout = BufWriter::new(stdout());
|
||||
let mut buf = String::with_capacity(BUFFER_LEN + 4);
|
||||
let mut char_output_buffer: [u8; 4] = [0;4];
|
||||
|
||||
let mut s2_prev = '_';
|
||||
for i in set1 {
|
||||
s2_prev = set2.next().unwrap_or(s2_prev);
|
||||
|
||||
map.insert(i as usize, s2_prev);
|
||||
}
|
||||
TranslateOperation {
|
||||
translate_map: map,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
while let Ok(length) = locked_stdin.read_line(&mut buf) {
|
||||
impl SymbolTranslator for TranslateOperation {
|
||||
fn translate(&self, c: &char, _prev_c: &char) -> Option<char> {
|
||||
Some(*self.translate_map.get(&(*c as usize)).unwrap_or(c))
|
||||
}
|
||||
}
|
||||
|
||||
fn translate_input<T: SymbolTranslator>(input: &mut BufRead, output: &mut Write, translator: T) {
|
||||
let mut buf = String::with_capacity(BUFFER_LEN + 4);
|
||||
let mut output_buf = String::with_capacity(BUFFER_LEN + 4);
|
||||
|
||||
while let Ok(length) = input.read_line(&mut buf) {
|
||||
let mut prev_c = 0 as char;
|
||||
if length == 0 { break }
|
||||
|
||||
{ // isolation to make borrow checker happy
|
||||
let output_stream = buf.chars().map(|c| *map.get(&(c as usize)).unwrap_or(&c));
|
||||
for c in output_stream {
|
||||
let char_as_bytes = c.encode_utf8(&mut char_output_buffer);
|
||||
buffered_stdout.write_all(char_as_bytes.as_bytes()).unwrap();
|
||||
}
|
||||
let filtered = buf.chars().filter_map(|c| {
|
||||
let res = translator.translate(&c, &prev_c);
|
||||
if res.is_some() {
|
||||
prev_c = c;
|
||||
}
|
||||
res
|
||||
});
|
||||
|
||||
output_buf.extend(filtered);
|
||||
output.write_all(output_buf.as_bytes()).unwrap();
|
||||
}
|
||||
buf.clear();
|
||||
output_buf.clear();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -111,6 +176,7 @@ pub fn uumain(args: Vec<String>) -> i32 {
|
|||
opts.optflag("C", "", "same as -c");
|
||||
opts.optflag("d", "delete", "delete characters in SET1");
|
||||
opts.optflag("h", "help", "display this help and exit");
|
||||
opts.optflag("s", "squeeze", "replace each sequence of a repeated character that is listed in the last specified SET, with a single occurrence of that character");
|
||||
opts.optflag("V", "version", "output version information and exit");
|
||||
|
||||
let matches = match opts.parse(&args[1..]) {
|
||||
|
@ -138,20 +204,37 @@ pub fn uumain(args: Vec<String>) -> i32 {
|
|||
|
||||
let dflag = matches.opt_present("d");
|
||||
let cflag = matches.opts_present(&["c".to_owned(), "C".to_owned()]);
|
||||
let sflag = matches.opt_present("s");
|
||||
let sets = matches.free;
|
||||
|
||||
if cflag && !dflag {
|
||||
show_error!("-c is only supported with -d");
|
||||
if cflag && !dflag && !sflag {
|
||||
show_error!("-c is only supported with -d or -s");
|
||||
return 1;
|
||||
}
|
||||
|
||||
let stdin = stdin();
|
||||
let mut locked_stdin = stdin.lock();
|
||||
let stdout = stdout();
|
||||
let locked_stdout = stdout.lock();
|
||||
let mut buffered_stdout = BufWriter::new(locked_stdout);
|
||||
|
||||
let set1 = ExpandSet::new(sets[0].as_ref());
|
||||
if dflag {
|
||||
let set1 = ExpandSet::new(sets[0].as_ref());
|
||||
delete(set1, cflag);
|
||||
} else {
|
||||
let set1 = ExpandSet::new(sets[0].as_ref());
|
||||
if sflag {
|
||||
let set2 = ExpandSet::new(sets[1].as_ref());
|
||||
tr(set1, set2);
|
||||
let op = DeleteAndSqueezeOperation::new(set1, set2, cflag);
|
||||
translate_input(&mut locked_stdin, &mut buffered_stdout, op);
|
||||
} else {
|
||||
let op = DeleteOperation::new(set1, cflag);
|
||||
translate_input(&mut locked_stdin, &mut buffered_stdout, op);
|
||||
}
|
||||
} else if sflag {
|
||||
let op = SqueezeOperation::new(set1, cflag);
|
||||
translate_input(&mut locked_stdin, &mut buffered_stdout, op);
|
||||
} else {
|
||||
let mut set2 = ExpandSet::new(sets[1].as_ref());
|
||||
let op = TranslateOperation::new(set1, &mut set2);
|
||||
translate_input(&mut locked_stdin, &mut buffered_stdout, op)
|
||||
}
|
||||
|
||||
0
|
||||
|
|
|
@ -32,3 +32,28 @@ fn test_delete_complement() {
|
|||
new_ucmd!()
|
||||
.args(&["-d", "-c", "a-z"]).pipe_in("aBcD").run().stdout_is("ac");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_squeeze() {
|
||||
new_ucmd!()
|
||||
.args(&["-s", "a-z"]).pipe_in("aaBBcDcc").run().stdout_is("aBBcDc");
|
||||
}
|
||||
|
||||
|
||||
#[test]
|
||||
fn test_squeeze_complement() {
|
||||
new_ucmd!()
|
||||
.args(&["-sc", "a-z"]).pipe_in("aaBBcDcc").run().stdout_is("aaBcDcc");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_delete_and_squeeze() {
|
||||
new_ucmd!()
|
||||
.args(&["-ds", "a-z", "A-Z"]).pipe_in("abBcB").run().stdout_is("B");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_delete_and_squeeze_complement() {
|
||||
new_ucmd!()
|
||||
.args(&["-dsc", "a-z", "A-Z"]).pipe_in("abBcB").run().stdout_is("abc");
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue