mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-28 19:47:45 +00:00
commit
ac6c8e2794
2 changed files with 166 additions and 58 deletions
179
src/tr/tr.rs
179
src/tr/tr.rs
|
@ -32,66 +32,131 @@ static NAME: &'static str = "tr";
|
||||||
static VERSION: &'static str = env!("CARGO_PKG_VERSION");
|
static VERSION: &'static str = env!("CARGO_PKG_VERSION");
|
||||||
const BUFFER_LEN: usize = 1024;
|
const BUFFER_LEN: usize = 1024;
|
||||||
|
|
||||||
fn delete(set: ExpandSet, complement: bool) {
|
trait SymbolTranslator {
|
||||||
let mut bset = BitSet::new();
|
fn translate(&self, c: &char, prev_c: &char) -> Option<char>;
|
||||||
let stdin = stdin();
|
}
|
||||||
let mut locked_stdin = stdin.lock();
|
|
||||||
let mut buffered_stdout = BufWriter::new(stdout());
|
|
||||||
let mut buf = String::with_capacity(BUFFER_LEN + 4);
|
|
||||||
let mut char_output_buffer: [u8; 4] = [0;4];
|
|
||||||
|
|
||||||
for c in set {
|
struct DeleteOperation {
|
||||||
bset.insert(c as usize);
|
bset: BitSet,
|
||||||
}
|
complement: bool,
|
||||||
|
}
|
||||||
|
|
||||||
let is_allowed = |c : char| {
|
impl DeleteOperation {
|
||||||
if complement {
|
fn new(set: ExpandSet, complement: bool) -> DeleteOperation {
|
||||||
bset.contains(c as usize)
|
DeleteOperation {
|
||||||
} else {
|
bset: set.map(|c| c as usize).collect(),
|
||||||
!bset.contains(c as usize)
|
complement: complement
|
||||||
}
|
}
|
||||||
};
|
|
||||||
|
|
||||||
while let Ok(length) = locked_stdin.read_line(&mut buf) {
|
|
||||||
if length == 0 { break }
|
|
||||||
{ // isolation to make borrow checker happy
|
|
||||||
let filtered = buf.chars().filter(|c| is_allowed(*c));
|
|
||||||
for c in filtered {
|
|
||||||
let char_as_bytes = c.encode_utf8(&mut char_output_buffer);
|
|
||||||
buffered_stdout.write_all(char_as_bytes.as_bytes()).unwrap();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
buf.clear();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn tr<'a>(set1: ExpandSet<'a>, mut set2: ExpandSet<'a>) {
|
impl SymbolTranslator for DeleteOperation {
|
||||||
let mut map = FnvHashMap::default();
|
fn translate(&self, c: &char, _prev_c: &char) -> Option<char> {
|
||||||
let stdin = stdin();
|
let uc = *c as usize;
|
||||||
let mut locked_stdin = stdin.lock();
|
if self.complement == self.bset.contains(uc) {
|
||||||
let mut buffered_stdout = BufWriter::new(stdout());
|
Some(*c)
|
||||||
let mut buf = String::with_capacity(BUFFER_LEN + 4);
|
} else {
|
||||||
let mut char_output_buffer: [u8; 4] = [0;4];
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct SqueezeOperation {
|
||||||
|
squeeze_set: BitSet,
|
||||||
|
complement: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SqueezeOperation {
|
||||||
|
fn new(squeeze_set: ExpandSet, complement: bool) -> SqueezeOperation {
|
||||||
|
SqueezeOperation {
|
||||||
|
squeeze_set: squeeze_set.map(|c| c as usize).collect(),
|
||||||
|
complement: complement
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SymbolTranslator for SqueezeOperation {
|
||||||
|
fn translate(&self, c: &char, prev_c: &char) -> Option<char> {
|
||||||
|
if *prev_c == *c && self.complement != self.squeeze_set.contains(*c as usize) {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
Some(*c)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct DeleteAndSqueezeOperation {
|
||||||
|
delete_set: BitSet,
|
||||||
|
squeeze_set: BitSet,
|
||||||
|
complement: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl DeleteAndSqueezeOperation {
|
||||||
|
fn new(delete_set: ExpandSet, squeeze_set: ExpandSet, complement: bool) -> DeleteAndSqueezeOperation {
|
||||||
|
DeleteAndSqueezeOperation {
|
||||||
|
delete_set: delete_set.map(|c| c as usize).collect(),
|
||||||
|
squeeze_set: squeeze_set.map(|c| c as usize).collect(),
|
||||||
|
complement: complement
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SymbolTranslator for DeleteAndSqueezeOperation {
|
||||||
|
fn translate(&self, c: &char, prev_c: &char) -> Option<char> {
|
||||||
|
if self.complement != self.delete_set.contains(*c as usize) || *prev_c == *c && self.squeeze_set.contains(*c as usize) {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
Some(*c)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct TranslateOperation {
|
||||||
|
translate_map: FnvHashMap<usize, char>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TranslateOperation {
|
||||||
|
fn new(set1: ExpandSet, set2: &mut ExpandSet) -> TranslateOperation {
|
||||||
|
let mut map = FnvHashMap::default();
|
||||||
let mut s2_prev = '_';
|
let mut s2_prev = '_';
|
||||||
for i in set1 {
|
for i in set1 {
|
||||||
s2_prev = set2.next().unwrap_or(s2_prev);
|
s2_prev = set2.next().unwrap_or(s2_prev);
|
||||||
|
|
||||||
map.insert(i as usize, s2_prev);
|
map.insert(i as usize, s2_prev);
|
||||||
}
|
}
|
||||||
|
TranslateOperation {
|
||||||
|
translate_map: map,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
while let Ok(length) = locked_stdin.read_line(&mut buf) {
|
impl SymbolTranslator for TranslateOperation {
|
||||||
|
fn translate(&self, c: &char, _prev_c: &char) -> Option<char> {
|
||||||
|
Some(*self.translate_map.get(&(*c as usize)).unwrap_or(c))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn translate_input<T: SymbolTranslator>(input: &mut BufRead, output: &mut Write, translator: T) {
|
||||||
|
let mut buf = String::with_capacity(BUFFER_LEN + 4);
|
||||||
|
let mut output_buf = String::with_capacity(BUFFER_LEN + 4);
|
||||||
|
|
||||||
|
while let Ok(length) = input.read_line(&mut buf) {
|
||||||
|
let mut prev_c = 0 as char;
|
||||||
if length == 0 { break }
|
if length == 0 { break }
|
||||||
|
|
||||||
{ // isolation to make borrow checker happy
|
{ // isolation to make borrow checker happy
|
||||||
let output_stream = buf.chars().map(|c| *map.get(&(c as usize)).unwrap_or(&c));
|
let filtered = buf.chars().filter_map(|c| {
|
||||||
for c in output_stream {
|
let res = translator.translate(&c, &prev_c);
|
||||||
let char_as_bytes = c.encode_utf8(&mut char_output_buffer);
|
if res.is_some() {
|
||||||
buffered_stdout.write_all(char_as_bytes.as_bytes()).unwrap();
|
prev_c = c;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
res
|
||||||
|
});
|
||||||
|
|
||||||
|
output_buf.extend(filtered);
|
||||||
|
output.write_all(output_buf.as_bytes()).unwrap();
|
||||||
|
}
|
||||||
buf.clear();
|
buf.clear();
|
||||||
|
output_buf.clear();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -111,6 +176,7 @@ pub fn uumain(args: Vec<String>) -> i32 {
|
||||||
opts.optflag("C", "", "same as -c");
|
opts.optflag("C", "", "same as -c");
|
||||||
opts.optflag("d", "delete", "delete characters in SET1");
|
opts.optflag("d", "delete", "delete characters in SET1");
|
||||||
opts.optflag("h", "help", "display this help and exit");
|
opts.optflag("h", "help", "display this help and exit");
|
||||||
|
opts.optflag("s", "squeeze", "replace each sequence of a repeated character that is listed in the last specified SET, with a single occurrence of that character");
|
||||||
opts.optflag("V", "version", "output version information and exit");
|
opts.optflag("V", "version", "output version information and exit");
|
||||||
|
|
||||||
let matches = match opts.parse(&args[1..]) {
|
let matches = match opts.parse(&args[1..]) {
|
||||||
|
@ -138,20 +204,37 @@ pub fn uumain(args: Vec<String>) -> i32 {
|
||||||
|
|
||||||
let dflag = matches.opt_present("d");
|
let dflag = matches.opt_present("d");
|
||||||
let cflag = matches.opts_present(&["c".to_owned(), "C".to_owned()]);
|
let cflag = matches.opts_present(&["c".to_owned(), "C".to_owned()]);
|
||||||
|
let sflag = matches.opt_present("s");
|
||||||
let sets = matches.free;
|
let sets = matches.free;
|
||||||
|
|
||||||
if cflag && !dflag {
|
if cflag && !dflag && !sflag {
|
||||||
show_error!("-c is only supported with -d");
|
show_error!("-c is only supported with -d or -s");
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let stdin = stdin();
|
||||||
|
let mut locked_stdin = stdin.lock();
|
||||||
|
let stdout = stdout();
|
||||||
|
let locked_stdout = stdout.lock();
|
||||||
|
let mut buffered_stdout = BufWriter::new(locked_stdout);
|
||||||
|
|
||||||
|
let set1 = ExpandSet::new(sets[0].as_ref());
|
||||||
if dflag {
|
if dflag {
|
||||||
let set1 = ExpandSet::new(sets[0].as_ref());
|
if sflag {
|
||||||
delete(set1, cflag);
|
|
||||||
} else {
|
|
||||||
let set1 = ExpandSet::new(sets[0].as_ref());
|
|
||||||
let set2 = ExpandSet::new(sets[1].as_ref());
|
let set2 = ExpandSet::new(sets[1].as_ref());
|
||||||
tr(set1, set2);
|
let op = DeleteAndSqueezeOperation::new(set1, set2, cflag);
|
||||||
|
translate_input(&mut locked_stdin, &mut buffered_stdout, op);
|
||||||
|
} else {
|
||||||
|
let op = DeleteOperation::new(set1, cflag);
|
||||||
|
translate_input(&mut locked_stdin, &mut buffered_stdout, op);
|
||||||
|
}
|
||||||
|
} else if sflag {
|
||||||
|
let op = SqueezeOperation::new(set1, cflag);
|
||||||
|
translate_input(&mut locked_stdin, &mut buffered_stdout, op);
|
||||||
|
} else {
|
||||||
|
let mut set2 = ExpandSet::new(sets[1].as_ref());
|
||||||
|
let op = TranslateOperation::new(set1, &mut set2);
|
||||||
|
translate_input(&mut locked_stdin, &mut buffered_stdout, op)
|
||||||
}
|
}
|
||||||
|
|
||||||
0
|
0
|
||||||
|
|
|
@ -32,3 +32,28 @@ fn test_delete_complement() {
|
||||||
new_ucmd!()
|
new_ucmd!()
|
||||||
.args(&["-d", "-c", "a-z"]).pipe_in("aBcD").run().stdout_is("ac");
|
.args(&["-d", "-c", "a-z"]).pipe_in("aBcD").run().stdout_is("ac");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_squeeze() {
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-s", "a-z"]).pipe_in("aaBBcDcc").run().stdout_is("aBBcDc");
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_squeeze_complement() {
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-sc", "a-z"]).pipe_in("aaBBcDcc").run().stdout_is("aaBcDcc");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_delete_and_squeeze() {
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-ds", "a-z", "A-Z"]).pipe_in("abBcB").run().stdout_is("B");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_delete_and_squeeze_complement() {
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-dsc", "a-z", "A-Z"]).pipe_in("abBcB").run().stdout_is("abc");
|
||||||
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue