From 840c6e7b91d7ed43f774c41b83eb86f06d6d50c5 Mon Sep 17 00:00:00 2001 From: Hanif Bin Ariffin Date: Thu, 15 Jul 2021 00:04:43 +0800 Subject: [PATCH 01/50] `tr`: Reimplementing set expansion Hopefully will be feature parity with GNU `tr`. Signed-off-by: Hanif Bin Ariffin Implemented a bit of new expansion module Signed-off-by: Hanif Bin Ariffin Implemented delete operation Signed-off-by: Hanif Bin Ariffin Partially implemented delete operation Will go through translate next. Signed-off-by: Hanif Bin Ariffin Fix formatting... Signed-off-by: Hanif Bin Ariffin Implemented translation feature Signed-off-by: Hanif Bin Ariffin --- Cargo.lock | 47 ++++- src/uu/hashsum/Cargo.toml | 2 +- src/uu/tr/Cargo.toml | 1 + src/uu/tr/src/operation.rs | 409 +++++++++++++++++++++++++++++++++++++ src/uu/tr/src/tr.rs | 39 ++-- tests/by-util/test_tr.rs | 55 +++++ 6 files changed, 527 insertions(+), 26 deletions(-) create mode 100644 src/uu/tr/src/operation.rs diff --git a/Cargo.lock b/Cargo.lock index 8cf7cddcb..aebe260c6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -119,9 +119,9 @@ checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" [[package]] name = "bitflags" -version = "1.2.1" +version = "1.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" +checksum = "2da1976d75adbe5fbc88130ecd119529cf1cc6a93ae1546d8696ee66f0d21af1" [[package]] name = "bitvec" @@ -200,7 +200,7 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "db507a7679252d2276ed0dd8113c6875ec56d3089f9225b2b42c30cc1f8e5c89" dependencies = [ - "nom", + "nom 6.1.2", ] [[package]] @@ -645,9 +645,9 @@ checksum = "0e25ea47919b1560c4e3b7fe0aaab9becf5b84a10325ddf7db0f0ba5e1026499" [[package]] name = "digest" -version = "0.6.2" +version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5b29bf156f3f4b3c4f610a25ff69370616ae6e0657d416de22645483e72af0a" +checksum = "ecae1c064e29fcabb6c2e9939e53dc7da72ed90234ae36ebfe03a478742efbd1" dependencies = [ "generic-array", ] @@ -937,6 +937,19 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" +[[package]] +name = "lexical-core" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6607c62aa161d23d17a9072cc5da0be67cdfc89d3afb1e8d9c842bebc2525ffe" +dependencies = [ + "arrayvec", + "bitflags", + "cfg-if 1.0.0", + "ryu", + "static_assertions", +] + [[package]] name = "libc" version = "0.2.85" @@ -1084,6 +1097,17 @@ version = "0.1.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72ef4a56884ca558e5ddb05a1d1e7e1bfd9a68d9ed024c21704cc98872dae1bb" +[[package]] +name = "nom" +version = "5.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffb4262d26ed83a1c0a33a38fe2bb15797329c85770da05e6b828ddb782627af" +dependencies = [ + "lexical-core", + "memchr 2.4.0", + "version_check", +] + [[package]] name = "nom" version = "6.1.2" @@ -1614,6 +1638,12 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" +[[package]] +name = "ryu" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e" + [[package]] name = "same-file" version = "1.0.6" @@ -1754,6 +1784,12 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + [[package]] name = "strsim" version = "0.8.0" @@ -2910,6 +2946,7 @@ dependencies = [ "bit-set", "clap", "fnv", + "nom 5.1.2", "uucore", "uucore_procs", ] diff --git a/src/uu/hashsum/Cargo.toml b/src/uu/hashsum/Cargo.toml index 43d78119b..4b541ec81 100644 --- a/src/uu/hashsum/Cargo.toml +++ b/src/uu/hashsum/Cargo.toml @@ -15,7 +15,7 @@ edition = "2018" path = "src/hashsum.rs" [dependencies] -digest = "0.6.2" +digest = "0.6.1" clap = { version = "2.33", features = ["wrap_help"] } hex = "0.2.0" libc = "0.2.42" diff --git a/src/uu/tr/Cargo.toml b/src/uu/tr/Cargo.toml index f75a540ee..13a1616d4 100644 --- a/src/uu/tr/Cargo.toml +++ b/src/uu/tr/Cargo.toml @@ -20,6 +20,7 @@ fnv = "1.0.5" clap = { version = "2.33", features = ["wrap_help"] } uucore = { version=">=0.0.9", package="uucore", path="../../uucore" } uucore_procs = { version=">=0.0.6", package="uucore_procs", path="../../uucore_procs" } +nom = "5.1.2" [[bin]] name = "tr" diff --git a/src/uu/tr/src/operation.rs b/src/uu/tr/src/operation.rs new file mode 100644 index 000000000..d8ed30c54 --- /dev/null +++ b/src/uu/tr/src/operation.rs @@ -0,0 +1,409 @@ +use nom::{ + branch::alt, + bytes::complete::{tag, take, take_until}, + character::complete::one_of, + multi::many0, + sequence::{separated_pair, tuple}, + IResult, +}; +use std::{ + collections::HashMap, + io::{BufRead, Write}, +}; + +#[derive(Debug, PartialEq, Eq, Clone)] +pub enum Sequence { + Char(char), + CharRange(Vec), +} + +impl Sequence { + pub fn parse_set_string(input: &str) -> Vec { + many0(alt(( + alt(( + Sequence::parse_octal, + Sequence::parse_backslash, + Sequence::parse_audible_bel, + Sequence::parse_backspace, + Sequence::parse_form_feed, + Sequence::parse_newline, + Sequence::parse_return, + Sequence::parse_horizontal_tab, + Sequence::parse_vertical_tab, + )), + alt(( + Sequence::parse_char_range, + Sequence::parse_char_star, + Sequence::parse_char_repeat, + )), + alt(( + Sequence::parse_alnum, + Sequence::parse_alpha, + Sequence::parse_blank, + Sequence::parse_control, + Sequence::parse_digit, + Sequence::parse_graph, + Sequence::parse_lower, + Sequence::parse_print, + Sequence::parse_punct, + Sequence::parse_space, + Sequence::parse_space, + Sequence::parse_upper, + Sequence::parse_xdigit, + Sequence::parse_char_equal, + Sequence::parse_char, + )), + )))(input) + .map(|(_, r)| r) + .unwrap() + } + + pub fn dissolve(self) -> Vec { + match self { + Sequence::Char(c) => vec![c], + Sequence::CharRange(r) => r, + } + } + + /// Sequence parsers + + fn parse_char(input: &str) -> IResult<&str, Sequence> { + take(1usize)(input).map(|(l, r)| (l, Sequence::Char(r.chars().next().unwrap()))) + } + + fn parse_octal(input: &str) -> IResult<&str, Sequence> { + tuple(( + tag("\\"), + one_of("01234567"), + one_of("01234567"), + one_of("01234567"), + ))(input) + .map(|(l, (_, a, b, c))| { + ( + l, + Sequence::Char( + // SAFETY: All the values from \000 to \777 is valid based on a test below... + std::char::from_u32( + a.to_digit(8).unwrap() * 8 * 8 + + b.to_digit(8).unwrap() * 8 + + c.to_digit(8).unwrap(), + ) + .unwrap(), + ), + ) + }) + } + + fn parse_backslash(input: &str) -> IResult<&str, Sequence> { + tuple((tag("\\"), tag("\\")))(input).map(|(l, _)| (l, Sequence::Char('\\'))) + } + + fn parse_audible_bel(input: &str) -> IResult<&str, Sequence> { + tuple((tag("\\"), tag("a")))(input).map(|(l, _)| (l, Sequence::Char('\u{0007}'))) + } + + fn parse_backspace(input: &str) -> IResult<&str, Sequence> { + tuple((tag("\\"), tag("b")))(input).map(|(l, _)| (l, Sequence::Char('\u{0008}'))) + } + + fn parse_form_feed(input: &str) -> IResult<&str, Sequence> { + tuple((tag("\\"), tag("f")))(input).map(|(l, _)| (l, Sequence::Char('\u{000C}'))) + } + + fn parse_newline(input: &str) -> IResult<&str, Sequence> { + tuple((tag("\\"), tag("n")))(input).map(|(l, _)| (l, Sequence::Char('\u{000A}'))) + } + + fn parse_return(input: &str) -> IResult<&str, Sequence> { + tuple((tag("\\"), tag("r")))(input).map(|(l, _)| (l, Sequence::Char('\u{000D}'))) + } + + fn parse_horizontal_tab(input: &str) -> IResult<&str, Sequence> { + tuple((tag("\\"), tag("t")))(input).map(|(l, _)| (l, Sequence::Char('\u{0009}'))) + } + + fn parse_vertical_tab(input: &str) -> IResult<&str, Sequence> { + tuple((tag("\\"), tag("v")))(input).map(|(l, _)| (l, Sequence::Char('\u{000B}'))) + } + + fn parse_char_range(input: &str) -> IResult<&str, Sequence> { + separated_pair(take(1usize), tag("-"), take(1usize))(input).map(|(l, (a, b))| { + (l, { + let (start, end) = ( + u32::from(a.chars().next().unwrap()), + u32::from(b.chars().next().unwrap()), + ); + if (start >= 97 && start <= 122 && end >= 97 && end <= 122 && end > start) + || (start >= 65 && start <= 90 && end >= 65 && end <= 90 && end > start) + || (start >= 48 && start <= 57 && end >= 48 && end <= 57 && end > start) + { + Sequence::CharRange( + (start..=end) + .map(|c| std::char::from_u32(c).unwrap()) + .collect(), + ) + } else { + // This part is unchecked...not all `u32` => `char` is valid + Sequence::CharRange( + (start..=end) + .map(|c| std::char::from_u32(c).unwrap()) + .collect(), + ) + } + }) + }) + } + + fn parse_char_star(input: &str) -> IResult<&str, Sequence> { + tuple((tag("["), take(1usize), tag("*"), tag("]")))(input).map(|(_, (_, _, _, _))| todo!()) + } + + fn parse_char_repeat(input: &str) -> IResult<&str, Sequence> { + tuple((tag("["), take(1usize), tag("*"), take_until("]"), tag("]")))(input).map( + |(l, (_, c, _, n, _))| { + ( + l, + Sequence::CharRange( + std::iter::repeat(c.chars().next().unwrap()) + .take(n.parse().unwrap()) + .collect(), + ), + ) + }, + ) + } + + fn parse_alnum(input: &str) -> IResult<&str, Sequence> { + tag("[:alnum:]")(input).map(|(l, _)| { + ( + l, + Sequence::CharRange(('a'..='z').chain('A'..'Z').chain('0'..'9').collect()), + ) + }) + } + + fn parse_alpha(input: &str) -> IResult<&str, Sequence> { + tag("[:alpha:]")(input).map(|(l, _)| { + ( + l, + Sequence::CharRange(('a'..='z').chain('A'..'Z').collect()), + ) + }) + } + + fn parse_blank(input: &str) -> IResult<&str, Sequence> { + tag("[:blank:]")(input).map(|(_, _)| todo!()) + } + + fn parse_control(input: &str) -> IResult<&str, Sequence> { + tag("[:cntrl:]")(input).map(|(_, _)| todo!()) + } + + fn parse_digit(input: &str) -> IResult<&str, Sequence> { + tag("[:digit:]")(input).map(|(l, _)| (l, Sequence::CharRange(('0'..='9').collect()))) + } + + fn parse_graph(input: &str) -> IResult<&str, Sequence> { + tag("[:graph:]")(input).map(|(_, _)| todo!()) + } + + fn parse_lower(input: &str) -> IResult<&str, Sequence> { + tag("[:lower:]")(input).map(|(_, _)| todo!()) + } + + fn parse_print(input: &str) -> IResult<&str, Sequence> { + tag("[:print:]")(input).map(|(_, _)| todo!()) + } + + fn parse_punct(input: &str) -> IResult<&str, Sequence> { + tag("[:punct:]")(input).map(|(_, _)| todo!()) + } + + fn parse_space(input: &str) -> IResult<&str, Sequence> { + tag("[:space:]")(input).map(|(_, _)| todo!()) + } + + fn parse_upper(input: &str) -> IResult<&str, Sequence> { + tag("[:upper:]")(input).map(|(l, _)| (l, Sequence::CharRange(('A'..='Z').collect()))) + } + + fn parse_xdigit(input: &str) -> IResult<&str, Sequence> { + tag("[:xdigit:]")(input).map(|(_, _)| todo!()) + } + + fn parse_char_equal(input: &str) -> IResult<&str, Sequence> { + tuple((tag("[="), take(1usize), tag("=]")))(input).map(|(_, (_, _, _))| todo!()) + } +} + +pub trait SymbolTranslatorNew { + fn translate(&mut self, current: char) -> Option; +} + +#[derive(Debug, Clone)] +pub struct DeleteOperationNew { + set: Vec, + complement_flag: bool, +} + +impl DeleteOperationNew { + pub fn new(set: Vec, complement_flag: bool) -> DeleteOperationNew { + DeleteOperationNew { + set, + complement_flag, + } + } +} + +impl SymbolTranslatorNew for DeleteOperationNew { + fn translate(&mut self, current: char) -> Option { + let found = self.set.iter().any(|sequence| match sequence { + Sequence::Char(c) => c.eq(¤t), + Sequence::CharRange(r) => r.iter().any(|c| c.eq(¤t)), + }); + (self.complement_flag == found).then(|| current) + } +} + +#[derive(Debug, Clone)] +pub enum TranslateOperationNew { + Standard(HashMap), + Complement(Vec, Vec, HashMap, char), +} + +impl TranslateOperationNew { + pub fn new( + set1: Vec, + mut set2: Vec, + truncate_set2: bool, + complement: bool, + ) -> TranslateOperationNew { + let fallback = set2.last().cloned().unwrap(); + if truncate_set2 { + set2.truncate(set1.len()); + } + if complement { + TranslateOperationNew::Complement( + set1.into_iter() + .flat_map(Sequence::dissolve) + .rev() + .collect(), + set2.into_iter() + .flat_map(Sequence::dissolve) + .rev() + .collect(), + HashMap::new(), + // TODO: Check how `tr` actually handles this + fallback.dissolve().first().cloned().unwrap(), + ) + } else { + TranslateOperationNew::Standard( + set1.into_iter() + .flat_map(Sequence::dissolve) + .zip( + set2.into_iter() + .chain(std::iter::repeat(fallback)) + .flat_map(Sequence::dissolve), + ) + .collect::>(), + ) + } + } +} + +impl SymbolTranslatorNew for TranslateOperationNew { + fn translate(&mut self, current: char) -> Option { + match self { + TranslateOperationNew::Standard(map) => Some( + map.iter() + .find_map(|(l, r)| l.eq(¤t).then(|| *r)) + .unwrap_or(current), + ), + TranslateOperationNew::Complement(set1, set2, mapped_characters, fallback) => { + // First, see if we have already mapped this character. + // If so, return it. + // Else, check if current character is part of set1 + // If so, return it. + // Else, consume from set2, create the translation pair, and return the mapped character + match mapped_characters.get(¤t) { + Some(k) => Some(*k), + None => match set1.iter().any(|c| c.eq(&¤t)) { + true => Some(current), + false => { + let popped = set2.pop().unwrap_or(*fallback); + mapped_characters.insert(current, popped); + Some(popped) + } + }, + } + } + } + } +} + +pub fn translate_input_new(input: &mut dyn BufRead, output: &mut dyn Write, mut translator: T) +where + T: SymbolTranslatorNew, +{ + let mut buf = String::new(); + let mut output_buf = String::new(); + while let Ok(length) = input.read_line(&mut buf) { + if length == 0 { + break; + } else { + let filtered = buf.chars().filter_map(|c| translator.translate(c)); + output_buf.extend(filtered); + output.write_all(output_buf.as_bytes()).unwrap(); + } + buf.clear(); + output_buf.clear(); + } +} + +#[test] +fn test_parse_char_range() { + assert_eq!(Sequence::parse_set_string(""), vec![]); + assert_eq!( + Sequence::parse_set_string("a-z"), + vec![Sequence::CharRange(vec![ + 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', + 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', + ])] + ); + assert_eq!( + Sequence::parse_set_string("a-zA-Z"), + vec![ + Sequence::CharRange(vec![ + 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', + 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', + ]), + Sequence::CharRange(vec![ + 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', + 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', + ]) + ] + ); + assert_eq!( + Sequence::parse_set_string(", ┬─┬"), + vec![ + Sequence::Char(','), + Sequence::Char(' '), + Sequence::Char('┬'), + Sequence::Char('─'), + Sequence::Char('┬') + ] + ); +} + +#[test] +fn test_parse_octal() { + for a in '0'..='7' { + for b in '0'..='7' { + for c in '0'..='7' { + assert!( + Sequence::parse_set_string(format!("\\{}{}{}", a, b, c).as_str()).len() == 1 + ); + } + } + } +} diff --git a/src/uu/tr/src/tr.rs b/src/uu/tr/src/tr.rs index 6dd81badf..1e7236d6e 100644 --- a/src/uu/tr/src/tr.rs +++ b/src/uu/tr/src/tr.rs @@ -12,15 +12,18 @@ #[macro_use] extern crate uucore; +extern crate nom; mod expand; +mod operation; use bit_set::BitSet; use clap::{crate_version, App, Arg}; use fnv::FnvHashMap; +use operation::{translate_input_new, Sequence, TranslateOperationNew}; use std::io::{stdin, stdout, BufRead, BufWriter, Write}; -use crate::expand::ExpandSet; +use crate::{expand::ExpandSet, operation::DeleteOperationNew}; use uucore::InvalidEncodingHandling; static ABOUT: &str = "translate or delete characters"; @@ -31,7 +34,7 @@ mod options { pub const COMPLEMENT: &str = "complement"; pub const DELETE: &str = "delete"; pub const SQUEEZE: &str = "squeeze-repeats"; - pub const TRUNCATE: &str = "truncate"; + pub const TRUNCATE_SET1: &str = "truncate-set1"; pub const SETS: &str = "sets"; } @@ -44,15 +47,6 @@ struct DeleteOperation { complement: bool, } -impl DeleteOperation { - fn new(set: ExpandSet, complement: bool) -> DeleteOperation { - DeleteOperation { - bset: set.map(|c| c as usize).collect(), - complement, - } - } -} - impl SymbolTranslator for DeleteOperation { fn translate(&self, c: char, _prev_c: char) -> Option { let uc = c as usize; @@ -254,7 +248,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { let delete_flag = matches.is_present(options::DELETE); let complement_flag = matches.is_present(options::COMPLEMENT) || matches.is_present("C"); let squeeze_flag = matches.is_present(options::SQUEEZE); - let truncate_flag = matches.is_present(options::TRUNCATE); + let truncate_set1_flag = matches.is_present(options::TRUNCATE_SET1); let sets = matches .values_of(options::SETS) @@ -291,21 +285,26 @@ pub fn uumain(args: impl uucore::Args) -> i32 { let op = DeleteAndSqueezeOperation::new(set1, set2, complement_flag); translate_input(&mut locked_stdin, &mut buffered_stdout, op); } else { - let op = DeleteOperation::new(set1, complement_flag); - translate_input(&mut locked_stdin, &mut buffered_stdout, op); + let op = DeleteOperationNew::new(Sequence::parse_set_string(&sets[0]), complement_flag); + translate_input_new(&mut locked_stdin, &mut buffered_stdout, op); } } else if squeeze_flag { if sets.len() < 2 { let op = SqueezeOperation::new(set1, complement_flag); translate_input(&mut locked_stdin, &mut buffered_stdout, op); } else { - let op = TranslateAndSqueezeOperation::new(sets, truncate_flag, complement_flag); + let op = TranslateAndSqueezeOperation::new(sets, truncate_set1_flag, complement_flag); translate_input(&mut locked_stdin, &mut buffered_stdout, op); } } else { - let mut set2 = ExpandSet::new(sets[1].as_ref()); - let op = TranslateOperation::new(set1, &mut set2, truncate_flag, complement_flag); - translate_input(&mut locked_stdin, &mut buffered_stdout, op); + let op = TranslateOperationNew::new( + Sequence::parse_set_string(&sets[0]), + Sequence::parse_set_string(&sets[1]), + truncate_set1_flag, + complement_flag, + ); + println!("op:{:#?}", op); + translate_input_new(&mut locked_stdin, &mut buffered_stdout, op); } 0 @@ -344,8 +343,8 @@ pub fn uu_app() -> App<'static, 'static> { ), ) .arg( - Arg::with_name(options::TRUNCATE) - .long(options::TRUNCATE) + Arg::with_name(options::TRUNCATE_SET1) + .long(options::TRUNCATE_SET1) .short("t") .help("first truncate SET1 to length of SET2"), ) diff --git a/tests/by-util/test_tr.rs b/tests/by-util/test_tr.rs index 8a3e36625..cffb7153f 100644 --- a/tests/by-util/test_tr.rs +++ b/tests/by-util/test_tr.rs @@ -292,3 +292,58 @@ fn test_more_than_2_sets() { .pipe_in("hello world") .fails(); } + +#[test] +fn test_basic_translation() { + new_ucmd!() + .args(&["dabcdef", "xyz"]) + .pipe_in("abcdefabcdef") + .succeeds() + .stdout_is("yzzzzzyzzzzz"); +} + +#[test] +fn test_basic_translation_with_alnum_1() { + new_ucmd!() + .args(&["dabcdef[:alnum:]", "xyz"]) + .pipe_in("abcdefabcdef") + .succeeds() + .stdout_is("zzzzzzzzzzzz"); +} + +#[test] +fn test_basic_translation_with_alnum_2() { + new_ucmd!() + .args(&["[:alnum:]abc", "xyz"]) + .pipe_in("abcdefabcdef") + .succeeds() + .stdout_is("zzzzzzzzzzzz"); +} + +#[test] +fn test_translation_override_pair() { + new_ucmd!() + .args(&["aaa", "xyz"]) + .pipe_in("aaa") + .succeeds() + .stdout_is("zzz"); +} + +#[test] +fn test_translation_case_conversion_works() { + new_ucmd!() + .args(&["abcdefghijklmnopqrstuvwxyz", "ABCDEFGHIJKLMNOPQRSTUVWXYZ"]) + .pipe_in("abcdefghijklmnopqrstuvwxyz") + .succeeds() + .stdout_is("ABCDEFGHIJKLMNOPQRSTUVWXYZ"); + new_ucmd!() + .args(&["a-z", "A-Z"]) + .pipe_in("abcdefghijklmnopqrstuvwxyz") + .succeeds() + .stdout_is("ABCDEFGHIJKLMNOPQRSTUVWXYZ"); + new_ucmd!() + .args(&["[:lower:]", "[:upper:]"]) + .pipe_in("abcdefghijklmnopqrstuvwxyz") + .succeeds() + .stdout_is("ABCDEFGHIJKLMNOPQRSTUVWXYZ"); +} From 572cbc6ba2e70e5581ed14689f13a50cc7668d06 Mon Sep 17 00:00:00 2001 From: Hanif Bin Ariffin Date: Sun, 18 Jul 2021 10:14:21 +0800 Subject: [PATCH 02/50] Some small cleanup to translation module Signed-off-by: Hanif Bin Ariffin --- src/uu/tr/src/operation.rs | 56 +++++++++++++++++++++++--------------- src/uu/tr/src/tr.rs | 1 - 2 files changed, 34 insertions(+), 23 deletions(-) diff --git a/src/uu/tr/src/operation.rs b/src/uu/tr/src/operation.rs index d8ed30c54..b08f0ac74 100644 --- a/src/uu/tr/src/operation.rs +++ b/src/uu/tr/src/operation.rs @@ -146,7 +146,7 @@ impl Sequence { // This part is unchecked...not all `u32` => `char` is valid Sequence::CharRange( (start..=end) - .map(|c| std::char::from_u32(c).unwrap()) + .filter_map(|c| std::char::from_u32(c)) .collect(), ) } @@ -268,7 +268,16 @@ impl SymbolTranslatorNew for DeleteOperationNew { #[derive(Debug, Clone)] pub enum TranslateOperationNew { Standard(HashMap), - Complement(Vec, Vec, HashMap, char), + Complement(u32, Vec, Vec, char, HashMap), +} + +impl TranslateOperationNew { + fn next_complement_char(mut iter: u32) -> (u32, char) { + while let None = char::from_u32(iter) { + iter = iter.saturating_add(1) + } + (iter, char::from_u32(iter).unwrap()) + } } impl TranslateOperationNew { @@ -279,22 +288,21 @@ impl TranslateOperationNew { complement: bool, ) -> TranslateOperationNew { let fallback = set2.last().cloned().unwrap(); + println!("fallback:{:#?}", fallback); if truncate_set2 { set2.truncate(set1.len()); } if complement { TranslateOperationNew::Complement( - set1.into_iter() - .flat_map(Sequence::dissolve) - .rev() - .collect(), + 0, + set1.into_iter().flat_map(Sequence::dissolve).collect(), set2.into_iter() .flat_map(Sequence::dissolve) .rev() .collect(), - HashMap::new(), // TODO: Check how `tr` actually handles this fallback.dissolve().first().cloned().unwrap(), + HashMap::new(), ) } else { TranslateOperationNew::Standard( @@ -319,22 +327,26 @@ impl SymbolTranslatorNew for TranslateOperationNew { .find_map(|(l, r)| l.eq(¤t).then(|| *r)) .unwrap_or(current), ), - TranslateOperationNew::Complement(set1, set2, mapped_characters, fallback) => { - // First, see if we have already mapped this character. - // If so, return it. - // Else, check if current character is part of set1 - // If so, return it. - // Else, consume from set2, create the translation pair, and return the mapped character - match mapped_characters.get(¤t) { - Some(k) => Some(*k), - None => match set1.iter().any(|c| c.eq(&¤t)) { - true => Some(current), - false => { - let popped = set2.pop().unwrap_or(*fallback); - mapped_characters.insert(current, popped); - Some(popped) + TranslateOperationNew::Complement(iter, set1, set2, fallback, mapped_characters) => { + // First, try to see if current char is already mapped + // If so, return the mapped char + // Else, pop from set2 + // If we popped something, map the next complement character to this value + // If set2 is empty, we just map the current char directly to fallback --- to avoid looping unnecessarily + if let Some(c) = set1.iter().find(|c| c.eq(&¤t)) { + Some(*c) + } else { + while let None = mapped_characters.get(¤t) { + if let Some(p) = set2.pop() { + let (next_index, next_value) = + TranslateOperationNew::next_complement_char(*iter); + *iter = next_index; + mapped_characters.insert(next_value, p); + } else { + mapped_characters.insert(current, *fallback); } - }, + } + Some(*mapped_characters.get(¤t).unwrap()) } } } diff --git a/src/uu/tr/src/tr.rs b/src/uu/tr/src/tr.rs index 1e7236d6e..a5b6d04b7 100644 --- a/src/uu/tr/src/tr.rs +++ b/src/uu/tr/src/tr.rs @@ -303,7 +303,6 @@ pub fn uumain(args: impl uucore::Args) -> i32 { truncate_set1_flag, complement_flag, ); - println!("op:{:#?}", op); translate_input_new(&mut locked_stdin, &mut buffered_stdout, op); } From 50167a33a81cf32ef65fd48c800105911ab381d3 Mon Sep 17 00:00:00 2001 From: Hanif Bin Ariffin Date: Sun, 18 Jul 2021 12:59:04 +0800 Subject: [PATCH 03/50] Now all tr tests passes with the new translation impl! Signed-off-by: Hanif Bin Ariffin --- src/uu/tr/src/operation.rs | 91 ++++++++++++++++++++++++++------------ 1 file changed, 63 insertions(+), 28 deletions(-) diff --git a/src/uu/tr/src/operation.rs b/src/uu/tr/src/operation.rs index b08f0ac74..96884c3cd 100644 --- a/src/uu/tr/src/operation.rs +++ b/src/uu/tr/src/operation.rs @@ -1,7 +1,7 @@ use nom::{ branch::alt, bytes::complete::{tag, take, take_until}, - character::complete::one_of, + character::complete::{none_of, one_of}, multi::many0, sequence::{separated_pair, tuple}, IResult, @@ -21,7 +21,10 @@ impl Sequence { pub fn parse_set_string(input: &str) -> Vec { many0(alt(( alt(( - Sequence::parse_octal, + Sequence::parse_3_octal, + Sequence::parse_2_octal, + Sequence::parse_1_octal, + Sequence::parse_unrecognized_backslash, Sequence::parse_backslash, Sequence::parse_audible_bel, Sequence::parse_backspace, @@ -71,7 +74,44 @@ impl Sequence { take(1usize)(input).map(|(l, r)| (l, Sequence::Char(r.chars().next().unwrap()))) } - fn parse_octal(input: &str) -> IResult<&str, Sequence> { + fn parse_unrecognized_backslash(input: &str) -> IResult<&str, Sequence> { + tuple((tag("\\"), none_of("01234567")))(input).map(|(l, (_, a))| { + let c = match a { + 'a' => Sequence::Char('\u{0007}'), + 'b' => Sequence::Char('\u{0008}'), + 'f' => Sequence::Char('\u{000C}'), + 'n' => Sequence::Char('\u{000A}'), + 'r' => Sequence::Char('\u{000D}'), + 't' => Sequence::Char('\u{0009}'), + 'v' => Sequence::Char('\u{000B}'), + _ => Sequence::Char(a), + }; + (l, c) + }) + } + + fn parse_1_octal(input: &str) -> IResult<&str, Sequence> { + tuple((tag("\\"), one_of("01234567")))(input).map(|(l, (_, a))| { + ( + l, + Sequence::Char(std::char::from_u32(a.to_digit(8).unwrap()).unwrap()), + ) + }) + } + + fn parse_2_octal(input: &str) -> IResult<&str, Sequence> { + tuple((tag("\\"), one_of("01234567"), one_of("01234567")))(input).map(|(l, (_, a, b))| { + ( + l, + Sequence::Char( + std::char::from_u32(a.to_digit(8).unwrap() * 8 + b.to_digit(8).unwrap()) + .unwrap(), + ), + ) + }) + } + + fn parse_3_octal(input: &str) -> IResult<&str, Sequence> { tuple(( tag("\\"), one_of("01234567"), @@ -133,17 +173,13 @@ impl Sequence { u32::from(a.chars().next().unwrap()), u32::from(b.chars().next().unwrap()), ); - if (start >= 97 && start <= 122 && end >= 97 && end <= 122 && end > start) - || (start >= 65 && start <= 90 && end >= 65 && end <= 90 && end > start) - || (start >= 48 && start <= 57 && end >= 48 && end <= 57 && end > start) - { + if start >= 48 && start <= 90 && end >= 48 && end <= 90 && end > start { Sequence::CharRange( (start..=end) .map(|c| std::char::from_u32(c).unwrap()) .collect(), ) } else { - // This part is unchecked...not all `u32` => `char` is valid Sequence::CharRange( (start..=end) .filter_map(|c| std::char::from_u32(c)) @@ -208,7 +244,7 @@ impl Sequence { } fn parse_lower(input: &str) -> IResult<&str, Sequence> { - tag("[:lower:]")(input).map(|(_, _)| todo!()) + tag("[:lower:]")(input).map(|(l, _)| (l, Sequence::CharRange(('a'..='z').collect()))) } fn parse_print(input: &str) -> IResult<&str, Sequence> { @@ -282,37 +318,36 @@ impl TranslateOperationNew { impl TranslateOperationNew { pub fn new( - set1: Vec, - mut set2: Vec, - truncate_set2: bool, + pset1: Vec, + pset2: Vec, + truncate_set1: bool, complement: bool, ) -> TranslateOperationNew { - let fallback = set2.last().cloned().unwrap(); - println!("fallback:{:#?}", fallback); - if truncate_set2 { - set2.truncate(set1.len()); + let mut set1 = pset1 + .into_iter() + .flat_map(Sequence::dissolve) + .collect::>(); + let set2 = pset2 + .into_iter() + .flat_map(Sequence::dissolve) + .collect::>(); + if truncate_set1 { + set1.truncate(set2.len()); } + let fallback = set2.last().cloned().unwrap(); if complement { TranslateOperationNew::Complement( 0, - set1.into_iter().flat_map(Sequence::dissolve).collect(), - set2.into_iter() - .flat_map(Sequence::dissolve) - .rev() - .collect(), + set1, + set2, // TODO: Check how `tr` actually handles this - fallback.dissolve().first().cloned().unwrap(), + fallback, HashMap::new(), ) } else { TranslateOperationNew::Standard( set1.into_iter() - .flat_map(Sequence::dissolve) - .zip( - set2.into_iter() - .chain(std::iter::repeat(fallback)) - .flat_map(Sequence::dissolve), - ) + .zip(set2.into_iter().chain(std::iter::repeat(fallback))) .collect::>(), ) } From c4e04c53842a62eb8b02a3e3b8da853062a44a9e Mon Sep 17 00:00:00 2001 From: Hanif Bin Ariffin Date: Sun, 18 Jul 2021 13:34:30 +0800 Subject: [PATCH 04/50] Implemented squeeze operation Signed-off-by: Hanif Bin Ariffin --- src/uu/tr/src/operation.rs | 77 +++++++++++++++++++++++++++++++++++++- src/uu/tr/src/tr.rs | 15 +++++--- 2 files changed, 85 insertions(+), 7 deletions(-) diff --git a/src/uu/tr/src/operation.rs b/src/uu/tr/src/operation.rs index 96884c3cd..72c0158f3 100644 --- a/src/uu/tr/src/operation.rs +++ b/src/uu/tr/src/operation.rs @@ -304,7 +304,18 @@ impl SymbolTranslatorNew for DeleteOperationNew { #[derive(Debug, Clone)] pub enum TranslateOperationNew { Standard(HashMap), - Complement(u32, Vec, Vec, char, HashMap), + Complement( + // iter + u32, + // set 1 + Vec, + // set 2 + Vec, + // fallback + char, + // translation map + HashMap, + ), } impl TranslateOperationNew { @@ -388,6 +399,70 @@ impl SymbolTranslatorNew for TranslateOperationNew { } } +#[derive(Debug, Clone)] +pub struct SqueezeOperationNew { + squeeze_set: Vec, + complement: bool, + previous: Option, +} + +impl SqueezeOperationNew { + pub fn new(squeeze_set: Vec, complement: bool) -> SqueezeOperationNew { + SqueezeOperationNew { + squeeze_set: squeeze_set + .into_iter() + .flat_map(Sequence::dissolve) + .collect(), + complement, + previous: None, + } + } +} + +impl SymbolTranslatorNew for SqueezeOperationNew { + fn translate(&mut self, current: char) -> Option { + if self.complement { + if self.squeeze_set.iter().any(|c| c.eq(¤t)) { + Some(current) + } else { + match self.previous { + Some(v) => { + if v.eq(¤t) { + None + } else { + self.previous = Some(current); + Some(current) + } + } + None => { + self.previous = Some(current); + Some(current) + } + } + } + } else { + if self.squeeze_set.iter().any(|c| c.eq(¤t)) { + match self.previous { + Some(v) => { + if v.eq(¤t) { + None + } else { + self.previous = Some(current); + Some(current) + } + } + None => { + self.previous = Some(current); + Some(current) + } + } + } else { + Some(current) + } + } + } +} + pub fn translate_input_new(input: &mut dyn BufRead, output: &mut dyn Write, mut translator: T) where T: SymbolTranslatorNew, diff --git a/src/uu/tr/src/tr.rs b/src/uu/tr/src/tr.rs index a5b6d04b7..286e7b023 100644 --- a/src/uu/tr/src/tr.rs +++ b/src/uu/tr/src/tr.rs @@ -20,7 +20,7 @@ mod operation; use bit_set::BitSet; use clap::{crate_version, App, Arg}; use fnv::FnvHashMap; -use operation::{translate_input_new, Sequence, TranslateOperationNew}; +use operation::{translate_input_new, Sequence, SqueezeOperationNew, TranslateOperationNew}; use std::io::{stdin, stdout, BufRead, BufWriter, Write}; use crate::{expand::ExpandSet, operation::DeleteOperationNew}; @@ -278,11 +278,13 @@ pub fn uumain(args: impl uucore::Args) -> i32 { let locked_stdout = stdout.lock(); let mut buffered_stdout = BufWriter::new(locked_stdout); - let set1 = ExpandSet::new(sets[0].as_ref()); if delete_flag { if squeeze_flag { - let set2 = ExpandSet::new(sets[1].as_ref()); - let op = DeleteAndSqueezeOperation::new(set1, set2, complement_flag); + let op = DeleteAndSqueezeOperation::new( + ExpandSet::new(sets[0].as_ref()), + ExpandSet::new(sets[1].as_ref()), + complement_flag, + ); translate_input(&mut locked_stdin, &mut buffered_stdout, op); } else { let op = DeleteOperationNew::new(Sequence::parse_set_string(&sets[0]), complement_flag); @@ -290,8 +292,9 @@ pub fn uumain(args: impl uucore::Args) -> i32 { } } else if squeeze_flag { if sets.len() < 2 { - let op = SqueezeOperation::new(set1, complement_flag); - translate_input(&mut locked_stdin, &mut buffered_stdout, op); + let op = + SqueezeOperationNew::new(Sequence::parse_set_string(&sets[0]), complement_flag); + translate_input_new(&mut locked_stdin, &mut buffered_stdout, op); } else { let op = TranslateAndSqueezeOperation::new(sets, truncate_set1_flag, complement_flag); translate_input(&mut locked_stdin, &mut buffered_stdout, op); From 05d297351043a9d91a74800752317c5b3e1d0ce8 Mon Sep 17 00:00:00 2001 From: Hanif Bin Ariffin Date: Sun, 18 Jul 2021 14:09:26 +0800 Subject: [PATCH 05/50] Reimplemented everything using new expansion module Signed-off-by: Hanif Bin Ariffin --- src/uu/tr/src/expand.rs | 146 ----------------------- src/uu/tr/src/operation.rs | 22 ++-- src/uu/tr/src/tr.rs | 233 ++++++------------------------------- 3 files changed, 46 insertions(+), 355 deletions(-) delete mode 100644 src/uu/tr/src/expand.rs diff --git a/src/uu/tr/src/expand.rs b/src/uu/tr/src/expand.rs deleted file mode 100644 index 5d960921e..000000000 --- a/src/uu/tr/src/expand.rs +++ /dev/null @@ -1,146 +0,0 @@ -// * This file is part of the uutils coreutils package. -// * -// * (c) Michael Gehring -// * (c) kwantam -// * * 2015-04-28 ~ created `expand` module to eliminate most allocs during setup -// * -// * For the full copyright and license information, please view the LICENSE -// * file that was distributed with this source code. - -// spell-checker:ignore (ToDO) allocs slen unesc - -use std::char::from_u32; -use std::cmp::min; -use std::iter::Peekable; -use std::ops::RangeInclusive; - -/// Parse a backslash escape sequence to the corresponding character. Assumes -/// the string starts from the character _after_ the `\` and is not empty. -/// -/// Returns a tuple containing the character and the number of characters -/// consumed from the input. The alphabetic escape sequences consume 1 -/// character; octal escape sequences consume 1 to 3 octal digits. -#[inline] -fn parse_sequence(s: &str) -> (char, usize) { - let mut s = s.chars(); - let c = s.next().expect("invalid escape: empty string"); - - if ('0'..='7').contains(&c) { - let mut v = c.to_digit(8).unwrap(); - let mut consumed = 1; - let bits_per_digit = 3; - - for c in s.take(2) { - match c.to_digit(8) { - Some(c) => { - v = (v << bits_per_digit) | c; - consumed += 1; - } - None => break, - } - } - - (from_u32(v).expect("invalid octal escape"), consumed) - } else { - ( - match c { - 'a' => 0x07u8 as char, - 'b' => 0x08u8 as char, - 'f' => 0x0cu8 as char, - 'v' => 0x0bu8 as char, - 'n' => '\n', - 'r' => '\r', - 't' => '\t', - c => c, - }, - 1, - ) - } -} - -struct Unescape<'a> { - string: &'a str, -} - -impl<'a> Iterator for Unescape<'a> { - type Item = char; - - #[inline] - fn size_hint(&self) -> (usize, Option) { - let slen = self.string.len(); - (min(slen, 1), None) - } - - #[inline] - fn next(&mut self) -> Option { - if self.string.is_empty() { - return None; - } - - // is the next character an escape? - let (ret, idx) = match self.string.chars().next().unwrap() { - '\\' if self.string.len() > 1 => { - // yes---it's \ and it's not the last char in a string - // we know that \ is 1 byte long so we can index into the string safely - let (c, consumed) = parse_sequence(&self.string[1..]); - - (Some(c), 1 + consumed) - } - c => (Some(c), c.len_utf8()), // not an escape char - }; - - self.string = &self.string[idx..]; // advance the pointer to the next char - ret - } -} - -pub struct ExpandSet<'a> { - range: RangeInclusive, - unesc: Peekable>, -} - -impl<'a> Iterator for ExpandSet<'a> { - type Item = char; - - #[inline] - fn size_hint(&self) -> (usize, Option) { - self.unesc.size_hint() - } - - #[inline] - fn next(&mut self) -> Option { - // while the Range has elements, try to return chars from it - // but make sure that they actually turn out to be Chars! - for n in &mut self.range { - if let Some(c) = from_u32(n) { - return Some(c); - } - } - - if let Some(first) = self.unesc.next() { - // peek ahead - if self.unesc.peek() == Some(&'-') && self.unesc.size_hint().0 > 1 { - self.unesc.next(); // this is the '-' - let last = self.unesc.next().unwrap(); // this is the end of the range - - { - self.range = first as u32 + 1..=last as u32; - } - } - - return Some(first); // in any case, return the next char - } - - None - } -} - -impl<'a> ExpandSet<'a> { - #[inline] - pub fn new(s: &'a str) -> ExpandSet<'a> { - ExpandSet { - range: 0..=0, - unesc: Unescape { string: s }.peekable(), - } - } -} diff --git a/src/uu/tr/src/operation.rs b/src/uu/tr/src/operation.rs index 72c0158f3..dd3e722ca 100644 --- a/src/uu/tr/src/operation.rs +++ b/src/uu/tr/src/operation.rs @@ -422,7 +422,7 @@ impl SqueezeOperationNew { impl SymbolTranslatorNew for SqueezeOperationNew { fn translate(&mut self, current: char) -> Option { if self.complement { - if self.squeeze_set.iter().any(|c| c.eq(¤t)) { + let next = if self.squeeze_set.iter().any(|c| c.eq(¤t)) { Some(current) } else { match self.previous { @@ -439,33 +439,35 @@ impl SymbolTranslatorNew for SqueezeOperationNew { Some(current) } } - } + }; + self.previous = Some(current); + next } else { - if self.squeeze_set.iter().any(|c| c.eq(¤t)) { + let next = if self.squeeze_set.iter().any(|c| c.eq(¤t)) { match self.previous { Some(v) => { if v.eq(¤t) { None } else { - self.previous = Some(current); Some(current) } } - None => { - self.previous = Some(current); - Some(current) - } + None => Some(current), } } else { Some(current) - } + }; + self.previous = Some(current); + next } } } -pub fn translate_input_new(input: &mut dyn BufRead, output: &mut dyn Write, mut translator: T) +pub fn translate_input_new(input: &mut R, output: &mut W, mut translator: T) where T: SymbolTranslatorNew, + R: BufRead, + W: Write, { let mut buf = String::new(); let mut output_buf = String::new(); diff --git a/src/uu/tr/src/tr.rs b/src/uu/tr/src/tr.rs index 286e7b023..c21bc679e 100644 --- a/src/uu/tr/src/tr.rs +++ b/src/uu/tr/src/tr.rs @@ -14,22 +14,18 @@ extern crate uucore; extern crate nom; -mod expand; mod operation; -use bit_set::BitSet; use clap::{crate_version, App, Arg}; -use fnv::FnvHashMap; +use nom::AsBytes; use operation::{translate_input_new, Sequence, SqueezeOperationNew, TranslateOperationNew}; -use std::io::{stdin, stdout, BufRead, BufWriter, Write}; +use std::io::{stdin, stdout, BufReader, BufWriter}; -use crate::{expand::ExpandSet, operation::DeleteOperationNew}; +use crate::operation::DeleteOperationNew; use uucore::InvalidEncodingHandling; static ABOUT: &str = "translate or delete characters"; -const BUFFER_LEN: usize = 1024; - mod options { pub const COMPLEMENT: &str = "complement"; pub const DELETE: &str = "delete"; @@ -38,190 +34,6 @@ mod options { pub const SETS: &str = "sets"; } -trait SymbolTranslator { - fn translate(&self, c: char, prev_c: char) -> Option; -} - -struct DeleteOperation { - bset: BitSet, - complement: bool, -} - -impl SymbolTranslator for DeleteOperation { - fn translate(&self, c: char, _prev_c: char) -> Option { - let uc = c as usize; - if self.complement == self.bset.contains(uc) { - Some(c) - } else { - None - } - } -} - -struct SqueezeOperation { - squeeze_set: BitSet, - complement: bool, -} - -impl SqueezeOperation { - fn new(squeeze_set: ExpandSet, complement: bool) -> SqueezeOperation { - SqueezeOperation { - squeeze_set: squeeze_set.map(|c| c as usize).collect(), - complement, - } - } -} - -impl SymbolTranslator for SqueezeOperation { - fn translate(&self, c: char, prev_c: char) -> Option { - if prev_c == c && self.complement != self.squeeze_set.contains(c as usize) { - None - } else { - Some(c) - } - } -} - -struct DeleteAndSqueezeOperation { - delete_set: BitSet, - squeeze_set: BitSet, - complement: bool, -} - -impl DeleteAndSqueezeOperation { - fn new( - delete_set: ExpandSet, - squeeze_set: ExpandSet, - complement: bool, - ) -> DeleteAndSqueezeOperation { - DeleteAndSqueezeOperation { - delete_set: delete_set.map(|c| c as usize).collect(), - squeeze_set: squeeze_set.map(|c| c as usize).collect(), - complement, - } - } -} - -impl SymbolTranslator for DeleteAndSqueezeOperation { - fn translate(&self, c: char, prev_c: char) -> Option { - if self.complement != self.delete_set.contains(c as usize) - || prev_c == c && self.squeeze_set.contains(c as usize) - { - None - } else { - Some(c) - } - } -} - -struct TranslateOperation { - translate_map: FnvHashMap, - complement: bool, - s2_last: char, -} - -impl TranslateOperation { - fn new( - set1: ExpandSet, - set2: &mut ExpandSet, - truncate: bool, - complement: bool, - ) -> TranslateOperation { - let mut map = FnvHashMap::default(); - let mut s2_prev = '_'; - for i in set1 { - let s2_next = set2.next(); - - if s2_next.is_none() && truncate { - map.insert(i as usize, i); - } else { - s2_prev = s2_next.unwrap_or(s2_prev); - map.insert(i as usize, s2_prev); - } - } - TranslateOperation { - translate_map: map, - complement, - s2_last: set2.last().unwrap_or(s2_prev), - } - } -} - -impl SymbolTranslator for TranslateOperation { - fn translate(&self, c: char, _prev_c: char) -> Option { - if self.complement { - Some(if self.translate_map.contains_key(&(c as usize)) { - c - } else { - self.s2_last - }) - } else { - Some(*self.translate_map.get(&(c as usize)).unwrap_or(&c)) - } - } -} - -struct TranslateAndSqueezeOperation { - translate: TranslateOperation, - squeeze: SqueezeOperation, -} - -impl TranslateAndSqueezeOperation { - fn new(sets: Vec, truncate: bool, complement: bool) -> TranslateAndSqueezeOperation { - let set1 = ExpandSet::new(sets[0].as_ref()); - let set1_ = ExpandSet::new(sets[0].as_ref()); - let mut set2 = ExpandSet::new(sets[1].as_ref()); - let set2_ = ExpandSet::new(sets[1].as_ref()); - TranslateAndSqueezeOperation { - translate: TranslateOperation::new(set1, &mut set2, truncate, complement), - squeeze: SqueezeOperation::new(if complement { set1_ } else { set2_ }, complement), - } - } -} - -impl SymbolTranslator for TranslateAndSqueezeOperation { - fn translate(&self, c: char, prev_c: char) -> Option { - // `unwrap()` will never panic because `Translate.translate()` - // always returns `Some`. - self.squeeze - .translate(self.translate.translate(c, 0 as char).unwrap(), prev_c) - } -} - -fn translate_input( - input: &mut dyn BufRead, - output: &mut dyn Write, - translator: T, -) { - let mut buf = String::with_capacity(BUFFER_LEN + 4); - let mut output_buf = String::with_capacity(BUFFER_LEN + 4); - - while let Ok(length) = input.read_line(&mut buf) { - let mut prev_c = 0 as char; - if length == 0 { - break; - } - { - // isolation to make borrow checker happy - let filtered = buf.chars().filter_map(|c| { - let res = translator.translate(c, prev_c); - // Set `prev_c` to the post-translate character. This - // allows the squeeze operation to correctly function - // after the translate operation. - if let Some(rc) = res { - prev_c = rc; - } - res - }); - - output_buf.extend(filtered); - output.write_all(output_buf.as_bytes()).unwrap(); - } - buf.clear(); - output_buf.clear(); - } -} - fn get_usage() -> String { format!("{} [OPTION]... SET1 [SET2]", executable!()) } @@ -280,12 +92,19 @@ pub fn uumain(args: impl uucore::Args) -> i32 { if delete_flag { if squeeze_flag { - let op = DeleteAndSqueezeOperation::new( - ExpandSet::new(sets[0].as_ref()), - ExpandSet::new(sets[1].as_ref()), - complement_flag, - ); - translate_input(&mut locked_stdin, &mut buffered_stdout, op); + let mut delete_buffer = vec![]; + { + let mut delete_writer = BufWriter::new(&mut delete_buffer); + let delete_op = + DeleteOperationNew::new(Sequence::parse_set_string(&sets[0]), complement_flag); + translate_input_new(&mut locked_stdin, &mut delete_writer, delete_op); + } + { + let mut squeeze_reader = BufReader::new(delete_buffer.as_bytes()); + let squeeze_op = + SqueezeOperationNew::new(Sequence::parse_set_string(&sets[1]), complement_flag); + translate_input_new(&mut squeeze_reader, &mut buffered_stdout, squeeze_op); + } } else { let op = DeleteOperationNew::new(Sequence::parse_set_string(&sets[0]), complement_flag); translate_input_new(&mut locked_stdin, &mut buffered_stdout, op); @@ -294,10 +113,26 @@ pub fn uumain(args: impl uucore::Args) -> i32 { if sets.len() < 2 { let op = SqueezeOperationNew::new(Sequence::parse_set_string(&sets[0]), complement_flag); + translate_input_new(&mut locked_stdin, &mut buffered_stdout, op); } else { - let op = TranslateAndSqueezeOperation::new(sets, truncate_set1_flag, complement_flag); - translate_input(&mut locked_stdin, &mut buffered_stdout, op); + let mut translate_buffer = vec![]; + { + let mut writer = BufWriter::new(&mut translate_buffer); + let translate_op = TranslateOperationNew::new( + Sequence::parse_set_string(&sets[0]), + Sequence::parse_set_string(&sets[1]), + truncate_set1_flag, + complement_flag, + ); + translate_input_new(&mut locked_stdin, &mut writer, translate_op); + } + { + let mut reader = BufReader::new(translate_buffer.as_bytes()); + let squeeze_op = + SqueezeOperationNew::new(Sequence::parse_set_string(&sets[1]), false); + translate_input_new(&mut reader, &mut buffered_stdout, squeeze_op); + } } } else { let op = TranslateOperationNew::new( From 671d355aebb37316fff08a487496c3ebba951243 Mon Sep 17 00:00:00 2001 From: Hanif Bin Ariffin Date: Sun, 18 Jul 2021 14:09:33 +0800 Subject: [PATCH 06/50] Removed unused dependencies Signed-off-by: Hanif Bin Ariffin --- src/uu/tr/Cargo.toml | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/uu/tr/Cargo.toml b/src/uu/tr/Cargo.toml index 13a1616d4..db8f0fa36 100644 --- a/src/uu/tr/Cargo.toml +++ b/src/uu/tr/Cargo.toml @@ -15,8 +15,6 @@ edition = "2018" path = "src/tr.rs" [dependencies] -bit-set = "0.5.0" -fnv = "1.0.5" clap = { version = "2.33", features = ["wrap_help"] } uucore = { version=">=0.0.9", package="uucore", path="../../uucore" } uucore_procs = { version=">=0.0.6", package="uucore_procs", path="../../uucore_procs" } From 403910aed2632ae856785952ebdbf3cf275203a4 Mon Sep 17 00:00:00 2001 From: Hanif Bin Ariffin Date: Sun, 18 Jul 2021 14:15:26 +0800 Subject: [PATCH 07/50] Updated Cargo.lock Signed-off-by: Hanif Bin Ariffin --- Cargo.lock | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index aebe260c6..cf959ee21 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -102,21 +102,6 @@ dependencies = [ "which", ] -[[package]] -name = "bit-set" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e11e16035ea35e4e5997b393eacbf6f63983188f7a2ad25bfb13465f5ad59de" -dependencies = [ - "bit-vec", -] - -[[package]] -name = "bit-vec" -version = "0.6.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" - [[package]] name = "bitflags" version = "1.3.1" @@ -2943,9 +2928,7 @@ dependencies = [ name = "uu_tr" version = "0.0.7" dependencies = [ - "bit-set", "clap", - "fnv", "nom 5.1.2", "uucore", "uucore_procs", From 6ff826b712e67062a7035d104912bb46727fc682 Mon Sep 17 00:00:00 2001 From: Hanif Bin Ariffin Date: Sun, 18 Jul 2021 14:15:35 +0800 Subject: [PATCH 08/50] Some lint changes Signed-off-by: Hanif Bin Ariffin --- src/uu/tr/src/operation.rs | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/src/uu/tr/src/operation.rs b/src/uu/tr/src/operation.rs index dd3e722ca..30e7b6af5 100644 --- a/src/uu/tr/src/operation.rs +++ b/src/uu/tr/src/operation.rs @@ -173,18 +173,14 @@ impl Sequence { u32::from(a.chars().next().unwrap()), u32::from(b.chars().next().unwrap()), ); - if start >= 48 && start <= 90 && end >= 48 && end <= 90 && end > start { + if (48..=90).contains(&start) && (48..=90).contains(&end) && end > start { Sequence::CharRange( (start..=end) .map(|c| std::char::from_u32(c).unwrap()) .collect(), ) } else { - Sequence::CharRange( - (start..=end) - .filter_map(|c| std::char::from_u32(c)) - .collect(), - ) + Sequence::CharRange((start..=end).filter_map(std::char::from_u32).collect()) } }) }) @@ -320,7 +316,7 @@ pub enum TranslateOperationNew { impl TranslateOperationNew { fn next_complement_char(mut iter: u32) -> (u32, char) { - while let None = char::from_u32(iter) { + while char::from_u32(iter).is_none() { iter = iter.saturating_add(1) } (iter, char::from_u32(iter).unwrap()) @@ -382,7 +378,7 @@ impl SymbolTranslatorNew for TranslateOperationNew { if let Some(c) = set1.iter().find(|c| c.eq(&¤t)) { Some(*c) } else { - while let None = mapped_characters.get(¤t) { + while mapped_characters.get(¤t).is_none() { if let Some(p) = set2.pop() { let (next_index, next_value) = TranslateOperationNew::next_complement_char(*iter); From f13c0ba5a788d6c14194e88834781abea645a397 Mon Sep 17 00:00:00 2001 From: Hanif Bin Ariffin Date: Mon, 19 Jul 2021 21:32:52 +0800 Subject: [PATCH 09/50] Remove new from struct names Signed-off-by: Hanif Bin Ariffin --- src/uu/tr/src/operation.rs | 44 +++++++++++++++++++------------------- src/uu/tr/src/tr.rs | 18 ++++++++-------- 2 files changed, 31 insertions(+), 31 deletions(-) diff --git a/src/uu/tr/src/operation.rs b/src/uu/tr/src/operation.rs index 30e7b6af5..ecc3d52a3 100644 --- a/src/uu/tr/src/operation.rs +++ b/src/uu/tr/src/operation.rs @@ -268,26 +268,26 @@ impl Sequence { } } -pub trait SymbolTranslatorNew { +pub trait SymbolTranslator { fn translate(&mut self, current: char) -> Option; } #[derive(Debug, Clone)] -pub struct DeleteOperationNew { +pub struct DeleteOperation { set: Vec, complement_flag: bool, } -impl DeleteOperationNew { - pub fn new(set: Vec, complement_flag: bool) -> DeleteOperationNew { - DeleteOperationNew { +impl DeleteOperation { + pub fn new(set: Vec, complement_flag: bool) -> DeleteOperation { + DeleteOperation { set, complement_flag, } } } -impl SymbolTranslatorNew for DeleteOperationNew { +impl SymbolTranslator for DeleteOperation { fn translate(&mut self, current: char) -> Option { let found = self.set.iter().any(|sequence| match sequence { Sequence::Char(c) => c.eq(¤t), @@ -298,7 +298,7 @@ impl SymbolTranslatorNew for DeleteOperationNew { } #[derive(Debug, Clone)] -pub enum TranslateOperationNew { +pub enum TranslateOperation { Standard(HashMap), Complement( // iter @@ -314,7 +314,7 @@ pub enum TranslateOperationNew { ), } -impl TranslateOperationNew { +impl TranslateOperation { fn next_complement_char(mut iter: u32) -> (u32, char) { while char::from_u32(iter).is_none() { iter = iter.saturating_add(1) @@ -323,13 +323,13 @@ impl TranslateOperationNew { } } -impl TranslateOperationNew { +impl TranslateOperation { pub fn new( pset1: Vec, pset2: Vec, truncate_set1: bool, complement: bool, - ) -> TranslateOperationNew { + ) -> TranslateOperation { let mut set1 = pset1 .into_iter() .flat_map(Sequence::dissolve) @@ -343,7 +343,7 @@ impl TranslateOperationNew { } let fallback = set2.last().cloned().unwrap(); if complement { - TranslateOperationNew::Complement( + TranslateOperation::Complement( 0, set1, set2, @@ -352,7 +352,7 @@ impl TranslateOperationNew { HashMap::new(), ) } else { - TranslateOperationNew::Standard( + TranslateOperation::Standard( set1.into_iter() .zip(set2.into_iter().chain(std::iter::repeat(fallback))) .collect::>(), @@ -361,15 +361,15 @@ impl TranslateOperationNew { } } -impl SymbolTranslatorNew for TranslateOperationNew { +impl SymbolTranslator for TranslateOperation { fn translate(&mut self, current: char) -> Option { match self { - TranslateOperationNew::Standard(map) => Some( + TranslateOperation::Standard(map) => Some( map.iter() .find_map(|(l, r)| l.eq(¤t).then(|| *r)) .unwrap_or(current), ), - TranslateOperationNew::Complement(iter, set1, set2, fallback, mapped_characters) => { + TranslateOperation::Complement(iter, set1, set2, fallback, mapped_characters) => { // First, try to see if current char is already mapped // If so, return the mapped char // Else, pop from set2 @@ -381,7 +381,7 @@ impl SymbolTranslatorNew for TranslateOperationNew { while mapped_characters.get(¤t).is_none() { if let Some(p) = set2.pop() { let (next_index, next_value) = - TranslateOperationNew::next_complement_char(*iter); + TranslateOperation::next_complement_char(*iter); *iter = next_index; mapped_characters.insert(next_value, p); } else { @@ -396,15 +396,15 @@ impl SymbolTranslatorNew for TranslateOperationNew { } #[derive(Debug, Clone)] -pub struct SqueezeOperationNew { +pub struct SqueezeOperation { squeeze_set: Vec, complement: bool, previous: Option, } -impl SqueezeOperationNew { - pub fn new(squeeze_set: Vec, complement: bool) -> SqueezeOperationNew { - SqueezeOperationNew { +impl SqueezeOperation { + pub fn new(squeeze_set: Vec, complement: bool) -> SqueezeOperation { + SqueezeOperation { squeeze_set: squeeze_set .into_iter() .flat_map(Sequence::dissolve) @@ -415,7 +415,7 @@ impl SqueezeOperationNew { } } -impl SymbolTranslatorNew for SqueezeOperationNew { +impl SymbolTranslator for SqueezeOperation { fn translate(&mut self, current: char) -> Option { if self.complement { let next = if self.squeeze_set.iter().any(|c| c.eq(¤t)) { @@ -461,7 +461,7 @@ impl SymbolTranslatorNew for SqueezeOperationNew { pub fn translate_input_new(input: &mut R, output: &mut W, mut translator: T) where - T: SymbolTranslatorNew, + T: SymbolTranslator, R: BufRead, W: Write, { diff --git a/src/uu/tr/src/tr.rs b/src/uu/tr/src/tr.rs index c21bc679e..77fc35bbc 100644 --- a/src/uu/tr/src/tr.rs +++ b/src/uu/tr/src/tr.rs @@ -18,10 +18,10 @@ mod operation; use clap::{crate_version, App, Arg}; use nom::AsBytes; -use operation::{translate_input_new, Sequence, SqueezeOperationNew, TranslateOperationNew}; +use operation::{translate_input_new, Sequence, SqueezeOperation, TranslateOperation}; use std::io::{stdin, stdout, BufReader, BufWriter}; -use crate::operation::DeleteOperationNew; +use crate::operation::DeleteOperation; use uucore::InvalidEncodingHandling; static ABOUT: &str = "translate or delete characters"; @@ -96,30 +96,30 @@ pub fn uumain(args: impl uucore::Args) -> i32 { { let mut delete_writer = BufWriter::new(&mut delete_buffer); let delete_op = - DeleteOperationNew::new(Sequence::parse_set_string(&sets[0]), complement_flag); + DeleteOperation::new(Sequence::parse_set_string(&sets[0]), complement_flag); translate_input_new(&mut locked_stdin, &mut delete_writer, delete_op); } { let mut squeeze_reader = BufReader::new(delete_buffer.as_bytes()); let squeeze_op = - SqueezeOperationNew::new(Sequence::parse_set_string(&sets[1]), complement_flag); + SqueezeOperation::new(Sequence::parse_set_string(&sets[1]), complement_flag); translate_input_new(&mut squeeze_reader, &mut buffered_stdout, squeeze_op); } } else { - let op = DeleteOperationNew::new(Sequence::parse_set_string(&sets[0]), complement_flag); + let op = DeleteOperation::new(Sequence::parse_set_string(&sets[0]), complement_flag); translate_input_new(&mut locked_stdin, &mut buffered_stdout, op); } } else if squeeze_flag { if sets.len() < 2 { let op = - SqueezeOperationNew::new(Sequence::parse_set_string(&sets[0]), complement_flag); + SqueezeOperation::new(Sequence::parse_set_string(&sets[0]), complement_flag); translate_input_new(&mut locked_stdin, &mut buffered_stdout, op); } else { let mut translate_buffer = vec![]; { let mut writer = BufWriter::new(&mut translate_buffer); - let translate_op = TranslateOperationNew::new( + let translate_op = TranslateOperation::new( Sequence::parse_set_string(&sets[0]), Sequence::parse_set_string(&sets[1]), truncate_set1_flag, @@ -130,12 +130,12 @@ pub fn uumain(args: impl uucore::Args) -> i32 { { let mut reader = BufReader::new(translate_buffer.as_bytes()); let squeeze_op = - SqueezeOperationNew::new(Sequence::parse_set_string(&sets[1]), false); + SqueezeOperation::new(Sequence::parse_set_string(&sets[1]), false); translate_input_new(&mut reader, &mut buffered_stdout, squeeze_op); } } } else { - let op = TranslateOperationNew::new( + let op = TranslateOperation::new( Sequence::parse_set_string(&sets[0]), Sequence::parse_set_string(&sets[1]), truncate_set1_flag, From b0ef508b044534a145cd43ea4c20fea03b1ad998 Mon Sep 17 00:00:00 2001 From: Hanif Bin Ariffin Date: Tue, 20 Jul 2021 14:54:04 +0800 Subject: [PATCH 10/50] Some code cleanup Signed-off-by: Hanif Bin Ariffin --- src/uu/tr/src/operation.rs | 276 ++++++++++++++----------------------- src/uu/tr/src/tr.rs | 7 +- 2 files changed, 108 insertions(+), 175 deletions(-) diff --git a/src/uu/tr/src/operation.rs b/src/uu/tr/src/operation.rs index ecc3d52a3..32a08c715 100644 --- a/src/uu/tr/src/operation.rs +++ b/src/uu/tr/src/operation.rs @@ -1,13 +1,15 @@ use nom::{ branch::alt, - bytes::complete::{tag, take, take_until}, - character::complete::{none_of, one_of}, - multi::many0, - sequence::{separated_pair, tuple}, + bytes::complete::{tag, take_while1}, + character::complete::{anychar, one_of}, + combinator::{map_opt, recognize}, + multi::{many0, many_m_n}, + sequence::{preceded, separated_pair, tuple}, IResult, }; use std::{ collections::HashMap, + fmt::Debug, io::{BufRead, Write}, }; @@ -20,20 +22,7 @@ pub enum Sequence { impl Sequence { pub fn parse_set_string(input: &str) -> Vec { many0(alt(( - alt(( - Sequence::parse_3_octal, - Sequence::parse_2_octal, - Sequence::parse_1_octal, - Sequence::parse_unrecognized_backslash, - Sequence::parse_backslash, - Sequence::parse_audible_bel, - Sequence::parse_backspace, - Sequence::parse_form_feed, - Sequence::parse_newline, - Sequence::parse_return, - Sequence::parse_horizontal_tab, - Sequence::parse_vertical_tab, - )), + alt((Sequence::parse_octal, Sequence::parse_backslash)), alt(( Sequence::parse_char_range, Sequence::parse_char_star, @@ -71,11 +60,11 @@ impl Sequence { /// Sequence parsers fn parse_char(input: &str) -> IResult<&str, Sequence> { - take(1usize)(input).map(|(l, r)| (l, Sequence::Char(r.chars().next().unwrap()))) + anychar(input).map(|(l, r)| (l, Sequence::Char(r))) } - fn parse_unrecognized_backslash(input: &str) -> IResult<&str, Sequence> { - tuple((tag("\\"), none_of("01234567")))(input).map(|(l, (_, a))| { + fn parse_backslash(input: &str) -> IResult<&str, Sequence> { + preceded(tag("\\"), anychar)(input).map(|(l, a)| { let c = match a { 'a' => Sequence::Char('\u{0007}'), 'b' => Sequence::Char('\u{0008}'), @@ -84,132 +73,57 @@ impl Sequence { 'r' => Sequence::Char('\u{000D}'), 't' => Sequence::Char('\u{0009}'), 'v' => Sequence::Char('\u{000B}'), - _ => Sequence::Char(a), + x => Sequence::Char(x), }; (l, c) }) } - fn parse_1_octal(input: &str) -> IResult<&str, Sequence> { - tuple((tag("\\"), one_of("01234567")))(input).map(|(l, (_, a))| { - ( - l, - Sequence::Char(std::char::from_u32(a.to_digit(8).unwrap()).unwrap()), - ) - }) - } - - fn parse_2_octal(input: &str) -> IResult<&str, Sequence> { - tuple((tag("\\"), one_of("01234567"), one_of("01234567")))(input).map(|(l, (_, a, b))| { - ( - l, - Sequence::Char( - std::char::from_u32(a.to_digit(8).unwrap() * 8 + b.to_digit(8).unwrap()) - .unwrap(), - ), - ) - }) - } - - fn parse_3_octal(input: &str) -> IResult<&str, Sequence> { - tuple(( - tag("\\"), - one_of("01234567"), - one_of("01234567"), - one_of("01234567"), - ))(input) - .map(|(l, (_, a, b, c))| { - ( - l, - Sequence::Char( - // SAFETY: All the values from \000 to \777 is valid based on a test below... - std::char::from_u32( - a.to_digit(8).unwrap() * 8 * 8 - + b.to_digit(8).unwrap() * 8 - + c.to_digit(8).unwrap(), - ) - .unwrap(), - ), - ) - }) - } - - fn parse_backslash(input: &str) -> IResult<&str, Sequence> { - tuple((tag("\\"), tag("\\")))(input).map(|(l, _)| (l, Sequence::Char('\\'))) - } - - fn parse_audible_bel(input: &str) -> IResult<&str, Sequence> { - tuple((tag("\\"), tag("a")))(input).map(|(l, _)| (l, Sequence::Char('\u{0007}'))) - } - - fn parse_backspace(input: &str) -> IResult<&str, Sequence> { - tuple((tag("\\"), tag("b")))(input).map(|(l, _)| (l, Sequence::Char('\u{0008}'))) - } - - fn parse_form_feed(input: &str) -> IResult<&str, Sequence> { - tuple((tag("\\"), tag("f")))(input).map(|(l, _)| (l, Sequence::Char('\u{000C}'))) - } - - fn parse_newline(input: &str) -> IResult<&str, Sequence> { - tuple((tag("\\"), tag("n")))(input).map(|(l, _)| (l, Sequence::Char('\u{000A}'))) - } - - fn parse_return(input: &str) -> IResult<&str, Sequence> { - tuple((tag("\\"), tag("r")))(input).map(|(l, _)| (l, Sequence::Char('\u{000D}'))) - } - - fn parse_horizontal_tab(input: &str) -> IResult<&str, Sequence> { - tuple((tag("\\"), tag("t")))(input).map(|(l, _)| (l, Sequence::Char('\u{0009}'))) - } - - fn parse_vertical_tab(input: &str) -> IResult<&str, Sequence> { - tuple((tag("\\"), tag("v")))(input).map(|(l, _)| (l, Sequence::Char('\u{000B}'))) + fn parse_octal(input: &str) -> IResult<&str, Sequence> { + map_opt( + preceded(tag("\\"), recognize(many_m_n(1, 3, one_of("01234567")))), + |out: &str| { + u32::from_str_radix(out, 8) + .map(|u| Sequence::Char(char::from_u32(u).unwrap())) + .ok() + }, + )(input) } fn parse_char_range(input: &str) -> IResult<&str, Sequence> { - separated_pair(take(1usize), tag("-"), take(1usize))(input).map(|(l, (a, b))| { + separated_pair(anychar, tag("-"), anychar)(input).map(|(l, (a, b))| { (l, { - let (start, end) = ( - u32::from(a.chars().next().unwrap()), - u32::from(b.chars().next().unwrap()), - ); - if (48..=90).contains(&start) && (48..=90).contains(&end) && end > start { - Sequence::CharRange( - (start..=end) - .map(|c| std::char::from_u32(c).unwrap()) - .collect(), - ) - } else { - Sequence::CharRange((start..=end).filter_map(std::char::from_u32).collect()) - } + let (start, end) = (u32::from(a), u32::from(b)); + Sequence::CharRange((start..=end).filter_map(std::char::from_u32).collect()) }) }) } fn parse_char_star(input: &str) -> IResult<&str, Sequence> { - tuple((tag("["), take(1usize), tag("*"), tag("]")))(input).map(|(_, (_, _, _, _))| todo!()) + tuple((tag("["), anychar, tag("*]")))(input).map(|(_, (_, _, _))| todo!()) } fn parse_char_repeat(input: &str) -> IResult<&str, Sequence> { - tuple((tag("["), take(1usize), tag("*"), take_until("]"), tag("]")))(input).map( - |(l, (_, c, _, n, _))| { - ( - l, - Sequence::CharRange( - std::iter::repeat(c.chars().next().unwrap()) - .take(n.parse().unwrap()) - .collect(), - ), - ) - }, - ) + tuple(( + tag("["), + anychar, + tag("*"), + take_while1(|c: char| c.is_digit(10)), + tag("]"), + ))(input) + .map(|(l, (_, c, _, n, _))| { + ( + l, + Sequence::CharRange(std::iter::repeat(c).take(n.parse().unwrap()).collect()), + ) + }) } fn parse_alnum(input: &str) -> IResult<&str, Sequence> { tag("[:alnum:]")(input).map(|(l, _)| { ( l, - Sequence::CharRange(('a'..='z').chain('A'..'Z').chain('0'..'9').collect()), + Sequence::CharRange(('0'..='9').chain('A'..='Z').chain('a'..='z').collect()), ) }) } @@ -218,7 +132,7 @@ impl Sequence { tag("[:alpha:]")(input).map(|(l, _)| { ( l, - Sequence::CharRange(('a'..='z').chain('A'..'Z').collect()), + Sequence::CharRange(('A'..='Z').chain('a'..='z').collect()), ) }) } @@ -260,11 +174,16 @@ impl Sequence { } fn parse_xdigit(input: &str) -> IResult<&str, Sequence> { - tag("[:xdigit:]")(input).map(|(_, _)| todo!()) + tag("[:xdigit:]")(input).map(|(l, _)| { + ( + l, + Sequence::CharRange(('0'..='9').chain('A'..='Z').chain('a'..='z').collect()), + ) + }) } fn parse_char_equal(input: &str) -> IResult<&str, Sequence> { - tuple((tag("[="), take(1usize), tag("=]")))(input).map(|(_, (_, _, _))| todo!()) + tuple((tag("[="), anychar, tag("=]")))(input).map(|(_, (_, _, _))| todo!()) } } @@ -297,21 +216,47 @@ impl SymbolTranslator for DeleteOperation { } } +#[derive(Debug, Clone)] +pub struct TranslateOperationComplement { + iter: u32, + set1: Vec, + set2: Vec, + fallback: char, + translation_map: HashMap, +} + +impl TranslateOperationComplement { + fn new(set1: Vec, set2: Vec, fallback: char) -> TranslateOperationComplement { + TranslateOperationComplement { + iter: 0, + set1, + set2: set2.into_iter().rev().collect(), + fallback, + translation_map: HashMap::new(), + } + } +} + +#[derive(Debug, Clone)] +pub struct TranslateOperationStandard { + translation_map: HashMap, +} + +impl TranslateOperationStandard { + fn new(set1: Vec, set2: Vec, fallback: char) -> TranslateOperationStandard { + TranslateOperationStandard { + translation_map: set1 + .into_iter() + .zip(set2.into_iter().chain(std::iter::repeat(fallback))) + .collect::>(), + } + } +} + #[derive(Debug, Clone)] pub enum TranslateOperation { - Standard(HashMap), - Complement( - // iter - u32, - // set 1 - Vec, - // set 2 - Vec, - // fallback - char, - // translation map - HashMap, - ), + Standard(TranslateOperationStandard), + Complement(TranslateOperationComplement), } impl TranslateOperation { @@ -319,7 +264,7 @@ impl TranslateOperation { while char::from_u32(iter).is_none() { iter = iter.saturating_add(1) } - (iter, char::from_u32(iter).unwrap()) + (iter.saturating_add(1), char::from_u32(iter).unwrap()) } } @@ -330,6 +275,7 @@ impl TranslateOperation { truncate_set1: bool, complement: bool, ) -> TranslateOperation { + // TODO: Only some translation is acceptable i.e. uppercase/lowercase transform. let mut set1 = pset1 .into_iter() .flat_map(Sequence::dissolve) @@ -338,25 +284,14 @@ impl TranslateOperation { .into_iter() .flat_map(Sequence::dissolve) .collect::>(); + let fallback = set2.last().cloned().unwrap(); if truncate_set1 { set1.truncate(set2.len()); } - let fallback = set2.last().cloned().unwrap(); if complement { - TranslateOperation::Complement( - 0, - set1, - set2, - // TODO: Check how `tr` actually handles this - fallback, - HashMap::new(), - ) + TranslateOperation::Complement(TranslateOperationComplement::new(set1, set2, fallback)) } else { - TranslateOperation::Standard( - set1.into_iter() - .zip(set2.into_iter().chain(std::iter::repeat(fallback))) - .collect::>(), - ) + TranslateOperation::Standard(TranslateOperationStandard::new(set1, set2, fallback)) } } } @@ -364,12 +299,19 @@ impl TranslateOperation { impl SymbolTranslator for TranslateOperation { fn translate(&mut self, current: char) -> Option { match self { - TranslateOperation::Standard(map) => Some( - map.iter() + TranslateOperation::Standard(TranslateOperationStandard { translation_map }) => Some( + translation_map + .iter() .find_map(|(l, r)| l.eq(¤t).then(|| *r)) .unwrap_or(current), ), - TranslateOperation::Complement(iter, set1, set2, fallback, mapped_characters) => { + TranslateOperation::Complement(TranslateOperationComplement { + iter, + set1, + set2, + fallback, + translation_map, + }) => { // First, try to see if current char is already mapped // If so, return the mapped char // Else, pop from set2 @@ -378,17 +320,17 @@ impl SymbolTranslator for TranslateOperation { if let Some(c) = set1.iter().find(|c| c.eq(&¤t)) { Some(*c) } else { - while mapped_characters.get(¤t).is_none() { + while translation_map.get(¤t).is_none() { if let Some(p) = set2.pop() { let (next_index, next_value) = TranslateOperation::next_complement_char(*iter); *iter = next_index; - mapped_characters.insert(next_value, p); + translation_map.insert(next_value, p); } else { - mapped_characters.insert(current, *fallback); + translation_map.insert(current, *fallback); } } - Some(*mapped_characters.get(¤t).unwrap()) + Some(*translation_map.get(¤t).unwrap()) } } } @@ -441,14 +383,8 @@ impl SymbolTranslator for SqueezeOperation { } else { let next = if self.squeeze_set.iter().any(|c| c.eq(¤t)) { match self.previous { - Some(v) => { - if v.eq(¤t) { - None - } else { - Some(current) - } - } - None => Some(current), + Some(v) if v == current => None, + _ => Some(current), } } else { Some(current) diff --git a/src/uu/tr/src/tr.rs b/src/uu/tr/src/tr.rs index 77fc35bbc..5ba6cf611 100644 --- a/src/uu/tr/src/tr.rs +++ b/src/uu/tr/src/tr.rs @@ -111,9 +111,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { } } else if squeeze_flag { if sets.len() < 2 { - let op = - SqueezeOperation::new(Sequence::parse_set_string(&sets[0]), complement_flag); - + let op = SqueezeOperation::new(Sequence::parse_set_string(&sets[0]), complement_flag); translate_input_new(&mut locked_stdin, &mut buffered_stdout, op); } else { let mut translate_buffer = vec![]; @@ -129,8 +127,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { } { let mut reader = BufReader::new(translate_buffer.as_bytes()); - let squeeze_op = - SqueezeOperation::new(Sequence::parse_set_string(&sets[1]), false); + let squeeze_op = SqueezeOperation::new(Sequence::parse_set_string(&sets[1]), false); translate_input_new(&mut reader, &mut buffered_stdout, squeeze_op); } } From 4b45a2287cf3135d0e18880ffad559fbdc8a8c3b Mon Sep 17 00:00:00 2001 From: Hanif Bin Ariffin Date: Tue, 20 Jul 2021 15:41:43 +0800 Subject: [PATCH 11/50] Implement some more parsers Signed-off-by: Hanif Bin Ariffin --- src/uu/tr/src/operation.rs | 89 +++++++++++++++++++++++++--------- src/uu/tr/src/tr.rs | 1 + src/uu/tr/src/unicode_table.rs | 8 +++ 3 files changed, 76 insertions(+), 22 deletions(-) create mode 100644 src/uu/tr/src/unicode_table.rs diff --git a/src/uu/tr/src/operation.rs b/src/uu/tr/src/operation.rs index 32a08c715..eae348370 100644 --- a/src/uu/tr/src/operation.rs +++ b/src/uu/tr/src/operation.rs @@ -2,7 +2,7 @@ use nom::{ branch::alt, bytes::complete::{tag, take_while1}, character::complete::{anychar, one_of}, - combinator::{map_opt, recognize}, + combinator::{map_opt, recognize, value}, multi::{many0, many_m_n}, sequence::{preceded, separated_pair, tuple}, IResult, @@ -13,6 +13,8 @@ use std::{ io::{BufRead, Write}, }; +use crate::unicode_table; + #[derive(Debug, PartialEq, Eq, Clone)] pub enum Sequence { Char(char), @@ -66,13 +68,13 @@ impl Sequence { fn parse_backslash(input: &str) -> IResult<&str, Sequence> { preceded(tag("\\"), anychar)(input).map(|(l, a)| { let c = match a { - 'a' => Sequence::Char('\u{0007}'), - 'b' => Sequence::Char('\u{0008}'), - 'f' => Sequence::Char('\u{000C}'), - 'n' => Sequence::Char('\u{000A}'), - 'r' => Sequence::Char('\u{000D}'), - 't' => Sequence::Char('\u{0009}'), - 'v' => Sequence::Char('\u{000B}'), + 'a' => Sequence::Char(unicode_table::BEL), + 'b' => Sequence::Char(unicode_table::BS), + 'f' => Sequence::Char(unicode_table::FF), + 'n' => Sequence::Char(unicode_table::LF), + 'r' => Sequence::Char(unicode_table::CR), + 't' => Sequence::Char(unicode_table::HT), + 'v' => Sequence::Char(unicode_table::VT), x => Sequence::Char(x), }; (l, c) @@ -129,32 +131,55 @@ impl Sequence { } fn parse_alpha(input: &str) -> IResult<&str, Sequence> { - tag("[:alpha:]")(input).map(|(l, _)| { - ( - l, - Sequence::CharRange(('A'..='Z').chain('a'..='z').collect()), - ) - }) + value( + Sequence::CharRange(('A'..='Z').chain('a'..='z').collect()), + tag("[:alpha:]"), + )(input) } fn parse_blank(input: &str) -> IResult<&str, Sequence> { - tag("[:blank:]")(input).map(|(_, _)| todo!()) + value( + Sequence::CharRange(vec![unicode_table::SPACE, unicode_table::HT]), + tag("[:blank:]"), + )(input) } fn parse_control(input: &str) -> IResult<&str, Sequence> { - tag("[:cntrl:]")(input).map(|(_, _)| todo!()) + value( + Sequence::CharRange( + (0..=31) + .chain(std::iter::once(127)) + .flat_map(char::from_u32) + .collect(), + ), + tag("[:cntrl:]"), + )(input) } fn parse_digit(input: &str) -> IResult<&str, Sequence> { - tag("[:digit:]")(input).map(|(l, _)| (l, Sequence::CharRange(('0'..='9').collect()))) + value(Sequence::CharRange(('0'..='9').collect()), tag("[:digit:]"))(input) } fn parse_graph(input: &str) -> IResult<&str, Sequence> { - tag("[:graph:]")(input).map(|(_, _)| todo!()) + value( + Sequence::CharRange( + (48..=57) // digit + .chain(65..=90) // uppercase + .chain(97..=122) // lowercase + // punctuations + .chain(33..=47) + .chain(58..=64) + .chain(91..=96) + .chain(123..=126) + .flat_map(char::from_u32) + .collect(), + ), + tag("[:graph:]"), + )(input) } fn parse_lower(input: &str) -> IResult<&str, Sequence> { - tag("[:lower:]")(input).map(|(l, _)| (l, Sequence::CharRange(('a'..='z').collect()))) + value(Sequence::CharRange(('a'..='z').collect()), tag("[:lower:]"))(input) } fn parse_print(input: &str) -> IResult<&str, Sequence> { @@ -162,11 +187,31 @@ impl Sequence { } fn parse_punct(input: &str) -> IResult<&str, Sequence> { - tag("[:punct:]")(input).map(|(_, _)| todo!()) + value( + Sequence::CharRange( + (33..=47) + .chain(58..=64) + .chain(91..=96) + .chain(123..=126) + .flat_map(char::from_u32) + .collect(), + ), + tag("[:punct:]"), + )(input) } fn parse_space(input: &str) -> IResult<&str, Sequence> { - tag("[:space:]")(input).map(|(_, _)| todo!()) + value( + Sequence::CharRange(vec![ + unicode_table::HT, + unicode_table::LF, + unicode_table::VT, + unicode_table::FF, + unicode_table::CR, + unicode_table::SPACE, + ]), + tag("[:space:]"), + )(input) } fn parse_upper(input: &str) -> IResult<&str, Sequence> { @@ -177,7 +222,7 @@ impl Sequence { tag("[:xdigit:]")(input).map(|(l, _)| { ( l, - Sequence::CharRange(('0'..='9').chain('A'..='Z').chain('a'..='z').collect()), + Sequence::CharRange(('0'..='9').chain('A'..='F').chain('a'..='f').collect()), ) }) } diff --git a/src/uu/tr/src/tr.rs b/src/uu/tr/src/tr.rs index 5ba6cf611..581595385 100644 --- a/src/uu/tr/src/tr.rs +++ b/src/uu/tr/src/tr.rs @@ -15,6 +15,7 @@ extern crate uucore; extern crate nom; mod operation; +mod unicode_table; use clap::{crate_version, App, Arg}; use nom::AsBytes; diff --git a/src/uu/tr/src/unicode_table.rs b/src/uu/tr/src/unicode_table.rs new file mode 100644 index 000000000..9362be647 --- /dev/null +++ b/src/uu/tr/src/unicode_table.rs @@ -0,0 +1,8 @@ +pub static BEL: char = '\u{0007}'; +pub static BS: char = '\u{0008}'; +pub static HT: char = '\u{0009}'; +pub static LF: char = '\u{000A}'; +pub static VT: char = '\u{000B}'; +pub static FF: char = '\u{000C}'; +pub static CR: char = '\u{000D}'; +pub static SPACE: char = '\u{0020}'; From 74247547258007e840df9f9383f667d7af3fb341 Mon Sep 17 00:00:00 2001 From: Hanif Bin Ariffin Date: Tue, 20 Jul 2021 15:51:33 +0800 Subject: [PATCH 12/50] Update traits name Signed-off-by: Hanif Bin Ariffin --- src/uu/tr/src/operation.rs | 4 +++- src/uu/tr/src/tr.rs | 16 ++++++++-------- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/src/uu/tr/src/operation.rs b/src/uu/tr/src/operation.rs index eae348370..f440487c8 100644 --- a/src/uu/tr/src/operation.rs +++ b/src/uu/tr/src/operation.rs @@ -45,6 +45,7 @@ impl Sequence { Sequence::parse_upper, Sequence::parse_xdigit, Sequence::parse_char_equal, + // NOTE: This must be the last one Sequence::parse_char, )), )))(input) @@ -110,6 +111,7 @@ impl Sequence { tag("["), anychar, tag("*"), + // TODO: Extend this to support octal as well. Octal starts with 0. take_while1(|c: char| c.is_digit(10)), tag("]"), ))(input) @@ -440,7 +442,7 @@ impl SymbolTranslator for SqueezeOperation { } } -pub fn translate_input_new(input: &mut R, output: &mut W, mut translator: T) +pub fn translate_input(input: &mut R, output: &mut W, mut translator: T) where T: SymbolTranslator, R: BufRead, diff --git a/src/uu/tr/src/tr.rs b/src/uu/tr/src/tr.rs index 581595385..3ba06920a 100644 --- a/src/uu/tr/src/tr.rs +++ b/src/uu/tr/src/tr.rs @@ -19,7 +19,7 @@ mod unicode_table; use clap::{crate_version, App, Arg}; use nom::AsBytes; -use operation::{translate_input_new, Sequence, SqueezeOperation, TranslateOperation}; +use operation::{translate_input, Sequence, SqueezeOperation, TranslateOperation}; use std::io::{stdin, stdout, BufReader, BufWriter}; use crate::operation::DeleteOperation; @@ -98,22 +98,22 @@ pub fn uumain(args: impl uucore::Args) -> i32 { let mut delete_writer = BufWriter::new(&mut delete_buffer); let delete_op = DeleteOperation::new(Sequence::parse_set_string(&sets[0]), complement_flag); - translate_input_new(&mut locked_stdin, &mut delete_writer, delete_op); + translate_input(&mut locked_stdin, &mut delete_writer, delete_op); } { let mut squeeze_reader = BufReader::new(delete_buffer.as_bytes()); let squeeze_op = SqueezeOperation::new(Sequence::parse_set_string(&sets[1]), complement_flag); - translate_input_new(&mut squeeze_reader, &mut buffered_stdout, squeeze_op); + translate_input(&mut squeeze_reader, &mut buffered_stdout, squeeze_op); } } else { let op = DeleteOperation::new(Sequence::parse_set_string(&sets[0]), complement_flag); - translate_input_new(&mut locked_stdin, &mut buffered_stdout, op); + translate_input(&mut locked_stdin, &mut buffered_stdout, op); } } else if squeeze_flag { if sets.len() < 2 { let op = SqueezeOperation::new(Sequence::parse_set_string(&sets[0]), complement_flag); - translate_input_new(&mut locked_stdin, &mut buffered_stdout, op); + translate_input(&mut locked_stdin, &mut buffered_stdout, op); } else { let mut translate_buffer = vec![]; { @@ -124,12 +124,12 @@ pub fn uumain(args: impl uucore::Args) -> i32 { truncate_set1_flag, complement_flag, ); - translate_input_new(&mut locked_stdin, &mut writer, translate_op); + translate_input(&mut locked_stdin, &mut writer, translate_op); } { let mut reader = BufReader::new(translate_buffer.as_bytes()); let squeeze_op = SqueezeOperation::new(Sequence::parse_set_string(&sets[1]), false); - translate_input_new(&mut reader, &mut buffered_stdout, squeeze_op); + translate_input(&mut reader, &mut buffered_stdout, squeeze_op); } } } else { @@ -139,7 +139,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { truncate_set1_flag, complement_flag, ); - translate_input_new(&mut locked_stdin, &mut buffered_stdout, op); + translate_input(&mut locked_stdin, &mut buffered_stdout, op); } 0 From c3bd727f8d9709d4dd67ca90177264a0170bda4d Mon Sep 17 00:00:00 2001 From: Hanif Bin Ariffin Date: Tue, 20 Jul 2021 15:55:58 +0800 Subject: [PATCH 13/50] Updated tests to be more descriptive and added its equivalent cmd line script Signed-off-by: Hanif Bin Ariffin --- tests/by-util/test_tr.rs | 71 ++++++++++++++++++++++++++++++++-------- 1 file changed, 58 insertions(+), 13 deletions(-) diff --git a/tests/by-util/test_tr.rs b/tests/by-util/test_tr.rs index cffb7153f..54e7fe081 100644 --- a/tests/by-util/test_tr.rs +++ b/tests/by-util/test_tr.rs @@ -294,34 +294,38 @@ fn test_more_than_2_sets() { } #[test] -fn test_basic_translation() { +fn basic_translation_works() { + // echo -n "abcdefabcdef" | tr "dabcdef" "xyz" new_ucmd!() - .args(&["dabcdef", "xyz"]) + .args(&["abcdef", "xyz"]) .pipe_in("abcdefabcdef") .succeeds() - .stdout_is("yzzzzzyzzzzz"); + .stdout_is("xyzzzzxyzzzz"); } #[test] -fn test_basic_translation_with_alnum_1() { +fn alnum_overrides_translation_to_fallback_1() { + // echo -n "abcdefghijklmnopqrstuvwxyz" | tr "abc[:alpha:]" "xyz" new_ucmd!() - .args(&["dabcdef[:alnum:]", "xyz"]) - .pipe_in("abcdefabcdef") + .args(&["abc[:alpha:]", "xyz"]) + .pipe_in("abcdefghijklmnopqrstuvwxyz") .succeeds() - .stdout_is("zzzzzzzzzzzz"); + .stdout_is("zzzzzzzzzzzzzzzzzzzzzzzzzz"); } #[test] -fn test_basic_translation_with_alnum_2() { +fn alnum_overrides_translation_to_fallback_2() { + // echo -n "abcdefghijklmnopqrstuvwxyz" | tr "[:alpha:]abc" "xyz" new_ucmd!() - .args(&["[:alnum:]abc", "xyz"]) - .pipe_in("abcdefabcdef") + .args(&["[:alpha:]abc", "xyz"]) + .pipe_in("abcdefghijklmnopqrstuvwxyz") .succeeds() - .stdout_is("zzzzzzzzzzzz"); + .stdout_is("zzzzzzzzzzzzzzzzzzzzzzzzzz"); } #[test] -fn test_translation_override_pair() { +fn overrides_translation_pair_if_repeats() { + // echo -n 'aaa' | tr "aaa" "xyz" new_ucmd!() .args(&["aaa", "xyz"]) .pipe_in("aaa") @@ -330,20 +334,61 @@ fn test_translation_override_pair() { } #[test] -fn test_translation_case_conversion_works() { +fn uppercase_conversion_works_1() { + // echo -n 'abcdefghijklmnopqrstuvwxyz' | tr "abcdefghijklmnopqrstuvwxyz" "ABCDEFGHIJKLMNOPQRSTUVWXYZ" new_ucmd!() .args(&["abcdefghijklmnopqrstuvwxyz", "ABCDEFGHIJKLMNOPQRSTUVWXYZ"]) .pipe_in("abcdefghijklmnopqrstuvwxyz") .succeeds() .stdout_is("ABCDEFGHIJKLMNOPQRSTUVWXYZ"); +} + +#[test] +fn uppercase_conversion_works_2() { + // echo -n 'abcdefghijklmnopqrstuvwxyz' | tr "a-z" "A-Z" new_ucmd!() .args(&["a-z", "A-Z"]) .pipe_in("abcdefghijklmnopqrstuvwxyz") .succeeds() .stdout_is("ABCDEFGHIJKLMNOPQRSTUVWXYZ"); +} + +#[test] +fn uppercase_conversion_works_3() { + // echo -n 'abcdefghijklmnopqrstuvwxyz' | tr "[:lower:]" "[:upper:]" new_ucmd!() .args(&["[:lower:]", "[:upper:]"]) .pipe_in("abcdefghijklmnopqrstuvwxyz") .succeeds() .stdout_is("ABCDEFGHIJKLMNOPQRSTUVWXYZ"); } + +#[test] +fn translate_complement_set_in_order() { + // echo -n '01234' | tr -c '@-~' ' -^' + new_ucmd!() + .args(&["-c", "@-~", " -^"]) + .pipe_in("01234") + .succeeds() + .stdout_is("PQRST"); +} + +#[test] +fn alpha_expands_uppercase_lowercase() { + // echo -n "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" | tr "[:alpha:]" " -_" + new_ucmd!() + .args(&["[:alpha:]", " -_"]) + .pipe_in("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz") + .succeeds() + .stdout_is(r##" !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRS"##); +} + +#[test] +fn alnum_expands_number_uppercase_lowercase() { + // echo -n "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" | tr "[:alnum:]" " -_" + new_ucmd!() + .args(&["[:alnum:]", " -_"]) + .pipe_in("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz") + .succeeds() + .stdout_is(r##" !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]"##); +} From 0254ceb48b1c3fae6ffb9ef27cb4be2abd8501ec Mon Sep 17 00:00:00 2001 From: Hanif Bin Ariffin Date: Tue, 20 Jul 2021 17:04:05 +0800 Subject: [PATCH 14/50] Removes some allocations Signed-off-by: Hanif Bin Ariffin --- src/uu/tr/src/operation.rs | 211 ++++++++++++++++++------------------- 1 file changed, 100 insertions(+), 111 deletions(-) diff --git a/src/uu/tr/src/operation.rs b/src/uu/tr/src/operation.rs index f440487c8..9e0cb4c63 100644 --- a/src/uu/tr/src/operation.rs +++ b/src/uu/tr/src/operation.rs @@ -1,8 +1,9 @@ +use crate::unicode_table; use nom::{ branch::alt, bytes::complete::{tag, take_while1}, character::complete::{anychar, one_of}, - combinator::{map_opt, recognize, value}, + combinator::{map_opt, recognize}, multi::{many0, many_m_n}, sequence::{preceded, separated_pair, tuple}, IResult, @@ -13,12 +14,20 @@ use std::{ io::{BufRead, Write}, }; -use crate::unicode_table; +static SPACES: &'static [char] = &[ + unicode_table::HT, + unicode_table::LF, + unicode_table::VT, + unicode_table::FF, + unicode_table::CR, + unicode_table::SPACE, +]; +static BLANK: &'static [char] = &[unicode_table::SPACE, unicode_table::HT]; -#[derive(Debug, PartialEq, Eq, Clone)] pub enum Sequence { Char(char), - CharRange(Vec), + CharRange(Box>), + CharStar(char), } impl Sequence { @@ -53,10 +62,11 @@ impl Sequence { .unwrap() } - pub fn dissolve(self) -> Vec { + pub fn dissolve(self) -> Box> { match self { - Sequence::Char(c) => vec![c], + Sequence::Char(c) => Box::new(std::iter::once(c)), Sequence::CharRange(r) => r, + Sequence::CharStar(c) => Box::new(std::iter::repeat(c)), } } @@ -97,13 +107,14 @@ impl Sequence { separated_pair(anychar, tag("-"), anychar)(input).map(|(l, (a, b))| { (l, { let (start, end) = (u32::from(a), u32::from(b)); - Sequence::CharRange((start..=end).filter_map(std::char::from_u32).collect()) + Sequence::CharRange(Box::new((start..=end).filter_map(std::char::from_u32))) }) }) } fn parse_char_star(input: &str) -> IResult<&str, Sequence> { - tuple((tag("["), anychar, tag("*]")))(input).map(|(_, (_, _, _))| todo!()) + tuple((tag("["), anychar, tag("*]")))(input) + .map(|(l, (_, c, _))| (l, Sequence::CharStar(c))) } fn parse_char_repeat(input: &str) -> IResult<&str, Sequence> { @@ -118,7 +129,7 @@ impl Sequence { .map(|(l, (_, c, _, n, _))| { ( l, - Sequence::CharRange(std::iter::repeat(c).take(n.parse().unwrap()).collect()), + Sequence::CharRange(Box::new(std::iter::repeat(c).take(n.parse().unwrap()))), ) }) } @@ -127,104 +138,118 @@ impl Sequence { tag("[:alnum:]")(input).map(|(l, _)| { ( l, - Sequence::CharRange(('0'..='9').chain('A'..='Z').chain('a'..='z').collect()), + Sequence::CharRange(Box::new(('0'..='9').chain('A'..='Z').chain('a'..='z'))), ) }) } fn parse_alpha(input: &str) -> IResult<&str, Sequence> { - value( - Sequence::CharRange(('A'..='Z').chain('a'..='z').collect()), - tag("[:alpha:]"), - )(input) + tag("[:alpha:]")(input).map(|(l, _)| { + ( + l, + Sequence::CharRange(Box::new(('A'..='Z').chain('a'..='z'))), + ) + }) } fn parse_blank(input: &str) -> IResult<&str, Sequence> { - value( - Sequence::CharRange(vec![unicode_table::SPACE, unicode_table::HT]), - tag("[:blank:]"), - )(input) + tag("[:blank:]")(input) + .map(|(l, _)| (l, Sequence::CharRange(Box::new(BLANK.into_iter().cloned())))) } fn parse_control(input: &str) -> IResult<&str, Sequence> { - value( - Sequence::CharRange( - (0..=31) - .chain(std::iter::once(127)) - .flat_map(char::from_u32) - .collect(), - ), - tag("[:cntrl:]"), - )(input) + tag("[:cntrl:]")(input).map(|(l, _)| { + ( + l, + Sequence::CharRange(Box::new( + (0..=31) + .chain(std::iter::once(127)) + .flat_map(char::from_u32), + )), + ) + }) } fn parse_digit(input: &str) -> IResult<&str, Sequence> { - value(Sequence::CharRange(('0'..='9').collect()), tag("[:digit:]"))(input) + tag("[:digit:]")(input).map(|(l, _)| (l, Sequence::CharRange(Box::new('0'..='9')))) } fn parse_graph(input: &str) -> IResult<&str, Sequence> { - value( - Sequence::CharRange( - (48..=57) // digit - .chain(65..=90) // uppercase - .chain(97..=122) // lowercase - // punctuations - .chain(33..=47) - .chain(58..=64) - .chain(91..=96) - .chain(123..=126) - .flat_map(char::from_u32) - .collect(), - ), - tag("[:graph:]"), - )(input) + tag("[:graph:]")(input).map(|(l, _)| { + ( + l, + Sequence::CharRange(Box::new( + (48..=57) // digit + .chain(65..=90) // uppercase + .chain(97..=122) // lowercase + // punctuations + .chain(33..=47) + .chain(58..=64) + .chain(91..=96) + .chain(123..=126) + .chain(std::iter::once(32)) // space + .flat_map(char::from_u32), + )), + ) + }) } fn parse_lower(input: &str) -> IResult<&str, Sequence> { - value(Sequence::CharRange(('a'..='z').collect()), tag("[:lower:]"))(input) + tag("[:lower:]")(input).map(|(l, _)| (l, Sequence::CharRange(Box::new('a'..='z')))) } fn parse_print(input: &str) -> IResult<&str, Sequence> { - tag("[:print:]")(input).map(|(_, _)| todo!()) + tag("[:print:]")(input).map(|(l, _)| { + ( + l, + Sequence::CharRange(Box::new( + (48..=57) // digit + .chain(65..=90) // uppercase + .chain(97..=122) // lowercase + // punctuations + .chain(33..=47) + .chain(58..=64) + .chain(91..=96) + .chain(123..=126) + .flat_map(char::from_u32), + )), + ) + }) } fn parse_punct(input: &str) -> IResult<&str, Sequence> { - value( - Sequence::CharRange( - (33..=47) - .chain(58..=64) - .chain(91..=96) - .chain(123..=126) - .flat_map(char::from_u32) - .collect(), - ), - tag("[:punct:]"), - )(input) + tag("[:punct:]")(input).map(|(l, _)| { + ( + l, + Sequence::CharRange(Box::new( + (33..=47) + .chain(58..=64) + .chain(91..=96) + .chain(123..=126) + .flat_map(char::from_u32), + )), + ) + }) } fn parse_space(input: &str) -> IResult<&str, Sequence> { - value( - Sequence::CharRange(vec![ - unicode_table::HT, - unicode_table::LF, - unicode_table::VT, - unicode_table::FF, - unicode_table::CR, - unicode_table::SPACE, - ]), - tag("[:space:]"), - )(input) + tag("[:space:]")(input).map(|(l, _)| { + ( + l, + Sequence::CharRange(Box::new(SPACES.into_iter().cloned())), + ) + }) } fn parse_upper(input: &str) -> IResult<&str, Sequence> { - tag("[:upper:]")(input).map(|(l, _)| (l, Sequence::CharRange(('A'..='Z').collect()))) + tag("[:upper:]")(input).map(|(l, _)| (l, Sequence::CharRange(Box::new('A'..='Z')))) } fn parse_xdigit(input: &str) -> IResult<&str, Sequence> { tag("[:xdigit:]")(input).map(|(l, _)| { ( l, - Sequence::CharRange(('0'..='9').chain('A'..='F').chain('a'..='f').collect()), + Sequence::CharRange(Box::new(('0'..='9').chain('A'..='F').chain('a'..='f'))), ) }) } @@ -238,16 +263,18 @@ pub trait SymbolTranslator { fn translate(&mut self, current: char) -> Option; } -#[derive(Debug, Clone)] pub struct DeleteOperation { - set: Vec, + set: Vec, complement_flag: bool, } impl DeleteOperation { pub fn new(set: Vec, complement_flag: bool) -> DeleteOperation { DeleteOperation { - set, + set: set + .into_iter() + .flat_map(Sequence::dissolve) + .collect::>(), complement_flag, } } @@ -255,10 +282,7 @@ impl DeleteOperation { impl SymbolTranslator for DeleteOperation { fn translate(&mut self, current: char) -> Option { - let found = self.set.iter().any(|sequence| match sequence { - Sequence::Char(c) => c.eq(¤t), - Sequence::CharRange(r) => r.iter().any(|c| c.eq(¤t)), - }); + let found = self.set.iter().any(|sequence| sequence.eq(¤t)); (self.complement_flag == found).then(|| current) } } @@ -463,41 +487,6 @@ where } } -#[test] -fn test_parse_char_range() { - assert_eq!(Sequence::parse_set_string(""), vec![]); - assert_eq!( - Sequence::parse_set_string("a-z"), - vec![Sequence::CharRange(vec![ - 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', - 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', - ])] - ); - assert_eq!( - Sequence::parse_set_string("a-zA-Z"), - vec![ - Sequence::CharRange(vec![ - 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', - 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', - ]), - Sequence::CharRange(vec![ - 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', - 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', - ]) - ] - ); - assert_eq!( - Sequence::parse_set_string(", ┬─┬"), - vec![ - Sequence::Char(','), - Sequence::Char(' '), - Sequence::Char('┬'), - Sequence::Char('─'), - Sequence::Char('┬') - ] - ); -} - #[test] fn test_parse_octal() { for a in '0'..='7' { From 5aeeb6cfe911507324fcb70484d9194a7307b19a Mon Sep 17 00:00:00 2001 From: Hanif Bin Ariffin Date: Wed, 21 Jul 2021 19:13:07 +0800 Subject: [PATCH 15/50] Use delimited whenever possible and removed a duplicate parse Signed-off-by: Hanif Bin Ariffin --- src/uu/tr/src/operation.rs | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/src/uu/tr/src/operation.rs b/src/uu/tr/src/operation.rs index 9e0cb4c63..2b27204fa 100644 --- a/src/uu/tr/src/operation.rs +++ b/src/uu/tr/src/operation.rs @@ -1,12 +1,12 @@ use crate::unicode_table; use nom::{ branch::alt, - bytes::complete::{tag, take_while1}, - character::complete::{anychar, one_of}, - combinator::{map_opt, recognize}, + bytes::complete::{tag, take_until}, + character::complete::{anychar, digit1, one_of}, + combinator::{map_opt, opt, recognize}, multi::{many0, many_m_n}, - sequence::{preceded, separated_pair, tuple}, - IResult, + sequence::{delimited, preceded, separated_pair, tuple}, + take_until1, IResult, }; use std::{ collections::HashMap, @@ -33,7 +33,6 @@ pub enum Sequence { impl Sequence { pub fn parse_set_string(input: &str) -> Vec { many0(alt(( - alt((Sequence::parse_octal, Sequence::parse_backslash)), alt(( Sequence::parse_char_range, Sequence::parse_char_star, @@ -50,11 +49,14 @@ impl Sequence { Sequence::parse_print, Sequence::parse_punct, Sequence::parse_space, - Sequence::parse_space, Sequence::parse_upper, Sequence::parse_xdigit, Sequence::parse_char_equal, // NOTE: This must be the last one + )), + alt(( + Sequence::parse_octal, + Sequence::parse_backslash, Sequence::parse_char, )), )))(input) @@ -113,20 +115,16 @@ impl Sequence { } fn parse_char_star(input: &str) -> IResult<&str, Sequence> { - tuple((tag("["), anychar, tag("*]")))(input) - .map(|(l, (_, c, _))| (l, Sequence::CharStar(c))) + delimited(tag("["), anychar, tag("*]"))(input).map(|(l, c)| (l, Sequence::CharStar(c))) } fn parse_char_repeat(input: &str) -> IResult<&str, Sequence> { - tuple(( + delimited( tag("["), - anychar, - tag("*"), - // TODO: Extend this to support octal as well. Octal starts with 0. - take_while1(|c: char| c.is_digit(10)), + separated_pair(anychar, tag("*"), digit1), tag("]"), - ))(input) - .map(|(l, (_, c, _, n, _))| { + )(input) + .map(|(l, (c, n))| { ( l, Sequence::CharRange(Box::new(std::iter::repeat(c).take(n.parse().unwrap()))), @@ -255,7 +253,7 @@ impl Sequence { } fn parse_char_equal(input: &str) -> IResult<&str, Sequence> { - tuple((tag("[="), anychar, tag("=]")))(input).map(|(_, (_, _, _))| todo!()) + delimited(tag("[="), anychar, tag("=]"))(input).map(|(_, _)| todo!()) } } From 0acc16572093799151d55b6cf4657e71e41bb771 Mon Sep 17 00:00:00 2001 From: Hanif Bin Ariffin Date: Wed, 21 Jul 2021 20:07:35 +0800 Subject: [PATCH 16/50] Finally fixed parsing octal in char ranges Signed-off-by: Hanif Bin Ariffin --- src/uu/tr/src/operation.rs | 92 +++++++++++++++++++++++++++++++++----- 1 file changed, 81 insertions(+), 11 deletions(-) diff --git a/src/uu/tr/src/operation.rs b/src/uu/tr/src/operation.rs index 2b27204fa..e273cecd4 100644 --- a/src/uu/tr/src/operation.rs +++ b/src/uu/tr/src/operation.rs @@ -1,12 +1,12 @@ use crate::unicode_table; use nom::{ branch::alt, - bytes::complete::{tag, take_until}, + bytes::complete::tag, character::complete::{anychar, digit1, one_of}, - combinator::{map_opt, opt, recognize}, + combinator::{map_opt, recognize}, multi::{many0, many_m_n}, - sequence::{delimited, preceded, separated_pair, tuple}, - take_until1, IResult, + sequence::{delimited, preceded, separated_pair}, + IResult, }; use std::{ collections::HashMap, @@ -34,6 +34,10 @@ impl Sequence { pub fn parse_set_string(input: &str) -> Vec { many0(alt(( alt(( + Sequence::parse_char_range_octal_leftright, + Sequence::parse_char_range_octal_left, + Sequence::parse_char_range_octal_right, + Sequence::parse_char_range_backslash_collapse, Sequence::parse_char_range, Sequence::parse_char_star, Sequence::parse_char_repeat, @@ -114,6 +118,65 @@ impl Sequence { }) } + fn parse_char_range_backslash_collapse(input: &str) -> IResult<&str, Sequence> { + separated_pair( + preceded(tag("\\"), anychar), + tag("-"), + preceded(tag("\\"), anychar), + )(input) + .map(|(l, (a, b))| { + (l, { + let (start, end) = (u32::from(a), u32::from(b)); + Sequence::CharRange(Box::new((start..=end).filter_map(std::char::from_u32))) + }) + }) + } + + fn parse_char_range_octal_left(input: &str) -> IResult<&str, Sequence> { + separated_pair( + preceded(tag("\\"), recognize(many_m_n(1, 3, one_of("01234567")))), + tag("-"), + anychar, + )(input) + .map(|(l, (a, b))| { + (l, { + let (start, end) = (u32::from_str_radix(a, 8).unwrap(), u32::from(b)); + Sequence::CharRange(Box::new((start..=end).filter_map(std::char::from_u32))) + }) + }) + } + + fn parse_char_range_octal_right(input: &str) -> IResult<&str, Sequence> { + separated_pair( + anychar, + tag("-"), + preceded(tag("\\"), recognize(many_m_n(1, 3, one_of("01234567")))), + )(input) + .map(|(l, (a, b))| { + (l, { + let (start, end) = (u32::from(a), u32::from_str_radix(b, 8).unwrap()); + Sequence::CharRange(Box::new((start..=end).filter_map(std::char::from_u32))) + }) + }) + } + + fn parse_char_range_octal_leftright(input: &str) -> IResult<&str, Sequence> { + separated_pair( + preceded(tag("\\"), recognize(many_m_n(1, 3, one_of("01234567")))), + tag("-"), + preceded(tag("\\"), recognize(many_m_n(1, 3, one_of("01234567")))), + )(input) + .map(|(l, (a, b))| { + (l, { + let (start, end) = ( + u32::from_str_radix(a, 8).unwrap(), + u32::from_str_radix(b, 8).unwrap(), + ); + Sequence::CharRange(Box::new((start..=end).filter_map(std::char::from_u32))) + }) + }) + } + fn parse_char_star(input: &str) -> IResult<&str, Sequence> { delimited(tag("["), anychar, tag("*]"))(input).map(|(l, c)| (l, Sequence::CharStar(c))) } @@ -261,6 +324,7 @@ pub trait SymbolTranslator { fn translate(&mut self, current: char) -> Option; } +#[derive(Debug)] pub struct DeleteOperation { set: Vec, complement_flag: bool, @@ -285,7 +349,7 @@ impl SymbolTranslator for DeleteOperation { } } -#[derive(Debug, Clone)] +#[derive(Debug)] pub struct TranslateOperationComplement { iter: u32, set1: Vec, @@ -306,7 +370,7 @@ impl TranslateOperationComplement { } } -#[derive(Debug, Clone)] +#[derive(Debug)] pub struct TranslateOperationStandard { translation_map: HashMap, } @@ -322,15 +386,21 @@ impl TranslateOperationStandard { } } -#[derive(Debug, Clone)] +#[derive(Debug)] pub enum TranslateOperation { Standard(TranslateOperationStandard), Complement(TranslateOperationComplement), } impl TranslateOperation { - fn next_complement_char(mut iter: u32) -> (u32, char) { - while char::from_u32(iter).is_none() { + fn next_complement_char(mut iter: u32, ignore_list: &[char]) -> (u32, char) { + while (char::from_u32(iter).is_none() + || ignore_list + .iter() + .map(|c| u32::from(*c)) + .any(|c| iter.eq(&c))) + && iter.ne(&u32::MAX) + { iter = iter.saturating_add(1) } (iter.saturating_add(1), char::from_u32(iter).unwrap()) @@ -392,7 +462,7 @@ impl SymbolTranslator for TranslateOperation { while translation_map.get(¤t).is_none() { if let Some(p) = set2.pop() { let (next_index, next_value) = - TranslateOperation::next_complement_char(*iter); + TranslateOperation::next_complement_char(*iter, &*set1); *iter = next_index; translation_map.insert(next_value, p); } else { @@ -466,7 +536,7 @@ impl SymbolTranslator for SqueezeOperation { pub fn translate_input(input: &mut R, output: &mut W, mut translator: T) where - T: SymbolTranslator, + T: SymbolTranslator + Debug, R: BufRead, W: Write, { From db8f321abf032fc84a26e8dc2e1a3d3ed6072a56 Mon Sep 17 00:00:00 2001 From: Hanif Bin Ariffin Date: Wed, 21 Jul 2021 20:07:55 +0800 Subject: [PATCH 17/50] Enabled the test for that weird backslash octal :) Signed-off-by: Hanif Bin Ariffin --- tests/by-util/test_tr.rs | 49 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 47 insertions(+), 2 deletions(-) diff --git a/tests/by-util/test_tr.rs b/tests/by-util/test_tr.rs index 54e7fe081..8d135db7d 100644 --- a/tests/by-util/test_tr.rs +++ b/tests/by-util/test_tr.rs @@ -98,9 +98,8 @@ fn test_complement4() { } #[test] -#[ignore = "fixme: GNU tr returns '0a1b2c3' instead of '0~1~2~3', see #2158"] fn test_complement5() { - // $ echo '0x1y2z3' | tr -c '\0-@' '*-~' + // $ echo -n '0x1y2z3' | tr -c '\0-@' '*-~' // 0a1b2c3 new_ucmd!() .args(&["-c", "\\0-@", "*-~"]) @@ -392,3 +391,49 @@ fn alnum_expands_number_uppercase_lowercase() { .succeeds() .stdout_is(r##" !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]"##); } + +#[test] +#[ignore = "not expected to fully pass -- any help appreciated!"] +fn check_against_gnu_tr_tests() { + // echo -n "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" | tr "[:alnum:]" " -_" + new_ucmd!() + .args(&["abcd", "[]*]"]) + .pipe_in("abcd") + .succeeds() + .stdout_is("]]]]"); + new_ucmd!() + .args(&["abc", "[%*]xyz"]) + .pipe_in("abc") + .succeeds() + .stdout_is("xyz"); + new_ucmd!() + .args(&["", "[.*]"]) + .pipe_in("abc") + .succeeds() + .stdout_is("abc"); + new_ucmd!() + .args(&["-t", "abcd", "xy"]) + .pipe_in("abcde") + .succeeds() + .stdout_is("xycde"); + new_ucmd!().args(&[""]).pipe_in("").succeeds().stdout_is(""); + new_ucmd!().args(&[""]).pipe_in("").succeeds().stdout_is(""); + new_ucmd!().args(&[""]).pipe_in("").succeeds().stdout_is(""); + new_ucmd!().args(&[""]).pipe_in("").succeeds().stdout_is(""); + new_ucmd!().args(&[""]).pipe_in("").succeeds().stdout_is(""); + new_ucmd!().args(&[""]).pipe_in("").succeeds().stdout_is(""); + new_ucmd!().args(&[""]).pipe_in("").succeeds().stdout_is(""); + new_ucmd!().args(&[""]).pipe_in("").succeeds().stdout_is(""); + new_ucmd!().args(&[""]).pipe_in("").succeeds().stdout_is(""); + new_ucmd!().args(&[""]).pipe_in("").succeeds().stdout_is(""); + new_ucmd!().args(&[""]).pipe_in("").succeeds().stdout_is(""); + new_ucmd!().args(&[""]).pipe_in("").succeeds().stdout_is(""); + new_ucmd!().args(&[""]).pipe_in("").succeeds().stdout_is(""); + new_ucmd!().args(&[""]).pipe_in("").succeeds().stdout_is(""); + new_ucmd!().args(&[""]).pipe_in("").succeeds().stdout_is(""); + new_ucmd!().args(&[""]).pipe_in("").succeeds().stdout_is(""); + new_ucmd!().args(&[""]).pipe_in("").succeeds().stdout_is(""); + new_ucmd!().args(&[""]).pipe_in("").succeeds().stdout_is(""); + new_ucmd!().args(&[""]).pipe_in("").succeeds().stdout_is(""); + new_ucmd!().args(&[""]).pipe_in("").succeeds().stdout_is(""); +} From 700ce7d64a8350e14afe1097a039c7e98f16d76f Mon Sep 17 00:00:00 2001 From: Hanif Bin Ariffin Date: Wed, 21 Jul 2021 20:10:43 +0800 Subject: [PATCH 18/50] Removed useless tests that were supposed to be filled with tests from GNU tr Signed-off-by: Hanif Bin Ariffin --- tests/by-util/test_tr.rs | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/tests/by-util/test_tr.rs b/tests/by-util/test_tr.rs index 8d135db7d..c62fbdae6 100644 --- a/tests/by-util/test_tr.rs +++ b/tests/by-util/test_tr.rs @@ -416,24 +416,4 @@ fn check_against_gnu_tr_tests() { .pipe_in("abcde") .succeeds() .stdout_is("xycde"); - new_ucmd!().args(&[""]).pipe_in("").succeeds().stdout_is(""); - new_ucmd!().args(&[""]).pipe_in("").succeeds().stdout_is(""); - new_ucmd!().args(&[""]).pipe_in("").succeeds().stdout_is(""); - new_ucmd!().args(&[""]).pipe_in("").succeeds().stdout_is(""); - new_ucmd!().args(&[""]).pipe_in("").succeeds().stdout_is(""); - new_ucmd!().args(&[""]).pipe_in("").succeeds().stdout_is(""); - new_ucmd!().args(&[""]).pipe_in("").succeeds().stdout_is(""); - new_ucmd!().args(&[""]).pipe_in("").succeeds().stdout_is(""); - new_ucmd!().args(&[""]).pipe_in("").succeeds().stdout_is(""); - new_ucmd!().args(&[""]).pipe_in("").succeeds().stdout_is(""); - new_ucmd!().args(&[""]).pipe_in("").succeeds().stdout_is(""); - new_ucmd!().args(&[""]).pipe_in("").succeeds().stdout_is(""); - new_ucmd!().args(&[""]).pipe_in("").succeeds().stdout_is(""); - new_ucmd!().args(&[""]).pipe_in("").succeeds().stdout_is(""); - new_ucmd!().args(&[""]).pipe_in("").succeeds().stdout_is(""); - new_ucmd!().args(&[""]).pipe_in("").succeeds().stdout_is(""); - new_ucmd!().args(&[""]).pipe_in("").succeeds().stdout_is(""); - new_ucmd!().args(&[""]).pipe_in("").succeeds().stdout_is(""); - new_ucmd!().args(&[""]).pipe_in("").succeeds().stdout_is(""); - new_ucmd!().args(&[""]).pipe_in("").succeeds().stdout_is(""); } From 55b2eacb4b94f16fa540e3b448bfedc437bba7b5 Mon Sep 17 00:00:00 2001 From: Hanif Bin Ariffin Date: Wed, 21 Jul 2021 22:36:18 +0800 Subject: [PATCH 19/50] Added gnu tests for tr (mostly as comments) Signed-off-by: Hanif Bin Ariffin --- tests/by-util/test_tr.rs | 151 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 150 insertions(+), 1 deletion(-) diff --git a/tests/by-util/test_tr.rs b/tests/by-util/test_tr.rs index c62fbdae6..602f91ee1 100644 --- a/tests/by-util/test_tr.rs +++ b/tests/by-util/test_tr.rs @@ -395,25 +395,174 @@ fn alnum_expands_number_uppercase_lowercase() { #[test] #[ignore = "not expected to fully pass -- any help appreciated!"] fn check_against_gnu_tr_tests() { - // echo -n "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" | tr "[:alnum:]" " -_" + // ['1', qw(abcd '[]*]'), {IN=>'abcd'}, {OUT=>']]]]'}], new_ucmd!() .args(&["abcd", "[]*]"]) .pipe_in("abcd") .succeeds() .stdout_is("]]]]"); + // ['2', qw(abc '[%*]xyz'), {IN=>'abc'}, {OUT=>'xyz'}], new_ucmd!() .args(&["abc", "[%*]xyz"]) .pipe_in("abc") .succeeds() .stdout_is("xyz"); + // ['3', qw('' '[.*]'), {IN=>'abc'}, {OUT=>'abc'}], new_ucmd!() .args(&["", "[.*]"]) .pipe_in("abc") .succeeds() .stdout_is("abc"); + // # Test --truncate-set1 behavior when string1 is longer than string2 + // ['4', qw(-t abcd xy), {IN=>'abcde'}, {OUT=>'xycde'}], new_ucmd!() .args(&["-t", "abcd", "xy"]) .pipe_in("abcde") .succeeds() .stdout_is("xycde"); + // # Test bsd behavior (the default) when string1 is longer than string2 + // ['5', qw(abcd xy), {IN=>'abcde'}, {OUT=>'xyyye'}], + new_ucmd!() + .args(&["abcd", "xy"]) + .pipe_in("abcde") + .succeeds() + .stdout_is("xyyye"); + // # Do it the posix way + // ['6', qw(abcd 'x[y*]'), {IN=>'abcde'}, {OUT=>'xyyye'}], + new_ucmd!() + .args(&["abcd", "x[y*]"]) + .pipe_in("abcde") + .succeeds() + .stdout_is("xyyye"); + // ['7', qw(-s a-p ,"'), {IN=>'abcdefghijklmnop'}, {OUT=>'%.$'}], + new_ucmd!() + .args(&["-s", "a-p", "\"'"]) + .pipe_in("abcdefghijklmnop") + .succeeds() + .stdout_is("%.$"); + // ['8', qw(-s a-p '[.*]$'), {IN=>'abcdefghijklmnop'}, {OUT=>'.$'}], + new_ucmd!() + .args(&["-s", "a-p"]) + .pipe_in("abcdefghijklmnop") + .succeeds() + .stdout_is(".$"); + // + // ['9', qw(-s a-p '%[.*]'), {IN=>'abcdefghijklmnop'}, {OUT=>'%.'}], + // ['a', qw(-s '[a-z]'), {IN=>'aabbcc'}, {OUT=>'abc'}], + // ['b', qw(-s '[a-c]'), {IN=>'aabbcc'}, {OUT=>'abc'}], + // ['c', qw(-s '[a-b]'), {IN=>'aabbcc'}, {OUT=>'abcc'}], + // ['d', qw(-s '[b-c]'), {IN=>'aabbcc'}, {OUT=>'aabc'}], + // ['e', qw(-s '[\0-\5]'), + // {IN=>"\0\0a\1\1b\2\2\2c\3\3\3d\4\4\4\4e\5\5"}, {OUT=>"\0a\1b\2c\3d\4e\5"}], + // # tests of delete + // ['f', qw(-d '[=[=]'), {IN=>'[[[[[[[]]]]]]]]'}, {OUT=>']]]]]]]]'}], + // ['g', qw(-d '[=]=]'), {IN=>'[[[[[[[]]]]]]]]'}, {OUT=>'[[[[[[['}], + // ['h', qw(-d '[:xdigit:]'), {IN=>'0123456789acbdefABCDEF'}, {OUT=>''}], + // ['i', qw(-d '[:xdigit:]'), {IN=>'w0x1y2z3456789acbdefABCDEFz'}, + // {OUT=>'wxyzz'}], + // ['j', qw(-d '[:digit:]'), {IN=>'0123456789'}, {OUT=>''}], + // ['k', qw(-d '[:digit:]'), + // {IN=>'a0b1c2d3e4f5g6h7i8j9k'}, {OUT=>'abcdefghijk'}], + // ['l', qw(-d '[:lower:]'), {IN=>'abcdefghijklmnopqrstuvwxyz'}, {OUT=>''}], + // ['m', qw(-d '[:upper:]'), {IN=>'ABCDEFGHIJKLMNOPQRSTUVWXYZ'}, {OUT=>''}], + // ['n', qw(-d '[:lower:][:upper:]'), + // {IN=>'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'}, {OUT=>''}], + // ['o', qw(-d '[:alpha:]'), + // {IN=>'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'}, {OUT=>''}], + // ['p', qw(-d '[:alnum:]'), + // {IN=>'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'}, + // {OUT=>''}], + // ['q', qw(-d '[:alnum:]'), + // {IN=>'.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789.'}, + // {OUT=>'..'}], + // ['r', qw(-ds '[:alnum:]' .), + // {IN=>'.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789.'}, + // {OUT=>'.'}], + // + // # The classic example, with string2 BSD-style + // ['s', qw(-cs '[:alnum:]' '\n'), + // {IN=>'The big black fox jumped over the fence.'}, + // {OUT=>"The\nbig\nblack\nfox\njumped\nover\nthe\nfence\n"}], + // + // # The classic example, POSIX-style + // ['t', qw(-cs '[:alnum:]' '[\n*]'), + // {IN=>'The big black fox jumped over the fence.'}, + // {OUT=>"The\nbig\nblack\nfox\njumped\nover\nthe\nfence\n"}], + // ['u', qw(-ds b a), {IN=>'aabbaa'}, {OUT=>'a'}], + // ['v', qw(-ds '[:xdigit:]' Z), {IN=>'ZZ0123456789acbdefABCDEFZZ'}, {OUT=>'Z'}], + // + // # Try some data with 8th bit set in case something is mistakenly + // # sign-extended. + // ['w', qw(-ds '\350' '\345'), + // {IN=>"\300\301\377\345\345\350\345"}, + // {OUT=>"\300\301\377\345"}], + // ['x', qw(-s abcdefghijklmn '[:*016]'), + // {IN=>'abcdefghijklmnop'}, {OUT=>':op'}], + // ['y', qw(-d a-z), {IN=>'abc $code'}, {OUT=>' $'}], + // ['z', qw(-ds a-z '$.'), {IN=>'a.b.c $$$$code\\'}, {OUT=>'. $\\'}], + // + // # Make sure that a-a is accepted. + // ['range-a-a', qw(a-a z), {IN=>'abc'}, {OUT=>'zbc'}], + // # + // ['null', qw(a ''), {IN=>''}, {OUT=>''}, {EXIT=>1}, + // {ERR=>"$prog: when not truncating set1, string2 must be non-empty\n"}], + // ['upcase', qw('[:lower:]' '[:upper:]'), + // {IN=>'abcxyzABCXYZ'}, + // {OUT=>'ABCXYZABCXYZ'}], + // ['dncase', qw('[:upper:]' '[:lower:]'), + // {IN=>'abcxyzABCXYZ'}, + // {OUT=>'abcxyzabcxyz'}], + // # + // ['rep-cclass', qw('a[=*2][=c=]' xyyz), {IN=>'a=c'}, {OUT=>'xyz'}], + // ['rep-1', qw('[:*3][:digit:]' a-m), {IN=>':1239'}, {OUT=>'cefgm'}], + // ['rep-2', qw('a[b*512]c' '1[x*]2'), {IN=>'abc'}, {OUT=>'1x2'}], + // ['rep-3', qw('a[b*513]c' '1[x*]2'), {IN=>'abc'}, {OUT=>'1x2'}], + // # Another couple octal repeat count tests. + // ['o-rep-1', qw('[b*08]' '[x*]'), {IN=>''}, {OUT=>''}, {EXIT=>1}, + // {ERR=>"$prog: invalid repeat count '08' in [c*n] construct\n"}], + // ['o-rep-2', qw('[b*010]cd' '[a*7]BC[x*]'), {IN=>'bcd'}, {OUT=>'BCx'}], + // + // ['esc', qw('a\-z' A-Z), {IN=>'abc-z'}, {OUT=>'AbcBC'}], + // ['bs-055', qw('a\055b' def), {IN=>"a\055b"}, {OUT=>'def'}], + // ['bs-at-end', qw('\\' x), {IN=>"\\"}, {OUT=>'x'}, + // {ERR=>"$prog: warning: an unescaped backslash at end of " + // . "string is not portable\n"}], + // + // # + // # From Ross + // ['ross-0a', qw(-cs '[:upper:]' 'X[Y*]'), {IN=>''}, {OUT=>''}, {EXIT=>1}, + // {ERR=>$map_all_to_1}], + // ['ross-0b', qw(-cs '[:cntrl:]' 'X[Y*]'), {IN=>''}, {OUT=>''}, {EXIT=>1}, + // {ERR=>$map_all_to_1}], + // ['ross-1a', qw(-cs '[:upper:]' '[X*]'), + // {IN=>'AMZamz123.-+AMZ'}, {OUT=>'AMZXAMZ'}], + // ['ross-1b', qw(-cs '[:upper:][:digit:]' '[Z*]'), {IN=>''}, {OUT=>''}], + // ['ross-2', qw(-dcs '[:lower:]' n-rs-z), + // {IN=>'amzAMZ123.-+amz'}, {OUT=>'amzamz'}], + // ['ross-3', qw(-ds '[:xdigit:]' '[:alnum:]'), + // {IN=>'.ZABCDEFGzabcdefg.0123456788899.GG'}, {OUT=>'.ZGzg..G'}], + // ['ross-4', qw(-dcs '[:alnum:]' '[:digit:]'), {IN=>''}, {OUT=>''}], + // ['ross-5', qw(-dc '[:lower:]'), {IN=>''}, {OUT=>''}], + // ['ross-6', qw(-dc '[:upper:]'), {IN=>''}, {OUT=>''}], + // + // # Ensure that these fail. + // # Prior to 2.0.20, each would evoke a failed assertion. + // ['empty-eq', qw('[==]' x), {IN=>''}, {OUT=>''}, {EXIT=>1}, + // {ERR=>"$prog: missing equivalence class character '[==]'\n"}], + // ['empty-cc', qw('[::]' x), {IN=>''}, {OUT=>''}, {EXIT=>1}, + // {ERR=>"$prog: missing character class name '[::]'\n"}], + // + // # Weird repeat counts. + // ['repeat-bs-9', qw(abc '[b*\9]'), {IN=>'abcd'}, {OUT=>'[b*d'}], + // ['repeat-0', qw(abc '[b*0]'), {IN=>'abcd'}, {OUT=>'bbbd'}], + // ['repeat-zeros', qw(abc '[b*00000000000000000000]'), + // {IN=>'abcd'}, {OUT=>'bbbd'}], + // ['repeat-compl', qw(-c '[a*65536]\n' '[b*]'), {IN=>'abcd'}, {OUT=>'abbb'}], + // ['repeat-xC', qw(-C '[a*65536]\n' '[b*]'), {IN=>'abcd'}, {OUT=>'abbb'}], + // + // # From Glenn Fowler. + // ['fowler-1', qw(ah -H), {IN=>'aha'}, {OUT=>'-H-'}], + // + // # Up to coreutils-6.9, this would provoke a failed assertion. + // ['no-abort-1', qw(-c a '[b*256]'), {IN=>'abc'}, {OUT=>'abb'}], } From 5def69d3eead16d150ead9cbf1a49af9dbdd6e28 Mon Sep 17 00:00:00 2001 From: Hanif Bin Ariffin Date: Wed, 21 Jul 2021 23:05:11 +0800 Subject: [PATCH 20/50] Trimming down files Signed-off-by: Hanif Bin Ariffin --- src/uu/tr/src/operation.rs | 32 +++++++++++++++++++------------- src/uu/tr/src/tr.rs | 1 - src/uu/tr/src/unicode_table.rs | 8 -------- 3 files changed, 19 insertions(+), 22 deletions(-) delete mode 100644 src/uu/tr/src/unicode_table.rs diff --git a/src/uu/tr/src/operation.rs b/src/uu/tr/src/operation.rs index e273cecd4..960ab7ada 100644 --- a/src/uu/tr/src/operation.rs +++ b/src/uu/tr/src/operation.rs @@ -1,4 +1,3 @@ -use crate::unicode_table; use nom::{ branch::alt, bytes::complete::tag, @@ -14,15 +13,18 @@ use std::{ io::{BufRead, Write}, }; -static SPACES: &'static [char] = &[ - unicode_table::HT, - unicode_table::LF, - unicode_table::VT, - unicode_table::FF, - unicode_table::CR, - unicode_table::SPACE, -]; -static BLANK: &'static [char] = &[unicode_table::SPACE, unicode_table::HT]; +mod unicode_table { + pub static BEL: char = '\u{0007}'; + pub static BS: char = '\u{0008}'; + pub static HT: char = '\u{0009}'; + pub static LF: char = '\u{000A}'; + pub static VT: char = '\u{000B}'; + pub static FF: char = '\u{000C}'; + pub static CR: char = '\u{000D}'; + pub static SPACE: char = '\u{0020}'; + pub static SPACES: &'static [char] = &[HT, LF, VT, FF, CR, SPACE]; + pub static BLANK: &'static [char] = &[SPACE, HT]; +} pub enum Sequence { Char(char), @@ -214,8 +216,12 @@ impl Sequence { } fn parse_blank(input: &str) -> IResult<&str, Sequence> { - tag("[:blank:]")(input) - .map(|(l, _)| (l, Sequence::CharRange(Box::new(BLANK.into_iter().cloned())))) + tag("[:blank:]")(input).map(|(l, _)| { + ( + l, + Sequence::CharRange(Box::new(unicode_table::BLANK.into_iter().cloned())), + ) + }) } fn parse_control(input: &str) -> IResult<&str, Sequence> { @@ -297,7 +303,7 @@ impl Sequence { tag("[:space:]")(input).map(|(l, _)| { ( l, - Sequence::CharRange(Box::new(SPACES.into_iter().cloned())), + Sequence::CharRange(Box::new(unicode_table::SPACES.into_iter().cloned())), ) }) } diff --git a/src/uu/tr/src/tr.rs b/src/uu/tr/src/tr.rs index 3ba06920a..f024fd6db 100644 --- a/src/uu/tr/src/tr.rs +++ b/src/uu/tr/src/tr.rs @@ -15,7 +15,6 @@ extern crate uucore; extern crate nom; mod operation; -mod unicode_table; use clap::{crate_version, App, Arg}; use nom::AsBytes; diff --git a/src/uu/tr/src/unicode_table.rs b/src/uu/tr/src/unicode_table.rs deleted file mode 100644 index 9362be647..000000000 --- a/src/uu/tr/src/unicode_table.rs +++ /dev/null @@ -1,8 +0,0 @@ -pub static BEL: char = '\u{0007}'; -pub static BS: char = '\u{0008}'; -pub static HT: char = '\u{0009}'; -pub static LF: char = '\u{000A}'; -pub static VT: char = '\u{000B}'; -pub static FF: char = '\u{000C}'; -pub static CR: char = '\u{000D}'; -pub static SPACE: char = '\u{0020}'; From d5dbedb2e43f2cec802cf0c2f8306ebf6e879c0c Mon Sep 17 00:00:00 2001 From: Hanif Bin Ariffin Date: Thu, 22 Jul 2021 23:27:15 +0800 Subject: [PATCH 21/50] Added more tests Signed-off-by: Hanif Bin Ariffin --- tests/by-util/test_tr.rs | 116 +++++++++++++++++++++++++++++++++------ 1 file changed, 98 insertions(+), 18 deletions(-) diff --git a/tests/by-util/test_tr.rs b/tests/by-util/test_tr.rs index 602f91ee1..6d80cb528 100644 --- a/tests/by-util/test_tr.rs +++ b/tests/by-util/test_tr.rs @@ -442,39 +442,119 @@ fn check_against_gnu_tr_tests() { .stdout_is("%.$"); // ['8', qw(-s a-p '[.*]$'), {IN=>'abcdefghijklmnop'}, {OUT=>'.$'}], new_ucmd!() - .args(&["-s", "a-p"]) + .args(&["-s", "a-p", "[.*]$"]) .pipe_in("abcdefghijklmnop") .succeeds() .stdout_is(".$"); - // // ['9', qw(-s a-p '%[.*]'), {IN=>'abcdefghijklmnop'}, {OUT=>'%.'}], + new_ucmd!() + .args(&["-s", "a-p", "%[.*]"]) + .pipe_in("abcdefghijklmnop") + .succeeds() + .stdout_is("%."); // ['a', qw(-s '[a-z]'), {IN=>'aabbcc'}, {OUT=>'abc'}], + new_ucmd!() + .args(&["-s", "[a-z]"]) + .pipe_in("aabbcc") + .succeeds() + .stdout_is("abc"); // ['b', qw(-s '[a-c]'), {IN=>'aabbcc'}, {OUT=>'abc'}], + new_ucmd!() + .args(&["-s", "[a-c]"]) + .pipe_in("aabbcc") + .succeeds() + .stdout_is("abc"); // ['c', qw(-s '[a-b]'), {IN=>'aabbcc'}, {OUT=>'abcc'}], + new_ucmd!() + .args(&["-s", "[a-b]"]) + .pipe_in("aabbcc") + .succeeds() + .stdout_is("abcc"); // ['d', qw(-s '[b-c]'), {IN=>'aabbcc'}, {OUT=>'aabc'}], - // ['e', qw(-s '[\0-\5]'), - // {IN=>"\0\0a\1\1b\2\2\2c\3\3\3d\4\4\4\4e\5\5"}, {OUT=>"\0a\1b\2c\3d\4e\5"}], + new_ucmd!() + .args(&["-s", "[b-c]"]) + .pipe_in("aabbcc") + .succeeds() + .stdout_is("aabc"); + // ['e', qw(-s '[\0-\5]'), {IN=>"\0\0a\1\1b\2\2\2c\3\3\3d\4\4\4\4e\5\5"}, {OUT=>"\0a\1b\2c\3d\4e\5"}], + new_ucmd!() + .args(&["-s", r#"[\0-\5]"#]) + .pipe_in(r#"\0\0a\1\1b\2\2\2c\3\3\3d\4\4\4\4e\5\5"#) + .succeeds() + .stdout_is(r#"\0a\1b\2c\3d\4e\5"#); // # tests of delete // ['f', qw(-d '[=[=]'), {IN=>'[[[[[[[]]]]]]]]'}, {OUT=>']]]]]]]]'}], + new_ucmd!() + .args(&["-d", "[=[=]"]) + .pipe_in("[[[[[[[]]]]]]]]") + .succeeds() + .stdout_is("]]]]]]]]"); // ['g', qw(-d '[=]=]'), {IN=>'[[[[[[[]]]]]]]]'}, {OUT=>'[[[[[[['}], + new_ucmd!() + .args(&["-d", "[=]=]"]) + .pipe_in("[[[[[[[]]]]]]]]") + .succeeds() + .stdout_is("[[[[[[["); // ['h', qw(-d '[:xdigit:]'), {IN=>'0123456789acbdefABCDEF'}, {OUT=>''}], - // ['i', qw(-d '[:xdigit:]'), {IN=>'w0x1y2z3456789acbdefABCDEFz'}, - // {OUT=>'wxyzz'}], + new_ucmd!() + .args(&["-d", "[:xdigit:]"]) + .pipe_in("0123456789acbdefABCDEF") + .succeeds() + .stdout_is(""); + // ['i', qw(-d '[:xdigit:]'), {IN=>'w0x1y2z3456789acbdefABCDEFz'}, {OUT=>'wxyzz'}], + new_ucmd!() + .args(&["-d", "[:xdigit:]"]) + .pipe_in("w0x1y2z3456789acbdefABCDEFz") + .succeeds() + .stdout_is("wxyzz"); // ['j', qw(-d '[:digit:]'), {IN=>'0123456789'}, {OUT=>''}], - // ['k', qw(-d '[:digit:]'), - // {IN=>'a0b1c2d3e4f5g6h7i8j9k'}, {OUT=>'abcdefghijk'}], + new_ucmd!() + .args(&["", "", ""]) + .pipe_in("") + .succeeds() + .stdout_is(""); + // ['k', qw(-d '[:digit:]'), {IN=>'a0b1c2d3e4f5g6h7i8j9k'}, {OUT=>'abcdefghijk'}], + new_ucmd!() + .args(&["-d", "[:digit:]"]) + .pipe_in("a0b1c2d3e4f5g6h7i8j9k") + .succeeds() + .stdout_is("abcdefghijk"); // ['l', qw(-d '[:lower:]'), {IN=>'abcdefghijklmnopqrstuvwxyz'}, {OUT=>''}], + new_ucmd!() + .args(&["-d", "[:lower:]"]) + .pipe_in("abcdefghijklmnopqrstuvwxyz") + .succeeds() + .stdout_is(""); // ['m', qw(-d '[:upper:]'), {IN=>'ABCDEFGHIJKLMNOPQRSTUVWXYZ'}, {OUT=>''}], - // ['n', qw(-d '[:lower:][:upper:]'), - // {IN=>'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'}, {OUT=>''}], - // ['o', qw(-d '[:alpha:]'), - // {IN=>'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'}, {OUT=>''}], - // ['p', qw(-d '[:alnum:]'), - // {IN=>'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'}, - // {OUT=>''}], - // ['q', qw(-d '[:alnum:]'), - // {IN=>'.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789.'}, - // {OUT=>'..'}], + new_ucmd!() + .args(&["-d", "[:upper:]"]) + .pipe_in("ABCDEFGHIJKLMNOPQRSTUVWXYZ") + .succeeds() + .stdout_is(""); + // ['n', qw(-d '[:lower:][:upper:]'), {IN=>'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'}, {OUT=>''}], + new_ucmd!() + .args(&["-d", "[:lower:][:upper:]"]) + .pipe_in("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ") + .succeeds() + .stdout_is(""); + // ['o', qw(-d '[:alpha:]'), {IN=>'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'}, {OUT=>''}], + new_ucmd!() + .args(&["-d", "[:alpha:]"]) + .pipe_in("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ") + .succeeds() + .stdout_is(""); + // ['p', qw(-d '[:alnum:]'), {IN=>'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'}, {OUT=>''}], + new_ucmd!() + .args(&["-d", "[:alnum:]", ""]) + .pipe_in("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789") + .succeeds() + .stdout_is(""); + // ['q', qw(-d '[:alnum:]'), {IN=>'.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789.'}, {OUT=>'..'}], + new_ucmd!() + .args(&["-d", "[:alnum:]"]) + .pipe_in(".abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789.") + .succeeds() + .stdout_is(".."); // ['r', qw(-ds '[:alnum:]' .), // {IN=>'.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789.'}, // {OUT=>'.'}], From 279a7cf6b396ad4a9430d83a74fbb3461680dc37 Mon Sep 17 00:00:00 2001 From: Hanif Bin Ariffin Date: Sat, 24 Jul 2021 22:06:19 +0800 Subject: [PATCH 22/50] Attempting to fix star expansion Signed-off-by: Hanif Bin Ariffin --- src/uu/tr/src/operation.rs | 380 +++++++++++++++++++++---------------- 1 file changed, 218 insertions(+), 162 deletions(-) diff --git a/src/uu/tr/src/operation.rs b/src/uu/tr/src/operation.rs index 960ab7ada..2ff43b2a5 100644 --- a/src/uu/tr/src/operation.rs +++ b/src/uu/tr/src/operation.rs @@ -26,10 +26,132 @@ mod unicode_table { pub static BLANK: &'static [char] = &[SPACE, HT]; } +struct Repeat(char); + +impl Repeat { + fn new(element: char) -> Repeat { + Repeat(element) + } +} + +impl Iterator for Repeat { + type Item = char; + + fn next(&mut self) -> Option { + Some(self.0) + } + + fn last(self) -> Option { + Some(self.0) + } + + fn any(&mut self, mut f: F) -> bool + where + Self: Sized, + F: FnMut(Self::Item) -> bool, + { + f(self.0) + } +} + +fn truncate_iterator(input: Option) -> impl Fn((usize, T)) -> Option { + move |(idx, c)| match input { + Some(s) => match s.cmp(&idx) { + std::cmp::Ordering::Greater => Some(c), + _ => None, + }, + None => Some(c), + } +} + +#[derive(Debug, Clone, Copy)] pub enum Sequence { Char(char), - CharRange(Box>), + CharRange(u32, u32), CharStar(char), + CharRepeat(char, usize), + Alnum, + Alpha, + Blank, + Control, + Digit, + Graph, + Lower, + Print, + Punct, + Space, + Upper, + Xdigit, +} + +impl Sequence { + pub fn flatten(&self) -> Box> { + match self { + Sequence::Char(c) => Box::new(std::iter::once(*c)), + Sequence::CharRange(l, r) => Box::new((*l..=*r).flat_map(char::from_u32)), + Sequence::CharStar(c) => Box::new(Repeat::new(*c)), + Sequence::CharRepeat(c, n) => Box::new(Repeat::new(*c).take(*n)), + Sequence::Alnum => Box::new(('0'..='9').chain('A'..='Z').chain('a'..='z')), + Sequence::Alpha => Box::new(('A'..='Z').chain('a'..='z')), + Sequence::Blank => Box::new(unicode_table::BLANK.into_iter().cloned()), + Sequence::Control => Box::new( + (0..=31) + .chain(std::iter::once(127)) + .flat_map(char::from_u32), + ), + Sequence::Digit => Box::new('0'..='9'), + Sequence::Graph => Box::new( + (48..=57) // digit + .chain(65..=90) // uppercase + .chain(97..=122) // lowercase + // punctuations + .chain(33..=47) + .chain(58..=64) + .chain(91..=96) + .chain(123..=126) + .chain(std::iter::once(32)) // space + .flat_map(char::from_u32), + ), + Sequence::Lower => Box::new('a'..='z'), + Sequence::Print => Box::new( + (48..=57) // digit + .chain(65..=90) // uppercase + .chain(97..=122) // lowercase + // punctuations + .chain(33..=47) + .chain(58..=64) + .chain(91..=96) + .chain(123..=126) + .flat_map(char::from_u32), + ), + Sequence::Punct => Box::new( + (33..=47) + .chain(58..=64) + .chain(91..=96) + .chain(123..=126) + .flat_map(char::from_u32), + ), + Sequence::Space => Box::new(unicode_table::SPACES.into_iter().cloned()), + Sequence::Upper => Box::new('A'..='Z'), + Sequence::Xdigit => Box::new(('0'..='9').chain('A'..='F').chain('a'..='f')), + } + } + + pub fn last(&self) -> Option { + match self { + Sequence::CharStar(c) => Some(*c), + // TODO: Can be optimized further... + rest => rest.flatten().last(), + } + } + + pub fn len(&self) -> Option { + match self { + Sequence::CharStar(_) => None, + // TODO: Is there a fix for this? + rest => Some(rest.flatten().count()), + } + } } impl Sequence { @@ -70,16 +192,6 @@ impl Sequence { .unwrap() } - pub fn dissolve(self) -> Box> { - match self { - Sequence::Char(c) => Box::new(std::iter::once(c)), - Sequence::CharRange(r) => r, - Sequence::CharStar(c) => Box::new(std::iter::repeat(c)), - } - } - - /// Sequence parsers - fn parse_char(input: &str) -> IResult<&str, Sequence> { anychar(input).map(|(l, r)| (l, Sequence::Char(r))) } @@ -115,7 +227,7 @@ impl Sequence { separated_pair(anychar, tag("-"), anychar)(input).map(|(l, (a, b))| { (l, { let (start, end) = (u32::from(a), u32::from(b)); - Sequence::CharRange(Box::new((start..=end).filter_map(std::char::from_u32))) + Sequence::CharRange(start, end) }) }) } @@ -129,7 +241,7 @@ impl Sequence { .map(|(l, (a, b))| { (l, { let (start, end) = (u32::from(a), u32::from(b)); - Sequence::CharRange(Box::new((start..=end).filter_map(std::char::from_u32))) + Sequence::CharRange(start, end) }) }) } @@ -143,7 +255,7 @@ impl Sequence { .map(|(l, (a, b))| { (l, { let (start, end) = (u32::from_str_radix(a, 8).unwrap(), u32::from(b)); - Sequence::CharRange(Box::new((start..=end).filter_map(std::char::from_u32))) + Sequence::CharRange(start, end) }) }) } @@ -157,7 +269,7 @@ impl Sequence { .map(|(l, (a, b))| { (l, { let (start, end) = (u32::from(a), u32::from_str_radix(b, 8).unwrap()); - Sequence::CharRange(Box::new((start..=end).filter_map(std::char::from_u32))) + Sequence::CharRange(start, end) }) }) } @@ -174,7 +286,7 @@ impl Sequence { u32::from_str_radix(a, 8).unwrap(), u32::from_str_radix(b, 8).unwrap(), ); - Sequence::CharRange(Box::new((start..=end).filter_map(std::char::from_u32))) + Sequence::CharRange(start, end) }) }) } @@ -189,136 +301,55 @@ impl Sequence { separated_pair(anychar, tag("*"), digit1), tag("]"), )(input) - .map(|(l, (c, n))| { - ( - l, - Sequence::CharRange(Box::new(std::iter::repeat(c).take(n.parse().unwrap()))), - ) - }) + .map(|(l, (c, n))| (l, Sequence::CharRepeat(c, n.parse().unwrap()))) } fn parse_alnum(input: &str) -> IResult<&str, Sequence> { - tag("[:alnum:]")(input).map(|(l, _)| { - ( - l, - Sequence::CharRange(Box::new(('0'..='9').chain('A'..='Z').chain('a'..='z'))), - ) - }) + tag("[:alnum:]")(input).map(|(l, _)| (l, Sequence::Alnum)) } fn parse_alpha(input: &str) -> IResult<&str, Sequence> { - tag("[:alpha:]")(input).map(|(l, _)| { - ( - l, - Sequence::CharRange(Box::new(('A'..='Z').chain('a'..='z'))), - ) - }) + tag("[:alpha:]")(input).map(|(l, _)| (l, Sequence::Alpha)) } fn parse_blank(input: &str) -> IResult<&str, Sequence> { - tag("[:blank:]")(input).map(|(l, _)| { - ( - l, - Sequence::CharRange(Box::new(unicode_table::BLANK.into_iter().cloned())), - ) - }) + tag("[:blank:]")(input).map(|(l, _)| (l, Sequence::Blank)) } fn parse_control(input: &str) -> IResult<&str, Sequence> { - tag("[:cntrl:]")(input).map(|(l, _)| { - ( - l, - Sequence::CharRange(Box::new( - (0..=31) - .chain(std::iter::once(127)) - .flat_map(char::from_u32), - )), - ) - }) + tag("[:cntrl:]")(input).map(|(l, _)| (l, Sequence::Control)) } fn parse_digit(input: &str) -> IResult<&str, Sequence> { - tag("[:digit:]")(input).map(|(l, _)| (l, Sequence::CharRange(Box::new('0'..='9')))) + tag("[:digit:]")(input).map(|(l, _)| (l, Sequence::Digit)) } fn parse_graph(input: &str) -> IResult<&str, Sequence> { - tag("[:graph:]")(input).map(|(l, _)| { - ( - l, - Sequence::CharRange(Box::new( - (48..=57) // digit - .chain(65..=90) // uppercase - .chain(97..=122) // lowercase - // punctuations - .chain(33..=47) - .chain(58..=64) - .chain(91..=96) - .chain(123..=126) - .chain(std::iter::once(32)) // space - .flat_map(char::from_u32), - )), - ) - }) + tag("[:graph:]")(input).map(|(l, _)| (l, Sequence::Graph)) } fn parse_lower(input: &str) -> IResult<&str, Sequence> { - tag("[:lower:]")(input).map(|(l, _)| (l, Sequence::CharRange(Box::new('a'..='z')))) + tag("[:lower:]")(input).map(|(l, _)| (l, Sequence::Lower)) } fn parse_print(input: &str) -> IResult<&str, Sequence> { - tag("[:print:]")(input).map(|(l, _)| { - ( - l, - Sequence::CharRange(Box::new( - (48..=57) // digit - .chain(65..=90) // uppercase - .chain(97..=122) // lowercase - // punctuations - .chain(33..=47) - .chain(58..=64) - .chain(91..=96) - .chain(123..=126) - .flat_map(char::from_u32), - )), - ) - }) + tag("[:print:]")(input).map(|(l, _)| (l, Sequence::Print)) } fn parse_punct(input: &str) -> IResult<&str, Sequence> { - tag("[:punct:]")(input).map(|(l, _)| { - ( - l, - Sequence::CharRange(Box::new( - (33..=47) - .chain(58..=64) - .chain(91..=96) - .chain(123..=126) - .flat_map(char::from_u32), - )), - ) - }) + tag("[:punct:]")(input).map(|(l, _)| (l, Sequence::Punct)) } fn parse_space(input: &str) -> IResult<&str, Sequence> { - tag("[:space:]")(input).map(|(l, _)| { - ( - l, - Sequence::CharRange(Box::new(unicode_table::SPACES.into_iter().cloned())), - ) - }) + tag("[:space:]")(input).map(|(l, _)| (l, Sequence::Space)) } fn parse_upper(input: &str) -> IResult<&str, Sequence> { - tag("[:upper:]")(input).map(|(l, _)| (l, Sequence::CharRange(Box::new('A'..='Z')))) + tag("[:upper:]")(input).map(|(l, _)| (l, Sequence::Upper)) } fn parse_xdigit(input: &str) -> IResult<&str, Sequence> { - tag("[:xdigit:]")(input).map(|(l, _)| { - ( - l, - Sequence::CharRange(Box::new(('0'..='9').chain('A'..='F').chain('a'..='f'))), - ) - }) + tag("[:xdigit:]")(input).map(|(l, _)| (l, Sequence::Xdigit)) } fn parse_char_equal(input: &str) -> IResult<&str, Sequence> { @@ -339,10 +370,7 @@ pub struct DeleteOperation { impl DeleteOperation { pub fn new(set: Vec, complement_flag: bool) -> DeleteOperation { DeleteOperation { - set: set - .into_iter() - .flat_map(Sequence::dissolve) - .collect::>(), + set: set.iter().flat_map(Sequence::flatten).collect::>(), complement_flag, } } @@ -355,21 +383,30 @@ impl SymbolTranslator for DeleteOperation { } } -#[derive(Debug)] pub struct TranslateOperationComplement { iter: u32, set1: Vec, - set2: Vec, + set2: Box>, fallback: char, translation_map: HashMap, } impl TranslateOperationComplement { - fn new(set1: Vec, set2: Vec, fallback: char) -> TranslateOperationComplement { + fn new( + set1: Vec, + set2: Vec, + set1_truncate_length: Option, + fallback: char, + ) -> TranslateOperationComplement { TranslateOperationComplement { iter: 0, - set1, - set2: set2.into_iter().rev().collect(), + set1: set1 + .iter() + .flat_map(Sequence::flatten) + .enumerate() + .filter_map(truncate_iterator(set1_truncate_length)) + .collect(), + set2: Box::new(set2.into_iter().flat_map(|c| Sequence::flatten(&c))), fallback, translation_map: HashMap::new(), } @@ -382,61 +419,83 @@ pub struct TranslateOperationStandard { } impl TranslateOperationStandard { - fn new(set1: Vec, set2: Vec, fallback: char) -> TranslateOperationStandard { + fn new( + set1: Vec, + set2: Vec, + set1_truncate_length: Option, + fallback: char, + ) -> TranslateOperationStandard { TranslateOperationStandard { translation_map: set1 - .into_iter() - .zip(set2.into_iter().chain(std::iter::repeat(fallback))) + .iter() + .flat_map(Sequence::flatten) + .zip( + set2.iter() + .flat_map(Sequence::flatten) + .chain(Repeat(fallback)), + ) + .enumerate() + .filter_map(truncate_iterator(set1_truncate_length)) .collect::>(), } } } -#[derive(Debug)] pub enum TranslateOperation { Standard(TranslateOperationStandard), Complement(TranslateOperationComplement), } impl TranslateOperation { - fn next_complement_char(mut iter: u32, ignore_list: &[char]) -> (u32, char) { - while (char::from_u32(iter).is_none() - || ignore_list - .iter() - .map(|c| u32::from(*c)) - .any(|c| iter.eq(&c))) - && iter.ne(&u32::MAX) - { - iter = iter.saturating_add(1) - } - (iter.saturating_add(1), char::from_u32(iter).unwrap()) + fn next_complement_char(iter: u32, ignore_list: &[char]) -> (u32, char) { + (iter..) + .filter_map(char::from_u32) + .filter(|c| !ignore_list.iter().any(|s| s.eq(c))) + .map(|c| (u32::from(c) + 1, c)) + .next() + .expect("exhausted all possible characters") } } impl TranslateOperation { pub fn new( - pset1: Vec, - pset2: Vec, + set1: Vec, + set2: Vec, truncate_set1: bool, complement: bool, ) -> TranslateOperation { - // TODO: Only some translation is acceptable i.e. uppercase/lowercase transform. - let mut set1 = pset1 - .into_iter() - .flat_map(Sequence::dissolve) - .collect::>(); - let set2 = pset2 - .into_iter() - .flat_map(Sequence::dissolve) - .collect::>(); - let fallback = set2.last().cloned().unwrap(); - if truncate_set1 { - set1.truncate(set2.len()); - } - if complement { - TranslateOperation::Complement(TranslateOperationComplement::new(set1, set2, fallback)) + let fallback = set2 + .iter() + .rev() + .next() + .map(Sequence::last) + .flatten() + .unwrap(); + let set1_truncate_length = if truncate_set1 { + set2.iter() + .map(Sequence::len) + .reduce(|a, b| match (a, b) { + (Some(l), Some(r)) => Some(l + r), + _ => None, + }) + .flatten() } else { - TranslateOperation::Standard(TranslateOperationStandard::new(set1, set2, fallback)) + None + }; + if complement { + TranslateOperation::Complement(TranslateOperationComplement::new( + set1, + set2, + set1_truncate_length, + fallback, + )) + } else { + TranslateOperation::Standard(TranslateOperationStandard::new( + set1, + set2, + set1_truncate_length, + fallback, + )) } } } @@ -466,7 +525,7 @@ impl SymbolTranslator for TranslateOperation { Some(*c) } else { while translation_map.get(¤t).is_none() { - if let Some(p) = set2.pop() { + if let Some(p) = set2.next() { let (next_index, next_value) = TranslateOperation::next_complement_char(*iter, &*set1); *iter = next_index; @@ -484,18 +543,15 @@ impl SymbolTranslator for TranslateOperation { #[derive(Debug, Clone)] pub struct SqueezeOperation { - squeeze_set: Vec, + set1: Vec, complement: bool, previous: Option, } impl SqueezeOperation { - pub fn new(squeeze_set: Vec, complement: bool) -> SqueezeOperation { + pub fn new(set1: Vec, complement: bool) -> SqueezeOperation { SqueezeOperation { - squeeze_set: squeeze_set - .into_iter() - .flat_map(Sequence::dissolve) - .collect(), + set1: set1.iter().flat_map(Sequence::flatten).collect(), complement, previous: None, } @@ -505,7 +561,7 @@ impl SqueezeOperation { impl SymbolTranslator for SqueezeOperation { fn translate(&mut self, current: char) -> Option { if self.complement { - let next = if self.squeeze_set.iter().any(|c| c.eq(¤t)) { + let next = if self.set1.iter().any(|c| c.eq(¤t)) { Some(current) } else { match self.previous { @@ -526,7 +582,7 @@ impl SymbolTranslator for SqueezeOperation { self.previous = Some(current); next } else { - let next = if self.squeeze_set.iter().any(|c| c.eq(¤t)) { + let next = if self.set1.iter().any(|c| c.eq(¤t)) { match self.previous { Some(v) if v == current => None, _ => Some(current), @@ -542,7 +598,7 @@ impl SymbolTranslator for SqueezeOperation { pub fn translate_input(input: &mut R, output: &mut W, mut translator: T) where - T: SymbolTranslator + Debug, + T: SymbolTranslator, R: BufRead, W: Write, { From b7a0ad15a7e19db98826dab7adfe0c2fa76f2fd2 Mon Sep 17 00:00:00 2001 From: Hanif Bin Ariffin Date: Sat, 24 Jul 2021 22:06:27 +0800 Subject: [PATCH 23/50] Cleaning up tests Signed-off-by: Hanif Bin Ariffin --- tests/by-util/test_tr.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/by-util/test_tr.rs b/tests/by-util/test_tr.rs index 6d80cb528..74d631a8f 100644 --- a/tests/by-util/test_tr.rs +++ b/tests/by-util/test_tr.rs @@ -191,6 +191,7 @@ fn test_set1_shorter_than_set2() { #[test] fn test_truncate() { + // echo -n "abcde" | tr -t "abc" "xy" new_ucmd!() .args(&["-t", "abc", "xy"]) .pipe_in("abcde") From 5a0870bb3005e738dab920ff29a73dc7b440414b Mon Sep 17 00:00:00 2001 From: Hanif Bin Ariffin Date: Sun, 25 Jul 2021 15:51:40 +0800 Subject: [PATCH 24/50] Condensed many of the weird stuff in tr in a function...passes more GNU tests Signed-off-by: Hanif Bin Ariffin --- src/uu/tr/src/operation.rs | 232 +++++++++++++++++++------------------ src/uu/tr/src/tr.rs | 57 +++++---- 2 files changed, 156 insertions(+), 133 deletions(-) diff --git a/src/uu/tr/src/operation.rs b/src/uu/tr/src/operation.rs index 2ff43b2a5..72845e531 100644 --- a/src/uu/tr/src/operation.rs +++ b/src/uu/tr/src/operation.rs @@ -26,44 +26,6 @@ mod unicode_table { pub static BLANK: &'static [char] = &[SPACE, HT]; } -struct Repeat(char); - -impl Repeat { - fn new(element: char) -> Repeat { - Repeat(element) - } -} - -impl Iterator for Repeat { - type Item = char; - - fn next(&mut self) -> Option { - Some(self.0) - } - - fn last(self) -> Option { - Some(self.0) - } - - fn any(&mut self, mut f: F) -> bool - where - Self: Sized, - F: FnMut(Self::Item) -> bool, - { - f(self.0) - } -} - -fn truncate_iterator(input: Option) -> impl Fn((usize, T)) -> Option { - move |(idx, c)| match input { - Some(s) => match s.cmp(&idx) { - std::cmp::Ordering::Greater => Some(c), - _ => None, - }, - None => Some(c), - } -} - #[derive(Debug, Clone, Copy)] pub enum Sequence { Char(char), @@ -89,8 +51,8 @@ impl Sequence { match self { Sequence::Char(c) => Box::new(std::iter::once(*c)), Sequence::CharRange(l, r) => Box::new((*l..=*r).flat_map(char::from_u32)), - Sequence::CharStar(c) => Box::new(Repeat::new(*c)), - Sequence::CharRepeat(c, n) => Box::new(Repeat::new(*c).take(*n)), + Sequence::CharStar(c) => Box::new(std::iter::repeat(*c)), + Sequence::CharRepeat(c, n) => Box::new(std::iter::repeat(*c).take(*n)), Sequence::Alnum => Box::new(('0'..='9').chain('A'..='Z').chain('a'..='z')), Sequence::Alpha => Box::new(('A'..='Z').chain('a'..='z')), Sequence::Blank => Box::new(unicode_table::BLANK.into_iter().cloned()), @@ -140,22 +102,99 @@ impl Sequence { pub fn last(&self) -> Option { match self { Sequence::CharStar(c) => Some(*c), - // TODO: Can be optimized further... rest => rest.flatten().last(), } } - pub fn len(&self) -> Option { - match self { - Sequence::CharStar(_) => None, - // TODO: Is there a fix for this? - rest => Some(rest.flatten().count()), + // Hide all the nasty sh*t in here + pub fn solve_set_characters( + set1: &Vec, + set2: &Vec, + ) -> Result<(Vec, Vec), String> { + let is_char_star = |s: &&Sequence| -> bool { + match s { + Sequence::CharStar(_) => true, + _ => false, + } + }; + let set1_star_count = set1.iter().filter(is_char_star).count(); + if set1_star_count == 0 { + let set2_star_count = set2.iter().filter(is_char_star).count(); + if set2_star_count < 2 { + let char_star = set2.iter().find_map(|s| match s { + Sequence::CharStar(c) => Some(c), + _ => None, + }); + let mut partition = set2.as_slice().split(|s| match s { + Sequence::CharStar(_) => true, + _ => false, + }); + let set1_len = set1.iter().flat_map(Sequence::flatten).count(); + let set2_len = set2 + .iter() + .filter_map(|s| match s { + Sequence::CharStar(_) => None, + r => Some(r), + }) + .flat_map(Sequence::flatten) + .count(); + let star_compensate_len = set1_len.saturating_sub(set2_len); + let set2_solved = match (partition.next(), partition.next()) { + (None, None) => match char_star { + Some(c) => std::iter::repeat(*c).take(star_compensate_len).collect(), + None => std::iter::empty().collect(), + }, + (None, Some(set2_b)) => { + if let Some(c) = char_star { + std::iter::repeat(*c) + .take(star_compensate_len) + .chain(set2_b.iter().flat_map(Sequence::flatten)) + .collect() + } else { + set2_b.iter().flat_map(Sequence::flatten).collect() + } + } + (Some(set2_a), None) => match char_star { + Some(c) => set2_a + .iter() + .flat_map(Sequence::flatten) + .chain(std::iter::repeat(*c).take(star_compensate_len)) + .collect(), + None => set2_a.iter().flat_map(Sequence::flatten).collect(), + }, + (Some(set2_a), Some(set2_b)) => match char_star { + Some(c) => set2_a + .iter() + .flat_map(Sequence::flatten) + .chain(std::iter::repeat(*c).take(star_compensate_len)) + .chain(set2_b.iter().flat_map(Sequence::flatten)) + .collect(), + None => set2_a + .iter() + .chain(set2_b.iter()) + .flat_map(Sequence::flatten) + .collect(), + }, + }; + let set1_solved = set1.iter().flat_map(Sequence::flatten).collect(); + return Ok((set1_solved, set2_solved)); + } else { + Err(format!( + "{}: only one [c*] repeat construct may appear in string2", + executable!() + )) + } + } else { + Err(format!( + "{}: the [c*] repeat construct may not appear in string1", + executable!() + )) } } } impl Sequence { - pub fn parse_set_string(input: &str) -> Vec { + pub fn from_str(input: &str) -> Vec { many0(alt(( alt(( Sequence::parse_char_range_octal_leftright, @@ -385,28 +424,20 @@ impl SymbolTranslator for DeleteOperation { pub struct TranslateOperationComplement { iter: u32, + set2_iter: usize, set1: Vec, - set2: Box>, + set2: Vec, fallback: char, translation_map: HashMap, } impl TranslateOperationComplement { - fn new( - set1: Vec, - set2: Vec, - set1_truncate_length: Option, - fallback: char, - ) -> TranslateOperationComplement { + fn new(set1: Vec, set2: Vec, fallback: char) -> TranslateOperationComplement { TranslateOperationComplement { iter: 0, - set1: set1 - .iter() - .flat_map(Sequence::flatten) - .enumerate() - .filter_map(truncate_iterator(set1_truncate_length)) - .collect(), - set2: Box::new(set2.into_iter().flat_map(|c| Sequence::flatten(&c))), + set2_iter: 0, + set1, + set2, fallback, translation_map: HashMap::new(), } @@ -419,23 +450,11 @@ pub struct TranslateOperationStandard { } impl TranslateOperationStandard { - fn new( - set1: Vec, - set2: Vec, - set1_truncate_length: Option, - fallback: char, - ) -> TranslateOperationStandard { + fn new(set1: Vec, set2: Vec, fallback: char) -> TranslateOperationStandard { TranslateOperationStandard { translation_map: set1 - .iter() - .flat_map(Sequence::flatten) - .zip( - set2.iter() - .flat_map(Sequence::flatten) - .chain(Repeat(fallback)), - ) - .enumerate() - .filter_map(truncate_iterator(set1_truncate_length)) + .into_iter() + .zip(set2.into_iter().chain(std::iter::repeat(fallback))) .collect::>(), } } @@ -461,40 +480,27 @@ impl TranslateOperation { pub fn new( set1: Vec, set2: Vec, - truncate_set1: bool, + truncate_set1_flag: bool, complement: bool, - ) -> TranslateOperation { - let fallback = set2 - .iter() - .rev() - .next() - .map(Sequence::last) - .flatten() - .unwrap(); - let set1_truncate_length = if truncate_set1 { - set2.iter() - .map(Sequence::len) - .reduce(|a, b| match (a, b) { - (Some(l), Some(r)) => Some(l + r), - _ => None, - }) - .flatten() - } else { - None - }; + ) -> Result { + let (mut set1_solved, set2_solved) = Sequence::solve_set_characters(&set1, &set2)?; + if truncate_set1_flag { + set1_solved.truncate(set2_solved.len()); + } + let fallback = set2.iter().map(Sequence::last).last().flatten().expect( + format!( + "{}: when not truncating set1, string2 must be non-empty", + executable!() + ) + .as_str(), + ); if complement { - TranslateOperation::Complement(TranslateOperationComplement::new( - set1, - set2, - set1_truncate_length, - fallback, + Ok(TranslateOperation::Complement( + TranslateOperationComplement::new(set1_solved, set2_solved, fallback), )) } else { - TranslateOperation::Standard(TranslateOperationStandard::new( - set1, - set2, - set1_truncate_length, - fallback, + Ok(TranslateOperation::Standard( + TranslateOperationStandard::new(set1_solved, set2_solved, fallback), )) } } @@ -511,6 +517,7 @@ impl SymbolTranslator for TranslateOperation { ), TranslateOperation::Complement(TranslateOperationComplement { iter, + set2_iter, set1, set2, fallback, @@ -525,11 +532,12 @@ impl SymbolTranslator for TranslateOperation { Some(*c) } else { while translation_map.get(¤t).is_none() { - if let Some(p) = set2.next() { - let (next_index, next_value) = + if let Some(value) = set2.get(*set2_iter) { + let (next_iter, next_key) = TranslateOperation::next_complement_char(*iter, &*set1); - *iter = next_index; - translation_map.insert(next_value, p); + *iter = next_iter; + *set2_iter = set2_iter.saturating_add(1); + translation_map.insert(next_key, *value); } else { translation_map.insert(current, *fallback); } @@ -622,9 +630,7 @@ fn test_parse_octal() { for a in '0'..='7' { for b in '0'..='7' { for c in '0'..='7' { - assert!( - Sequence::parse_set_string(format!("\\{}{}{}", a, b, c).as_str()).len() == 1 - ); + assert!(Sequence::from_str(format!("\\{}{}{}", a, b, c).as_str()).len() == 1); } } } diff --git a/src/uu/tr/src/tr.rs b/src/uu/tr/src/tr.rs index f024fd6db..59e4852b2 100644 --- a/src/uu/tr/src/tr.rs +++ b/src/uu/tr/src/tr.rs @@ -69,7 +69,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { if sets.is_empty() { show_error!( - "missing operand\nTry `{} --help` for more information.", + "missing operand\nTry '{} --help' for more information.", executable!() ); return 1; @@ -77,7 +77,16 @@ pub fn uumain(args: impl uucore::Args) -> i32 { if !(delete_flag || squeeze_flag) && sets.len() < 2 { show_error!( - "missing operand after '{}'\nTry `{} --help` for more information.", + "missing operand after '{}'\nTry '{} --help' for more information.", + sets[0], + executable!() + ); + return 1; + } + + if sets.len() > 2 { + show_error!( + "extra operand '{}'\nTry '{} --help' for more information.", sets[0], executable!() ); @@ -95,50 +104,58 @@ pub fn uumain(args: impl uucore::Args) -> i32 { let mut delete_buffer = vec![]; { let mut delete_writer = BufWriter::new(&mut delete_buffer); - let delete_op = - DeleteOperation::new(Sequence::parse_set_string(&sets[0]), complement_flag); + let delete_op = DeleteOperation::new(Sequence::from_str(&sets[0]), complement_flag); translate_input(&mut locked_stdin, &mut delete_writer, delete_op); } { let mut squeeze_reader = BufReader::new(delete_buffer.as_bytes()); - let squeeze_op = - SqueezeOperation::new(Sequence::parse_set_string(&sets[1]), complement_flag); - translate_input(&mut squeeze_reader, &mut buffered_stdout, squeeze_op); + let op = SqueezeOperation::new(Sequence::from_str(&sets[1]), complement_flag); + translate_input(&mut squeeze_reader, &mut buffered_stdout, op); } } else { - let op = DeleteOperation::new(Sequence::parse_set_string(&sets[0]), complement_flag); + let op = DeleteOperation::new(Sequence::from_str(&sets[0]), complement_flag); translate_input(&mut locked_stdin, &mut buffered_stdout, op); } } else if squeeze_flag { if sets.len() < 2 { - let op = SqueezeOperation::new(Sequence::parse_set_string(&sets[0]), complement_flag); + let op = SqueezeOperation::new(Sequence::from_str(&sets[0]), complement_flag); translate_input(&mut locked_stdin, &mut buffered_stdout, op); } else { let mut translate_buffer = vec![]; { let mut writer = BufWriter::new(&mut translate_buffer); - let translate_op = TranslateOperation::new( - Sequence::parse_set_string(&sets[0]), - Sequence::parse_set_string(&sets[1]), + match TranslateOperation::new( + Sequence::from_str(&sets[0]), + Sequence::from_str(&sets[1]), truncate_set1_flag, complement_flag, - ); - translate_input(&mut locked_stdin, &mut writer, translate_op); + ) { + Ok(op) => translate_input(&mut locked_stdin, &mut writer, op), + Err(s) => { + show_error!("{}", s); + return 1; + } + }; } { let mut reader = BufReader::new(translate_buffer.as_bytes()); - let squeeze_op = SqueezeOperation::new(Sequence::parse_set_string(&sets[1]), false); + let squeeze_op = SqueezeOperation::new(Sequence::from_str(&sets[1]), false); translate_input(&mut reader, &mut buffered_stdout, squeeze_op); } } } else { - let op = TranslateOperation::new( - Sequence::parse_set_string(&sets[0]), - Sequence::parse_set_string(&sets[1]), + match TranslateOperation::new( + Sequence::from_str(&sets[0]), + Sequence::from_str(&sets[1]), truncate_set1_flag, complement_flag, - ); - translate_input(&mut locked_stdin, &mut buffered_stdout, op); + ) { + Ok(op) => translate_input(&mut locked_stdin, &mut buffered_stdout, op), + Err(s) => { + show_error!("{}", s); + return 1; + } + }; } 0 From 43dbff7c562a3a2d1f1e5e44092cb91d1db6a815 Mon Sep 17 00:00:00 2001 From: Hanif Bin Ariffin Date: Sun, 25 Jul 2021 16:16:12 +0800 Subject: [PATCH 25/50] Something wrong with rust iterator... Signed-off-by: Hanif Bin Ariffin --- src/uu/tr/src/operation.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/uu/tr/src/operation.rs b/src/uu/tr/src/operation.rs index 72845e531..504f8ac3a 100644 --- a/src/uu/tr/src/operation.rs +++ b/src/uu/tr/src/operation.rs @@ -487,7 +487,7 @@ impl TranslateOperation { if truncate_set1_flag { set1_solved.truncate(set2_solved.len()); } - let fallback = set2.iter().map(Sequence::last).last().flatten().expect( + let fallback = set2.last().map(Sequence::last).flatten().expect( format!( "{}: when not truncating set1, string2 must be non-empty", executable!() From bf0f01714caadc9fa4c23ddceeacdaf5c90c2b63 Mon Sep 17 00:00:00 2001 From: Hanif Bin Ariffin Date: Mon, 26 Jul 2021 07:59:35 +0800 Subject: [PATCH 26/50] Splitting out GNU tests Signed-off-by: Hanif Bin Ariffin --- tests/by-util/test_tr.rs | 105 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 102 insertions(+), 3 deletions(-) diff --git a/tests/by-util/test_tr.rs b/tests/by-util/test_tr.rs index 74d631a8f..504d76ba9 100644 --- a/tests/by-util/test_tr.rs +++ b/tests/by-util/test_tr.rs @@ -394,7 +394,6 @@ fn alnum_expands_number_uppercase_lowercase() { } #[test] -#[ignore = "not expected to fully pass -- any help appreciated!"] fn check_against_gnu_tr_tests() { // ['1', qw(abcd '[]*]'), {IN=>'abcd'}, {OUT=>']]]]'}], new_ucmd!() @@ -402,18 +401,30 @@ fn check_against_gnu_tr_tests() { .pipe_in("abcd") .succeeds() .stdout_is("]]]]"); +} + +#[test] +fn check_against_gnu_tr_tests_2() { // ['2', qw(abc '[%*]xyz'), {IN=>'abc'}, {OUT=>'xyz'}], new_ucmd!() .args(&["abc", "[%*]xyz"]) .pipe_in("abc") .succeeds() .stdout_is("xyz"); +} + +#[test] +fn check_against_gnu_tr_tests_3() { // ['3', qw('' '[.*]'), {IN=>'abc'}, {OUT=>'abc'}], new_ucmd!() .args(&["", "[.*]"]) .pipe_in("abc") .succeeds() .stdout_is("abc"); +} + +#[test] +fn check_against_gnu_tr_tests_4() { // # Test --truncate-set1 behavior when string1 is longer than string2 // ['4', qw(-t abcd xy), {IN=>'abcde'}, {OUT=>'xycde'}], new_ucmd!() @@ -421,6 +432,10 @@ fn check_against_gnu_tr_tests() { .pipe_in("abcde") .succeeds() .stdout_is("xycde"); +} + +#[test] +fn check_against_gnu_tr_tests_5() { // # Test bsd behavior (the default) when string1 is longer than string2 // ['5', qw(abcd xy), {IN=>'abcde'}, {OUT=>'xyyye'}], new_ucmd!() @@ -428,6 +443,10 @@ fn check_against_gnu_tr_tests() { .pipe_in("abcde") .succeeds() .stdout_is("xyyye"); +} + +#[test] +fn check_against_gnu_tr_tests_6() { // # Do it the posix way // ['6', qw(abcd 'x[y*]'), {IN=>'abcde'}, {OUT=>'xyyye'}], new_ucmd!() @@ -435,54 +454,90 @@ fn check_against_gnu_tr_tests() { .pipe_in("abcde") .succeeds() .stdout_is("xyyye"); - // ['7', qw(-s a-p ,"'), {IN=>'abcdefghijklmnop'}, {OUT=>'%.$'}], +} + +#[test] +fn check_against_gnu_tr_tests_7() { + // ['7', qw(-s a-p '%[.*]$'), {IN=>'abcdefghijklmnop'}, {OUT=>'%.$'}], new_ucmd!() - .args(&["-s", "a-p", "\"'"]) + .args(&["-s", "a-p", "%[.*]$"]) .pipe_in("abcdefghijklmnop") .succeeds() .stdout_is("%.$"); +} + +#[test] +fn check_against_gnu_tr_tests_8() { // ['8', qw(-s a-p '[.*]$'), {IN=>'abcdefghijklmnop'}, {OUT=>'.$'}], new_ucmd!() .args(&["-s", "a-p", "[.*]$"]) .pipe_in("abcdefghijklmnop") .succeeds() .stdout_is(".$"); +} + +#[test] +fn check_against_gnu_tr_tests_9() { // ['9', qw(-s a-p '%[.*]'), {IN=>'abcdefghijklmnop'}, {OUT=>'%.'}], new_ucmd!() .args(&["-s", "a-p", "%[.*]"]) .pipe_in("abcdefghijklmnop") .succeeds() .stdout_is("%."); +} + +#[test] +fn check_against_gnu_tr_tests_a() { // ['a', qw(-s '[a-z]'), {IN=>'aabbcc'}, {OUT=>'abc'}], new_ucmd!() .args(&["-s", "[a-z]"]) .pipe_in("aabbcc") .succeeds() .stdout_is("abc"); +} + +#[test] +fn check_against_gnu_tr_tests_b() { // ['b', qw(-s '[a-c]'), {IN=>'aabbcc'}, {OUT=>'abc'}], new_ucmd!() .args(&["-s", "[a-c]"]) .pipe_in("aabbcc") .succeeds() .stdout_is("abc"); +} + +#[test] +fn check_against_gnu_tr_tests_c() { // ['c', qw(-s '[a-b]'), {IN=>'aabbcc'}, {OUT=>'abcc'}], new_ucmd!() .args(&["-s", "[a-b]"]) .pipe_in("aabbcc") .succeeds() .stdout_is("abcc"); +} + +#[test] +fn check_against_gnu_tr_tests_d() { // ['d', qw(-s '[b-c]'), {IN=>'aabbcc'}, {OUT=>'aabc'}], new_ucmd!() .args(&["-s", "[b-c]"]) .pipe_in("aabbcc") .succeeds() .stdout_is("aabc"); +} + +#[test] +fn check_against_gnu_tr_tests_e() { // ['e', qw(-s '[\0-\5]'), {IN=>"\0\0a\1\1b\2\2\2c\3\3\3d\4\4\4\4e\5\5"}, {OUT=>"\0a\1b\2c\3d\4e\5"}], new_ucmd!() .args(&["-s", r#"[\0-\5]"#]) .pipe_in(r#"\0\0a\1\1b\2\2\2c\3\3\3d\4\4\4\4e\5\5"#) .succeeds() .stdout_is(r#"\0a\1b\2c\3d\4e\5"#); +} + +#[test] +fn check_against_gnu_tr_tests_f() { // # tests of delete // ['f', qw(-d '[=[=]'), {IN=>'[[[[[[[]]]]]]]]'}, {OUT=>']]]]]]]]'}], new_ucmd!() @@ -490,66 +545,110 @@ fn check_against_gnu_tr_tests() { .pipe_in("[[[[[[[]]]]]]]]") .succeeds() .stdout_is("]]]]]]]]"); +} + +#[test] +fn check_against_gnu_tr_tests_g() { // ['g', qw(-d '[=]=]'), {IN=>'[[[[[[[]]]]]]]]'}, {OUT=>'[[[[[[['}], new_ucmd!() .args(&["-d", "[=]=]"]) .pipe_in("[[[[[[[]]]]]]]]") .succeeds() .stdout_is("[[[[[[["); +} + +#[test] +fn check_against_gnu_tr_tests_h() { // ['h', qw(-d '[:xdigit:]'), {IN=>'0123456789acbdefABCDEF'}, {OUT=>''}], new_ucmd!() .args(&["-d", "[:xdigit:]"]) .pipe_in("0123456789acbdefABCDEF") .succeeds() .stdout_is(""); +} + +#[test] +fn check_against_gnu_tr_tests_i() { // ['i', qw(-d '[:xdigit:]'), {IN=>'w0x1y2z3456789acbdefABCDEFz'}, {OUT=>'wxyzz'}], new_ucmd!() .args(&["-d", "[:xdigit:]"]) .pipe_in("w0x1y2z3456789acbdefABCDEFz") .succeeds() .stdout_is("wxyzz"); +} + +#[test] +fn check_against_gnu_tr_tests_j() { // ['j', qw(-d '[:digit:]'), {IN=>'0123456789'}, {OUT=>''}], new_ucmd!() .args(&["", "", ""]) .pipe_in("") .succeeds() .stdout_is(""); +} + +#[test] +fn check_against_gnu_tr_tests_k() { // ['k', qw(-d '[:digit:]'), {IN=>'a0b1c2d3e4f5g6h7i8j9k'}, {OUT=>'abcdefghijk'}], new_ucmd!() .args(&["-d", "[:digit:]"]) .pipe_in("a0b1c2d3e4f5g6h7i8j9k") .succeeds() .stdout_is("abcdefghijk"); +} + +#[test] +fn check_against_gnu_tr_tests_l() { // ['l', qw(-d '[:lower:]'), {IN=>'abcdefghijklmnopqrstuvwxyz'}, {OUT=>''}], new_ucmd!() .args(&["-d", "[:lower:]"]) .pipe_in("abcdefghijklmnopqrstuvwxyz") .succeeds() .stdout_is(""); +} + +#[test] +fn check_against_gnu_tr_tests_m() { // ['m', qw(-d '[:upper:]'), {IN=>'ABCDEFGHIJKLMNOPQRSTUVWXYZ'}, {OUT=>''}], new_ucmd!() .args(&["-d", "[:upper:]"]) .pipe_in("ABCDEFGHIJKLMNOPQRSTUVWXYZ") .succeeds() .stdout_is(""); +} + +#[test] +fn check_against_gnu_tr_tests_n() { // ['n', qw(-d '[:lower:][:upper:]'), {IN=>'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'}, {OUT=>''}], new_ucmd!() .args(&["-d", "[:lower:][:upper:]"]) .pipe_in("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ") .succeeds() .stdout_is(""); +} + +#[test] +fn check_against_gnu_tr_tests_o() { // ['o', qw(-d '[:alpha:]'), {IN=>'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'}, {OUT=>''}], new_ucmd!() .args(&["-d", "[:alpha:]"]) .pipe_in("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ") .succeeds() .stdout_is(""); +} + +#[test] +fn check_against_gnu_tr_tests_p() { // ['p', qw(-d '[:alnum:]'), {IN=>'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'}, {OUT=>''}], new_ucmd!() .args(&["-d", "[:alnum:]", ""]) .pipe_in("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789") .succeeds() .stdout_is(""); +} + +#[test] +fn check_against_gnu_tr_tests_q() { // ['q', qw(-d '[:alnum:]'), {IN=>'.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789.'}, {OUT=>'..'}], new_ucmd!() .args(&["-d", "[:alnum:]"]) From 2c8ba4ad2dedf4586ed87ff361d66d7e98386279 Mon Sep 17 00:00:00 2001 From: Hanif Bin Ariffin Date: Mon, 26 Jul 2021 07:59:51 +0800 Subject: [PATCH 27/50] Fixing some issues discovered from tests...mostly to match GNU behavior Signed-off-by: Hanif Bin Ariffin --- src/uu/tr/src/operation.rs | 77 +++++++++++++++++--------------------- src/uu/tr/src/tr.rs | 43 +++++++++++---------- 2 files changed, 58 insertions(+), 62 deletions(-) diff --git a/src/uu/tr/src/operation.rs b/src/uu/tr/src/operation.rs index 504f8ac3a..04850aabf 100644 --- a/src/uu/tr/src/operation.rs +++ b/src/uu/tr/src/operation.rs @@ -99,17 +99,11 @@ impl Sequence { } } - pub fn last(&self) -> Option { - match self { - Sequence::CharStar(c) => Some(*c), - rest => rest.flatten().last(), - } - } - // Hide all the nasty sh*t in here pub fn solve_set_characters( - set1: &Vec, - set2: &Vec, + set1: Vec, + set2: Vec, + truncate_set1_flag: bool, ) -> Result<(Vec, Vec), String> { let is_char_star = |s: &&Sequence| -> bool { match s { @@ -139,7 +133,8 @@ impl Sequence { .flat_map(Sequence::flatten) .count(); let star_compensate_len = set1_len.saturating_sub(set2_len); - let set2_solved = match (partition.next(), partition.next()) { + let (left, right) = (partition.next(), partition.next()); + let set2_solved: Vec = match (left, right) { (None, None) => match char_star { Some(c) => std::iter::repeat(*c).take(star_compensate_len).collect(), None => std::iter::empty().collect(), @@ -176,7 +171,10 @@ impl Sequence { .collect(), }, }; - let set1_solved = set1.iter().flat_map(Sequence::flatten).collect(); + let mut set1_solved: Vec = set1.iter().flat_map(Sequence::flatten).collect(); + if truncate_set1_flag { + set1_solved.truncate(set2_solved.len()); + } return Ok((set1_solved, set2_solved)); } else { Err(format!( @@ -407,9 +405,9 @@ pub struct DeleteOperation { } impl DeleteOperation { - pub fn new(set: Vec, complement_flag: bool) -> DeleteOperation { + pub fn new(set: Vec, complement_flag: bool) -> DeleteOperation { DeleteOperation { - set: set.iter().flat_map(Sequence::flatten).collect::>(), + set, complement_flag, } } @@ -427,18 +425,16 @@ pub struct TranslateOperationComplement { set2_iter: usize, set1: Vec, set2: Vec, - fallback: char, translation_map: HashMap, } impl TranslateOperationComplement { - fn new(set1: Vec, set2: Vec, fallback: char) -> TranslateOperationComplement { + fn new(set1: Vec, set2: Vec) -> TranslateOperationComplement { TranslateOperationComplement { iter: 0, set2_iter: 0, set1, set2, - fallback, translation_map: HashMap::new(), } } @@ -450,12 +446,22 @@ pub struct TranslateOperationStandard { } impl TranslateOperationStandard { - fn new(set1: Vec, set2: Vec, fallback: char) -> TranslateOperationStandard { - TranslateOperationStandard { - translation_map: set1 - .into_iter() - .zip(set2.into_iter().chain(std::iter::repeat(fallback))) - .collect::>(), + fn new(set1: Vec, set2: Vec) -> Result { + if let Some(fallback) = set2.last().map(|s| *s) { + Ok(TranslateOperationStandard { + translation_map: set1 + .into_iter() + .zip(set2.into_iter().chain(std::iter::repeat(fallback))) + .collect::>(), + }) + } else { + if set1.is_empty() && set2.is_empty() { + Ok(TranslateOperationStandard { + translation_map: HashMap::new(), + }) + } else { + Err("when not truncating set1, string2 must be non-empty".to_string()) + } } } } @@ -478,29 +484,17 @@ impl TranslateOperation { impl TranslateOperation { pub fn new( - set1: Vec, - set2: Vec, - truncate_set1_flag: bool, + set1: Vec, + set2: Vec, complement: bool, ) -> Result { - let (mut set1_solved, set2_solved) = Sequence::solve_set_characters(&set1, &set2)?; - if truncate_set1_flag { - set1_solved.truncate(set2_solved.len()); - } - let fallback = set2.last().map(Sequence::last).flatten().expect( - format!( - "{}: when not truncating set1, string2 must be non-empty", - executable!() - ) - .as_str(), - ); if complement { Ok(TranslateOperation::Complement( - TranslateOperationComplement::new(set1_solved, set2_solved, fallback), + TranslateOperationComplement::new(set1, set2), )) } else { Ok(TranslateOperation::Standard( - TranslateOperationStandard::new(set1_solved, set2_solved, fallback), + TranslateOperationStandard::new(set1, set2)?, )) } } @@ -520,7 +514,6 @@ impl SymbolTranslator for TranslateOperation { set2_iter, set1, set2, - fallback, translation_map, }) => { // First, try to see if current char is already mapped @@ -539,7 +532,7 @@ impl SymbolTranslator for TranslateOperation { *set2_iter = set2_iter.saturating_add(1); translation_map.insert(next_key, *value); } else { - translation_map.insert(current, *fallback); + translation_map.insert(current, *set2.last().unwrap()); } } Some(*translation_map.get(¤t).unwrap()) @@ -557,9 +550,9 @@ pub struct SqueezeOperation { } impl SqueezeOperation { - pub fn new(set1: Vec, complement: bool) -> SqueezeOperation { + pub fn new(set1: Vec, complement: bool) -> SqueezeOperation { SqueezeOperation { - set1: set1.iter().flat_map(Sequence::flatten).collect(), + set1, complement, previous: None, } diff --git a/src/uu/tr/src/tr.rs b/src/uu/tr/src/tr.rs index 59e4852b2..99d7b5132 100644 --- a/src/uu/tr/src/tr.rs +++ b/src/uu/tr/src/tr.rs @@ -66,6 +66,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { .values_of(options::SETS) .map(|v| v.map(ToString::to_string).collect::>()) .unwrap_or_default(); + let sets_len = sets.len(); if sets.is_empty() { show_error!( @@ -75,7 +76,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { return 1; } - if !(delete_flag || squeeze_flag) && sets.len() < 2 { + if !(delete_flag || squeeze_flag) && sets_len < 2 { show_error!( "missing operand after '{}'\nTry '{} --help' for more information.", sets[0], @@ -84,7 +85,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { return 1; } - if sets.len() > 2 { + if sets_len > 2 { show_error!( "extra operand '{}'\nTry '{} --help' for more information.", sets[0], @@ -99,37 +100,44 @@ pub fn uumain(args: impl uucore::Args) -> i32 { let locked_stdout = stdout.lock(); let mut buffered_stdout = BufWriter::new(locked_stdout); + let mut sets_iter = sets.into_iter(); + let (set1, set2) = match Sequence::solve_set_characters( + Sequence::from_str(sets_iter.next().unwrap_or_default().as_str()), + Sequence::from_str(sets_iter.next().unwrap_or_default().as_str()), + truncate_set1_flag, + ) { + Ok(r) => r, + Err(s) => { + show_error!("{}", s); + return 1; + } + }; if delete_flag { if squeeze_flag { let mut delete_buffer = vec![]; { let mut delete_writer = BufWriter::new(&mut delete_buffer); - let delete_op = DeleteOperation::new(Sequence::from_str(&sets[0]), complement_flag); + let delete_op = DeleteOperation::new(set1.clone(), complement_flag); translate_input(&mut locked_stdin, &mut delete_writer, delete_op); } { let mut squeeze_reader = BufReader::new(delete_buffer.as_bytes()); - let op = SqueezeOperation::new(Sequence::from_str(&sets[1]), complement_flag); + let op = SqueezeOperation::new(set2, complement_flag); translate_input(&mut squeeze_reader, &mut buffered_stdout, op); } } else { - let op = DeleteOperation::new(Sequence::from_str(&sets[0]), complement_flag); + let op = DeleteOperation::new(set1, complement_flag); translate_input(&mut locked_stdin, &mut buffered_stdout, op); } } else if squeeze_flag { - if sets.len() < 2 { - let op = SqueezeOperation::new(Sequence::from_str(&sets[0]), complement_flag); + if sets_len < 2 { + let op = SqueezeOperation::new(set1, complement_flag); translate_input(&mut locked_stdin, &mut buffered_stdout, op); } else { let mut translate_buffer = vec![]; { let mut writer = BufWriter::new(&mut translate_buffer); - match TranslateOperation::new( - Sequence::from_str(&sets[0]), - Sequence::from_str(&sets[1]), - truncate_set1_flag, - complement_flag, - ) { + match TranslateOperation::new(set1.clone(), set2.clone(), complement_flag) { Ok(op) => translate_input(&mut locked_stdin, &mut writer, op), Err(s) => { show_error!("{}", s); @@ -139,17 +147,12 @@ pub fn uumain(args: impl uucore::Args) -> i32 { } { let mut reader = BufReader::new(translate_buffer.as_bytes()); - let squeeze_op = SqueezeOperation::new(Sequence::from_str(&sets[1]), false); + let squeeze_op = SqueezeOperation::new(set2, false); translate_input(&mut reader, &mut buffered_stdout, squeeze_op); } } } else { - match TranslateOperation::new( - Sequence::from_str(&sets[0]), - Sequence::from_str(&sets[1]), - truncate_set1_flag, - complement_flag, - ) { + match TranslateOperation::new(set1, set2, complement_flag) { Ok(op) => translate_input(&mut locked_stdin, &mut buffered_stdout, op), Err(s) => { show_error!("{}", s); From 5657f5af3ab59755f3cba54780a827ebd870698d Mon Sep 17 00:00:00 2001 From: Hanif Bin Ariffin Date: Mon, 26 Jul 2021 13:57:51 +0800 Subject: [PATCH 28/50] Simplified and extended parsing capabilities Signed-off-by: Hanif Bin Ariffin --- src/uu/tr/src/operation.rs | 97 ++++++++++++++------------------------ 1 file changed, 36 insertions(+), 61 deletions(-) diff --git a/src/uu/tr/src/operation.rs b/src/uu/tr/src/operation.rs index 04850aabf..2d9e24080 100644 --- a/src/uu/tr/src/operation.rs +++ b/src/uu/tr/src/operation.rs @@ -8,7 +8,7 @@ use nom::{ IResult, }; use std::{ - collections::HashMap, + collections::{HashMap, HashSet}, fmt::Debug, io::{BufRead, Write}, }; @@ -47,6 +47,18 @@ pub enum Sequence { } impl Sequence { + // TODO: Can we do better? + pub fn convert_octal_to_char(input: &str) -> char { + if input.starts_with("\\") && input.len() > 1 { + u32::from_str_radix(&input[1..], 8) + .map(|u| char::from_u32(u)) + .unwrap() + .unwrap() + } else { + input.chars().next().unwrap() + } + } + pub fn flatten(&self) -> Box> { match self { Sequence::Char(c) => Box::new(std::iter::once(*c)), @@ -196,9 +208,6 @@ impl Sequence { many0(alt(( alt(( Sequence::parse_char_range_octal_leftright, - Sequence::parse_char_range_octal_left, - Sequence::parse_char_range_octal_right, - Sequence::parse_char_range_backslash_collapse, Sequence::parse_char_range, Sequence::parse_char_star, Sequence::parse_char_repeat, @@ -229,6 +238,14 @@ impl Sequence { .unwrap() } + fn parse_octal_or_char(input: &str) -> IResult<&str, char> { + recognize(alt(( + preceded(tag("\\"), recognize(many_m_n(1, 3, one_of("01234567")))), + recognize(anychar), + )))(input) + .map(|(l, a)| (l, Sequence::convert_octal_to_char(a))) + } + fn parse_char(input: &str) -> IResult<&str, Sequence> { anychar(input).map(|(l, r)| (l, Sequence::Char(r))) } @@ -261,19 +278,10 @@ impl Sequence { } fn parse_char_range(input: &str) -> IResult<&str, Sequence> { - separated_pair(anychar, tag("-"), anychar)(input).map(|(l, (a, b))| { - (l, { - let (start, end) = (u32::from(a), u32::from(b)); - Sequence::CharRange(start, end) - }) - }) - } - - fn parse_char_range_backslash_collapse(input: &str) -> IResult<&str, Sequence> { separated_pair( - preceded(tag("\\"), anychar), + Sequence::parse_octal_or_char, tag("-"), - preceded(tag("\\"), anychar), + Sequence::parse_octal_or_char, )(input) .map(|(l, (a, b))| { (l, { @@ -283,59 +291,29 @@ impl Sequence { }) } - fn parse_char_range_octal_left(input: &str) -> IResult<&str, Sequence> { - separated_pair( - preceded(tag("\\"), recognize(many_m_n(1, 3, one_of("01234567")))), - tag("-"), - anychar, - )(input) - .map(|(l, (a, b))| { - (l, { - let (start, end) = (u32::from_str_radix(a, 8).unwrap(), u32::from(b)); - Sequence::CharRange(start, end) - }) - }) - } - - fn parse_char_range_octal_right(input: &str) -> IResult<&str, Sequence> { - separated_pair( - anychar, - tag("-"), - preceded(tag("\\"), recognize(many_m_n(1, 3, one_of("01234567")))), - )(input) - .map(|(l, (a, b))| { - (l, { - let (start, end) = (u32::from(a), u32::from_str_radix(b, 8).unwrap()); - Sequence::CharRange(start, end) - }) - }) - } - fn parse_char_range_octal_leftright(input: &str) -> IResult<&str, Sequence> { separated_pair( - preceded(tag("\\"), recognize(many_m_n(1, 3, one_of("01234567")))), + Sequence::parse_octal_or_char, tag("-"), - preceded(tag("\\"), recognize(many_m_n(1, 3, one_of("01234567")))), + Sequence::parse_octal_or_char, )(input) .map(|(l, (a, b))| { (l, { - let (start, end) = ( - u32::from_str_radix(a, 8).unwrap(), - u32::from_str_radix(b, 8).unwrap(), - ); + let (start, end) = (u32::from(a), u32::from(b)); Sequence::CharRange(start, end) }) }) } fn parse_char_star(input: &str) -> IResult<&str, Sequence> { - delimited(tag("["), anychar, tag("*]"))(input).map(|(l, c)| (l, Sequence::CharStar(c))) + delimited(tag("["), Sequence::parse_octal_or_char, tag("*]"))(input) + .map(|(l, a)| (l, Sequence::CharStar(a))) } fn parse_char_repeat(input: &str) -> IResult<&str, Sequence> { delimited( tag("["), - separated_pair(anychar, tag("*"), digit1), + separated_pair(Sequence::parse_octal_or_char, tag("*"), digit1), tag("]"), )(input) .map(|(l, (c, n))| (l, Sequence::CharRepeat(c, n.parse().unwrap()))) @@ -390,7 +368,8 @@ impl Sequence { } fn parse_char_equal(input: &str) -> IResult<&str, Sequence> { - delimited(tag("[="), anychar, tag("=]"))(input).map(|(_, _)| todo!()) + delimited(tag("[="), Sequence::parse_octal_or_char, tag("=]"))(input) + .map(|(l, c)| (l, Sequence::Char(c))) } } @@ -544,7 +523,7 @@ impl SymbolTranslator for TranslateOperation { #[derive(Debug, Clone)] pub struct SqueezeOperation { - set1: Vec, + set1: HashSet, complement: bool, previous: Option, } @@ -552,7 +531,7 @@ pub struct SqueezeOperation { impl SqueezeOperation { pub fn new(set1: Vec, complement: bool) -> SqueezeOperation { SqueezeOperation { - set1, + set1: set1.into_iter().collect(), complement, previous: None, } @@ -562,7 +541,7 @@ impl SqueezeOperation { impl SymbolTranslator for SqueezeOperation { fn translate(&mut self, current: char) -> Option { if self.complement { - let next = if self.set1.iter().any(|c| c.eq(¤t)) { + let next = if self.set1.contains(¤t) { Some(current) } else { match self.previous { @@ -570,20 +549,16 @@ impl SymbolTranslator for SqueezeOperation { if v.eq(¤t) { None } else { - self.previous = Some(current); Some(current) } } - None => { - self.previous = Some(current); - Some(current) - } + None => Some(current), } }; self.previous = Some(current); next } else { - let next = if self.set1.iter().any(|c| c.eq(¤t)) { + let next = if self.set1.contains(¤t) { match self.previous { Some(v) if v == current => None, _ => Some(current), From 3fea69f9ed226f38dd7533c60ee6b13f833b8b65 Mon Sep 17 00:00:00 2001 From: Hanif Bin Ariffin Date: Mon, 26 Jul 2021 14:03:47 +0800 Subject: [PATCH 29/50] inline some code Signed-off-by: Hanif Bin Ariffin --- src/uu/tr/src/operation.rs | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/src/uu/tr/src/operation.rs b/src/uu/tr/src/operation.rs index 2d9e24080..595dcc529 100644 --- a/src/uu/tr/src/operation.rs +++ b/src/uu/tr/src/operation.rs @@ -47,18 +47,6 @@ pub enum Sequence { } impl Sequence { - // TODO: Can we do better? - pub fn convert_octal_to_char(input: &str) -> char { - if input.starts_with("\\") && input.len() > 1 { - u32::from_str_radix(&input[1..], 8) - .map(|u| char::from_u32(u)) - .unwrap() - .unwrap() - } else { - input.chars().next().unwrap() - } - } - pub fn flatten(&self) -> Box> { match self { Sequence::Char(c) => Box::new(std::iter::once(*c)), @@ -243,7 +231,16 @@ impl Sequence { preceded(tag("\\"), recognize(many_m_n(1, 3, one_of("01234567")))), recognize(anychar), )))(input) - .map(|(l, a)| (l, Sequence::convert_octal_to_char(a))) + .map(|(l, a)| { + ( + l, + if let Some(input) = a.strip_prefix('\\') { + char::from_u32(u32::from_str_radix(&input, 8).unwrap()).unwrap() + } else { + input.chars().next().unwrap() + }, + ) + }) } fn parse_char(input: &str) -> IResult<&str, Sequence> { From 36c19293c8c14ade305b6f08f688ae69710ba2d1 Mon Sep 17 00:00:00 2001 From: Hanif Bin Ariffin Date: Thu, 29 Jul 2021 21:06:40 +0800 Subject: [PATCH 30/50] Fixing tests Signed-off-by: Hanif Bin Ariffin --- tests/by-util/test_tr.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/by-util/test_tr.rs b/tests/by-util/test_tr.rs index 504d76ba9..6ac152d66 100644 --- a/tests/by-util/test_tr.rs +++ b/tests/by-util/test_tr.rs @@ -641,7 +641,7 @@ fn check_against_gnu_tr_tests_o() { fn check_against_gnu_tr_tests_p() { // ['p', qw(-d '[:alnum:]'), {IN=>'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'}, {OUT=>''}], new_ucmd!() - .args(&["-d", "[:alnum:]", ""]) + .args(&["-d", "[:alnum:]"]) .pipe_in("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789") .succeeds() .stdout_is(""); From 4fb4511da39b460c145eff58b8e1b9dfa73382b0 Mon Sep 17 00:00:00 2001 From: Hanif Bin Ariffin Date: Thu, 29 Jul 2021 21:59:30 +0800 Subject: [PATCH 31/50] Fixed empty backslash Signed-off-by: Hanif Bin Ariffin --- src/uu/tr/src/operation.rs | 15 +++++++++++++-- src/uu/tr/src/tr.rs | 8 ++++++++ 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/src/uu/tr/src/operation.rs b/src/uu/tr/src/operation.rs index 595dcc529..660ab4883 100644 --- a/src/uu/tr/src/operation.rs +++ b/src/uu/tr/src/operation.rs @@ -235,9 +235,20 @@ impl Sequence { ( l, if let Some(input) = a.strip_prefix('\\') { - char::from_u32(u32::from_str_radix(&input, 8).unwrap()).unwrap() + if input.is_empty() { + '\\' + } else { + char::from_u32( + u32::from_str_radix(&input, 8) + .expect("We only matched against 0-7 so it should not fail"), + ) + .expect("Cannot convert octal value to character") + } } else { - input.chars().next().unwrap() + input + .chars() + .next() + .expect("We recognized a character so this should not fail") }, ) }) diff --git a/src/uu/tr/src/tr.rs b/src/uu/tr/src/tr.rs index 99d7b5132..eb02eb962 100644 --- a/src/uu/tr/src/tr.rs +++ b/src/uu/tr/src/tr.rs @@ -112,6 +112,14 @@ pub fn uumain(args: impl uucore::Args) -> i32 { return 1; } }; + + if set2.len() == 1 && set2[0] == '\\' { + show_error!( + "{}", + "warning: an unescaped backslash at end of string is not portable" + ); + } + if delete_flag { if squeeze_flag { let mut delete_buffer = vec![]; From dc033ab619d4e0b5244329e6865ecf23d8b3a03d Mon Sep 17 00:00:00 2001 From: Hanif Bin Ariffin Date: Sat, 31 Jul 2021 21:43:12 +0800 Subject: [PATCH 32/50] Tweaking error handling to use Error class Also handles additional error cases in GNU Signed-off-by: Hanif Bin Ariffin --- src/uu/tr/src/operation.rs | 196 +++++++++++++++++++++++++------------ src/uu/tr/src/tr.rs | 6 +- 2 files changed, 136 insertions(+), 66 deletions(-) diff --git a/src/uu/tr/src/operation.rs b/src/uu/tr/src/operation.rs index 660ab4883..45217010e 100644 --- a/src/uu/tr/src/operation.rs +++ b/src/uu/tr/src/operation.rs @@ -1,15 +1,15 @@ use nom::{ branch::alt, bytes::complete::tag, - character::complete::{anychar, digit1, one_of}, + character::complete::{anychar, one_of}, combinator::{map_opt, recognize}, - multi::{many0, many_m_n}, + multi::{many0, many1, many_m_n}, sequence::{delimited, preceded, separated_pair}, IResult, }; use std::{ collections::{HashMap, HashSet}, - fmt::Debug, + fmt::{Debug, Display}, io::{BufRead, Write}, }; @@ -26,6 +26,33 @@ mod unicode_table { pub static BLANK: &'static [char] = &[SPACE, HT]; } +#[derive(Debug)] +pub enum BadSequence { + MissingCharClassName, + MissingEquivalentClassChar, + MultipleCharRepeatInSet2, + CharRepeatInSet1, +} + +impl Display for BadSequence { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + BadSequence::MissingCharClassName => { + writeln!(f, "missing character class name '[::]'") + } + BadSequence::MissingEquivalentClassChar => { + writeln!(f, "missing equivalence class character '[==]'") + } + BadSequence::MultipleCharRepeatInSet2 => { + writeln!(f, "only one [c*] repeat construct may appear in string2") + } + BadSequence::CharRepeatInSet1 => { + writeln!(f, "the [c*] repeat construct may not appear in string1") + } + } + } +} + #[derive(Debug, Clone, Copy)] pub enum Sequence { Char(char), @@ -100,11 +127,14 @@ impl Sequence { } // Hide all the nasty sh*t in here + // TODO: Make the 2 set lazily generate the character mapping as necessary. pub fn solve_set_characters( - set1: Vec, - set2: Vec, + set1_str: &str, + set2_str: &str, truncate_set1_flag: bool, - ) -> Result<(Vec, Vec), String> { + ) -> Result<(Vec, Vec), BadSequence> { + let set1 = Sequence::from_str(set1_str)?; + let set2 = Sequence::from_str(set2_str)?; let is_char_star = |s: &&Sequence| -> bool { match s { Sequence::CharStar(_) => true, @@ -177,23 +207,17 @@ impl Sequence { } return Ok((set1_solved, set2_solved)); } else { - Err(format!( - "{}: only one [c*] repeat construct may appear in string2", - executable!() - )) + Err(BadSequence::MultipleCharRepeatInSet2) } } else { - Err(format!( - "{}: the [c*] repeat construct may not appear in string1", - executable!() - )) + Err(BadSequence::CharRepeatInSet1) } } } impl Sequence { - pub fn from_str(input: &str) -> Vec { - many0(alt(( + pub fn from_str(input: &str) -> Result, BadSequence> { + let result = many0(alt(( alt(( Sequence::parse_char_range_octal_leftright, Sequence::parse_char_range, @@ -214,8 +238,13 @@ impl Sequence { Sequence::parse_upper, Sequence::parse_xdigit, Sequence::parse_char_equal, - // NOTE: This must be the last one )), + // NOTE: Specific error cases + alt(( + Sequence::parse_empty_bracket, + Sequence::parse_empty_equivalant_char, + )), + // NOTE: This must be the last one alt(( Sequence::parse_octal, Sequence::parse_backslash, @@ -224,11 +253,16 @@ impl Sequence { )))(input) .map(|(_, r)| r) .unwrap() + .into_iter() + .collect::, _>>(); + result } + // TODO: We can surely do better than this :( fn parse_octal_or_char(input: &str) -> IResult<&str, char> { recognize(alt(( preceded(tag("\\"), recognize(many_m_n(1, 3, one_of("01234567")))), + preceded(tag("\\"), recognize(anychar)), recognize(anychar), )))(input) .map(|(l, a)| { @@ -238,10 +272,19 @@ impl Sequence { if input.is_empty() { '\\' } else { - char::from_u32( - u32::from_str_radix(&input, 8) - .expect("We only matched against 0-7 so it should not fail"), - ) + char::from_u32(u32::from_str_radix(&input, 8).unwrap_or_else(|_| { + let c = match input.chars().next().unwrap() { + 'a' => unicode_table::BEL, + 'b' => unicode_table::BS, + 'f' => unicode_table::FF, + 'n' => unicode_table::LF, + 'r' => unicode_table::CR, + 't' => unicode_table::HT, + 'v' => unicode_table::VT, + x => x, + }; + u32::from(c) + })) .expect("Cannot convert octal value to character") } } else { @@ -254,11 +297,11 @@ impl Sequence { }) } - fn parse_char(input: &str) -> IResult<&str, Sequence> { - anychar(input).map(|(l, r)| (l, Sequence::Char(r))) + fn parse_char(input: &str) -> IResult<&str, Result> { + anychar(input).map(|(l, r)| (l, Ok(Sequence::Char(r)))) } - fn parse_backslash(input: &str) -> IResult<&str, Sequence> { + fn parse_backslash(input: &str) -> IResult<&str, Result> { preceded(tag("\\"), anychar)(input).map(|(l, a)| { let c = match a { 'a' => Sequence::Char(unicode_table::BEL), @@ -270,22 +313,22 @@ impl Sequence { 'v' => Sequence::Char(unicode_table::VT), x => Sequence::Char(x), }; - (l, c) + (l, Ok(c)) }) } - fn parse_octal(input: &str) -> IResult<&str, Sequence> { + fn parse_octal(input: &str) -> IResult<&str, Result> { map_opt( preceded(tag("\\"), recognize(many_m_n(1, 3, one_of("01234567")))), |out: &str| { u32::from_str_radix(out, 8) - .map(|u| Sequence::Char(char::from_u32(u).unwrap())) + .map(|u| Ok(Sequence::Char(char::from_u32(u).unwrap()))) .ok() }, )(input) } - fn parse_char_range(input: &str) -> IResult<&str, Sequence> { + fn parse_char_range(input: &str) -> IResult<&str, Result> { separated_pair( Sequence::parse_octal_or_char, tag("-"), @@ -294,12 +337,14 @@ impl Sequence { .map(|(l, (a, b))| { (l, { let (start, end) = (u32::from(a), u32::from(b)); - Sequence::CharRange(start, end) + Ok(Sequence::CharRange(start, end)) }) }) } - fn parse_char_range_octal_leftright(input: &str) -> IResult<&str, Sequence> { + fn parse_char_range_octal_leftright( + input: &str, + ) -> IResult<&str, Result> { separated_pair( Sequence::parse_octal_or_char, tag("-"), @@ -308,76 +353,96 @@ impl Sequence { .map(|(l, (a, b))| { (l, { let (start, end) = (u32::from(a), u32::from(b)); - Sequence::CharRange(start, end) + Ok(Sequence::CharRange(start, end)) }) }) } - fn parse_char_star(input: &str) -> IResult<&str, Sequence> { + fn parse_char_star(input: &str) -> IResult<&str, Result> { delimited(tag("["), Sequence::parse_octal_or_char, tag("*]"))(input) - .map(|(l, a)| (l, Sequence::CharStar(a))) + .map(|(l, a)| (l, Ok(Sequence::CharStar(a)))) } - fn parse_char_repeat(input: &str) -> IResult<&str, Sequence> { + fn parse_char_repeat(input: &str) -> IResult<&str, Result> { delimited( tag("["), - separated_pair(Sequence::parse_octal_or_char, tag("*"), digit1), + separated_pair( + Sequence::parse_octal_or_char, + tag("*"), + recognize(many1(one_of("01234567"))), + ), tag("]"), )(input) - .map(|(l, (c, n))| (l, Sequence::CharRepeat(c, n.parse().unwrap()))) + .map(|(l, (c, n))| { + ( + l, + Ok(Sequence::CharRepeat( + c, + usize::from_str_radix(n, 8).expect("This should not fail "), + )), + ) + }) } - fn parse_alnum(input: &str) -> IResult<&str, Sequence> { - tag("[:alnum:]")(input).map(|(l, _)| (l, Sequence::Alnum)) + fn parse_alnum(input: &str) -> IResult<&str, Result> { + tag("[:alnum:]")(input).map(|(l, _)| (l, Ok(Sequence::Alnum))) } - fn parse_alpha(input: &str) -> IResult<&str, Sequence> { - tag("[:alpha:]")(input).map(|(l, _)| (l, Sequence::Alpha)) + fn parse_alpha(input: &str) -> IResult<&str, Result> { + tag("[:alpha:]")(input).map(|(l, _)| (l, Ok(Sequence::Alpha))) } - fn parse_blank(input: &str) -> IResult<&str, Sequence> { - tag("[:blank:]")(input).map(|(l, _)| (l, Sequence::Blank)) + fn parse_blank(input: &str) -> IResult<&str, Result> { + tag("[:blank:]")(input).map(|(l, _)| (l, Ok(Sequence::Blank))) } - fn parse_control(input: &str) -> IResult<&str, Sequence> { - tag("[:cntrl:]")(input).map(|(l, _)| (l, Sequence::Control)) + fn parse_control(input: &str) -> IResult<&str, Result> { + tag("[:cntrl:]")(input).map(|(l, _)| (l, Ok(Sequence::Control))) } - fn parse_digit(input: &str) -> IResult<&str, Sequence> { - tag("[:digit:]")(input).map(|(l, _)| (l, Sequence::Digit)) + fn parse_digit(input: &str) -> IResult<&str, Result> { + tag("[:digit:]")(input).map(|(l, _)| (l, Ok(Sequence::Digit))) } - fn parse_graph(input: &str) -> IResult<&str, Sequence> { - tag("[:graph:]")(input).map(|(l, _)| (l, Sequence::Graph)) + fn parse_graph(input: &str) -> IResult<&str, Result> { + tag("[:graph:]")(input).map(|(l, _)| (l, Ok(Sequence::Graph))) } - fn parse_lower(input: &str) -> IResult<&str, Sequence> { - tag("[:lower:]")(input).map(|(l, _)| (l, Sequence::Lower)) + fn parse_lower(input: &str) -> IResult<&str, Result> { + tag("[:lower:]")(input).map(|(l, _)| (l, Ok(Sequence::Lower))) } - fn parse_print(input: &str) -> IResult<&str, Sequence> { - tag("[:print:]")(input).map(|(l, _)| (l, Sequence::Print)) + fn parse_print(input: &str) -> IResult<&str, Result> { + tag("[:print:]")(input).map(|(l, _)| (l, Ok(Sequence::Print))) } - fn parse_punct(input: &str) -> IResult<&str, Sequence> { - tag("[:punct:]")(input).map(|(l, _)| (l, Sequence::Punct)) + fn parse_punct(input: &str) -> IResult<&str, Result> { + tag("[:punct:]")(input).map(|(l, _)| (l, Ok(Sequence::Punct))) } - fn parse_space(input: &str) -> IResult<&str, Sequence> { - tag("[:space:]")(input).map(|(l, _)| (l, Sequence::Space)) + fn parse_space(input: &str) -> IResult<&str, Result> { + tag("[:space:]")(input).map(|(l, _)| (l, Ok(Sequence::Space))) } - fn parse_upper(input: &str) -> IResult<&str, Sequence> { - tag("[:upper:]")(input).map(|(l, _)| (l, Sequence::Upper)) + fn parse_upper(input: &str) -> IResult<&str, Result> { + tag("[:upper:]")(input).map(|(l, _)| (l, Ok(Sequence::Upper))) } - fn parse_xdigit(input: &str) -> IResult<&str, Sequence> { - tag("[:xdigit:]")(input).map(|(l, _)| (l, Sequence::Xdigit)) + fn parse_xdigit(input: &str) -> IResult<&str, Result> { + tag("[:xdigit:]")(input).map(|(l, _)| (l, Ok(Sequence::Xdigit))) } - fn parse_char_equal(input: &str) -> IResult<&str, Sequence> { + fn parse_char_equal(input: &str) -> IResult<&str, Result> { delimited(tag("[="), Sequence::parse_octal_or_char, tag("=]"))(input) - .map(|(l, c)| (l, Sequence::Char(c))) + .map(|(l, c)| (l, Ok(Sequence::Char(c)))) + } + + fn parse_empty_bracket(input: &str) -> IResult<&str, Result> { + tag("[::]")(input).map(|(l, _)| (l, Err(BadSequence::MissingCharClassName))) + } + + fn parse_empty_equivalant_char(input: &str) -> IResult<&str, Result> { + tag("[==]")(input).map(|(l, _)| (l, Err(BadSequence::MissingEquivalentClassChar))) } } @@ -606,7 +671,12 @@ fn test_parse_octal() { for a in '0'..='7' { for b in '0'..='7' { for c in '0'..='7' { - assert!(Sequence::from_str(format!("\\{}{}{}", a, b, c).as_str()).len() == 1); + assert!( + Sequence::from_str(format!("\\{}{}{}", a, b, c).as_str()) + .unwrap() + .len() + == 1 + ); } } } diff --git a/src/uu/tr/src/tr.rs b/src/uu/tr/src/tr.rs index eb02eb962..e11887c91 100644 --- a/src/uu/tr/src/tr.rs +++ b/src/uu/tr/src/tr.rs @@ -100,10 +100,10 @@ pub fn uumain(args: impl uucore::Args) -> i32 { let locked_stdout = stdout.lock(); let mut buffered_stdout = BufWriter::new(locked_stdout); - let mut sets_iter = sets.into_iter(); + let mut sets_iter = sets.iter().map(|c| c.as_str()); let (set1, set2) = match Sequence::solve_set_characters( - Sequence::from_str(sets_iter.next().unwrap_or_default().as_str()), - Sequence::from_str(sets_iter.next().unwrap_or_default().as_str()), + sets_iter.next().unwrap_or_default(), + sets_iter.next().unwrap_or_default(), truncate_set1_flag, ) { Ok(r) => r, From 8c82cd660c8882e63eaee2d3564e02808ede7ddb Mon Sep 17 00:00:00 2001 From: Hanif Bin Ariffin Date: Sun, 1 Aug 2021 10:43:10 +0800 Subject: [PATCH 33/50] Fixing implementation to passes more GNU tests Signed-off-by: Hanif Bin Ariffin --- src/uu/tr/src/convert.rs | 29 ++++++ src/uu/tr/src/operation.rs | 167 +++++++++++---------------------- src/uu/tr/src/tr.rs | 21 +++-- src/uu/tr/src/unicode_table.rs | 10 ++ 4 files changed, 107 insertions(+), 120 deletions(-) create mode 100644 src/uu/tr/src/convert.rs create mode 100644 src/uu/tr/src/unicode_table.rs diff --git a/src/uu/tr/src/convert.rs b/src/uu/tr/src/convert.rs new file mode 100644 index 000000000..0584a82f6 --- /dev/null +++ b/src/uu/tr/src/convert.rs @@ -0,0 +1,29 @@ +use nom::{ + branch::alt, + bytes::complete::tag, + character::complete::{anychar, one_of}, + combinator::{map_opt, recognize}, + multi::{many0, many_m_n}, + sequence::preceded, + IResult, +}; + +fn parse_octal(input: &str) -> IResult<&str, char> { + map_opt( + preceded(tag("\\"), recognize(many_m_n(1, 3, one_of("01234567")))), + |out: &str| { + u32::from_str_radix(out, 8) + .map(|u| char::from_u32(u).unwrap()) + .ok() + }, + )(input) +} + +pub fn reduce_octal_to_char(input: String) -> String { + let result = many0(alt((parse_octal, anychar)))(input.as_str()) + .map(|(_, r)| r) + .unwrap() + .into_iter() + .collect(); + result +} diff --git a/src/uu/tr/src/operation.rs b/src/uu/tr/src/operation.rs index 45217010e..71089385d 100644 --- a/src/uu/tr/src/operation.rs +++ b/src/uu/tr/src/operation.rs @@ -2,8 +2,8 @@ use nom::{ branch::alt, bytes::complete::tag, character::complete::{anychar, one_of}, - combinator::{map_opt, recognize}, - multi::{many0, many1, many_m_n}, + combinator::{map, recognize}, + multi::{many0, many1}, sequence::{delimited, preceded, separated_pair}, IResult, }; @@ -13,18 +13,7 @@ use std::{ io::{BufRead, Write}, }; -mod unicode_table { - pub static BEL: char = '\u{0007}'; - pub static BS: char = '\u{0008}'; - pub static HT: char = '\u{0009}'; - pub static LF: char = '\u{000A}'; - pub static VT: char = '\u{000B}'; - pub static FF: char = '\u{000C}'; - pub static CR: char = '\u{000D}'; - pub static SPACE: char = '\u{0020}'; - pub static SPACES: &'static [char] = &[HT, LF, VT, FF, CR, SPACE]; - pub static BLANK: &'static [char] = &[SPACE, HT]; -} +use crate::unicode_table; #[derive(Debug)] pub enum BadSequence { @@ -32,6 +21,7 @@ pub enum BadSequence { MissingEquivalentClassChar, MultipleCharRepeatInSet2, CharRepeatInSet1, + InvalidRepeatCount(String), } impl Display for BadSequence { @@ -49,6 +39,9 @@ impl Display for BadSequence { BadSequence::CharRepeatInSet1 => { writeln!(f, "the [c*] repeat construct may not appear in string1") } + BadSequence::InvalidRepeatCount(count) => { + writeln!(f, "invalid repeat count '{}' in [c*n] construct", count) + } } } } @@ -135,6 +128,7 @@ impl Sequence { ) -> Result<(Vec, Vec), BadSequence> { let set1 = Sequence::from_str(set1_str)?; let set2 = Sequence::from_str(set2_str)?; + let is_char_star = |s: &&Sequence| -> bool { match s { Sequence::CharStar(_) => true, @@ -219,7 +213,6 @@ impl Sequence { pub fn from_str(input: &str) -> Result, BadSequence> { let result = many0(alt(( alt(( - Sequence::parse_char_range_octal_leftright, Sequence::parse_char_range, Sequence::parse_char_star, Sequence::parse_char_repeat, @@ -241,15 +234,12 @@ impl Sequence { )), // NOTE: Specific error cases alt(( - Sequence::parse_empty_bracket, - Sequence::parse_empty_equivalant_char, + Sequence::error_parse_char_repeat, + Sequence::error_parse_empty_bracket, + Sequence::error_parse_empty_equivalant_char, )), // NOTE: This must be the last one - alt(( - Sequence::parse_octal, - Sequence::parse_backslash, - Sequence::parse_char, - )), + map(Sequence::parse_backslash_or_char, |s| Ok(Sequence::Char(s))), )))(input) .map(|(_, r)| r) .unwrap() @@ -258,97 +248,31 @@ impl Sequence { result } - // TODO: We can surely do better than this :( - fn parse_octal_or_char(input: &str) -> IResult<&str, char> { - recognize(alt(( - preceded(tag("\\"), recognize(many_m_n(1, 3, one_of("01234567")))), - preceded(tag("\\"), recognize(anychar)), - recognize(anychar), - )))(input) - .map(|(l, a)| { - ( - l, - if let Some(input) = a.strip_prefix('\\') { - if input.is_empty() { - '\\' - } else { - char::from_u32(u32::from_str_radix(&input, 8).unwrap_or_else(|_| { - let c = match input.chars().next().unwrap() { - 'a' => unicode_table::BEL, - 'b' => unicode_table::BS, - 'f' => unicode_table::FF, - 'n' => unicode_table::LF, - 'r' => unicode_table::CR, - 't' => unicode_table::HT, - 'v' => unicode_table::VT, - x => x, - }; - u32::from(c) - })) - .expect("Cannot convert octal value to character") - } - } else { - input - .chars() - .next() - .expect("We recognized a character so this should not fail") - }, - ) - }) - } - - fn parse_char(input: &str) -> IResult<&str, Result> { - anychar(input).map(|(l, r)| (l, Ok(Sequence::Char(r)))) - } - - fn parse_backslash(input: &str) -> IResult<&str, Result> { + fn parse_backslash(input: &str) -> IResult<&str, char> { preceded(tag("\\"), anychar)(input).map(|(l, a)| { let c = match a { - 'a' => Sequence::Char(unicode_table::BEL), - 'b' => Sequence::Char(unicode_table::BS), - 'f' => Sequence::Char(unicode_table::FF), - 'n' => Sequence::Char(unicode_table::LF), - 'r' => Sequence::Char(unicode_table::CR), - 't' => Sequence::Char(unicode_table::HT), - 'v' => Sequence::Char(unicode_table::VT), - x => Sequence::Char(x), + 'a' => unicode_table::BEL, + 'b' => unicode_table::BS, + 'f' => unicode_table::FF, + 'n' => unicode_table::LF, + 'r' => unicode_table::CR, + 't' => unicode_table::HT, + 'v' => unicode_table::VT, + x => x, }; - (l, Ok(c)) + (l, c) }) } - fn parse_octal(input: &str) -> IResult<&str, Result> { - map_opt( - preceded(tag("\\"), recognize(many_m_n(1, 3, one_of("01234567")))), - |out: &str| { - u32::from_str_radix(out, 8) - .map(|u| Ok(Sequence::Char(char::from_u32(u).unwrap()))) - .ok() - }, - )(input) + fn parse_backslash_or_char(input: &str) -> IResult<&str, char> { + alt((Sequence::parse_backslash, anychar))(input) } fn parse_char_range(input: &str) -> IResult<&str, Result> { separated_pair( - Sequence::parse_octal_or_char, + Sequence::parse_backslash_or_char, tag("-"), - Sequence::parse_octal_or_char, - )(input) - .map(|(l, (a, b))| { - (l, { - let (start, end) = (u32::from(a), u32::from(b)); - Ok(Sequence::CharRange(start, end)) - }) - }) - } - - fn parse_char_range_octal_leftright( - input: &str, - ) -> IResult<&str, Result> { - separated_pair( - Sequence::parse_octal_or_char, - tag("-"), - Sequence::parse_octal_or_char, + Sequence::parse_backslash_or_char, )(input) .map(|(l, (a, b))| { (l, { @@ -359,7 +283,7 @@ impl Sequence { } fn parse_char_star(input: &str) -> IResult<&str, Result> { - delimited(tag("["), Sequence::parse_octal_or_char, tag("*]"))(input) + delimited(tag("["), Sequence::parse_backslash_or_char, tag("*]"))(input) .map(|(l, a)| (l, Ok(Sequence::CharStar(a)))) } @@ -367,19 +291,21 @@ impl Sequence { delimited( tag("["), separated_pair( - Sequence::parse_octal_or_char, + Sequence::parse_backslash_or_char, tag("*"), recognize(many1(one_of("01234567"))), ), tag("]"), )(input) - .map(|(l, (c, n))| { + .map(|(l, (c, str))| { ( l, - Ok(Sequence::CharRepeat( - c, - usize::from_str_radix(n, 8).expect("This should not fail "), - )), + match usize::from_str_radix(str, 8) + .expect("This should not fail because we only parse against 0-7") + { + 0 => Ok(Sequence::CharStar(c)), + count => Ok(Sequence::CharRepeat(c, count)), + }, ) }) } @@ -433,15 +359,32 @@ impl Sequence { } fn parse_char_equal(input: &str) -> IResult<&str, Result> { - delimited(tag("[="), Sequence::parse_octal_or_char, tag("=]"))(input) + delimited(tag("[="), Sequence::parse_backslash_or_char, tag("=]"))(input) .map(|(l, c)| (l, Ok(Sequence::Char(c)))) } +} - fn parse_empty_bracket(input: &str) -> IResult<&str, Result> { +impl Sequence { + fn error_parse_char_repeat(input: &str) -> IResult<&str, Result> { + delimited( + tag("["), + separated_pair( + Sequence::parse_backslash_or_char, + tag("*"), + recognize(many1(one_of("0123456789"))), + ), + tag("]"), + )(input) + .map(|(l, (_, n))| (l, Err(BadSequence::InvalidRepeatCount(n.to_string())))) + } + + fn error_parse_empty_bracket(input: &str) -> IResult<&str, Result> { tag("[::]")(input).map(|(l, _)| (l, Err(BadSequence::MissingCharClassName))) } - fn parse_empty_equivalant_char(input: &str) -> IResult<&str, Result> { + fn error_parse_empty_equivalant_char( + input: &str, + ) -> IResult<&str, Result> { tag("[==]")(input).map(|(l, _)| (l, Err(BadSequence::MissingEquivalentClassChar))) } } diff --git a/src/uu/tr/src/tr.rs b/src/uu/tr/src/tr.rs index e11887c91..a7faffe56 100644 --- a/src/uu/tr/src/tr.rs +++ b/src/uu/tr/src/tr.rs @@ -14,7 +14,9 @@ extern crate uucore; extern crate nom; +mod convert; mod operation; +mod unicode_table; use clap::{crate_version, App, Arg}; use nom::AsBytes; @@ -64,7 +66,11 @@ pub fn uumain(args: impl uucore::Args) -> i32 { let sets = matches .values_of(options::SETS) - .map(|v| v.map(ToString::to_string).collect::>()) + .map(|v| { + v.map(ToString::to_string) + .map(convert::reduce_octal_to_char) + .collect::>() + }) .unwrap_or_default(); let sets_len = sets.len(); @@ -94,6 +100,12 @@ pub fn uumain(args: impl uucore::Args) -> i32 { return 1; } + if let Some(first) = sets.get(0) { + if first.ends_with(r"\") { + show_error!("warning: an unescaped backslash at end of string is not portable"); + } + } + let stdin = stdin(); let mut locked_stdin = stdin.lock(); let stdout = stdout(); @@ -113,13 +125,6 @@ pub fn uumain(args: impl uucore::Args) -> i32 { } }; - if set2.len() == 1 && set2[0] == '\\' { - show_error!( - "{}", - "warning: an unescaped backslash at end of string is not portable" - ); - } - if delete_flag { if squeeze_flag { let mut delete_buffer = vec![]; diff --git a/src/uu/tr/src/unicode_table.rs b/src/uu/tr/src/unicode_table.rs new file mode 100644 index 000000000..1ec6a4fdb --- /dev/null +++ b/src/uu/tr/src/unicode_table.rs @@ -0,0 +1,10 @@ +pub static BEL: char = '\u{0007}'; +pub static BS: char = '\u{0008}'; +pub static HT: char = '\u{0009}'; +pub static LF: char = '\u{000A}'; +pub static VT: char = '\u{000B}'; +pub static FF: char = '\u{000C}'; +pub static CR: char = '\u{000D}'; +pub static SPACE: char = '\u{0020}'; +pub static SPACES: &'static [char] = &[HT, LF, VT, FF, CR, SPACE]; +pub static BLANK: &'static [char] = &[SPACE, HT]; From 5bf0197da514654a13562bc565e26de1cdf0e65c Mon Sep 17 00:00:00 2001 From: Hanif Bin Ariffin Date: Sun, 1 Aug 2021 10:43:28 +0800 Subject: [PATCH 34/50] Added all GNU tests as rust tests Signed-off-by: Hanif Bin Ariffin --- tests/by-util/test_tr.rs | 387 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 368 insertions(+), 19 deletions(-) diff --git a/tests/by-util/test_tr.rs b/tests/by-util/test_tr.rs index 6ac152d66..22d431c33 100644 --- a/tests/by-util/test_tr.rs +++ b/tests/by-util/test_tr.rs @@ -102,7 +102,7 @@ fn test_complement5() { // $ echo -n '0x1y2z3' | tr -c '\0-@' '*-~' // 0a1b2c3 new_ucmd!() - .args(&["-c", "\\0-@", "*-~"]) + .args(&["-c", r"\0-@", "*-~"]) .pipe_in("0x1y2z3") .run() .stdout_is("0a1b2c3"); @@ -527,13 +527,16 @@ fn check_against_gnu_tr_tests_d() { } #[test] +#[ignore = "the character from \\0->\\5 is not printable (meaning that they wont even get piped in). So its kind of tricky to test them"] fn check_against_gnu_tr_tests_e() { // ['e', qw(-s '[\0-\5]'), {IN=>"\0\0a\1\1b\2\2\2c\3\3\3d\4\4\4\4e\5\5"}, {OUT=>"\0a\1b\2c\3d\4e\5"}], new_ucmd!() - .args(&["-s", r#"[\0-\5]"#]) - .pipe_in(r#"\0\0a\1\1b\2\2\2c\3\3\3d\4\4\4\4e\5\5"#) + .args(&["-s", "[\\0-\\5]"]) + .pipe_in( + "\u{0}\u{0}a\u{1}\u{1}b\u{2}\u{2}\u{2}c\u{3}\u{3}\u{3}d\u{4}\u{4}\u{4}\u{4}e\u{5}\u{5}", + ) .succeeds() - .stdout_is(r#"\0a\1b\2c\3d\4e\5"#); + .stdout_is("\u{0}a\u{1}b\u{2}c\u{3}d\u{4}e\u{5}"); } #[test] @@ -581,8 +584,8 @@ fn check_against_gnu_tr_tests_i() { fn check_against_gnu_tr_tests_j() { // ['j', qw(-d '[:digit:]'), {IN=>'0123456789'}, {OUT=>''}], new_ucmd!() - .args(&["", "", ""]) - .pipe_in("") + .args(&["-d", "[:digit:]"]) + .pipe_in("0123456789") .succeeds() .stdout_is(""); } @@ -655,94 +658,440 @@ fn check_against_gnu_tr_tests_q() { .pipe_in(".abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789.") .succeeds() .stdout_is(".."); +} + +#[test] +fn check_against_gnu_tr_tests_r() { // ['r', qw(-ds '[:alnum:]' .), // {IN=>'.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789.'}, // {OUT=>'.'}], - // + new_ucmd!() + .args(&["-ds", "[:alnum:]", "."]) + .pipe_in(".abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789.") + .succeeds() + .stdout_is("."); +} + +#[test] +fn check_against_gnu_tr_tests_s() { // # The classic example, with string2 BSD-style // ['s', qw(-cs '[:alnum:]' '\n'), // {IN=>'The big black fox jumped over the fence.'}, // {OUT=>"The\nbig\nblack\nfox\njumped\nover\nthe\nfence\n"}], - // + new_ucmd!() + .args(&["-cs", "[:alnum:]", "\n"]) + .pipe_in("The big black fox jumped over the fence.") + .succeeds() + .stdout_is("The\nbig\nblack\nfox\njumped\nover\nthe\nfence\n"); +} + +#[test] +fn check_against_gnu_tr_tests_t() { // # The classic example, POSIX-style // ['t', qw(-cs '[:alnum:]' '[\n*]'), // {IN=>'The big black fox jumped over the fence.'}, // {OUT=>"The\nbig\nblack\nfox\njumped\nover\nthe\nfence\n"}], + new_ucmd!() + .args(&["-cs", "[:alnum:]", "[\n*]"]) + .pipe_in("The big black fox jumped over the fence.") + .succeeds() + .stdout_is("The\nbig\nblack\nfox\njumped\nover\nthe\nfence\n"); +} + +#[test] +fn check_against_gnu_tr_tests_u() { // ['u', qw(-ds b a), {IN=>'aabbaa'}, {OUT=>'a'}], + new_ucmd!() + .args(&["-ds", "b", "a"]) + .pipe_in("aabbaa") + .succeeds() + .stdout_is("a"); +} + +#[test] +fn check_against_gnu_tr_tests_v() { // ['v', qw(-ds '[:xdigit:]' Z), {IN=>'ZZ0123456789acbdefABCDEFZZ'}, {OUT=>'Z'}], - // + new_ucmd!() + .args(&["-ds", "[:xdigit:]", "Z"]) + .pipe_in("ZZ0123456789acbdefABCDEFZZ") + .succeeds() + .stdout_is("Z"); +} + +#[test] +fn check_against_gnu_tr_tests_w() { // # Try some data with 8th bit set in case something is mistakenly // # sign-extended. // ['w', qw(-ds '\350' '\345'), // {IN=>"\300\301\377\345\345\350\345"}, // {OUT=>"\300\301\377\345"}], + new_ucmd!() + .args(&["-ds", "\u{350}", "\u{345}"]) + .pipe_in("\u{300}\u{301}\u{377}\u{345}\u{345}\u{350}\u{345}") + .succeeds() + .stdout_is("\u{300}\u{301}\u{377}\u{345}"); +} + +#[test] +fn check_against_gnu_tr_tests_x() { // ['x', qw(-s abcdefghijklmn '[:*016]'), // {IN=>'abcdefghijklmnop'}, {OUT=>':op'}], + new_ucmd!() + .args(&["-s", "abcdefghijklmn", "[:*016]"]) + .pipe_in("abcdefghijklmnop") + .succeeds() + .stdout_is(":op"); +} + +#[test] +fn check_against_gnu_tr_tests_y() { // ['y', qw(-d a-z), {IN=>'abc $code'}, {OUT=>' $'}], + new_ucmd!() + .args(&["-d", "a-z"]) + .pipe_in("abc $code") + .succeeds() + .stdout_is(" $"); +} + +#[test] +fn check_against_gnu_tr_tests_z() { // ['z', qw(-ds a-z '$.'), {IN=>'a.b.c $$$$code\\'}, {OUT=>'. $\\'}], - // + new_ucmd!() + .args(&["-ds", "a-z", "$."]) + .pipe_in("a.b.c $$$$code\\") + .succeeds() + .stdout_is(". $\\"); +} + +#[test] +fn check_against_gnu_tr_tests_range_a_a() { // # Make sure that a-a is accepted. // ['range-a-a', qw(a-a z), {IN=>'abc'}, {OUT=>'zbc'}], - // # + new_ucmd!() + .args(&["a-a", "z"]) + .pipe_in("abc") + .succeeds() + .stdout_is("zbc"); +} + +#[test] +fn check_against_gnu_tr_tests_null() { // ['null', qw(a ''), {IN=>''}, {OUT=>''}, {EXIT=>1}, // {ERR=>"$prog: when not truncating set1, string2 must be non-empty\n"}], + new_ucmd!() + .args(&["a", ""]) + .pipe_in("") + .fails() + .stderr_is("tr: when not truncating set1, string2 must be non-empty\n"); +} + +#[test] +fn check_against_gnu_tr_tests_upcase() { // ['upcase', qw('[:lower:]' '[:upper:]'), // {IN=>'abcxyzABCXYZ'}, // {OUT=>'ABCXYZABCXYZ'}], + new_ucmd!() + .args(&["[:lower:]", "[:upper:]"]) + .pipe_in("abcxyzABCXYZ") + .succeeds() + .stdout_is("ABCXYZABCXYZ"); +} + +#[test] +fn check_against_gnu_tr_tests_dncase() { // ['dncase', qw('[:upper:]' '[:lower:]'), // {IN=>'abcxyzABCXYZ'}, // {OUT=>'abcxyzabcxyz'}], - // # + new_ucmd!() + .args(&["[:upper:]", "[:lower:]"]) + .pipe_in("abcxyzABCXYZ") + .succeeds() + .stdout_is("abcxyzabcxyz"); +} + +#[test] +fn check_against_gnu_tr_tests_rep_cclass() { // ['rep-cclass', qw('a[=*2][=c=]' xyyz), {IN=>'a=c'}, {OUT=>'xyz'}], + new_ucmd!() + .args(&["a[=*2][=c=]", "xyyz"]) + .pipe_in("a=c") + .succeeds() + .stdout_is("xyz"); +} + +#[test] +fn check_against_gnu_tr_tests_rep_1() { // ['rep-1', qw('[:*3][:digit:]' a-m), {IN=>':1239'}, {OUT=>'cefgm'}], + new_ucmd!() + .args(&["[:*3][:digit:]", "a-m"]) + .pipe_in(":1239") + .succeeds() + .stdout_is("cefgm"); +} + +#[test] +fn check_against_gnu_tr_tests_rep_2() { // ['rep-2', qw('a[b*512]c' '1[x*]2'), {IN=>'abc'}, {OUT=>'1x2'}], + new_ucmd!() + .args(&["a[b*512]c", "1[x*]2"]) + .pipe_in("abc") + .succeeds() + .stdout_is("1x2"); +} + +#[test] +fn check_against_gnu_tr_tests_rep_3() { // ['rep-3', qw('a[b*513]c' '1[x*]2'), {IN=>'abc'}, {OUT=>'1x2'}], + new_ucmd!() + .args(&["a[b*513]c", "1[x*]2"]) + .pipe_in("abc") + .succeeds() + .stdout_is("1x2"); +} + +#[test] +fn check_against_gnu_tr_tests_o_rep_1() { // # Another couple octal repeat count tests. // ['o-rep-1', qw('[b*08]' '[x*]'), {IN=>''}, {OUT=>''}, {EXIT=>1}, // {ERR=>"$prog: invalid repeat count '08' in [c*n] construct\n"}], + new_ucmd!() + .args(&["[b*08]", "[x*]"]) + .pipe_in("") + .fails() + .stderr_is("tr: invalid repeat count '08' in [c*n] construct\n"); +} + +#[test] +fn check_against_gnu_tr_tests_o_rep_2() { // ['o-rep-2', qw('[b*010]cd' '[a*7]BC[x*]'), {IN=>'bcd'}, {OUT=>'BCx'}], - // + new_ucmd!() + .args(&["[b*010]cd", "[a*7]BC[x*]"]) + .pipe_in("bcd") + .succeeds() + .stdout_is("BCx"); +} + +#[test] +fn check_against_gnu_tr_tests_esc() { // ['esc', qw('a\-z' A-Z), {IN=>'abc-z'}, {OUT=>'AbcBC'}], + new_ucmd!() + .args(&[r"a\-z", "A-Z"]) + .pipe_in("abc-z") + .succeeds() + .stdout_is("AbcBC"); +} + +#[test] +fn check_against_gnu_tr_tests_bs_055() { // ['bs-055', qw('a\055b' def), {IN=>"a\055b"}, {OUT=>'def'}], + new_ucmd!() + .args(&["a\u{055}b", "def"]) + .pipe_in("a\u{055}b") + .succeeds() + .stdout_is("def"); +} + +#[test] +fn check_against_gnu_tr_tests_bs_at_end() { // ['bs-at-end', qw('\\' x), {IN=>"\\"}, {OUT=>'x'}, // {ERR=>"$prog: warning: an unescaped backslash at end of " // . "string is not portable\n"}], - // - // # + new_ucmd!() + .args(&[r"\", "x"]) + .pipe_in(r"\") + .succeeds() + .stdout_is("x") + .stderr_is("tr: warning: an unescaped backslash at end of string is not portable"); +} + +#[test] +#[ignore = "not sure why GNU bails here. `[Y*]` should be able to generate all the mapping"] +fn check_against_gnu_tr_tests_ross_0a() { // # From Ross // ['ross-0a', qw(-cs '[:upper:]' 'X[Y*]'), {IN=>''}, {OUT=>''}, {EXIT=>1}, // {ERR=>$map_all_to_1}], + new_ucmd!() + .args(&["-cs", "[:upper:]", "X[Y*]"]) + .pipe_in("") + .fails() + .stderr_is("tr: when translating with complemented character classes,\nstring2 must map all characters in the domain to one"); +} + +#[test] +#[ignore = "not sure why GNU bails here. `[Y*]` should be able to generate all the mapping"] +fn check_against_gnu_tr_tests_ross_0b() { // ['ross-0b', qw(-cs '[:cntrl:]' 'X[Y*]'), {IN=>''}, {OUT=>''}, {EXIT=>1}, // {ERR=>$map_all_to_1}], + new_ucmd!() + .args(&["-cs", "[:cntrl:]", "X[Y*]"]) + .pipe_in("") + .fails() + .stderr_is("tr: when translating with complemented character classes,\nstring2 must map all characters in the domain to one"); +} + +#[test] +fn check_against_gnu_tr_tests_ross_1a() { // ['ross-1a', qw(-cs '[:upper:]' '[X*]'), // {IN=>'AMZamz123.-+AMZ'}, {OUT=>'AMZXAMZ'}], + new_ucmd!() + .args(&["-cs", "[:upper:]", "[X*]"]) + .pipe_in("AMZamz123.-+AMZ") + .succeeds() + .stdout_is("AMZXAMZ"); +} + +#[test] +fn check_against_gnu_tr_tests_ross_1b() { // ['ross-1b', qw(-cs '[:upper:][:digit:]' '[Z*]'), {IN=>''}, {OUT=>''}], + new_ucmd!() + .args(&["-cs", "[:upper:][:digit:]", "[Z*]"]) + .pipe_in("") + .succeeds() + .stdout_is(""); +} + +#[test] +fn check_against_gnu_tr_tests_ross_2() { // ['ross-2', qw(-dcs '[:lower:]' n-rs-z), // {IN=>'amzAMZ123.-+amz'}, {OUT=>'amzamz'}], + new_ucmd!() + .args(&["-dcs", "[:lower:]", "n-rs-z"]) + .pipe_in("amzAMZ123.-+amz") + .succeeds() + .stdout_is("amzamz"); +} + +#[test] +fn check_against_gnu_tr_tests_ross_3() { // ['ross-3', qw(-ds '[:xdigit:]' '[:alnum:]'), // {IN=>'.ZABCDEFGzabcdefg.0123456788899.GG'}, {OUT=>'.ZGzg..G'}], + new_ucmd!() + .args(&["-ds", "[:xdigit:]", "[:alnum:]"]) + .pipe_in(".ZABCDEFGzabcdefg.0123456788899.GG") + .succeeds() + .stdout_is(".ZGzg..G"); +} + +#[test] +fn check_against_gnu_tr_tests_ross_4() { // ['ross-4', qw(-dcs '[:alnum:]' '[:digit:]'), {IN=>''}, {OUT=>''}], + new_ucmd!() + .args(&["-dcs", "[:alnum:]", "[:digit:]"]) + .pipe_in("") + .succeeds() + .stdout_is(""); +} + +#[test] +fn check_against_gnu_tr_tests_ross_5() { // ['ross-5', qw(-dc '[:lower:]'), {IN=>''}, {OUT=>''}], + new_ucmd!() + .args(&["-dc", "[:lower:]"]) + .pipe_in("") + .succeeds() + .stdout_is(""); +} + +#[test] +fn check_against_gnu_tr_tests_ross_6() { // ['ross-6', qw(-dc '[:upper:]'), {IN=>''}, {OUT=>''}], - // + new_ucmd!() + .args(&["-dc", "[:upper:]"]) + .pipe_in("") + .succeeds() + .stdout_is(""); +} + +#[test] +fn check_against_gnu_tr_tests_empty_eq() { // # Ensure that these fail. // # Prior to 2.0.20, each would evoke a failed assertion. // ['empty-eq', qw('[==]' x), {IN=>''}, {OUT=>''}, {EXIT=>1}, // {ERR=>"$prog: missing equivalence class character '[==]'\n"}], + new_ucmd!() + .args(&["[==]", "x"]) + .pipe_in("") + .fails() + .stderr_is("tr: missing equivalence class character '[==]'\n"); +} + +#[test] +fn check_against_gnu_tr_tests_empty_cc() { // ['empty-cc', qw('[::]' x), {IN=>''}, {OUT=>''}, {EXIT=>1}, // {ERR=>"$prog: missing character class name '[::]'\n"}], - // + new_ucmd!() + .args(&["[::]", "x"]) + .pipe_in("") + .fails() + .stderr_is("tr: missing character class name '[::]'\n"); +} + +#[test] +fn check_against_gnu_tr_tests_repeat_bs_9() { // # Weird repeat counts. // ['repeat-bs-9', qw(abc '[b*\9]'), {IN=>'abcd'}, {OUT=>'[b*d'}], + new_ucmd!() + .args(&["abc", r"[b*\9]"]) + .pipe_in("abcd") + .succeeds() + .stdout_is("[b*d"); +} + +#[test] +fn check_against_gnu_tr_tests_repeat_0() { // ['repeat-0', qw(abc '[b*0]'), {IN=>'abcd'}, {OUT=>'bbbd'}], + new_ucmd!() + .args(&["abc", "[b*0]"]) + .pipe_in("abcd") + .succeeds() + .stdout_is("bbbd"); +} + +#[test] +fn check_against_gnu_tr_tests_repeat_zeros() { // ['repeat-zeros', qw(abc '[b*00000000000000000000]'), // {IN=>'abcd'}, {OUT=>'bbbd'}], + new_ucmd!() + .args(&["abc", "[b*00000000000000000000]"]) + .pipe_in("abcd") + .succeeds() + .stdout_is("bbbd"); +} + +#[test] +fn check_against_gnu_tr_tests_repeat_compl() { // ['repeat-compl', qw(-c '[a*65536]\n' '[b*]'), {IN=>'abcd'}, {OUT=>'abbb'}], + new_ucmd!() + .args(&["-c", "[a*65536]\n", "[b*]"]) + .pipe_in("abcd") + .succeeds() + .stdout_is("abbb"); +} + +#[test] +fn check_against_gnu_tr_tests_repeat_x_c() { // ['repeat-xC', qw(-C '[a*65536]\n' '[b*]'), {IN=>'abcd'}, {OUT=>'abbb'}], - // + new_ucmd!() + .args(&["-C", "[a*65536]\n", "[b*]"]) + .pipe_in("abcd") + .succeeds() + .stdout_is("abbb"); +} + +#[test] +#[ignore = "I think either clap-rs or uutils is parsing the '-H' as an argument..."] +fn check_against_gnu_tr_tests_fowler_1() { // # From Glenn Fowler. // ['fowler-1', qw(ah -H), {IN=>'aha'}, {OUT=>'-H-'}], - // + new_ucmd!() + .args(&["ah", "-H"]) + .pipe_in("aha") + .succeeds() + .stdout_is("-H-"); +} + +#[test] +fn check_against_gnu_tr_tests_no_abort_1() { // # Up to coreutils-6.9, this would provoke a failed assertion. // ['no-abort-1', qw(-c a '[b*256]'), {IN=>'abc'}, {OUT=>'abb'}], } From 3fa56eabce6bf7deb012649e8c25e3274bdc5b48 Mon Sep 17 00:00:00 2001 From: Hanif Bin Ariffin Date: Sun, 1 Aug 2021 12:16:11 +0800 Subject: [PATCH 35/50] Fixed clippy issues Signed-off-by: Hanif Bin Ariffin --- src/uu/tr/src/operation.rs | 39 +++++++++++++--------------------- src/uu/tr/src/tr.rs | 6 +++--- src/uu/tr/src/unicode_table.rs | 4 ++-- 3 files changed, 20 insertions(+), 29 deletions(-) diff --git a/src/uu/tr/src/operation.rs b/src/uu/tr/src/operation.rs index 71089385d..9660e594a 100644 --- a/src/uu/tr/src/operation.rs +++ b/src/uu/tr/src/operation.rs @@ -75,7 +75,7 @@ impl Sequence { Sequence::CharRepeat(c, n) => Box::new(std::iter::repeat(*c).take(*n)), Sequence::Alnum => Box::new(('0'..='9').chain('A'..='Z').chain('a'..='z')), Sequence::Alpha => Box::new(('A'..='Z').chain('a'..='z')), - Sequence::Blank => Box::new(unicode_table::BLANK.into_iter().cloned()), + Sequence::Blank => Box::new(unicode_table::BLANK.iter().cloned()), Sequence::Control => Box::new( (0..=31) .chain(std::iter::once(127)) @@ -113,7 +113,7 @@ impl Sequence { .chain(123..=126) .flat_map(char::from_u32), ), - Sequence::Space => Box::new(unicode_table::SPACES.into_iter().cloned()), + Sequence::Space => Box::new(unicode_table::SPACES.iter().cloned()), Sequence::Upper => Box::new('A'..='Z'), Sequence::Xdigit => Box::new(('0'..='9').chain('A'..='F').chain('a'..='f')), } @@ -129,12 +129,7 @@ impl Sequence { let set1 = Sequence::from_str(set1_str)?; let set2 = Sequence::from_str(set2_str)?; - let is_char_star = |s: &&Sequence| -> bool { - match s { - Sequence::CharStar(_) => true, - _ => false, - } - }; + let is_char_star = |s: &&Sequence| -> bool { matches!(s, Sequence::CharStar(_)) }; let set1_star_count = set1.iter().filter(is_char_star).count(); if set1_star_count == 0 { let set2_star_count = set2.iter().filter(is_char_star).count(); @@ -143,10 +138,9 @@ impl Sequence { Sequence::CharStar(c) => Some(c), _ => None, }); - let mut partition = set2.as_slice().split(|s| match s { - Sequence::CharStar(_) => true, - _ => false, - }); + let mut partition = set2 + .as_slice() + .split(|s| matches!(s, Sequence::CharStar(_))); let set1_len = set1.iter().flat_map(Sequence::flatten).count(); let set2_len = set2 .iter() @@ -199,7 +193,7 @@ impl Sequence { if truncate_set1_flag { set1_solved.truncate(set2_solved.len()); } - return Ok((set1_solved, set2_solved)); + Ok((set1_solved, set2_solved)) } else { Err(BadSequence::MultipleCharRepeatInSet2) } @@ -211,7 +205,7 @@ impl Sequence { impl Sequence { pub fn from_str(input: &str) -> Result, BadSequence> { - let result = many0(alt(( + many0(alt(( alt(( Sequence::parse_char_range, Sequence::parse_char_star, @@ -244,8 +238,7 @@ impl Sequence { .map(|(_, r)| r) .unwrap() .into_iter() - .collect::, _>>(); - result + .collect::, _>>() } fn parse_backslash(input: &str) -> IResult<&str, char> { @@ -442,21 +435,19 @@ pub struct TranslateOperationStandard { impl TranslateOperationStandard { fn new(set1: Vec, set2: Vec) -> Result { - if let Some(fallback) = set2.last().map(|s| *s) { + if let Some(fallback) = set2.last().copied() { Ok(TranslateOperationStandard { translation_map: set1 .into_iter() .zip(set2.into_iter().chain(std::iter::repeat(fallback))) .collect::>(), }) + } else if set1.is_empty() && set2.is_empty() { + Ok(TranslateOperationStandard { + translation_map: HashMap::new(), + }) } else { - if set1.is_empty() && set2.is_empty() { - Ok(TranslateOperationStandard { - translation_map: HashMap::new(), - }) - } else { - Err("when not truncating set1, string2 must be non-empty".to_string()) - } + Err("when not truncating set1, string2 must be non-empty".to_string()) } } } diff --git a/src/uu/tr/src/tr.rs b/src/uu/tr/src/tr.rs index a7faffe56..872f894c2 100644 --- a/src/uu/tr/src/tr.rs +++ b/src/uu/tr/src/tr.rs @@ -101,7 +101,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { } if let Some(first) = sets.get(0) { - if first.ends_with(r"\") { + if first.ends_with('\\') { show_error!("warning: an unescaped backslash at end of string is not portable"); } } @@ -130,7 +130,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { let mut delete_buffer = vec![]; { let mut delete_writer = BufWriter::new(&mut delete_buffer); - let delete_op = DeleteOperation::new(set1.clone(), complement_flag); + let delete_op = DeleteOperation::new(set1, complement_flag); translate_input(&mut locked_stdin, &mut delete_writer, delete_op); } { @@ -150,7 +150,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { let mut translate_buffer = vec![]; { let mut writer = BufWriter::new(&mut translate_buffer); - match TranslateOperation::new(set1.clone(), set2.clone(), complement_flag) { + match TranslateOperation::new(set1, set2.clone(), complement_flag) { Ok(op) => translate_input(&mut locked_stdin, &mut writer, op), Err(s) => { show_error!("{}", s); diff --git a/src/uu/tr/src/unicode_table.rs b/src/uu/tr/src/unicode_table.rs index 1ec6a4fdb..781e4cdba 100644 --- a/src/uu/tr/src/unicode_table.rs +++ b/src/uu/tr/src/unicode_table.rs @@ -6,5 +6,5 @@ pub static VT: char = '\u{000B}'; pub static FF: char = '\u{000C}'; pub static CR: char = '\u{000D}'; pub static SPACE: char = '\u{0020}'; -pub static SPACES: &'static [char] = &[HT, LF, VT, FF, CR, SPACE]; -pub static BLANK: &'static [char] = &[SPACE, HT]; +pub static SPACES: &[char] = &[HT, LF, VT, FF, CR, SPACE]; +pub static BLANK: &[char] = &[SPACE, HT]; From d813e00588c578ce3780163a2de202308d48912c Mon Sep 17 00:00:00 2001 From: Hanif Bin Ariffin Date: Sun, 1 Aug 2021 12:32:35 +0800 Subject: [PATCH 36/50] Don't convert octal if its not valid character Signed-off-by: Hanif Bin Ariffin --- src/uu/tr/src/convert.rs | 3 ++- src/uu/tr/src/operation.rs | 10 +++++++--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/uu/tr/src/convert.rs b/src/uu/tr/src/convert.rs index 0584a82f6..c143681e3 100644 --- a/src/uu/tr/src/convert.rs +++ b/src/uu/tr/src/convert.rs @@ -13,8 +13,9 @@ fn parse_octal(input: &str) -> IResult<&str, char> { preceded(tag("\\"), recognize(many_m_n(1, 3, one_of("01234567")))), |out: &str| { u32::from_str_radix(out, 8) - .map(|u| char::from_u32(u).unwrap()) + .map(|u| char::from_u32(u)) .ok() + .flatten() }, )(input) } diff --git a/src/uu/tr/src/operation.rs b/src/uu/tr/src/operation.rs index 9660e594a..9c6ca6b4a 100644 --- a/src/uu/tr/src/operation.rs +++ b/src/uu/tr/src/operation.rs @@ -22,6 +22,7 @@ pub enum BadSequence { MultipleCharRepeatInSet2, CharRepeatInSet1, InvalidRepeatCount(String), + EmptySet2WhenNotTruncatingSet1, } impl Display for BadSequence { @@ -42,6 +43,9 @@ impl Display for BadSequence { BadSequence::InvalidRepeatCount(count) => { writeln!(f, "invalid repeat count '{}' in [c*n] construct", count) } + BadSequence::EmptySet2WhenNotTruncatingSet1 => { + writeln!(f, "when not truncating set1, string2 must be non-empty") + } } } } @@ -434,7 +438,7 @@ pub struct TranslateOperationStandard { } impl TranslateOperationStandard { - fn new(set1: Vec, set2: Vec) -> Result { + fn new(set1: Vec, set2: Vec) -> Result { if let Some(fallback) = set2.last().copied() { Ok(TranslateOperationStandard { translation_map: set1 @@ -447,7 +451,7 @@ impl TranslateOperationStandard { translation_map: HashMap::new(), }) } else { - Err("when not truncating set1, string2 must be non-empty".to_string()) + Err(BadSequence::EmptySet2WhenNotTruncatingSet1) } } } @@ -473,7 +477,7 @@ impl TranslateOperation { set1: Vec, set2: Vec, complement: bool, - ) -> Result { + ) -> Result { if complement { Ok(TranslateOperation::Complement( TranslateOperationComplement::new(set1, set2), From d0b3a15994c222a5030fef07d486c9e343320d6c Mon Sep 17 00:00:00 2001 From: Hanif Bin Ariffin Date: Sun, 1 Aug 2021 12:33:07 +0800 Subject: [PATCH 37/50] Updated test ignore description Signed-off-by: Hanif Bin Ariffin --- tests/by-util/test_tr.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/by-util/test_tr.rs b/tests/by-util/test_tr.rs index 22d431c33..645a777d0 100644 --- a/tests/by-util/test_tr.rs +++ b/tests/by-util/test_tr.rs @@ -527,11 +527,11 @@ fn check_against_gnu_tr_tests_d() { } #[test] -#[ignore = "the character from \\0->\\5 is not printable (meaning that they wont even get piped in). So its kind of tricky to test them"] +#[ignore = "I cannot tell if this means that tr preserve the octal representation?"] fn check_against_gnu_tr_tests_e() { // ['e', qw(-s '[\0-\5]'), {IN=>"\0\0a\1\1b\2\2\2c\3\3\3d\4\4\4\4e\5\5"}, {OUT=>"\0a\1b\2c\3d\4e\5"}], new_ucmd!() - .args(&["-s", "[\\0-\\5]"]) + .args(&["-s", r"[\0-\5]"]) .pipe_in( "\u{0}\u{0}a\u{1}\u{1}b\u{2}\u{2}\u{2}c\u{3}\u{3}\u{3}d\u{4}\u{4}\u{4}\u{4}e\u{5}\u{5}", ) From 106ba4b77ddc6a9db55e01b8880edce15ebd3c1e Mon Sep 17 00:00:00 2001 From: Hanif Bin Ariffin Date: Sun, 1 Aug 2021 06:43:35 +0800 Subject: [PATCH 38/50] Added one last missing test Signed-off-by: Hanif Bin Ariffin --- tests/by-util/test_tr.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/by-util/test_tr.rs b/tests/by-util/test_tr.rs index 645a777d0..5bc4f065b 100644 --- a/tests/by-util/test_tr.rs +++ b/tests/by-util/test_tr.rs @@ -1094,4 +1094,9 @@ fn check_against_gnu_tr_tests_fowler_1() { fn check_against_gnu_tr_tests_no_abort_1() { // # Up to coreutils-6.9, this would provoke a failed assertion. // ['no-abort-1', qw(-c a '[b*256]'), {IN=>'abc'}, {OUT=>'abb'}], + new_ucmd!() + .args(&["-c", "a", "[b*256]"]) + .pipe_in("abc") + .succeeds() + .stdout_is("abb"); } From df7da4e907d9674f0f61a56f08a046335a34c000 Mon Sep 17 00:00:00 2001 From: Hanif Bin Ariffin Date: Sun, 1 Aug 2021 20:50:41 +0800 Subject: [PATCH 39/50] Fixed clippy issues Signed-off-by: Hanif Bin Ariffin --- src/uu/tr/src/convert.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/uu/tr/src/convert.rs b/src/uu/tr/src/convert.rs index c143681e3..d1e925c2b 100644 --- a/src/uu/tr/src/convert.rs +++ b/src/uu/tr/src/convert.rs @@ -13,7 +13,7 @@ fn parse_octal(input: &str) -> IResult<&str, char> { preceded(tag("\\"), recognize(many_m_n(1, 3, one_of("01234567")))), |out: &str| { u32::from_str_radix(out, 8) - .map(|u| char::from_u32(u)) + .map(char::from_u32) .ok() .flatten() }, From 0032f2c4a0ea348a6d3512e50ce510d805c281c1 Mon Sep 17 00:00:00 2001 From: Hanif Bin Ariffin Date: Sun, 1 Aug 2021 21:01:01 +0800 Subject: [PATCH 40/50] Fixed some spelling issues Signed-off-by: Hanif Bin Ariffin --- src/uu/tr/src/convert.rs | 2 ++ src/uu/tr/src/operation.rs | 6 ++++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/uu/tr/src/convert.rs b/src/uu/tr/src/convert.rs index d1e925c2b..27f31491f 100644 --- a/src/uu/tr/src/convert.rs +++ b/src/uu/tr/src/convert.rs @@ -1,3 +1,5 @@ +// spell-checker:ignore (strings) anychar combinator + use nom::{ branch::alt, bytes::complete::tag, diff --git a/src/uu/tr/src/operation.rs b/src/uu/tr/src/operation.rs index 9c6ca6b4a..73ec27c14 100644 --- a/src/uu/tr/src/operation.rs +++ b/src/uu/tr/src/operation.rs @@ -1,3 +1,5 @@ +// spell-checker:ignore (strings) anychar combinator Alnum Punct Xdigit alnum punct xdigit cntrl + use nom::{ branch::alt, bytes::complete::tag, @@ -234,7 +236,7 @@ impl Sequence { alt(( Sequence::error_parse_char_repeat, Sequence::error_parse_empty_bracket, - Sequence::error_parse_empty_equivalant_char, + Sequence::error_parse_empty_equivalent_char, )), // NOTE: This must be the last one map(Sequence::parse_backslash_or_char, |s| Ok(Sequence::Char(s))), @@ -379,7 +381,7 @@ impl Sequence { tag("[::]")(input).map(|(l, _)| (l, Err(BadSequence::MissingCharClassName))) } - fn error_parse_empty_equivalant_char( + fn error_parse_empty_equivalent_char( input: &str, ) -> IResult<&str, Result> { tag("[==]")(input).map(|(l, _)| (l, Err(BadSequence::MissingEquivalentClassChar))) From 9c6f2c765df233c35c8d86b61d6974994ecff8ca Mon Sep 17 00:00:00 2001 From: Hanif Bin Ariffin Date: Mon, 2 Aug 2021 00:00:33 +0800 Subject: [PATCH 41/50] Removed bad test Signed-off-by: Hanif Bin Ariffin --- src/uu/tr/src/operation.rs | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/src/uu/tr/src/operation.rs b/src/uu/tr/src/operation.rs index 73ec27c14..1f17809ec 100644 --- a/src/uu/tr/src/operation.rs +++ b/src/uu/tr/src/operation.rs @@ -605,19 +605,3 @@ where output_buf.clear(); } } - -#[test] -fn test_parse_octal() { - for a in '0'..='7' { - for b in '0'..='7' { - for c in '0'..='7' { - assert!( - Sequence::from_str(format!("\\{}{}{}", a, b, c).as_str()) - .unwrap() - .len() - == 1 - ); - } - } - } -} From 186886cd6991ffe00a69b26b3c95f77999df1ba2 Mon Sep 17 00:00:00 2001 From: Hanif Bin Ariffin Date: Sat, 14 Aug 2021 19:10:17 +0800 Subject: [PATCH 42/50] Ignore 1 test that is failing only in Windows Signed-off-by: Hanif Bin Ariffin --- tests/by-util/test_tr.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/by-util/test_tr.rs b/tests/by-util/test_tr.rs index 5bc4f065b..47b097d9d 100644 --- a/tests/by-util/test_tr.rs +++ b/tests/by-util/test_tr.rs @@ -892,6 +892,7 @@ fn check_against_gnu_tr_tests_bs_055() { } #[test] +#[ignore = "Failing in Windows because it will not separate '\' and 'x' as separate arguments"] fn check_against_gnu_tr_tests_bs_at_end() { // ['bs-at-end', qw('\\' x), {IN=>"\\"}, {OUT=>'x'}, // {ERR=>"$prog: warning: an unescaped backslash at end of " From f464879b124c82d221b380764dd4a9f83ef3e019 Mon Sep 17 00:00:00 2001 From: Hanif Bin Ariffin Date: Sun, 19 Sep 2021 23:15:28 +0800 Subject: [PATCH 43/50] Reduce MSRV to 1.47.0 Signed-off-by: Hanif Bin Ariffin --- src/uu/tr/src/convert.rs | 2 +- src/uu/tr/src/operation.rs | 20 ++++++++++++-------- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/src/uu/tr/src/convert.rs b/src/uu/tr/src/convert.rs index 27f31491f..44ee67ad1 100644 --- a/src/uu/tr/src/convert.rs +++ b/src/uu/tr/src/convert.rs @@ -15,7 +15,7 @@ fn parse_octal(input: &str) -> IResult<&str, char> { preceded(tag("\\"), recognize(many_m_n(1, 3, one_of("01234567")))), |out: &str| { u32::from_str_radix(out, 8) - .map(char::from_u32) + .map(std::char::from_u32) .ok() .flatten() }, diff --git a/src/uu/tr/src/operation.rs b/src/uu/tr/src/operation.rs index 1f17809ec..e22bc4276 100644 --- a/src/uu/tr/src/operation.rs +++ b/src/uu/tr/src/operation.rs @@ -76,7 +76,7 @@ impl Sequence { pub fn flatten(&self) -> Box> { match self { Sequence::Char(c) => Box::new(std::iter::once(*c)), - Sequence::CharRange(l, r) => Box::new((*l..=*r).flat_map(char::from_u32)), + Sequence::CharRange(l, r) => Box::new((*l..=*r).flat_map(std::char::from_u32)), Sequence::CharStar(c) => Box::new(std::iter::repeat(*c)), Sequence::CharRepeat(c, n) => Box::new(std::iter::repeat(*c).take(*n)), Sequence::Alnum => Box::new(('0'..='9').chain('A'..='Z').chain('a'..='z')), @@ -85,7 +85,7 @@ impl Sequence { Sequence::Control => Box::new( (0..=31) .chain(std::iter::once(127)) - .flat_map(char::from_u32), + .flat_map(std::char::from_u32), ), Sequence::Digit => Box::new('0'..='9'), Sequence::Graph => Box::new( @@ -98,7 +98,7 @@ impl Sequence { .chain(91..=96) .chain(123..=126) .chain(std::iter::once(32)) // space - .flat_map(char::from_u32), + .flat_map(std::char::from_u32), ), Sequence::Lower => Box::new('a'..='z'), Sequence::Print => Box::new( @@ -110,14 +110,14 @@ impl Sequence { .chain(58..=64) .chain(91..=96) .chain(123..=126) - .flat_map(char::from_u32), + .flat_map(std::char::from_u32), ), Sequence::Punct => Box::new( (33..=47) .chain(58..=64) .chain(91..=96) .chain(123..=126) - .flat_map(char::from_u32), + .flat_map(std::char::from_u32), ), Sequence::Space => Box::new(unicode_table::SPACES.iter().cloned()), Sequence::Upper => Box::new('A'..='Z'), @@ -410,7 +410,11 @@ impl DeleteOperation { impl SymbolTranslator for DeleteOperation { fn translate(&mut self, current: char) -> Option { let found = self.set.iter().any(|sequence| sequence.eq(¤t)); - (self.complement_flag == found).then(|| current) + if self.complement_flag == found { + Some(current) + } else { + None + } } } @@ -466,7 +470,7 @@ pub enum TranslateOperation { impl TranslateOperation { fn next_complement_char(iter: u32, ignore_list: &[char]) -> (u32, char) { (iter..) - .filter_map(char::from_u32) + .filter_map(std::char::from_u32) .filter(|c| !ignore_list.iter().any(|s| s.eq(c))) .map(|c| (u32::from(c) + 1, c)) .next() @@ -498,7 +502,7 @@ impl SymbolTranslator for TranslateOperation { TranslateOperation::Standard(TranslateOperationStandard { translation_map }) => Some( translation_map .iter() - .find_map(|(l, r)| l.eq(¤t).then(|| *r)) + .find_map(|(l, r)| if l.eq(¤t) { Some(*r) } else { None }) .unwrap_or(current), ), TranslateOperation::Complement(TranslateOperationComplement { From e0d1bf9bbaec560367c93dc448cff289748f6cd9 Mon Sep 17 00:00:00 2001 From: Hanif Bin Ariffin Date: Sun, 19 Sep 2021 23:15:42 +0800 Subject: [PATCH 44/50] Ignore some spell checks Signed-off-by: Hanif Bin Ariffin --- tests/by-util/test_tr.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/by-util/test_tr.rs b/tests/by-util/test_tr.rs index 5f60f8d2a..fb0fb93eb 100644 --- a/tests/by-util/test_tr.rs +++ b/tests/by-util/test_tr.rs @@ -1,3 +1,4 @@ +// spell-checker:ignore abcdefabcdef dabcdef abcdefabcdef xyzzzzxyzzzz alnum abcdefghijklmnopqrstuvwxyz abcdefghijklmnopqrstuvwxyz abcdefghijklmnopqrstuvwxyz abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRSTUVWXYZ ABCDEFGHIJKLMNOPQRSTUVWXYZ ABCDEFGHIJKLMNOPQRSTUVWXYZ ABCDEFGHIJKLMNOPQRSTUVWXYZ PQRST ABCDEFGHIJKLMNOPQRS xycde xyyye abcdefghijklmnop aabbcc abcc xdigit acbdef wxyzz wxyzz abcdefghijk aabbaa ABCDEFZZ abcdefghijklmn upcase cclass cefgm cntrl Zamz AMZXAMZ bbbd Gzabcdefg ZABCDEF compl use crate::common::util::*; #[test] From 1dc438c9d969b96da2e5a572ac16a9b48c89a0b9 Mon Sep 17 00:00:00 2001 From: Hanif Bin Ariffin Date: Sun, 19 Sep 2021 23:26:47 +0800 Subject: [PATCH 45/50] Fix spell check ignore list Use this small script: ```shell | tr ' ' '\n' | sort | uniq | tr '\n' ' ' ``` Signed-off-by: Hanif Bin Ariffin --- tests/by-util/test_tr.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/by-util/test_tr.rs b/tests/by-util/test_tr.rs index fb0fb93eb..417df6ec4 100644 --- a/tests/by-util/test_tr.rs +++ b/tests/by-util/test_tr.rs @@ -1,4 +1,4 @@ -// spell-checker:ignore abcdefabcdef dabcdef abcdefabcdef xyzzzzxyzzzz alnum abcdefghijklmnopqrstuvwxyz abcdefghijklmnopqrstuvwxyz abcdefghijklmnopqrstuvwxyz abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRSTUVWXYZ ABCDEFGHIJKLMNOPQRSTUVWXYZ ABCDEFGHIJKLMNOPQRSTUVWXYZ ABCDEFGHIJKLMNOPQRSTUVWXYZ PQRST ABCDEFGHIJKLMNOPQRS xycde xyyye abcdefghijklmnop aabbcc abcc xdigit acbdef wxyzz wxyzz abcdefghijk aabbaa ABCDEFZZ abcdefghijklmn upcase cclass cefgm cntrl Zamz AMZXAMZ bbbd Gzabcdefg ZABCDEF compl +// spell-checker:ignore aabbaa aabbcc aabc abbb abcc abcdefabcdef abcdefghijk abcdefghijklmn abcdefghijklmnop ABCDEFGHIJKLMNOPQRS abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRSTUVWXYZ ABCDEFZZ abcxyz ABCXYZ abcxyzabcxyz ABCXYZABCXYZ acbdef alnum amzamz AMZXAMZ bbbd cclass cefgm cntrl compl dabcdef dncase Gzabcdefg PQRST upcase wxyzz xdigit xycde xyyye xyyz xyzzzzxyzzzz ZABCDEF Zamz use crate::common::util::*; #[test] From eec5ad8c76a0ffbd0f3b49e609de658bf42a5ad7 Mon Sep 17 00:00:00 2001 From: Hanif Bin Ariffin Date: Thu, 18 Nov 2021 18:45:14 +0800 Subject: [PATCH 46/50] Fixing incompatible Cargo version issue with CI/CD Signed-off-by: Hanif Bin Ariffin --- Cargo.lock | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 005a3c125..46ef259a1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -198,9 +198,9 @@ checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" [[package]] name = "cc" -version = "1.0.71" +version = "1.0.72" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79c2681d6594606957bbb8631c4b90a7fcaaa72cdb714743a437b156d6a7eedd" +checksum = "22a9137b95ea06864e018375b72adfb7db6e6f68cfc8df5a04d00288050485ee" [[package]] name = "cexpr" @@ -992,9 +992,9 @@ checksum = "fbe5e23404da5b4f555ef85ebed98fb4083e55a00c317800bc2a50ede9f3d219" [[package]] name = "libloading" -version = "0.7.1" +version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0cf036d15402bea3c5d4de17b3fce76b3e4a56ebc1f577be0e7a72f7c607cf0" +checksum = "afe203d669ec979b7128619bae5a63b7b42e9203c1b29146079ee05e2f604b52" dependencies = [ "cfg-if 1.0.0", "winapi 0.3.9", @@ -1074,9 +1074,9 @@ dependencies = [ [[package]] name = "minimal-lexical" -version = "0.1.4" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c64630dcdd71f1a64c435f54885086a0de5d6a12d104d69b165fb7d5286d677" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" [[package]] name = "mio" @@ -1170,11 +1170,11 @@ dependencies = [ [[package]] name = "nom" -version = "7.0.0" +version = "7.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ffd9d26838a953b4af82cbeb9f1592c6798916983959be223a7124e992742c1" +checksum = "1b1d11e1ef389c76fe5b81bcaf2ea32cf88b62bc494e19f493d0b30e7a930109" dependencies = [ - "memchr 2.4.0", + "memchr 2.4.1", "minimal-lexical", "version_check", ] @@ -3123,7 +3123,7 @@ name = "uu_tr" version = "0.0.8" dependencies = [ "clap", - "nom 7.0.0", + "nom 7.1.0", "uucore", "uucore_procs", ] From 0d3fa51d1e536f0171a86cd342af3e46d82177cf Mon Sep 17 00:00:00 2001 From: Hanif Ariffin Date: Sat, 20 Nov 2021 17:04:28 +0800 Subject: [PATCH 47/50] Add license headers Signed-off-by: Hanif Ariffin --- src/uu/tr/src/operation.rs | 13 ++++++++++--- src/uu/tr/src/unicode_table.rs | 9 +++++++++ 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/src/uu/tr/src/operation.rs b/src/uu/tr/src/operation.rs index e22bc4276..775689a20 100644 --- a/src/uu/tr/src/operation.rs +++ b/src/uu/tr/src/operation.rs @@ -1,3 +1,12 @@ +// * This file is part of the uutils coreutils package. +// * +// * (c) Michael Gehring +// * (c) kwantam +// * (c) Sergey "Shnatsel" Davidoff +// * +// * For the full copyright and license information, please view the LICENSE +// * file that was distributed with this source code. + // spell-checker:ignore (strings) anychar combinator Alnum Punct Xdigit alnum punct xdigit cntrl use nom::{ @@ -30,9 +39,7 @@ pub enum BadSequence { impl Display for BadSequence { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - BadSequence::MissingCharClassName => { - writeln!(f, "missing character class name '[::]'") - } + BadSequence::MissingCharClassName => writeln!(f, "missing character class name '[::]'"), BadSequence::MissingEquivalentClassChar => { writeln!(f, "missing equivalence class character '[==]'") } diff --git a/src/uu/tr/src/unicode_table.rs b/src/uu/tr/src/unicode_table.rs index 781e4cdba..98f2a99fb 100644 --- a/src/uu/tr/src/unicode_table.rs +++ b/src/uu/tr/src/unicode_table.rs @@ -1,3 +1,12 @@ +// * This file is part of the uutils coreutils package. +// * +// * (c) Michael Gehring +// * (c) kwantam +// * (c) Sergey "Shnatsel" Davidoff +// * +// * For the full copyright and license information, please view the LICENSE +// * file that was distributed with this source code. + pub static BEL: char = '\u{0007}'; pub static BS: char = '\u{0008}'; pub static HT: char = '\u{0009}'; From 0599e910ccee071b6c36c2fdbf79d46df766cdee Mon Sep 17 00:00:00 2001 From: Hanif Ariffin Date: Sat, 20 Nov 2021 17:05:35 +0800 Subject: [PATCH 48/50] Small bump to Cargo.lock Signed-off-by: Hanif Ariffin --- Cargo.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index 005a3c125..d1b146759 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1174,7 +1174,7 @@ version = "7.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7ffd9d26838a953b4af82cbeb9f1592c6798916983959be223a7124e992742c1" dependencies = [ - "memchr 2.4.0", + "memchr 2.4.1", "minimal-lexical", "version_check", ] From 6aa433c70a6080c426c021c9a739ca64fd138d41 Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Wed, 19 Jan 2022 19:07:11 +0100 Subject: [PATCH 49/50] tr: adapt copyright to new guidelines --- src/uu/tr/src/convert.rs | 5 +++++ src/uu/tr/src/operation.rs | 4 ---- src/uu/tr/src/tr.rs | 5 ----- src/uu/tr/src/unicode_table.rs | 4 ---- 4 files changed, 5 insertions(+), 13 deletions(-) diff --git a/src/uu/tr/src/convert.rs b/src/uu/tr/src/convert.rs index 44ee67ad1..4a7b97250 100644 --- a/src/uu/tr/src/convert.rs +++ b/src/uu/tr/src/convert.rs @@ -1,3 +1,8 @@ +// * This file is part of the uutils coreutils package. +// * +// * For the full copyright and license information, please view the LICENSE +// * file that was distributed with this source code. + // spell-checker:ignore (strings) anychar combinator use nom::{ diff --git a/src/uu/tr/src/operation.rs b/src/uu/tr/src/operation.rs index 775689a20..27d48b279 100644 --- a/src/uu/tr/src/operation.rs +++ b/src/uu/tr/src/operation.rs @@ -1,9 +1,5 @@ // * This file is part of the uutils coreutils package. // * -// * (c) Michael Gehring -// * (c) kwantam -// * (c) Sergey "Shnatsel" Davidoff -// * // * For the full copyright and license information, please view the LICENSE // * file that was distributed with this source code. diff --git a/src/uu/tr/src/tr.rs b/src/uu/tr/src/tr.rs index 4510e9bd9..4dce1212f 100644 --- a/src/uu/tr/src/tr.rs +++ b/src/uu/tr/src/tr.rs @@ -1,10 +1,5 @@ // * This file is part of the uutils coreutils package. // * -// * (c) Michael Gehring -// * (c) kwantam -// * * 2015-04-28 ~ created `expand` module to eliminate most allocs during setup -// * (c) Sergey "Shnatsel" Davidoff -// * // * For the full copyright and license information, please view the LICENSE // * file that was distributed with this source code. diff --git a/src/uu/tr/src/unicode_table.rs b/src/uu/tr/src/unicode_table.rs index 98f2a99fb..43d9fd6f4 100644 --- a/src/uu/tr/src/unicode_table.rs +++ b/src/uu/tr/src/unicode_table.rs @@ -1,9 +1,5 @@ // * This file is part of the uutils coreutils package. // * -// * (c) Michael Gehring -// * (c) kwantam -// * (c) Sergey "Shnatsel" Davidoff -// * // * For the full copyright and license information, please view the LICENSE // * file that was distributed with this source code. From b51a6e8fe3343c4c59a7c66adb892ab7e517ea34 Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Wed, 19 Jan 2022 20:52:06 +0100 Subject: [PATCH 50/50] tr: make parsing of sets more terse --- src/uu/tr/src/operation.rs | 159 +++++++++++-------------------------- 1 file changed, 48 insertions(+), 111 deletions(-) diff --git a/src/uu/tr/src/operation.rs b/src/uu/tr/src/operation.rs index 5bc0edf25..373dec0c2 100644 --- a/src/uu/tr/src/operation.rs +++ b/src/uu/tr/src/operation.rs @@ -8,9 +8,9 @@ use nom::{ branch::alt, bytes::complete::tag, - character::complete::{anychar, one_of}, - combinator::{map, recognize}, - multi::{many0, many1}, + character::complete::{anychar, digit1}, + combinator::{map, peek, value}, + multi::many0, sequence::{delimited, preceded, separated_pair}, IResult, }; @@ -24,7 +24,7 @@ use uucore::error::UError; use crate::unicode_table; -#[derive(Debug)] +#[derive(Debug, Clone)] pub enum BadSequence { MissingCharClassName, MissingEquivalentClassChar, @@ -220,32 +220,11 @@ impl Sequence { impl Sequence { pub fn from_str(input: &str) -> Result, BadSequence> { many0(alt(( - alt(( - Sequence::parse_char_range, - Sequence::parse_char_star, - Sequence::parse_char_repeat, - )), - alt(( - Sequence::parse_alnum, - Sequence::parse_alpha, - Sequence::parse_blank, - Sequence::parse_control, - Sequence::parse_digit, - Sequence::parse_graph, - Sequence::parse_lower, - Sequence::parse_print, - Sequence::parse_punct, - Sequence::parse_space, - Sequence::parse_upper, - Sequence::parse_xdigit, - Sequence::parse_char_equal, - )), - // NOTE: Specific error cases - alt(( - Sequence::error_parse_char_repeat, - Sequence::error_parse_empty_bracket, - Sequence::error_parse_empty_equivalent_char, - )), + Sequence::parse_char_range, + Sequence::parse_char_star, + Sequence::parse_char_repeat, + Sequence::parse_class, + Sequence::parse_char_equal, // NOTE: This must be the last one map(Sequence::parse_backslash_or_char, |s| Ok(Sequence::Char(s))), )))(input) @@ -297,102 +276,60 @@ impl Sequence { fn parse_char_repeat(input: &str) -> IResult<&str, Result> { delimited( tag("["), - separated_pair( - Sequence::parse_backslash_or_char, - tag("*"), - recognize(many1(one_of("01234567"))), - ), + separated_pair(Sequence::parse_backslash_or_char, tag("*"), digit1), tag("]"), )(input) .map(|(l, (c, str))| { ( l, - match usize::from_str_radix(str, 8) - .expect("This should not fail because we only parse against 0-7") - { - 0 => Ok(Sequence::CharStar(c)), - count => Ok(Sequence::CharRepeat(c, count)), + match usize::from_str_radix(str, 8) { + Ok(0) => Ok(Sequence::CharStar(c)), + Ok(count) => Ok(Sequence::CharRepeat(c, count)), + Err(_) => Err(BadSequence::InvalidRepeatCount(str.to_string())), }, ) }) } - fn parse_alnum(input: &str) -> IResult<&str, Result> { - tag("[:alnum:]")(input).map(|(l, _)| (l, Ok(Sequence::Alnum))) - } - - fn parse_alpha(input: &str) -> IResult<&str, Result> { - tag("[:alpha:]")(input).map(|(l, _)| (l, Ok(Sequence::Alpha))) - } - - fn parse_blank(input: &str) -> IResult<&str, Result> { - tag("[:blank:]")(input).map(|(l, _)| (l, Ok(Sequence::Blank))) - } - - fn parse_control(input: &str) -> IResult<&str, Result> { - tag("[:cntrl:]")(input).map(|(l, _)| (l, Ok(Sequence::Control))) - } - - fn parse_digit(input: &str) -> IResult<&str, Result> { - tag("[:digit:]")(input).map(|(l, _)| (l, Ok(Sequence::Digit))) - } - - fn parse_graph(input: &str) -> IResult<&str, Result> { - tag("[:graph:]")(input).map(|(l, _)| (l, Ok(Sequence::Graph))) - } - - fn parse_lower(input: &str) -> IResult<&str, Result> { - tag("[:lower:]")(input).map(|(l, _)| (l, Ok(Sequence::Lower))) - } - - fn parse_print(input: &str) -> IResult<&str, Result> { - tag("[:print:]")(input).map(|(l, _)| (l, Ok(Sequence::Print))) - } - - fn parse_punct(input: &str) -> IResult<&str, Result> { - tag("[:punct:]")(input).map(|(l, _)| (l, Ok(Sequence::Punct))) - } - - fn parse_space(input: &str) -> IResult<&str, Result> { - tag("[:space:]")(input).map(|(l, _)| (l, Ok(Sequence::Space))) - } - - fn parse_upper(input: &str) -> IResult<&str, Result> { - tag("[:upper:]")(input).map(|(l, _)| (l, Ok(Sequence::Upper))) - } - - fn parse_xdigit(input: &str) -> IResult<&str, Result> { - tag("[:xdigit:]")(input).map(|(l, _)| (l, Ok(Sequence::Xdigit))) + fn parse_class(input: &str) -> IResult<&str, Result> { + delimited( + tag("[:"), + alt(( + map( + alt(( + value(Sequence::Alnum, tag("alnum")), + value(Sequence::Alpha, tag("alpha")), + value(Sequence::Blank, tag("blank")), + value(Sequence::Control, tag("cntrl")), + value(Sequence::Digit, tag("digit")), + value(Sequence::Graph, tag("graph")), + value(Sequence::Lower, tag("lower")), + value(Sequence::Print, tag("print")), + value(Sequence::Punct, tag("punct")), + value(Sequence::Space, tag("space")), + value(Sequence::Upper, tag("upper")), + value(Sequence::Xdigit, tag("xdigit")), + )), + Ok, + ), + value(Err(BadSequence::MissingCharClassName), tag("")), + )), + tag(":]"), + )(input) } fn parse_char_equal(input: &str) -> IResult<&str, Result> { - delimited(tag("[="), Sequence::parse_backslash_or_char, tag("=]"))(input) - .map(|(l, c)| (l, Ok(Sequence::Char(c)))) - } -} - -impl Sequence { - fn error_parse_char_repeat(input: &str) -> IResult<&str, Result> { delimited( - tag("["), - separated_pair( - Sequence::parse_backslash_or_char, - tag("*"), - recognize(many1(one_of("0123456789"))), - ), - tag("]"), + tag("[="), + alt(( + value( + Err(BadSequence::MissingEquivalentClassChar), + peek(tag("=]")), + ), + map(Sequence::parse_backslash_or_char, |c| Ok(Sequence::Char(c))), + )), + tag("=]"), )(input) - .map(|(l, (_, n))| (l, Err(BadSequence::InvalidRepeatCount(n.to_string())))) - } - - fn error_parse_empty_bracket(input: &str) -> IResult<&str, Result> { - tag("[::]")(input).map(|(l, _)| (l, Err(BadSequence::MissingCharClassName))) - } - - fn error_parse_empty_equivalent_char( - input: &str, - ) -> IResult<&str, Result> { - tag("[==]")(input).map(|(l, _)| (l, Err(BadSequence::MissingEquivalentClassChar))) } }