From 4b45a2287cf3135d0e18880ffad559fbdc8a8c3b Mon Sep 17 00:00:00 2001 From: Hanif Bin Ariffin Date: Tue, 20 Jul 2021 15:41:43 +0800 Subject: [PATCH] Implement some more parsers Signed-off-by: Hanif Bin Ariffin --- src/uu/tr/src/operation.rs | 89 +++++++++++++++++++++++++--------- src/uu/tr/src/tr.rs | 1 + src/uu/tr/src/unicode_table.rs | 8 +++ 3 files changed, 76 insertions(+), 22 deletions(-) create mode 100644 src/uu/tr/src/unicode_table.rs diff --git a/src/uu/tr/src/operation.rs b/src/uu/tr/src/operation.rs index 32a08c715..eae348370 100644 --- a/src/uu/tr/src/operation.rs +++ b/src/uu/tr/src/operation.rs @@ -2,7 +2,7 @@ use nom::{ branch::alt, bytes::complete::{tag, take_while1}, character::complete::{anychar, one_of}, - combinator::{map_opt, recognize}, + combinator::{map_opt, recognize, value}, multi::{many0, many_m_n}, sequence::{preceded, separated_pair, tuple}, IResult, @@ -13,6 +13,8 @@ use std::{ io::{BufRead, Write}, }; +use crate::unicode_table; + #[derive(Debug, PartialEq, Eq, Clone)] pub enum Sequence { Char(char), @@ -66,13 +68,13 @@ impl Sequence { fn parse_backslash(input: &str) -> IResult<&str, Sequence> { preceded(tag("\\"), anychar)(input).map(|(l, a)| { let c = match a { - 'a' => Sequence::Char('\u{0007}'), - 'b' => Sequence::Char('\u{0008}'), - 'f' => Sequence::Char('\u{000C}'), - 'n' => Sequence::Char('\u{000A}'), - 'r' => Sequence::Char('\u{000D}'), - 't' => Sequence::Char('\u{0009}'), - 'v' => Sequence::Char('\u{000B}'), + 'a' => Sequence::Char(unicode_table::BEL), + 'b' => Sequence::Char(unicode_table::BS), + 'f' => Sequence::Char(unicode_table::FF), + 'n' => Sequence::Char(unicode_table::LF), + 'r' => Sequence::Char(unicode_table::CR), + 't' => Sequence::Char(unicode_table::HT), + 'v' => Sequence::Char(unicode_table::VT), x => Sequence::Char(x), }; (l, c) @@ -129,32 +131,55 @@ impl Sequence { } fn parse_alpha(input: &str) -> IResult<&str, Sequence> { - tag("[:alpha:]")(input).map(|(l, _)| { - ( - l, - Sequence::CharRange(('A'..='Z').chain('a'..='z').collect()), - ) - }) + value( + Sequence::CharRange(('A'..='Z').chain('a'..='z').collect()), + tag("[:alpha:]"), + )(input) } fn parse_blank(input: &str) -> IResult<&str, Sequence> { - tag("[:blank:]")(input).map(|(_, _)| todo!()) + value( + Sequence::CharRange(vec![unicode_table::SPACE, unicode_table::HT]), + tag("[:blank:]"), + )(input) } fn parse_control(input: &str) -> IResult<&str, Sequence> { - tag("[:cntrl:]")(input).map(|(_, _)| todo!()) + value( + Sequence::CharRange( + (0..=31) + .chain(std::iter::once(127)) + .flat_map(char::from_u32) + .collect(), + ), + tag("[:cntrl:]"), + )(input) } fn parse_digit(input: &str) -> IResult<&str, Sequence> { - tag("[:digit:]")(input).map(|(l, _)| (l, Sequence::CharRange(('0'..='9').collect()))) + value(Sequence::CharRange(('0'..='9').collect()), tag("[:digit:]"))(input) } fn parse_graph(input: &str) -> IResult<&str, Sequence> { - tag("[:graph:]")(input).map(|(_, _)| todo!()) + value( + Sequence::CharRange( + (48..=57) // digit + .chain(65..=90) // uppercase + .chain(97..=122) // lowercase + // punctuations + .chain(33..=47) + .chain(58..=64) + .chain(91..=96) + .chain(123..=126) + .flat_map(char::from_u32) + .collect(), + ), + tag("[:graph:]"), + )(input) } fn parse_lower(input: &str) -> IResult<&str, Sequence> { - tag("[:lower:]")(input).map(|(l, _)| (l, Sequence::CharRange(('a'..='z').collect()))) + value(Sequence::CharRange(('a'..='z').collect()), tag("[:lower:]"))(input) } fn parse_print(input: &str) -> IResult<&str, Sequence> { @@ -162,11 +187,31 @@ impl Sequence { } fn parse_punct(input: &str) -> IResult<&str, Sequence> { - tag("[:punct:]")(input).map(|(_, _)| todo!()) + value( + Sequence::CharRange( + (33..=47) + .chain(58..=64) + .chain(91..=96) + .chain(123..=126) + .flat_map(char::from_u32) + .collect(), + ), + tag("[:punct:]"), + )(input) } fn parse_space(input: &str) -> IResult<&str, Sequence> { - tag("[:space:]")(input).map(|(_, _)| todo!()) + value( + Sequence::CharRange(vec![ + unicode_table::HT, + unicode_table::LF, + unicode_table::VT, + unicode_table::FF, + unicode_table::CR, + unicode_table::SPACE, + ]), + tag("[:space:]"), + )(input) } fn parse_upper(input: &str) -> IResult<&str, Sequence> { @@ -177,7 +222,7 @@ impl Sequence { tag("[:xdigit:]")(input).map(|(l, _)| { ( l, - Sequence::CharRange(('0'..='9').chain('A'..='Z').chain('a'..='z').collect()), + Sequence::CharRange(('0'..='9').chain('A'..='F').chain('a'..='f').collect()), ) }) } diff --git a/src/uu/tr/src/tr.rs b/src/uu/tr/src/tr.rs index 5ba6cf611..581595385 100644 --- a/src/uu/tr/src/tr.rs +++ b/src/uu/tr/src/tr.rs @@ -15,6 +15,7 @@ extern crate uucore; extern crate nom; mod operation; +mod unicode_table; use clap::{crate_version, App, Arg}; use nom::AsBytes; diff --git a/src/uu/tr/src/unicode_table.rs b/src/uu/tr/src/unicode_table.rs new file mode 100644 index 000000000..9362be647 --- /dev/null +++ b/src/uu/tr/src/unicode_table.rs @@ -0,0 +1,8 @@ +pub static BEL: char = '\u{0007}'; +pub static BS: char = '\u{0008}'; +pub static HT: char = '\u{0009}'; +pub static LF: char = '\u{000A}'; +pub static VT: char = '\u{000B}'; +pub static FF: char = '\u{000C}'; +pub static CR: char = '\u{000D}'; +pub static SPACE: char = '\u{0020}';