mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-28 11:37:44 +00:00
tr
: Reimplementing set expansion
Hopefully will be feature parity with GNU `tr`. Signed-off-by: Hanif Bin Ariffin <hanif.ariffin.4326@gmail.com> Implemented a bit of new expansion module Signed-off-by: Hanif Bin Ariffin <hanif.ariffin.4326@gmail.com> Implemented delete operation Signed-off-by: Hanif Bin Ariffin <hanif.ariffin.4326@gmail.com> Partially implemented delete operation Will go through translate next. Signed-off-by: Hanif Bin Ariffin <hanif.ariffin.4326@gmail.com> Fix formatting... Signed-off-by: Hanif Bin Ariffin <hanif.ariffin.4326@gmail.com> Implemented translation feature Signed-off-by: Hanif Bin Ariffin <hanif.ariffin.4326@gmail.com>
This commit is contained in:
parent
f9559fea80
commit
840c6e7b91
6 changed files with 527 additions and 26 deletions
47
Cargo.lock
generated
47
Cargo.lock
generated
|
@ -119,9 +119,9 @@ checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb"
|
|||
|
||||
[[package]]
|
||||
name = "bitflags"
|
||||
version = "1.2.1"
|
||||
version = "1.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693"
|
||||
checksum = "2da1976d75adbe5fbc88130ecd119529cf1cc6a93ae1546d8696ee66f0d21af1"
|
||||
|
||||
[[package]]
|
||||
name = "bitvec"
|
||||
|
@ -200,7 +200,7 @@ version = "0.5.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "db507a7679252d2276ed0dd8113c6875ec56d3089f9225b2b42c30cc1f8e5c89"
|
||||
dependencies = [
|
||||
"nom",
|
||||
"nom 6.1.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -645,9 +645,9 @@ checksum = "0e25ea47919b1560c4e3b7fe0aaab9becf5b84a10325ddf7db0f0ba5e1026499"
|
|||
|
||||
[[package]]
|
||||
name = "digest"
|
||||
version = "0.6.2"
|
||||
version = "0.6.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e5b29bf156f3f4b3c4f610a25ff69370616ae6e0657d416de22645483e72af0a"
|
||||
checksum = "ecae1c064e29fcabb6c2e9939e53dc7da72ed90234ae36ebfe03a478742efbd1"
|
||||
dependencies = [
|
||||
"generic-array",
|
||||
]
|
||||
|
@ -937,6 +937,19 @@ version = "1.3.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
|
||||
|
||||
[[package]]
|
||||
name = "lexical-core"
|
||||
version = "0.7.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6607c62aa161d23d17a9072cc5da0be67cdfc89d3afb1e8d9c842bebc2525ffe"
|
||||
dependencies = [
|
||||
"arrayvec",
|
||||
"bitflags",
|
||||
"cfg-if 1.0.0",
|
||||
"ryu",
|
||||
"static_assertions",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.85"
|
||||
|
@ -1084,6 +1097,17 @@ version = "0.1.14"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "72ef4a56884ca558e5ddb05a1d1e7e1bfd9a68d9ed024c21704cc98872dae1bb"
|
||||
|
||||
[[package]]
|
||||
name = "nom"
|
||||
version = "5.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ffb4262d26ed83a1c0a33a38fe2bb15797329c85770da05e6b828ddb782627af"
|
||||
dependencies = [
|
||||
"lexical-core",
|
||||
"memchr 2.4.0",
|
||||
"version_check",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "nom"
|
||||
version = "6.1.2"
|
||||
|
@ -1614,6 +1638,12 @@ version = "1.1.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
|
||||
|
||||
[[package]]
|
||||
name = "ryu"
|
||||
version = "1.0.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e"
|
||||
|
||||
[[package]]
|
||||
name = "same-file"
|
||||
version = "1.0.6"
|
||||
|
@ -1754,6 +1784,12 @@ version = "1.2.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
|
||||
|
||||
[[package]]
|
||||
name = "static_assertions"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
|
||||
|
||||
[[package]]
|
||||
name = "strsim"
|
||||
version = "0.8.0"
|
||||
|
@ -2910,6 +2946,7 @@ dependencies = [
|
|||
"bit-set",
|
||||
"clap",
|
||||
"fnv",
|
||||
"nom 5.1.2",
|
||||
"uucore",
|
||||
"uucore_procs",
|
||||
]
|
||||
|
|
|
@ -15,7 +15,7 @@ edition = "2018"
|
|||
path = "src/hashsum.rs"
|
||||
|
||||
[dependencies]
|
||||
digest = "0.6.2"
|
||||
digest = "0.6.1"
|
||||
clap = { version = "2.33", features = ["wrap_help"] }
|
||||
hex = "0.2.0"
|
||||
libc = "0.2.42"
|
||||
|
|
|
@ -20,6 +20,7 @@ fnv = "1.0.5"
|
|||
clap = { version = "2.33", features = ["wrap_help"] }
|
||||
uucore = { version=">=0.0.9", package="uucore", path="../../uucore" }
|
||||
uucore_procs = { version=">=0.0.6", package="uucore_procs", path="../../uucore_procs" }
|
||||
nom = "5.1.2"
|
||||
|
||||
[[bin]]
|
||||
name = "tr"
|
||||
|
|
409
src/uu/tr/src/operation.rs
Normal file
409
src/uu/tr/src/operation.rs
Normal file
|
@ -0,0 +1,409 @@
|
|||
use nom::{
|
||||
branch::alt,
|
||||
bytes::complete::{tag, take, take_until},
|
||||
character::complete::one_of,
|
||||
multi::many0,
|
||||
sequence::{separated_pair, tuple},
|
||||
IResult,
|
||||
};
|
||||
use std::{
|
||||
collections::HashMap,
|
||||
io::{BufRead, Write},
|
||||
};
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Clone)]
|
||||
pub enum Sequence {
|
||||
Char(char),
|
||||
CharRange(Vec<char>),
|
||||
}
|
||||
|
||||
impl Sequence {
|
||||
pub fn parse_set_string(input: &str) -> Vec<Sequence> {
|
||||
many0(alt((
|
||||
alt((
|
||||
Sequence::parse_octal,
|
||||
Sequence::parse_backslash,
|
||||
Sequence::parse_audible_bel,
|
||||
Sequence::parse_backspace,
|
||||
Sequence::parse_form_feed,
|
||||
Sequence::parse_newline,
|
||||
Sequence::parse_return,
|
||||
Sequence::parse_horizontal_tab,
|
||||
Sequence::parse_vertical_tab,
|
||||
)),
|
||||
alt((
|
||||
Sequence::parse_char_range,
|
||||
Sequence::parse_char_star,
|
||||
Sequence::parse_char_repeat,
|
||||
)),
|
||||
alt((
|
||||
Sequence::parse_alnum,
|
||||
Sequence::parse_alpha,
|
||||
Sequence::parse_blank,
|
||||
Sequence::parse_control,
|
||||
Sequence::parse_digit,
|
||||
Sequence::parse_graph,
|
||||
Sequence::parse_lower,
|
||||
Sequence::parse_print,
|
||||
Sequence::parse_punct,
|
||||
Sequence::parse_space,
|
||||
Sequence::parse_space,
|
||||
Sequence::parse_upper,
|
||||
Sequence::parse_xdigit,
|
||||
Sequence::parse_char_equal,
|
||||
Sequence::parse_char,
|
||||
)),
|
||||
)))(input)
|
||||
.map(|(_, r)| r)
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
pub fn dissolve(self) -> Vec<char> {
|
||||
match self {
|
||||
Sequence::Char(c) => vec![c],
|
||||
Sequence::CharRange(r) => r,
|
||||
}
|
||||
}
|
||||
|
||||
/// Sequence parsers
|
||||
|
||||
fn parse_char(input: &str) -> IResult<&str, Sequence> {
|
||||
take(1usize)(input).map(|(l, r)| (l, Sequence::Char(r.chars().next().unwrap())))
|
||||
}
|
||||
|
||||
fn parse_octal(input: &str) -> IResult<&str, Sequence> {
|
||||
tuple((
|
||||
tag("\\"),
|
||||
one_of("01234567"),
|
||||
one_of("01234567"),
|
||||
one_of("01234567"),
|
||||
))(input)
|
||||
.map(|(l, (_, a, b, c))| {
|
||||
(
|
||||
l,
|
||||
Sequence::Char(
|
||||
// SAFETY: All the values from \000 to \777 is valid based on a test below...
|
||||
std::char::from_u32(
|
||||
a.to_digit(8).unwrap() * 8 * 8
|
||||
+ b.to_digit(8).unwrap() * 8
|
||||
+ c.to_digit(8).unwrap(),
|
||||
)
|
||||
.unwrap(),
|
||||
),
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_backslash(input: &str) -> IResult<&str, Sequence> {
|
||||
tuple((tag("\\"), tag("\\")))(input).map(|(l, _)| (l, Sequence::Char('\\')))
|
||||
}
|
||||
|
||||
fn parse_audible_bel(input: &str) -> IResult<&str, Sequence> {
|
||||
tuple((tag("\\"), tag("a")))(input).map(|(l, _)| (l, Sequence::Char('\u{0007}')))
|
||||
}
|
||||
|
||||
fn parse_backspace(input: &str) -> IResult<&str, Sequence> {
|
||||
tuple((tag("\\"), tag("b")))(input).map(|(l, _)| (l, Sequence::Char('\u{0008}')))
|
||||
}
|
||||
|
||||
fn parse_form_feed(input: &str) -> IResult<&str, Sequence> {
|
||||
tuple((tag("\\"), tag("f")))(input).map(|(l, _)| (l, Sequence::Char('\u{000C}')))
|
||||
}
|
||||
|
||||
fn parse_newline(input: &str) -> IResult<&str, Sequence> {
|
||||
tuple((tag("\\"), tag("n")))(input).map(|(l, _)| (l, Sequence::Char('\u{000A}')))
|
||||
}
|
||||
|
||||
fn parse_return(input: &str) -> IResult<&str, Sequence> {
|
||||
tuple((tag("\\"), tag("r")))(input).map(|(l, _)| (l, Sequence::Char('\u{000D}')))
|
||||
}
|
||||
|
||||
fn parse_horizontal_tab(input: &str) -> IResult<&str, Sequence> {
|
||||
tuple((tag("\\"), tag("t")))(input).map(|(l, _)| (l, Sequence::Char('\u{0009}')))
|
||||
}
|
||||
|
||||
fn parse_vertical_tab(input: &str) -> IResult<&str, Sequence> {
|
||||
tuple((tag("\\"), tag("v")))(input).map(|(l, _)| (l, Sequence::Char('\u{000B}')))
|
||||
}
|
||||
|
||||
fn parse_char_range(input: &str) -> IResult<&str, Sequence> {
|
||||
separated_pair(take(1usize), tag("-"), take(1usize))(input).map(|(l, (a, b))| {
|
||||
(l, {
|
||||
let (start, end) = (
|
||||
u32::from(a.chars().next().unwrap()),
|
||||
u32::from(b.chars().next().unwrap()),
|
||||
);
|
||||
if (start >= 97 && start <= 122 && end >= 97 && end <= 122 && end > start)
|
||||
|| (start >= 65 && start <= 90 && end >= 65 && end <= 90 && end > start)
|
||||
|| (start >= 48 && start <= 57 && end >= 48 && end <= 57 && end > start)
|
||||
{
|
||||
Sequence::CharRange(
|
||||
(start..=end)
|
||||
.map(|c| std::char::from_u32(c).unwrap())
|
||||
.collect(),
|
||||
)
|
||||
} else {
|
||||
// This part is unchecked...not all `u32` => `char` is valid
|
||||
Sequence::CharRange(
|
||||
(start..=end)
|
||||
.map(|c| std::char::from_u32(c).unwrap())
|
||||
.collect(),
|
||||
)
|
||||
}
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_char_star(input: &str) -> IResult<&str, Sequence> {
|
||||
tuple((tag("["), take(1usize), tag("*"), tag("]")))(input).map(|(_, (_, _, _, _))| todo!())
|
||||
}
|
||||
|
||||
fn parse_char_repeat(input: &str) -> IResult<&str, Sequence> {
|
||||
tuple((tag("["), take(1usize), tag("*"), take_until("]"), tag("]")))(input).map(
|
||||
|(l, (_, c, _, n, _))| {
|
||||
(
|
||||
l,
|
||||
Sequence::CharRange(
|
||||
std::iter::repeat(c.chars().next().unwrap())
|
||||
.take(n.parse().unwrap())
|
||||
.collect(),
|
||||
),
|
||||
)
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
fn parse_alnum(input: &str) -> IResult<&str, Sequence> {
|
||||
tag("[:alnum:]")(input).map(|(l, _)| {
|
||||
(
|
||||
l,
|
||||
Sequence::CharRange(('a'..='z').chain('A'..'Z').chain('0'..'9').collect()),
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_alpha(input: &str) -> IResult<&str, Sequence> {
|
||||
tag("[:alpha:]")(input).map(|(l, _)| {
|
||||
(
|
||||
l,
|
||||
Sequence::CharRange(('a'..='z').chain('A'..'Z').collect()),
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_blank(input: &str) -> IResult<&str, Sequence> {
|
||||
tag("[:blank:]")(input).map(|(_, _)| todo!())
|
||||
}
|
||||
|
||||
fn parse_control(input: &str) -> IResult<&str, Sequence> {
|
||||
tag("[:cntrl:]")(input).map(|(_, _)| todo!())
|
||||
}
|
||||
|
||||
fn parse_digit(input: &str) -> IResult<&str, Sequence> {
|
||||
tag("[:digit:]")(input).map(|(l, _)| (l, Sequence::CharRange(('0'..='9').collect())))
|
||||
}
|
||||
|
||||
fn parse_graph(input: &str) -> IResult<&str, Sequence> {
|
||||
tag("[:graph:]")(input).map(|(_, _)| todo!())
|
||||
}
|
||||
|
||||
fn parse_lower(input: &str) -> IResult<&str, Sequence> {
|
||||
tag("[:lower:]")(input).map(|(_, _)| todo!())
|
||||
}
|
||||
|
||||
fn parse_print(input: &str) -> IResult<&str, Sequence> {
|
||||
tag("[:print:]")(input).map(|(_, _)| todo!())
|
||||
}
|
||||
|
||||
fn parse_punct(input: &str) -> IResult<&str, Sequence> {
|
||||
tag("[:punct:]")(input).map(|(_, _)| todo!())
|
||||
}
|
||||
|
||||
fn parse_space(input: &str) -> IResult<&str, Sequence> {
|
||||
tag("[:space:]")(input).map(|(_, _)| todo!())
|
||||
}
|
||||
|
||||
fn parse_upper(input: &str) -> IResult<&str, Sequence> {
|
||||
tag("[:upper:]")(input).map(|(l, _)| (l, Sequence::CharRange(('A'..='Z').collect())))
|
||||
}
|
||||
|
||||
fn parse_xdigit(input: &str) -> IResult<&str, Sequence> {
|
||||
tag("[:xdigit:]")(input).map(|(_, _)| todo!())
|
||||
}
|
||||
|
||||
fn parse_char_equal(input: &str) -> IResult<&str, Sequence> {
|
||||
tuple((tag("[="), take(1usize), tag("=]")))(input).map(|(_, (_, _, _))| todo!())
|
||||
}
|
||||
}
|
||||
|
||||
pub trait SymbolTranslatorNew {
|
||||
fn translate(&mut self, current: char) -> Option<char>;
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct DeleteOperationNew {
|
||||
set: Vec<Sequence>,
|
||||
complement_flag: bool,
|
||||
}
|
||||
|
||||
impl DeleteOperationNew {
|
||||
pub fn new(set: Vec<Sequence>, complement_flag: bool) -> DeleteOperationNew {
|
||||
DeleteOperationNew {
|
||||
set,
|
||||
complement_flag,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl SymbolTranslatorNew for DeleteOperationNew {
|
||||
fn translate(&mut self, current: char) -> Option<char> {
|
||||
let found = self.set.iter().any(|sequence| match sequence {
|
||||
Sequence::Char(c) => c.eq(¤t),
|
||||
Sequence::CharRange(r) => r.iter().any(|c| c.eq(¤t)),
|
||||
});
|
||||
(self.complement_flag == found).then(|| current)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum TranslateOperationNew {
|
||||
Standard(HashMap<char, char>),
|
||||
Complement(Vec<char>, Vec<char>, HashMap<char, char>, char),
|
||||
}
|
||||
|
||||
impl TranslateOperationNew {
|
||||
pub fn new(
|
||||
set1: Vec<Sequence>,
|
||||
mut set2: Vec<Sequence>,
|
||||
truncate_set2: bool,
|
||||
complement: bool,
|
||||
) -> TranslateOperationNew {
|
||||
let fallback = set2.last().cloned().unwrap();
|
||||
if truncate_set2 {
|
||||
set2.truncate(set1.len());
|
||||
}
|
||||
if complement {
|
||||
TranslateOperationNew::Complement(
|
||||
set1.into_iter()
|
||||
.flat_map(Sequence::dissolve)
|
||||
.rev()
|
||||
.collect(),
|
||||
set2.into_iter()
|
||||
.flat_map(Sequence::dissolve)
|
||||
.rev()
|
||||
.collect(),
|
||||
HashMap::new(),
|
||||
// TODO: Check how `tr` actually handles this
|
||||
fallback.dissolve().first().cloned().unwrap(),
|
||||
)
|
||||
} else {
|
||||
TranslateOperationNew::Standard(
|
||||
set1.into_iter()
|
||||
.flat_map(Sequence::dissolve)
|
||||
.zip(
|
||||
set2.into_iter()
|
||||
.chain(std::iter::repeat(fallback))
|
||||
.flat_map(Sequence::dissolve),
|
||||
)
|
||||
.collect::<HashMap<_, _>>(),
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl SymbolTranslatorNew for TranslateOperationNew {
|
||||
fn translate(&mut self, current: char) -> Option<char> {
|
||||
match self {
|
||||
TranslateOperationNew::Standard(map) => Some(
|
||||
map.iter()
|
||||
.find_map(|(l, r)| l.eq(¤t).then(|| *r))
|
||||
.unwrap_or(current),
|
||||
),
|
||||
TranslateOperationNew::Complement(set1, set2, mapped_characters, fallback) => {
|
||||
// First, see if we have already mapped this character.
|
||||
// If so, return it.
|
||||
// Else, check if current character is part of set1
|
||||
// If so, return it.
|
||||
// Else, consume from set2, create the translation pair, and return the mapped character
|
||||
match mapped_characters.get(¤t) {
|
||||
Some(k) => Some(*k),
|
||||
None => match set1.iter().any(|c| c.eq(&¤t)) {
|
||||
true => Some(current),
|
||||
false => {
|
||||
let popped = set2.pop().unwrap_or(*fallback);
|
||||
mapped_characters.insert(current, popped);
|
||||
Some(popped)
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn translate_input_new<T>(input: &mut dyn BufRead, output: &mut dyn Write, mut translator: T)
|
||||
where
|
||||
T: SymbolTranslatorNew,
|
||||
{
|
||||
let mut buf = String::new();
|
||||
let mut output_buf = String::new();
|
||||
while let Ok(length) = input.read_line(&mut buf) {
|
||||
if length == 0 {
|
||||
break;
|
||||
} else {
|
||||
let filtered = buf.chars().filter_map(|c| translator.translate(c));
|
||||
output_buf.extend(filtered);
|
||||
output.write_all(output_buf.as_bytes()).unwrap();
|
||||
}
|
||||
buf.clear();
|
||||
output_buf.clear();
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_char_range() {
|
||||
assert_eq!(Sequence::parse_set_string(""), vec![]);
|
||||
assert_eq!(
|
||||
Sequence::parse_set_string("a-z"),
|
||||
vec![Sequence::CharRange(vec![
|
||||
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q',
|
||||
'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
|
||||
])]
|
||||
);
|
||||
assert_eq!(
|
||||
Sequence::parse_set_string("a-zA-Z"),
|
||||
vec![
|
||||
Sequence::CharRange(vec![
|
||||
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p',
|
||||
'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
|
||||
]),
|
||||
Sequence::CharRange(vec![
|
||||
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
|
||||
'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
|
||||
])
|
||||
]
|
||||
);
|
||||
assert_eq!(
|
||||
Sequence::parse_set_string(", ┬─┬"),
|
||||
vec![
|
||||
Sequence::Char(','),
|
||||
Sequence::Char(' '),
|
||||
Sequence::Char('┬'),
|
||||
Sequence::Char('─'),
|
||||
Sequence::Char('┬')
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_octal() {
|
||||
for a in '0'..='7' {
|
||||
for b in '0'..='7' {
|
||||
for c in '0'..='7' {
|
||||
assert!(
|
||||
Sequence::parse_set_string(format!("\\{}{}{}", a, b, c).as_str()).len() == 1
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -12,15 +12,18 @@
|
|||
|
||||
#[macro_use]
|
||||
extern crate uucore;
|
||||
extern crate nom;
|
||||
|
||||
mod expand;
|
||||
mod operation;
|
||||
|
||||
use bit_set::BitSet;
|
||||
use clap::{crate_version, App, Arg};
|
||||
use fnv::FnvHashMap;
|
||||
use operation::{translate_input_new, Sequence, TranslateOperationNew};
|
||||
use std::io::{stdin, stdout, BufRead, BufWriter, Write};
|
||||
|
||||
use crate::expand::ExpandSet;
|
||||
use crate::{expand::ExpandSet, operation::DeleteOperationNew};
|
||||
use uucore::InvalidEncodingHandling;
|
||||
|
||||
static ABOUT: &str = "translate or delete characters";
|
||||
|
@ -31,7 +34,7 @@ mod options {
|
|||
pub const COMPLEMENT: &str = "complement";
|
||||
pub const DELETE: &str = "delete";
|
||||
pub const SQUEEZE: &str = "squeeze-repeats";
|
||||
pub const TRUNCATE: &str = "truncate";
|
||||
pub const TRUNCATE_SET1: &str = "truncate-set1";
|
||||
pub const SETS: &str = "sets";
|
||||
}
|
||||
|
||||
|
@ -44,15 +47,6 @@ struct DeleteOperation {
|
|||
complement: bool,
|
||||
}
|
||||
|
||||
impl DeleteOperation {
|
||||
fn new(set: ExpandSet, complement: bool) -> DeleteOperation {
|
||||
DeleteOperation {
|
||||
bset: set.map(|c| c as usize).collect(),
|
||||
complement,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl SymbolTranslator for DeleteOperation {
|
||||
fn translate(&self, c: char, _prev_c: char) -> Option<char> {
|
||||
let uc = c as usize;
|
||||
|
@ -254,7 +248,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
|
|||
let delete_flag = matches.is_present(options::DELETE);
|
||||
let complement_flag = matches.is_present(options::COMPLEMENT) || matches.is_present("C");
|
||||
let squeeze_flag = matches.is_present(options::SQUEEZE);
|
||||
let truncate_flag = matches.is_present(options::TRUNCATE);
|
||||
let truncate_set1_flag = matches.is_present(options::TRUNCATE_SET1);
|
||||
|
||||
let sets = matches
|
||||
.values_of(options::SETS)
|
||||
|
@ -291,21 +285,26 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
|
|||
let op = DeleteAndSqueezeOperation::new(set1, set2, complement_flag);
|
||||
translate_input(&mut locked_stdin, &mut buffered_stdout, op);
|
||||
} else {
|
||||
let op = DeleteOperation::new(set1, complement_flag);
|
||||
translate_input(&mut locked_stdin, &mut buffered_stdout, op);
|
||||
let op = DeleteOperationNew::new(Sequence::parse_set_string(&sets[0]), complement_flag);
|
||||
translate_input_new(&mut locked_stdin, &mut buffered_stdout, op);
|
||||
}
|
||||
} else if squeeze_flag {
|
||||
if sets.len() < 2 {
|
||||
let op = SqueezeOperation::new(set1, complement_flag);
|
||||
translate_input(&mut locked_stdin, &mut buffered_stdout, op);
|
||||
} else {
|
||||
let op = TranslateAndSqueezeOperation::new(sets, truncate_flag, complement_flag);
|
||||
let op = TranslateAndSqueezeOperation::new(sets, truncate_set1_flag, complement_flag);
|
||||
translate_input(&mut locked_stdin, &mut buffered_stdout, op);
|
||||
}
|
||||
} else {
|
||||
let mut set2 = ExpandSet::new(sets[1].as_ref());
|
||||
let op = TranslateOperation::new(set1, &mut set2, truncate_flag, complement_flag);
|
||||
translate_input(&mut locked_stdin, &mut buffered_stdout, op);
|
||||
let op = TranslateOperationNew::new(
|
||||
Sequence::parse_set_string(&sets[0]),
|
||||
Sequence::parse_set_string(&sets[1]),
|
||||
truncate_set1_flag,
|
||||
complement_flag,
|
||||
);
|
||||
println!("op:{:#?}", op);
|
||||
translate_input_new(&mut locked_stdin, &mut buffered_stdout, op);
|
||||
}
|
||||
|
||||
0
|
||||
|
@ -344,8 +343,8 @@ pub fn uu_app() -> App<'static, 'static> {
|
|||
),
|
||||
)
|
||||
.arg(
|
||||
Arg::with_name(options::TRUNCATE)
|
||||
.long(options::TRUNCATE)
|
||||
Arg::with_name(options::TRUNCATE_SET1)
|
||||
.long(options::TRUNCATE_SET1)
|
||||
.short("t")
|
||||
.help("first truncate SET1 to length of SET2"),
|
||||
)
|
||||
|
|
|
@ -292,3 +292,58 @@ fn test_more_than_2_sets() {
|
|||
.pipe_in("hello world")
|
||||
.fails();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_basic_translation() {
|
||||
new_ucmd!()
|
||||
.args(&["dabcdef", "xyz"])
|
||||
.pipe_in("abcdefabcdef")
|
||||
.succeeds()
|
||||
.stdout_is("yzzzzzyzzzzz");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_basic_translation_with_alnum_1() {
|
||||
new_ucmd!()
|
||||
.args(&["dabcdef[:alnum:]", "xyz"])
|
||||
.pipe_in("abcdefabcdef")
|
||||
.succeeds()
|
||||
.stdout_is("zzzzzzzzzzzz");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_basic_translation_with_alnum_2() {
|
||||
new_ucmd!()
|
||||
.args(&["[:alnum:]abc", "xyz"])
|
||||
.pipe_in("abcdefabcdef")
|
||||
.succeeds()
|
||||
.stdout_is("zzzzzzzzzzzz");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_translation_override_pair() {
|
||||
new_ucmd!()
|
||||
.args(&["aaa", "xyz"])
|
||||
.pipe_in("aaa")
|
||||
.succeeds()
|
||||
.stdout_is("zzz");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_translation_case_conversion_works() {
|
||||
new_ucmd!()
|
||||
.args(&["abcdefghijklmnopqrstuvwxyz", "ABCDEFGHIJKLMNOPQRSTUVWXYZ"])
|
||||
.pipe_in("abcdefghijklmnopqrstuvwxyz")
|
||||
.succeeds()
|
||||
.stdout_is("ABCDEFGHIJKLMNOPQRSTUVWXYZ");
|
||||
new_ucmd!()
|
||||
.args(&["a-z", "A-Z"])
|
||||
.pipe_in("abcdefghijklmnopqrstuvwxyz")
|
||||
.succeeds()
|
||||
.stdout_is("ABCDEFGHIJKLMNOPQRSTUVWXYZ");
|
||||
new_ucmd!()
|
||||
.args(&["[:lower:]", "[:upper:]"])
|
||||
.pipe_in("abcdefghijklmnopqrstuvwxyz")
|
||||
.succeeds()
|
||||
.stdout_is("ABCDEFGHIJKLMNOPQRSTUVWXYZ");
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue