mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-28 11:37:44 +00:00
tr: Add ambiguous octal escape warning (#6886)
* tr: Add ambiguous octal escape warning, issue #6821 * tr: Make code cleaner
This commit is contained in:
parent
dea0afb2a5
commit
75de5a0613
2 changed files with 42 additions and 13 deletions
|
@ -16,6 +16,7 @@ use nom::{
|
||||||
IResult,
|
IResult,
|
||||||
};
|
};
|
||||||
use std::{
|
use std::{
|
||||||
|
char,
|
||||||
collections::{HashMap, HashSet},
|
collections::{HashMap, HashSet},
|
||||||
error::Error,
|
error::Error,
|
||||||
fmt::{Debug, Display},
|
fmt::{Debug, Display},
|
||||||
|
@ -23,6 +24,7 @@ use std::{
|
||||||
ops::Not,
|
ops::Not,
|
||||||
};
|
};
|
||||||
use uucore::error::UError;
|
use uucore::error::UError;
|
||||||
|
use uucore::show_warning;
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub enum BadSequence {
|
pub enum BadSequence {
|
||||||
|
@ -293,7 +295,9 @@ impl Sequence {
|
||||||
Self::parse_class,
|
Self::parse_class,
|
||||||
Self::parse_char_equal,
|
Self::parse_char_equal,
|
||||||
// NOTE: This must be the last one
|
// NOTE: This must be the last one
|
||||||
map(Self::parse_backslash_or_char, |s| Ok(Self::Char(s))),
|
map(Self::parse_backslash_or_char_with_warning, |s| {
|
||||||
|
Ok(Self::Char(s))
|
||||||
|
}),
|
||||||
)))(input)
|
)))(input)
|
||||||
.map(|(_, r)| r)
|
.map(|(_, r)| r)
|
||||||
.unwrap()
|
.unwrap()
|
||||||
|
@ -302,10 +306,16 @@ impl Sequence {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_octal(input: &[u8]) -> IResult<&[u8], u8> {
|
fn parse_octal(input: &[u8]) -> IResult<&[u8], u8> {
|
||||||
|
// For `parse_char_range`, `parse_char_star`, `parse_char_repeat`, `parse_char_equal`.
|
||||||
|
// Because in these patterns, there's no ambiguous cases.
|
||||||
|
preceded(tag("\\"), Self::parse_octal_up_to_three_digits)(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_octal_with_warning(input: &[u8]) -> IResult<&[u8], u8> {
|
||||||
preceded(
|
preceded(
|
||||||
tag("\\"),
|
tag("\\"),
|
||||||
alt((
|
alt((
|
||||||
Self::parse_octal_up_to_three_digits,
|
Self::parse_octal_up_to_three_digits_with_warning,
|
||||||
// Fallback for if the three digit octal escape is greater than \377 (0xFF), and therefore can't be
|
// Fallback for if the three digit octal escape is greater than \377 (0xFF), and therefore can't be
|
||||||
// parsed as as a byte
|
// parsed as as a byte
|
||||||
// See test `test_multibyte_octal_sequence`
|
// See test `test_multibyte_octal_sequence`
|
||||||
|
@ -319,16 +329,29 @@ impl Sequence {
|
||||||
recognize(many_m_n(1, 3, one_of("01234567"))),
|
recognize(many_m_n(1, 3, one_of("01234567"))),
|
||||||
|out: &[u8]| {
|
|out: &[u8]| {
|
||||||
let str_to_parse = std::str::from_utf8(out).unwrap();
|
let str_to_parse = std::str::from_utf8(out).unwrap();
|
||||||
|
u8::from_str_radix(str_to_parse, 8).ok()
|
||||||
|
},
|
||||||
|
)(input)
|
||||||
|
}
|
||||||
|
|
||||||
match u8::from_str_radix(str_to_parse, 8) {
|
fn parse_octal_up_to_three_digits_with_warning(input: &[u8]) -> IResult<&[u8], u8> {
|
||||||
Ok(ue) => Some(ue),
|
map_opt(
|
||||||
Err(_pa) => {
|
recognize(many_m_n(1, 3, one_of("01234567"))),
|
||||||
// TODO
|
|out: &[u8]| {
|
||||||
// A warning needs to be printed here
|
let str_to_parse = std::str::from_utf8(out).unwrap();
|
||||||
// See https://github.com/uutils/coreutils/issues/6821
|
let result = u8::from_str_radix(str_to_parse, 8).ok();
|
||||||
None
|
if result.is_none() {
|
||||||
}
|
let origin_octal: &str = std::str::from_utf8(input).unwrap();
|
||||||
|
let actual_octal_tail: &str = std::str::from_utf8(&input[0..2]).unwrap();
|
||||||
|
let outstand_char: char = char::from_u32(input[2] as u32).unwrap();
|
||||||
|
show_warning!(
|
||||||
|
"the ambiguous octal escape \\{} is being\n interpreted as the 2-byte sequence \\0{}, {}",
|
||||||
|
origin_octal,
|
||||||
|
actual_octal_tail,
|
||||||
|
outstand_char
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
result
|
||||||
},
|
},
|
||||||
)(input)
|
)(input)
|
||||||
}
|
}
|
||||||
|
@ -360,6 +383,14 @@ impl Sequence {
|
||||||
alt((Self::parse_octal, Self::parse_backslash, Self::single_char))(input)
|
alt((Self::parse_octal, Self::parse_backslash, Self::single_char))(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn parse_backslash_or_char_with_warning(input: &[u8]) -> IResult<&[u8], u8> {
|
||||||
|
alt((
|
||||||
|
Self::parse_octal_with_warning,
|
||||||
|
Self::parse_backslash,
|
||||||
|
Self::single_char,
|
||||||
|
))(input)
|
||||||
|
}
|
||||||
|
|
||||||
fn single_char(input: &[u8]) -> IResult<&[u8], u8> {
|
fn single_char(input: &[u8]) -> IResult<&[u8], u8> {
|
||||||
take(1usize)(input).map(|(l, a)| (l, a[0]))
|
take(1usize)(input).map(|(l, a)| (l, a[0]))
|
||||||
}
|
}
|
||||||
|
|
|
@ -1494,9 +1494,7 @@ fn test_multibyte_octal_sequence() {
|
||||||
.args(&["-d", r"\501"])
|
.args(&["-d", r"\501"])
|
||||||
.pipe_in("(1Ł)")
|
.pipe_in("(1Ł)")
|
||||||
.succeeds()
|
.succeeds()
|
||||||
// TODO
|
.stderr_is("tr: warning: the ambiguous octal escape \\501 is being\n interpreted as the 2-byte sequence \\050, 1\n")
|
||||||
// A warning needs to be printed here
|
|
||||||
// See https://github.com/uutils/coreutils/issues/6821
|
|
||||||
.stdout_is("Ł)");
|
.stdout_is("Ł)");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue