1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-28 03:27:44 +00:00

tr: Add ambiguous octal escape warning (#6886)

* tr: Add ambiguous octal escape warning, issue #6821

* tr: Make code cleaner
This commit is contained in:
Peng Zijun 2024-11-28 16:14:16 +08:00 committed by GitHub
parent dea0afb2a5
commit 75de5a0613
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 42 additions and 13 deletions

View file

@ -16,6 +16,7 @@ use nom::{
IResult, IResult,
}; };
use std::{ use std::{
char,
collections::{HashMap, HashSet}, collections::{HashMap, HashSet},
error::Error, error::Error,
fmt::{Debug, Display}, fmt::{Debug, Display},
@ -23,6 +24,7 @@ use std::{
ops::Not, ops::Not,
}; };
use uucore::error::UError; use uucore::error::UError;
use uucore::show_warning;
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub enum BadSequence { pub enum BadSequence {
@ -293,7 +295,9 @@ impl Sequence {
Self::parse_class, Self::parse_class,
Self::parse_char_equal, Self::parse_char_equal,
// NOTE: This must be the last one // NOTE: This must be the last one
map(Self::parse_backslash_or_char, |s| Ok(Self::Char(s))), map(Self::parse_backslash_or_char_with_warning, |s| {
Ok(Self::Char(s))
}),
)))(input) )))(input)
.map(|(_, r)| r) .map(|(_, r)| r)
.unwrap() .unwrap()
@ -302,10 +306,16 @@ impl Sequence {
} }
fn parse_octal(input: &[u8]) -> IResult<&[u8], u8> { fn parse_octal(input: &[u8]) -> IResult<&[u8], u8> {
// For `parse_char_range`, `parse_char_star`, `parse_char_repeat`, `parse_char_equal`.
// Because in these patterns, there's no ambiguous cases.
preceded(tag("\\"), Self::parse_octal_up_to_three_digits)(input)
}
fn parse_octal_with_warning(input: &[u8]) -> IResult<&[u8], u8> {
preceded( preceded(
tag("\\"), tag("\\"),
alt(( alt((
Self::parse_octal_up_to_three_digits, Self::parse_octal_up_to_three_digits_with_warning,
// Fallback for if the three digit octal escape is greater than \377 (0xFF), and therefore can't be // Fallback for if the three digit octal escape is greater than \377 (0xFF), and therefore can't be
// parsed as as a byte // parsed as as a byte
// See test `test_multibyte_octal_sequence` // See test `test_multibyte_octal_sequence`
@ -319,16 +329,29 @@ impl Sequence {
recognize(many_m_n(1, 3, one_of("01234567"))), recognize(many_m_n(1, 3, one_of("01234567"))),
|out: &[u8]| { |out: &[u8]| {
let str_to_parse = std::str::from_utf8(out).unwrap(); let str_to_parse = std::str::from_utf8(out).unwrap();
u8::from_str_radix(str_to_parse, 8).ok()
},
)(input)
}
match u8::from_str_radix(str_to_parse, 8) { fn parse_octal_up_to_three_digits_with_warning(input: &[u8]) -> IResult<&[u8], u8> {
Ok(ue) => Some(ue), map_opt(
Err(_pa) => { recognize(many_m_n(1, 3, one_of("01234567"))),
// TODO |out: &[u8]| {
// A warning needs to be printed here let str_to_parse = std::str::from_utf8(out).unwrap();
// See https://github.com/uutils/coreutils/issues/6821 let result = u8::from_str_radix(str_to_parse, 8).ok();
None if result.is_none() {
} let origin_octal: &str = std::str::from_utf8(input).unwrap();
let actual_octal_tail: &str = std::str::from_utf8(&input[0..2]).unwrap();
let outstand_char: char = char::from_u32(input[2] as u32).unwrap();
show_warning!(
"the ambiguous octal escape \\{} is being\n interpreted as the 2-byte sequence \\0{}, {}",
origin_octal,
actual_octal_tail,
outstand_char
);
} }
result
}, },
)(input) )(input)
} }
@ -360,6 +383,14 @@ impl Sequence {
alt((Self::parse_octal, Self::parse_backslash, Self::single_char))(input) alt((Self::parse_octal, Self::parse_backslash, Self::single_char))(input)
} }
fn parse_backslash_or_char_with_warning(input: &[u8]) -> IResult<&[u8], u8> {
alt((
Self::parse_octal_with_warning,
Self::parse_backslash,
Self::single_char,
))(input)
}
fn single_char(input: &[u8]) -> IResult<&[u8], u8> { fn single_char(input: &[u8]) -> IResult<&[u8], u8> {
take(1usize)(input).map(|(l, a)| (l, a[0])) take(1usize)(input).map(|(l, a)| (l, a[0]))
} }

View file

@ -1494,9 +1494,7 @@ fn test_multibyte_octal_sequence() {
.args(&["-d", r"\501"]) .args(&["-d", r"\501"])
.pipe_in("(1Ł)") .pipe_in("(1Ł)")
.succeeds() .succeeds()
// TODO .stderr_is("tr: warning: the ambiguous octal escape \\501 is being\n interpreted as the 2-byte sequence \\050, 1\n")
// A warning needs to be printed here
// See https://github.com/uutils/coreutils/issues/6821
.stdout_is("Ł)"); .stdout_is("Ł)");
} }