From b83c30b12e88a8549e2f9367d80664eb73090187 Mon Sep 17 00:00:00 2001 From: Benjamin Bara Date: Sat, 11 Feb 2023 21:14:12 +0100 Subject: [PATCH 1/5] tail: improve GNU compatibility --- src/uu/tail/src/args.rs | 23 +++- src/uu/tail/src/parse.rs | 171 +++++++++++---------------- tests/by-util/test_tail.rs | 236 +++++++++++++++++++++++++++++++++++++ 3 files changed, 325 insertions(+), 105 deletions(-) diff --git a/src/uu/tail/src/args.rs b/src/uu/tail/src/args.rs index 6cb77757c..aa4b1c867 100644 --- a/src/uu/tail/src/args.rs +++ b/src/uu/tail/src/args.rs @@ -64,7 +64,12 @@ impl FilterMode { let mode = if let Some(arg) = matches.get_one::(options::BYTES) { match parse_num(arg) { Ok(signum) => Self::Bytes(signum), - Err(e) => return Err(UUsageError::new(1, format!("invalid number of bytes: {e}"))), + Err(e) => { + return Err(USimpleError::new( + 1, + format!("invalid number of bytes: {e}"), + )) + } } } else if let Some(arg) = matches.get_one::(options::LINES) { match parse_num(arg) { @@ -72,7 +77,12 @@ impl FilterMode { let delimiter = if zero_term { 0 } else { b'\n' }; Self::Lines(signum, delimiter) } - Err(e) => return Err(UUsageError::new(1, format!("invalid number of lines: {e}"))), + Err(e) => { + return Err(USimpleError::new( + 1, + format!("invalid number of lines: {e}"), + )) + } } } else if zero_term { Self::default_zero() @@ -307,14 +317,19 @@ pub fn arg_iterate<'a>( if let Some(s) = second.to_str() { match parse::parse_obsolete(s) { Some(Ok(iter)) => Ok(Box::new(vec![first].into_iter().chain(iter).chain(args))), - Some(Err(e)) => Err(UUsageError::new( + Some(Err(e)) => Err(USimpleError::new( 1, match e { - parse::ParseError::Syntax => format!("bad argument format: {}", s.quote()), parse::ParseError::Overflow => format!( "invalid argument: {} Value too large for defined datatype", s.quote() ), + parse::ParseError::Context => { + format!( + "option used in invalid context -- {}", + s.chars().nth(1).unwrap_or_default() + ) + } }, )), None => Ok(Box::new(vec![first, second].into_iter().chain(args))), diff --git a/src/uu/tail/src/parse.rs b/src/uu/tail/src/parse.rs index 2129d8e29..15c770bc5 100644 --- a/src/uu/tail/src/parse.rs +++ b/src/uu/tail/src/parse.rs @@ -7,92 +7,67 @@ use std::ffi::OsString; #[derive(PartialEq, Eq, Debug)] pub enum ParseError { - Syntax, Overflow, + Context, } /// Parses obsolete syntax -/// tail -NUM\[kmzv\] // spell-checker:disable-line +/// tail -\[NUM\]\[bl\]\[f\] and tail +\[NUM\]\[bcl\]\[f\] // spell-checker:disable-line pub fn parse_obsolete(src: &str) -> Option, ParseError>> { - let mut chars = src.char_indices(); - if let Some((_, '-')) = chars.next() { - let mut num_end = 0usize; - let mut has_num = false; - let mut last_char = 0 as char; - for (n, c) in &mut chars { - if c.is_ascii_digit() { - has_num = true; - num_end = n; - } else { - last_char = c; - break; - } - } - if has_num { - match src[1..=num_end].parse::() { - Ok(num) => { - let mut quiet = false; - let mut verbose = false; - let mut zero_terminated = false; - let mut multiplier = None; - let mut c = last_char; - loop { - // not that here, we only match lower case 'k', 'c', and 'm' - match c { - // we want to preserve order - // this also saves us 1 heap allocation - 'q' => { - quiet = true; - verbose = false; - } - 'v' => { - verbose = true; - quiet = false; - } - 'z' => zero_terminated = true, - 'c' => multiplier = Some(1), - 'b' => multiplier = Some(512), - 'k' => multiplier = Some(1024), - 'm' => multiplier = Some(1024 * 1024), - '\0' => {} - _ => return Some(Err(ParseError::Syntax)), - } - if let Some((_, next)) = chars.next() { - c = next; - } else { - break; - } - } - let mut options = Vec::new(); - if quiet { - options.push(OsString::from("-q")); - } - if verbose { - options.push(OsString::from("-v")); - } - if zero_terminated { - options.push(OsString::from("-z")); - } - if let Some(n) = multiplier { - options.push(OsString::from("-c")); - let num = match num.checked_mul(n) { - Some(n) => n, - None => return Some(Err(ParseError::Overflow)), - }; - options.push(OsString::from(format!("{num}"))); - } else { - options.push(OsString::from("-n")); - options.push(OsString::from(format!("{num}"))); - } - Some(Ok(options.into_iter())) - } - Err(_) => Some(Err(ParseError::Overflow)), - } + let mut chars = src.chars(); + let sign = chars.next()?; + if sign != '+' && sign != '-' { + return None; + } + + let numbers: String = chars.clone().take_while(|&c| c.is_ascii_digit()).collect(); + let has_num = !numbers.is_empty(); + let num: usize = if has_num { + if let Ok(num) = numbers.parse() { + num } else { - None + return Some(Err(ParseError::Overflow)); } } else { - None + 10 + }; + + let mut follow = false; + let mut mode = None; + let mut first_char = true; + for char in chars.skip_while(|&c| c.is_ascii_digit()) { + if sign == '-' && char == 'c' && !has_num { + // special case: -c should be handled by clap (is ambiguous) + return None; + } else if char == 'f' { + follow = true; + } else if first_char && (char == 'b' || char == 'c' || char == 'l') { + mode = Some(char); + } else if has_num && sign == '-' { + return Some(Err(ParseError::Context)); + } else { + return None; + } + first_char = false; } + + let mut options = Vec::new(); + if follow { + options.push(OsString::from("-f")); + } + let mode = mode.unwrap_or('l'); + if mode == 'b' || mode == 'c' { + options.push(OsString::from("-c")); + let n = if mode == 'b' { 512 } else { 1 }; + let num = match num.checked_mul(n) { + Some(n) => n, + None => return Some(Err(ParseError::Overflow)), + }; + options.push(OsString::from(format!("{sign}{num}"))); + } else { + options.push(OsString::from("-n")); + options.push(OsString::from(format!("{sign}{num}"))); + } + Some(Ok(options.into_iter())) } #[cfg(test)] @@ -113,40 +88,35 @@ mod tests { } #[test] fn test_parse_numbers_obsolete() { - assert_eq!(obsolete("-5"), obsolete_result(&["-n", "5"])); - assert_eq!(obsolete("-100"), obsolete_result(&["-n", "100"])); - assert_eq!(obsolete("-5m"), obsolete_result(&["-c", "5242880"])); - assert_eq!(obsolete("-1k"), obsolete_result(&["-c", "1024"])); - assert_eq!(obsolete("-2b"), obsolete_result(&["-c", "1024"])); - assert_eq!(obsolete("-1mmk"), obsolete_result(&["-c", "1024"])); - assert_eq!(obsolete("-1vz"), obsolete_result(&["-v", "-z", "-n", "1"])); - assert_eq!( - obsolete("-1vzqvq"), // spell-checker:disable-line - obsolete_result(&["-q", "-z", "-n", "1"]) - ); - assert_eq!(obsolete("-1vzc"), obsolete_result(&["-v", "-z", "-c", "1"])); - assert_eq!( - obsolete("-105kzm"), - obsolete_result(&["-z", "-c", "110100480"]) - ); + assert_eq!(obsolete("+2c"), obsolete_result(&["-c", "+2"])); + assert_eq!(obsolete("-5"), obsolete_result(&["-n", "-5"])); + assert_eq!(obsolete("-100"), obsolete_result(&["-n", "-100"])); + assert_eq!(obsolete("-2b"), obsolete_result(&["-c", "-1024"])); } #[test] fn test_parse_errors_obsolete() { - assert_eq!(obsolete("-5n"), Some(Err(ParseError::Syntax))); - assert_eq!(obsolete("-5c5"), Some(Err(ParseError::Syntax))); + assert_eq!(obsolete("-5n"), Some(Err(ParseError::Context))); + assert_eq!(obsolete("-5c5"), Some(Err(ParseError::Context))); + assert_eq!(obsolete("-1vzc"), Some(Err(ParseError::Context))); + assert_eq!(obsolete("-5m"), Some(Err(ParseError::Context))); + assert_eq!(obsolete("-1k"), Some(Err(ParseError::Context))); + assert_eq!(obsolete("-1mmk"), Some(Err(ParseError::Context))); + assert_eq!(obsolete("-105kzm"), Some(Err(ParseError::Context))); + assert_eq!(obsolete("-1vz"), Some(Err(ParseError::Context))); + assert_eq!( + obsolete("-1vzqvq"), // spell-checker:disable-line + Some(Err(ParseError::Context)) + ); } #[test] fn test_parse_obsolete_no_match() { assert_eq!(obsolete("-k"), None); assert_eq!(obsolete("asd"), None); + assert_eq!(obsolete("-cc"), None); } #[test] #[cfg(target_pointer_width = "64")] fn test_parse_obsolete_overflow_x64() { - assert_eq!( - obsolete("-1000000000000000m"), - Some(Err(ParseError::Overflow)) - ); assert_eq!( obsolete("-10000000000000000000000"), Some(Err(ParseError::Overflow)) @@ -156,6 +126,5 @@ mod tests { #[cfg(target_pointer_width = "32")] fn test_parse_obsolete_overflow_x32() { assert_eq!(obsolete("-42949672960"), Some(Err(ParseError::Overflow))); - assert_eq!(obsolete("-42949672k"), Some(Err(ParseError::Overflow))); } } diff --git a/tests/by-util/test_tail.rs b/tests/by-util/test_tail.rs index b2ec0e7bd..4644cbc02 100644 --- a/tests/by-util/test_tail.rs +++ b/tests/by-util/test_tail.rs @@ -4475,3 +4475,239 @@ fn test_args_sleep_interval_when_illegal_argument_then_usage_error(#[case] sleep .usage_error(format!("invalid number of seconds: '{sleep_interval}'")) .code_is(1); } + +#[test] +fn test_gnu_args_plus_c() { + let scene = TestScenario::new(util_name!()); + + // obs-plus-c1 + scene + .ucmd() + .arg("+2c") + .pipe_in("abcd") + .succeeds() + .stdout_only("bcd"); + // obs-plus-c2 + scene + .ucmd() + .arg("+8c") + .pipe_in("abcd") + .succeeds() + .stdout_only(""); + // obs-plus-x1: same as +10c + scene + .ucmd() + .arg("+c") + .pipe_in(format!("x{}z", "y".repeat(10))) + .succeeds() + .stdout_only("yyz"); +} + +#[test] +fn test_gnu_args_c() { + let scene = TestScenario::new(util_name!()); + + // obs-c3 + scene + .ucmd() + .arg("-1c") + .pipe_in("abcd") + .succeeds() + .stdout_only("d"); + // obs-c4 + scene + .ucmd() + .arg("-9c") + .pipe_in("abcd") + .succeeds() + .stdout_only("abcd"); + // obs-c5 + scene + .ucmd() + .arg("-12c") + .pipe_in(format!("x{}z", "y".repeat(12))) + .succeeds() + .stdout_only(&format!("{}z", "y".repeat(11))); +} + +#[test] +fn test_gnu_args_l() { + let scene = TestScenario::new(util_name!()); + + // obs-l1 + scene + .ucmd() + .arg("-1l") + .pipe_in("x") + .succeeds() + .stdout_only("x"); + // obs-l2 + scene + .ucmd() + .arg("-1l") + .pipe_in("x\ny\n") + .succeeds() + .stdout_only("y\n"); + // obs-l3 + scene + .ucmd() + .arg("-1l") + .pipe_in("x\ny") + .succeeds() + .stdout_only("y"); + // obs-l: same as -10l + scene + .ucmd() + .arg("-l") + .pipe_in(format!("x{}z", "y\n".repeat(10))) + .succeeds() + .stdout_only(&format!("{}z", "y\n".repeat(9))); +} + +#[test] +fn test_gnu_args_plus_l() { + let scene = TestScenario::new(util_name!()); + + // obs-plus-l4 + scene + .ucmd() + .arg("+1l") + .pipe_in("x\ny\n") + .succeeds() + .stdout_only("x\ny\n"); + // ops-plus-l5 + scene + .ucmd() + .arg("+2l") + .pipe_in("x\ny\n") + .succeeds() + .stdout_only("y\n"); + // obs-plus-x2: same as +10l + scene + .ucmd() + .arg("+l") + .pipe_in(format!("x\n{}z", "y\n".repeat(10))) + .succeeds() + .stdout_only("y\ny\nz"); +} + +#[test] +fn test_gnu_args_number() { + let scene = TestScenario::new(util_name!()); + + // obs-1 + scene + .ucmd() + .arg("-1") + .pipe_in("x") + .succeeds() + .stdout_only("x"); + // obs-2 + scene + .ucmd() + .arg("-1") + .pipe_in("x\ny\n") + .succeeds() + .stdout_only("y\n"); + // obs-3 + scene + .ucmd() + .arg("-1") + .pipe_in("x\ny") + .succeeds() + .stdout_only("y"); +} + +#[test] +fn test_gnu_args_plus_number() { + let scene = TestScenario::new(util_name!()); + + // obs-plus-4 + scene + .ucmd() + .arg("+1") + .pipe_in("x\ny\n") + .succeeds() + .stdout_only("x\ny\n"); + // ops-plus-5 + scene + .ucmd() + .arg("+2") + .pipe_in("x\ny\n") + .succeeds() + .stdout_only("y\n"); +} + +#[test] +fn test_gnu_args_b() { + let scene = TestScenario::new(util_name!()); + + // obs-b + scene + .ucmd() + .arg("-b") + .pipe_in("x\n".repeat(512 * 10 / 2 + 1)) + .succeeds() + .stdout_only(&"x\n".repeat(512 * 10 / 2)); +} + +#[test] +fn test_gnu_args_err() { + let scene = TestScenario::new(util_name!()); + + // err-1 + scene + .ucmd() + .arg("+cl") + .fails() + .no_stdout() + .stderr_is("tail: cannot open '+cl' for reading: No such file or directory\n") + .code_is(1); + // err-2 + scene + .ucmd() + .arg("-cl") + .fails() + .no_stdout() + .stderr_is("tail: invalid number of bytes: 'l'\n") + .code_is(1); + // err-3 + scene + .ucmd() + .arg("+2cz") + .fails() + .no_stdout() + .stderr_is("tail: cannot open '+2cz' for reading: No such file or directory\n") + .code_is(1); + // err-4 + scene + .ucmd() + .arg("-2cX") + .fails() + .no_stdout() + .stderr_is("tail: option used in invalid context -- 2\n") + .code_is(1); + // err-5 + scene + .ucmd() + .arg("-c99999999999999999999") + .fails() + .no_stdout() + .stderr_is("tail: invalid number of bytes: '99999999999999999999'\n") + .code_is(1); + // err-6 + scene + .ucmd() + .arg("-c --") + .fails() + .no_stdout() + .stderr_is("tail: invalid number of bytes: '-'\n") + .code_is(1); + scene + .ucmd() + .arg("-5cz") + .fails() + .no_stdout() + .stderr_is("tail: option used in invalid context -- 5\n") + .code_is(1); +} From dc34e89d5044f545740b14fd8d3a092b1338ab11 Mon Sep 17 00:00:00 2001 From: Benjamin Bara Date: Thu, 16 Feb 2023 21:16:21 +0100 Subject: [PATCH 2/5] tail: skip clap for obsolete args --- src/uu/tail/src/args.rs | 124 +++++++++++++++++++++--------- src/uu/tail/src/parse.rs | 153 ++++++++++++++++++++----------------- tests/by-util/test_tail.rs | 108 +++++++++++++++++++++++++- 3 files changed, 278 insertions(+), 107 deletions(-) diff --git a/src/uu/tail/src/args.rs b/src/uu/tail/src/args.rs index aa4b1c867..f18f99d1b 100644 --- a/src/uu/tail/src/args.rs +++ b/src/uu/tail/src/args.rs @@ -13,7 +13,6 @@ use fundu::DurationParser; use is_terminal::IsTerminal; use same_file::Handle; use std::collections::VecDeque; -use std::ffi::OsString; use std::time::Duration; use uucore::error::{UResult, USimpleError, UUsageError}; use uucore::parse_size::{parse_size, ParseSizeError}; @@ -59,6 +58,19 @@ pub enum FilterMode { } impl FilterMode { + fn from_obsolete_args(args: &parse::ObsoleteArgs) -> Self { + let signum = if args.plus { + Signum::Positive(args.num) + } else { + Signum::Negative(args.num) + }; + if args.lines { + Self::Lines(signum, b'\n') + } else { + Self::Bytes(signum) + } + } + fn from(matches: &ArgMatches) -> UResult { let zero_term = matches.get_flag(options::ZERO_TERM); let mode = if let Some(arg) = matches.get_one::(options::BYTES) { @@ -132,6 +144,29 @@ pub struct Settings { } impl Settings { + pub fn from_obsolete_args(args: &parse::ObsoleteArgs, name: Option<&str>) -> Self { + let mut settings: Self = Self { + sleep_sec: Duration::from_secs_f32(1.0), + max_unchanged_stats: 5, + ..Default::default() + }; + if args.follow { + settings.follow = if name.is_some() { + Some(FollowMode::Name) + } else { + Some(FollowMode::Descriptor) + }; + } + settings.mode = FilterMode::from_obsolete_args(args); + let input = if let Some(name) = name { + Input::from(name.to_string()) + } else { + Input::default() + }; + settings.inputs.push_back(input); + settings + } + pub fn from(matches: &clap::ArgMatches) -> UResult { let mut settings: Self = Self { sleep_sec: Duration::from_secs_f32(1.0), @@ -308,37 +343,24 @@ impl Settings { } } -pub fn arg_iterate<'a>( - mut args: impl uucore::Args + 'a, -) -> UResult + 'a>> { - // argv[0] is always present - let first = args.next().unwrap(); - if let Some(second) = args.next() { - if let Some(s) = second.to_str() { - match parse::parse_obsolete(s) { - Some(Ok(iter)) => Ok(Box::new(vec![first].into_iter().chain(iter).chain(args))), - Some(Err(e)) => Err(USimpleError::new( - 1, - match e { - parse::ParseError::Overflow => format!( - "invalid argument: {} Value too large for defined datatype", - s.quote() - ), - parse::ParseError::Context => { - format!( - "option used in invalid context -- {}", - s.chars().nth(1).unwrap_or_default() - ) - } - }, - )), - None => Ok(Box::new(vec![first, second].into_iter().chain(args))), - } - } else { - Err(UUsageError::new(1, "bad argument encoding".to_owned())) - } - } else { - Ok(Box::new(vec![first].into_iter())) +pub fn parse_obsolete(args: &str) -> UResult> { + match parse::parse_obsolete(args) { + Some(Ok(args)) => Ok(Some(args)), + None => Ok(None), + Some(Err(e)) => Err(USimpleError::new( + 1, + match e { + parse::ParseError::OutOfRange => format!( + "invalid number: {}: Numerical result out of range", + args.quote() + ), + parse::ParseError::Overflow => format!("invalid number: {}", args.quote()), + parse::ParseError::Context => format!( + "option used in invalid context -- {}", + args.chars().nth(1).unwrap_or_default() + ), + }, + )), } } @@ -366,9 +388,29 @@ fn parse_num(src: &str) -> Result { }) } -pub fn parse_args(args: impl uucore::Args) -> UResult { - let matches = uu_app().try_get_matches_from(arg_iterate(args)?)?; - Settings::from(&matches) +pub fn parse_args(mut args: impl uucore::Args) -> UResult { + let first = args.next().unwrap(); + let second = match args.next() { + Some(second) => second, + None => return Settings::from(&uu_app().try_get_matches_from(vec![first])?), + }; + let second_str = match second.to_str() { + Some(second_str) => second_str, + None => { + let second_string = second.to_string_lossy(); + return Err(USimpleError::new( + 1, + format!("bad argument encoding: '{second_string}'"), + )); + } + }; + match parse_obsolete(second_str)? { + Some(obsolete_args) => Ok(Settings::from_obsolete_args(&obsolete_args, args.next())), + None => { + let args = vec![first, second].into_iter().chain(args); + Settings::from(&uu_app().try_get_matches_from(args)?) + } + } } pub fn uu_app() -> Command { @@ -497,6 +539,8 @@ pub fn uu_app() -> Command { #[cfg(test)] mod tests { + use crate::parse::ObsoleteArgs; + use super::*; #[test] @@ -528,4 +572,14 @@ mod tests { assert!(result.is_ok()); assert_eq!(result.unwrap(), Signum::Negative(1)); } + + #[test] + fn test_parse_obsolete_settings_f() { + let args = ObsoleteArgs { follow: true, ..Default::default() }; + let result = Settings::from_obsolete_args(&args, None); + assert_eq!(result.follow, Some(FollowMode::Descriptor)); + + let result = Settings::from_obsolete_args(&args, Some("test".into())); + assert_eq!(result.follow, Some(FollowMode::Name)); + } } diff --git a/src/uu/tail/src/parse.rs b/src/uu/tail/src/parse.rs index 15c770bc5..2f4ebb62e 100644 --- a/src/uu/tail/src/parse.rs +++ b/src/uu/tail/src/parse.rs @@ -3,16 +3,34 @@ // * For the full copyright and license information, please view the LICENSE // * file that was distributed with this source code. -use std::ffi::OsString; +#[derive(PartialEq, Eq, Debug, Copy, Clone)] +pub struct ObsoleteArgs { + pub num: u64, + pub plus: bool, + pub lines: bool, + pub follow: bool, +} + +impl Default for ObsoleteArgs { + fn default() -> Self { + Self { + num: 10, + plus: false, + lines: true, + follow: false, + } + } +} #[derive(PartialEq, Eq, Debug)] pub enum ParseError { + OutOfRange, Overflow, Context, } /// Parses obsolete syntax /// tail -\[NUM\]\[bl\]\[f\] and tail +\[NUM\]\[bcl\]\[f\] // spell-checker:disable-line -pub fn parse_obsolete(src: &str) -> Option, ParseError>> { +pub fn parse_obsolete(src: &str) -> Option> { let mut chars = src.chars(); let sign = chars.next()?; if sign != '+' && sign != '-' { @@ -21,27 +39,27 @@ pub fn parse_obsolete(src: &str) -> Option let numbers: String = chars.clone().take_while(|&c| c.is_ascii_digit()).collect(); let has_num = !numbers.is_empty(); - let num: usize = if has_num { + let num: u64 = if has_num { if let Ok(num) = numbers.parse() { num } else { - return Some(Err(ParseError::Overflow)); + return Some(Err(ParseError::OutOfRange)); } } else { 10 }; let mut follow = false; - let mut mode = None; + let mut mode = 'l'; let mut first_char = true; for char in chars.skip_while(|&c| c.is_ascii_digit()) { - if sign == '-' && char == 'c' && !has_num { - // special case: -c should be handled by clap (is ambiguous) + if !has_num && first_char && sign == '-' && (char == 'c' || char == 'f') { + // special cases: -c, -f should be handled by clap (are ambiguous) return None; } else if char == 'f' { follow = true; } else if first_char && (char == 'b' || char == 'c' || char == 'l') { - mode = Some(char); + mode = char; } else if has_num && sign == '-' { return Some(Err(ParseError::Context)); } else { @@ -49,82 +67,81 @@ pub fn parse_obsolete(src: &str) -> Option } first_char = false; } + let multiplier = if mode == 'b' { 512 } else { 1 }; + let num = match num.checked_mul(multiplier) { + Some(n) => n, + None => return Some(Err(ParseError::Overflow)), + }; - let mut options = Vec::new(); - if follow { - options.push(OsString::from("-f")); - } - let mode = mode.unwrap_or('l'); - if mode == 'b' || mode == 'c' { - options.push(OsString::from("-c")); - let n = if mode == 'b' { 512 } else { 1 }; - let num = match num.checked_mul(n) { - Some(n) => n, - None => return Some(Err(ParseError::Overflow)), - }; - options.push(OsString::from(format!("{sign}{num}"))); - } else { - options.push(OsString::from("-n")); - options.push(OsString::from(format!("{sign}{num}"))); - } - Some(Ok(options.into_iter())) + Some(Ok(ObsoleteArgs { + num, + plus: sign == '+', + lines: mode == 'l', + follow, + })) } #[cfg(test)] mod tests { use super::*; - fn obsolete(src: &str) -> Option, ParseError>> { - let r = parse_obsolete(src); - match r { - Some(s) => match s { - Ok(v) => Some(Ok(v.map(|s| s.to_str().unwrap().to_owned()).collect())), - Err(e) => Some(Err(e)), - }, - None => None, - } - } - fn obsolete_result(src: &[&str]) -> Option, ParseError>> { - Some(Ok(src.iter().map(|s| s.to_string()).collect())) - } #[test] fn test_parse_numbers_obsolete() { - assert_eq!(obsolete("+2c"), obsolete_result(&["-c", "+2"])); - assert_eq!(obsolete("-5"), obsolete_result(&["-n", "-5"])); - assert_eq!(obsolete("-100"), obsolete_result(&["-n", "-100"])); - assert_eq!(obsolete("-2b"), obsolete_result(&["-c", "-1024"])); + assert_eq!( + parse_obsolete("+2c"), + Some(Ok(ObsoleteArgs { + num: 2, + plus: true, + lines: false, + follow: false, + })) + ); + assert_eq!( + parse_obsolete("-5"), + Some(Ok(ObsoleteArgs { + num: 5, + plus: false, + lines: true, + follow: false, + })) + ); + assert_eq!( + parse_obsolete("+100f"), + Some(Ok(ObsoleteArgs { + num: 100, + plus: true, + lines: true, + follow: true, + })) + ); + assert_eq!( + parse_obsolete("-2b"), + Some(Ok(ObsoleteArgs { + num: 1024, + plus: false, + lines: false, + follow: false, + })) + ); } #[test] fn test_parse_errors_obsolete() { - assert_eq!(obsolete("-5n"), Some(Err(ParseError::Context))); - assert_eq!(obsolete("-5c5"), Some(Err(ParseError::Context))); - assert_eq!(obsolete("-1vzc"), Some(Err(ParseError::Context))); - assert_eq!(obsolete("-5m"), Some(Err(ParseError::Context))); - assert_eq!(obsolete("-1k"), Some(Err(ParseError::Context))); - assert_eq!(obsolete("-1mmk"), Some(Err(ParseError::Context))); - assert_eq!(obsolete("-105kzm"), Some(Err(ParseError::Context))); - assert_eq!(obsolete("-1vz"), Some(Err(ParseError::Context))); + assert_eq!(parse_obsolete("-5n"), Some(Err(ParseError::Context))); + assert_eq!(parse_obsolete("-5c5"), Some(Err(ParseError::Context))); + assert_eq!(parse_obsolete("-1vzc"), Some(Err(ParseError::Context))); + assert_eq!(parse_obsolete("-5m"), Some(Err(ParseError::Context))); + assert_eq!(parse_obsolete("-1k"), Some(Err(ParseError::Context))); + assert_eq!(parse_obsolete("-1mmk"), Some(Err(ParseError::Context))); + assert_eq!(parse_obsolete("-105kzm"), Some(Err(ParseError::Context))); + assert_eq!(parse_obsolete("-1vz"), Some(Err(ParseError::Context))); assert_eq!( - obsolete("-1vzqvq"), // spell-checker:disable-line + parse_obsolete("-1vzqvq"), // spell-checker:disable-line Some(Err(ParseError::Context)) ); } #[test] fn test_parse_obsolete_no_match() { - assert_eq!(obsolete("-k"), None); - assert_eq!(obsolete("asd"), None); - assert_eq!(obsolete("-cc"), None); - } - #[test] - #[cfg(target_pointer_width = "64")] - fn test_parse_obsolete_overflow_x64() { - assert_eq!( - obsolete("-10000000000000000000000"), - Some(Err(ParseError::Overflow)) - ); - } - #[test] - #[cfg(target_pointer_width = "32")] - fn test_parse_obsolete_overflow_x32() { - assert_eq!(obsolete("-42949672960"), Some(Err(ParseError::Overflow))); + assert_eq!(parse_obsolete("-k"), None); + assert_eq!(parse_obsolete("asd"), None); + assert_eq!(parse_obsolete("-cc"), None); } } diff --git a/tests/by-util/test_tail.rs b/tests/by-util/test_tail.rs index 4644cbc02..d1e5ba632 100644 --- a/tests/by-util/test_tail.rs +++ b/tests/by-util/test_tail.rs @@ -47,6 +47,13 @@ static FOLLOW_NAME_EXP: &str = "follow_name.expected"; #[cfg(not(windows))] const DEFAULT_SLEEP_INTERVAL_MILLIS: u64 = 1000; +// The binary integer "10000000" is *not* a valid UTF-8 encoding +// of a character: https://en.wikipedia.org/wiki/UTF-8#Encoding +#[cfg(unix)] +const INVALID_UTF8: u8 = 0x80; +#[cfg(windows)] +const INVALID_UTF16: u16 = 0xD800; + #[test] fn test_invalid_arg() { new_ucmd!().arg("--definitely-invalid").fails().code_is(1); @@ -469,16 +476,13 @@ fn test_follow_non_utf8_bytes() { // Now append some bytes that are not valid UTF-8. // - // The binary integer "10000000" is *not* a valid UTF-8 encoding - // of a character: https://en.wikipedia.org/wiki/UTF-8#Encoding - // // We also write the newline character because our implementation // of `tail` is attempting to read a line of input, so the // presence of a newline character will force the `follow()` // function to conclude reading input bytes and start writing them // to output. The newline character is not fundamental to this // test, it is just a requirement of the current implementation. - let expected = [0b10000000, b'\n']; + let expected = [INVALID_UTF8, b'\n']; at.append_bytes(FOOBAR_TXT, &expected); child @@ -4710,4 +4714,100 @@ fn test_gnu_args_err() { .no_stdout() .stderr_is("tail: option used in invalid context -- 5\n") .code_is(1); + scene + .ucmd() + .arg("-9999999999999999999b") + .fails() + .no_stdout() + .stderr_is("tail: invalid number: '-9999999999999999999b'\n") + .code_is(1); + scene + .ucmd() + .arg("-999999999999999999999b") + .fails() + .no_stdout() + .stderr_is( + "tail: invalid number: '-999999999999999999999b': Numerical result out of range\n", + ) + .code_is(1); +} + +#[test] +fn test_gnu_args_f() { + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + + let mut p = scene + .ucmd() + .set_stdin(Stdio::piped()) + .arg("+f") + .run_no_wait(); + p.make_assertion_with_delay(500).is_alive(); + p.kill() + .make_assertion() + .with_all_output() + .no_stderr() + .no_stdout(); + + let source = "file"; + at.touch(source); + let mut p = scene.ucmd().args(&["+f", source]).run_no_wait(); + p.make_assertion_with_delay(500).is_alive(); + p.kill() + .make_assertion() + .with_all_output() + .no_stderr() + .no_stdout(); +} + +#[test] +#[cfg(unix)] +fn test_obsolete_encoding_unix() { + use std::ffi::OsStr; + use std::os::unix::ffi::OsStrExt; + + let scene = TestScenario::new(util_name!()); + let invalid_utf8_arg = OsStr::from_bytes(&[b'-', INVALID_UTF8, b'b']); + let valid_utf8_arg = OsStr::from_bytes(&[b'-', b'b']); + + scene + .ucmd() + .arg(invalid_utf8_arg) + .fails() + .no_stdout() + .stderr_is("tail: bad argument encoding: '-�b'\n") + .code_is(1); + scene + .ucmd() + .args(&[valid_utf8_arg, invalid_utf8_arg]) + .fails() + .no_stdout() + .stderr_is("tail: bad argument encoding\n") + .code_is(1); +} + +#[test] +#[cfg(windows)] +fn test_obsolete_encoding_windows() { + use std::ffi::OsString; + use std::os::windows::ffi::OsStringExt; + + let scene = TestScenario::new(util_name!()); + let invalid_utf16_arg = OsString::from_wide(&['-' as u16, INVALID_UTF16, 'b' as u16]); + let valid_utf16_arg = OsString::from("-b"); + + scene + .ucmd() + .arg(&invalid_utf16_arg) + .fails() + .no_stdout() + .stderr_is("tail: bad argument encoding: '-�b'\n") + .code_is(1); + scene + .ucmd() + .args(&[&valid_utf16_arg, &invalid_utf16_arg]) + .fails() + .no_stdout() + .stderr_is("tail: bad argument encoding\n") + .code_is(1); } From f6edea2d0565d476d6725f57f20bb4b4abf16f20 Mon Sep 17 00:00:00 2001 From: Benjamin Bara Date: Sat, 18 Feb 2023 15:44:49 +0100 Subject: [PATCH 3/5] tail: enable non-utf8 paths --- src/uu/tail/src/args.rs | 12 ++++++++---- src/uu/tail/src/paths.rs | 13 +++++++------ tests/by-util/test_tail.rs | 16 ---------------- 3 files changed, 15 insertions(+), 26 deletions(-) diff --git a/src/uu/tail/src/args.rs b/src/uu/tail/src/args.rs index f18f99d1b..7bf06f853 100644 --- a/src/uu/tail/src/args.rs +++ b/src/uu/tail/src/args.rs @@ -13,6 +13,7 @@ use fundu::DurationParser; use is_terminal::IsTerminal; use same_file::Handle; use std::collections::VecDeque; +use std::ffi::OsString; use std::time::Duration; use uucore::error::{UResult, USimpleError, UUsageError}; use uucore::parse_size::{parse_size, ParseSizeError}; @@ -144,7 +145,7 @@ pub struct Settings { } impl Settings { - pub fn from_obsolete_args(args: &parse::ObsoleteArgs, name: Option<&str>) -> Self { + pub fn from_obsolete_args(args: &parse::ObsoleteArgs, name: Option) -> Self { let mut settings: Self = Self { sleep_sec: Duration::from_secs_f32(1.0), max_unchanged_stats: 5, @@ -159,7 +160,7 @@ impl Settings { } settings.mode = FilterMode::from_obsolete_args(args); let input = if let Some(name) = name { - Input::from(name.to_string()) + Input::from(&name) } else { Input::default() }; @@ -249,7 +250,7 @@ impl Settings { let mut inputs: VecDeque = matches .get_many::(options::ARG_FILES) - .map(|v| v.map(|string| Input::from(string.clone())).collect()) + .map(|v| v.map(|string| Input::from(&string)).collect()) .unwrap_or_default(); // apply default and add '-' to inputs if none is present @@ -575,7 +576,10 @@ mod tests { #[test] fn test_parse_obsolete_settings_f() { - let args = ObsoleteArgs { follow: true, ..Default::default() }; + let args = ObsoleteArgs { + follow: true, + ..Default::default() + }; let result = Settings::from_obsolete_args(&args, None); assert_eq!(result.follow, Some(FollowMode::Descriptor)); diff --git a/src/uu/tail/src/paths.rs b/src/uu/tail/src/paths.rs index d8e6ece9a..b1cbc9bb8 100644 --- a/src/uu/tail/src/paths.rs +++ b/src/uu/tail/src/paths.rs @@ -6,6 +6,7 @@ // spell-checker:ignore tailable seekable stdlib (stdlib) use crate::text; +use std::ffi::OsStr; use std::fs::{File, Metadata}; use std::io::{Seek, SeekFrom}; #[cfg(unix)] @@ -26,21 +27,21 @@ pub struct Input { } impl Input { - // TODO: from &str may be the better choice - pub fn from(string: String) -> Self { - let kind = if string == text::DASH { + pub fn from>(string: &T) -> Self { + let valid_string = string.as_ref().to_str(); + let kind = if valid_string.is_some() && valid_string.unwrap() == text::DASH { InputKind::Stdin } else { - InputKind::File(PathBuf::from(&string)) + InputKind::File(PathBuf::from(string.as_ref())) }; let display_name = match kind { - InputKind::File(_) => string, + InputKind::File(_) => string.as_ref().to_string_lossy().to_string(), InputKind::Stdin => { if cfg!(unix) { text::STDIN_HEADER.to_string() } else { - string + string.as_ref().to_string_lossy().to_string() } } }; diff --git a/tests/by-util/test_tail.rs b/tests/by-util/test_tail.rs index d1e5ba632..b450d303b 100644 --- a/tests/by-util/test_tail.rs +++ b/tests/by-util/test_tail.rs @@ -4768,7 +4768,6 @@ fn test_obsolete_encoding_unix() { let scene = TestScenario::new(util_name!()); let invalid_utf8_arg = OsStr::from_bytes(&[b'-', INVALID_UTF8, b'b']); - let valid_utf8_arg = OsStr::from_bytes(&[b'-', b'b']); scene .ucmd() @@ -4777,13 +4776,6 @@ fn test_obsolete_encoding_unix() { .no_stdout() .stderr_is("tail: bad argument encoding: '-�b'\n") .code_is(1); - scene - .ucmd() - .args(&[valid_utf8_arg, invalid_utf8_arg]) - .fails() - .no_stdout() - .stderr_is("tail: bad argument encoding\n") - .code_is(1); } #[test] @@ -4794,7 +4786,6 @@ fn test_obsolete_encoding_windows() { let scene = TestScenario::new(util_name!()); let invalid_utf16_arg = OsString::from_wide(&['-' as u16, INVALID_UTF16, 'b' as u16]); - let valid_utf16_arg = OsString::from("-b"); scene .ucmd() @@ -4803,11 +4794,4 @@ fn test_obsolete_encoding_windows() { .no_stdout() .stderr_is("tail: bad argument encoding: '-�b'\n") .code_is(1); - scene - .ucmd() - .args(&[&valid_utf16_arg, &invalid_utf16_arg]) - .fails() - .no_stdout() - .stderr_is("tail: bad argument encoding\n") - .code_is(1); } From 9b49f368c7cf7fb91054f2b609432715efc3fbdc Mon Sep 17 00:00:00 2001 From: Benjamin Bara Date: Sun, 19 Feb 2023 14:24:07 +0100 Subject: [PATCH 4/5] tail: parse default before obsolete --- src/uu/tail/src/args.rs | 119 +++++++++++++++++++++++-------------- tests/by-util/test_tail.rs | 16 ++--- 2 files changed, 84 insertions(+), 51 deletions(-) diff --git a/src/uu/tail/src/args.rs b/src/uu/tail/src/args.rs index 7bf06f853..9d0d6db7c 100644 --- a/src/uu/tail/src/args.rs +++ b/src/uu/tail/src/args.rs @@ -130,7 +130,7 @@ pub enum VerificationResult { NoOutput, } -#[derive(Debug, Default)] +#[derive(Debug)] pub struct Settings { pub follow: Option, pub max_unchanged_stats: u32, @@ -144,13 +144,26 @@ pub struct Settings { pub inputs: VecDeque, } -impl Settings { - pub fn from_obsolete_args(args: &parse::ObsoleteArgs, name: Option) -> Self { - let mut settings: Self = Self { - sleep_sec: Duration::from_secs_f32(1.0), +impl Default for Settings { + fn default() -> Self { + Self { max_unchanged_stats: 5, - ..Default::default() - }; + sleep_sec: Duration::from_secs_f32(1.0), + follow: Default::default(), + mode: Default::default(), + pid: Default::default(), + retry: Default::default(), + use_polling: Default::default(), + verbose: Default::default(), + presume_input_pipe: Default::default(), + inputs: Default::default(), + } + } +} + +impl Settings { + pub fn from_obsolete_args(args: &parse::ObsoleteArgs, name: Option<&OsString>) -> Self { + let mut settings: Self = Default::default(); if args.follow { settings.follow = if name.is_some() { Some(FollowMode::Name) @@ -170,25 +183,25 @@ impl Settings { pub fn from(matches: &clap::ArgMatches) -> UResult { let mut settings: Self = Self { - sleep_sec: Duration::from_secs_f32(1.0), - max_unchanged_stats: 5, + follow: if matches.get_flag(options::FOLLOW_RETRY) { + Some(FollowMode::Name) + } else if matches.value_source(options::FOLLOW) != Some(ValueSource::CommandLine) { + None + } else if matches.get_one::(options::FOLLOW) + == Some(String::from("name")).as_ref() + { + Some(FollowMode::Name) + } else { + Some(FollowMode::Descriptor) + }, + retry: matches.get_flag(options::RETRY) || matches.get_flag(options::FOLLOW_RETRY), + use_polling: matches.get_flag(options::USE_POLLING), + mode: FilterMode::from(matches)?, + verbose: matches.get_flag(options::verbosity::VERBOSE), + presume_input_pipe: matches.get_flag(options::PRESUME_INPUT_PIPE), ..Default::default() }; - settings.follow = if matches.get_flag(options::FOLLOW_RETRY) { - Some(FollowMode::Name) - } else if matches.value_source(options::FOLLOW) != Some(ValueSource::CommandLine) { - None - } else if matches.get_one::(options::FOLLOW) == Some(String::from("name")).as_ref() - { - Some(FollowMode::Name) - } else { - Some(FollowMode::Descriptor) - }; - - settings.retry = - matches.get_flag(options::RETRY) || matches.get_flag(options::FOLLOW_RETRY); - if let Some(source) = matches.get_one::(options::SLEEP_INT) { // Advantage of `fundu` over `Duration::(try_)from_secs_f64(source.parse().unwrap())`: // * doesn't panic on errors like `Duration::from_secs_f64` would. @@ -205,8 +218,6 @@ impl Settings { })?; } - settings.use_polling = matches.get_flag(options::USE_POLLING); - if let Some(s) = matches.get_one::(options::MAX_UNCHANGED_STATS) { settings.max_unchanged_stats = match s.parse::() { Ok(s) => s, @@ -246,8 +257,6 @@ impl Settings { } } - settings.mode = FilterMode::from(matches)?; - let mut inputs: VecDeque = matches .get_many::(options::ARG_FILES) .map(|v| v.map(|string| Input::from(&string)).collect()) @@ -258,13 +267,10 @@ impl Settings { inputs.push_front(Input::default()); } - settings.verbose = (matches.get_flag(options::verbosity::VERBOSE) || inputs.len() > 1) - && !matches.get_flag(options::verbosity::QUIET); + settings.verbose = inputs.len() > 1 && !matches.get_flag(options::verbosity::QUIET); settings.inputs = inputs; - settings.presume_input_pipe = matches.get_flag(options::PRESUME_INPUT_PIPE); - Ok(settings) } @@ -342,6 +348,19 @@ impl Settings { VerificationResult::Ok } + + pub fn is_default(&self) -> bool { + let default = Self::default(); + self.max_unchanged_stats == default.max_unchanged_stats + && self.sleep_sec == default.sleep_sec + && self.follow == default.follow + && self.mode == default.mode + && self.pid == default.pid + && self.retry == default.retry + && self.use_polling == default.use_polling + && (self.verbose == default.verbose || self.inputs.len() > 1) + && self.presume_input_pipe == default.presume_input_pipe + } } pub fn parse_obsolete(args: &str) -> UResult> { @@ -389,28 +408,42 @@ fn parse_num(src: &str) -> Result { }) } -pub fn parse_args(mut args: impl uucore::Args) -> UResult { - let first = args.next().unwrap(); - let second = match args.next() { +pub fn parse_args(args: impl uucore::Args) -> UResult { + let args_vec: Vec = args.collect(); + let clap_result = match uu_app().try_get_matches_from(args_vec.clone()) { + Ok(matches) => { + let settings = Settings::from(&matches)?; + if !settings.is_default() { + // non-default settings can't have obsolete arguments + return Ok(settings); + } + Ok(settings) + } + Err(err) => Err(err.into()), + }; + + // clap parsing failed or resulted to default -> check for obsolete/deprecated args + // argv[0] is always present + let second = match args_vec.get(1) { Some(second) => second, - None => return Settings::from(&uu_app().try_get_matches_from(vec![first])?), + None => return clap_result, }; let second_str = match second.to_str() { Some(second_str) => second_str, None => { - let second_string = second.to_string_lossy(); + let invalid_string = second.to_string_lossy(); return Err(USimpleError::new( 1, - format!("bad argument encoding: '{second_string}'"), + format!("bad argument encoding: '{invalid_string}'"), )); } }; match parse_obsolete(second_str)? { - Some(obsolete_args) => Ok(Settings::from_obsolete_args(&obsolete_args, args.next())), - None => { - let args = vec![first, second].into_iter().chain(args); - Settings::from(&uu_app().try_get_matches_from(args)?) - } + Some(obsolete_args) => Ok(Settings::from_obsolete_args( + &obsolete_args, + args_vec.get(2), + )), + None => clap_result, } } @@ -583,7 +616,7 @@ mod tests { let result = Settings::from_obsolete_args(&args, None); assert_eq!(result.follow, Some(FollowMode::Descriptor)); - let result = Settings::from_obsolete_args(&args, Some("test".into())); + let result = Settings::from_obsolete_args(&args, Some(&"file".into())); assert_eq!(result.follow, Some(FollowMode::Name)); } } diff --git a/tests/by-util/test_tail.rs b/tests/by-util/test_tail.rs index b450d303b..eb18e858f 100644 --- a/tests/by-util/test_tail.rs +++ b/tests/by-util/test_tail.rs @@ -4737,11 +4737,9 @@ fn test_gnu_args_f() { let scene = TestScenario::new(util_name!()); let at = &scene.fixtures; - let mut p = scene - .ucmd() - .set_stdin(Stdio::piped()) - .arg("+f") - .run_no_wait(); + let source = "file"; + at.touch(source); + let mut p = scene.ucmd().args(&["+f", source]).run_no_wait(); p.make_assertion_with_delay(500).is_alive(); p.kill() .make_assertion() @@ -4749,9 +4747,11 @@ fn test_gnu_args_f() { .no_stderr() .no_stdout(); - let source = "file"; - at.touch(source); - let mut p = scene.ucmd().args(&["+f", source]).run_no_wait(); + let mut p = scene + .ucmd() + .set_stdin(Stdio::piped()) + .arg("+f") + .run_no_wait(); p.make_assertion_with_delay(500).is_alive(); p.kill() .make_assertion() From 26a945f3b54d3e3d86dccda3dafd08f71092d198 Mon Sep 17 00:00:00 2001 From: Benjamin Bara Date: Sun, 19 Feb 2023 15:16:58 +0100 Subject: [PATCH 5/5] tail: simplify obsolete parsing --- src/uu/tail/src/args.rs | 117 ++++++++++++++++++------------------- src/uu/tail/src/parse.rs | 122 ++++++++++++++++++++++++++------------- src/uu/tail/src/paths.rs | 3 +- 3 files changed, 140 insertions(+), 102 deletions(-) diff --git a/src/uu/tail/src/args.rs b/src/uu/tail/src/args.rs index 9d0d6db7c..8e039b5f4 100644 --- a/src/uu/tail/src/args.rs +++ b/src/uu/tail/src/args.rs @@ -348,39 +348,33 @@ impl Settings { VerificationResult::Ok } - - pub fn is_default(&self) -> bool { - let default = Self::default(); - self.max_unchanged_stats == default.max_unchanged_stats - && self.sleep_sec == default.sleep_sec - && self.follow == default.follow - && self.mode == default.mode - && self.pid == default.pid - && self.retry == default.retry - && self.use_polling == default.use_polling - && (self.verbose == default.verbose || self.inputs.len() > 1) - && self.presume_input_pipe == default.presume_input_pipe - } } -pub fn parse_obsolete(args: &str) -> UResult> { - match parse::parse_obsolete(args) { - Some(Ok(args)) => Ok(Some(args)), +pub fn parse_obsolete(arg: &OsString, input: Option<&OsString>) -> UResult> { + match parse::parse_obsolete(arg) { + Some(Ok(args)) => Ok(Some(Settings::from_obsolete_args(&args, input))), None => Ok(None), - Some(Err(e)) => Err(USimpleError::new( - 1, - match e { - parse::ParseError::OutOfRange => format!( - "invalid number: {}: Numerical result out of range", - args.quote() - ), - parse::ParseError::Overflow => format!("invalid number: {}", args.quote()), - parse::ParseError::Context => format!( - "option used in invalid context -- {}", - args.chars().nth(1).unwrap_or_default() - ), - }, - )), + Some(Err(e)) => { + let arg_str = arg.to_string_lossy(); + Err(USimpleError::new( + 1, + match e { + parse::ParseError::OutOfRange => format!( + "invalid number: {}: Numerical result out of range", + arg_str.quote() + ), + parse::ParseError::Overflow => format!("invalid number: {}", arg_str.quote()), + // this ensures compatibility to GNU's error message (as tested in misc/tail) + parse::ParseError::Context => format!( + "option used in invalid context -- {}", + arg_str.chars().nth(1).unwrap_or_default() + ), + parse::ParseError::InvalidEncoding => { + format!("bad argument encoding: '{arg_str}'") + } + }, + )) + } } } @@ -410,39 +404,41 @@ fn parse_num(src: &str) -> Result { pub fn parse_args(args: impl uucore::Args) -> UResult { let args_vec: Vec = args.collect(); - let clap_result = match uu_app().try_get_matches_from(args_vec.clone()) { - Ok(matches) => { - let settings = Settings::from(&matches)?; - if !settings.is_default() { - // non-default settings can't have obsolete arguments - return Ok(settings); - } - Ok(settings) - } + let clap_args = uu_app().try_get_matches_from(args_vec.clone()); + let clap_result = match clap_args { + Ok(matches) => Ok(Settings::from(&matches)?), Err(err) => Err(err.into()), }; - // clap parsing failed or resulted to default -> check for obsolete/deprecated args - // argv[0] is always present - let second = match args_vec.get(1) { - Some(second) => second, - None => return clap_result, - }; - let second_str = match second.to_str() { - Some(second_str) => second_str, - None => { - let invalid_string = second.to_string_lossy(); - return Err(USimpleError::new( - 1, - format!("bad argument encoding: '{invalid_string}'"), - )); - } - }; - match parse_obsolete(second_str)? { - Some(obsolete_args) => Ok(Settings::from_obsolete_args( - &obsolete_args, - args_vec.get(2), - )), + // clap isn't able to handle obsolete syntax. + // therefore, we want to check further for obsolete arguments. + // argv[0] is always present, argv[1] might be obsolete arguments + // argv[2] might contain an input file, argv[3] isn't allowed in obsolete mode + if args_vec.len() != 2 && args_vec.len() != 3 { + return clap_result; + } + + // At this point, there are a few possible cases: + // + // 1. clap has succeeded and the arguments would be invalid for the obsolete syntax. + // 2. The case of `tail -c 5` is ambiguous. clap parses this as `tail -c5`, + // but it could also be interpreted as valid obsolete syntax (tail -c on file '5'). + // GNU chooses to interpret this as `tail -c5`, like clap. + // 3. `tail -f foo` is also ambiguous, but has the same effect in both cases. We can safely + // use the clap result here. + // 4. clap succeeded for obsolete arguments starting with '+', but misinterprets them as + // input files (e.g. 'tail +f'). + // 5. clap failed because of unknown flags, but possibly valid obsolete arguments + // (e.g. tail -l; tail -10c). + // + // In cases 4 & 5, we want to try parsing the obsolete arguments, which corresponds to + // checking whether clap succeeded or the first argument starts with '+'. + let possible_obsolete_args = &args_vec[1]; + if clap_result.is_ok() && !possible_obsolete_args.to_string_lossy().starts_with('+') { + return clap_result; + } + match parse_obsolete(possible_obsolete_args, args_vec.get(2))? { + Some(settings) => Ok(settings), None => clap_result, } } @@ -477,6 +473,7 @@ pub fn uu_app() -> Command { .num_args(0..=1) .require_equals(true) .value_parser(["descriptor", "name"]) + .overrides_with(options::FOLLOW) .help("Print the file as it grows"), ) .arg( diff --git a/src/uu/tail/src/parse.rs b/src/uu/tail/src/parse.rs index 2f4ebb62e..96cf1e918 100644 --- a/src/uu/tail/src/parse.rs +++ b/src/uu/tail/src/parse.rs @@ -3,6 +3,8 @@ // * For the full copyright and license information, please view the LICENSE // * file that was distributed with this source code. +use std::ffi::OsString; + #[derive(PartialEq, Eq, Debug, Copy, Clone)] pub struct ObsoleteArgs { pub num: u64, @@ -27,20 +29,31 @@ pub enum ParseError { OutOfRange, Overflow, Context, + InvalidEncoding, } /// Parses obsolete syntax -/// tail -\[NUM\]\[bl\]\[f\] and tail +\[NUM\]\[bcl\]\[f\] // spell-checker:disable-line -pub fn parse_obsolete(src: &str) -> Option> { - let mut chars = src.chars(); - let sign = chars.next()?; - if sign != '+' && sign != '-' { +/// tail -\[NUM\]\[bcl\]\[f\] and tail +\[NUM\]\[bcl\]\[f\] // spell-checker:disable-line +pub fn parse_obsolete(src: &OsString) -> Option> { + let mut rest = match src.to_str() { + Some(src) => src, + None => return Some(Err(ParseError::InvalidEncoding)), + }; + let sign = if let Some(r) = rest.strip_prefix('-') { + rest = r; + '-' + } else if let Some(r) = rest.strip_prefix('+') { + rest = r; + '+' + } else { return None; - } + }; - let numbers: String = chars.clone().take_while(|&c| c.is_ascii_digit()).collect(); - let has_num = !numbers.is_empty(); + let end_num = rest + .find(|c: char| !c.is_ascii_digit()) + .unwrap_or(rest.len()); + let has_num = !rest[..end_num].is_empty(); let num: u64 = if has_num { - if let Ok(num) = numbers.parse() { + if let Ok(num) = rest[..end_num].parse() { num } else { return Some(Err(ParseError::OutOfRange)); @@ -48,25 +61,30 @@ pub fn parse_obsolete(src: &str) -> Option> { } else { 10 }; + rest = &rest[end_num..]; - let mut follow = false; - let mut mode = 'l'; - let mut first_char = true; - for char in chars.skip_while(|&c| c.is_ascii_digit()) { - if !has_num && first_char && sign == '-' && (char == 'c' || char == 'f') { - // special cases: -c, -f should be handled by clap (are ambiguous) - return None; - } else if char == 'f' { - follow = true; - } else if first_char && (char == 'b' || char == 'c' || char == 'l') { - mode = char; - } else if has_num && sign == '-' { + let mode = if let Some(r) = rest.strip_prefix('l') { + rest = r; + 'l' + } else if let Some(r) = rest.strip_prefix('c') { + rest = r; + 'c' + } else if let Some(r) = rest.strip_prefix('b') { + rest = r; + 'b' + } else { + 'l' + }; + + let follow = rest.contains('f'); + if !rest.chars().all(|f| f == 'f') { + // GNU allows an arbitrary amount of following fs, but nothing else + if sign == '-' && has_num { return Some(Err(ParseError::Context)); - } else { - return None; } - first_char = false; + return None; } + let multiplier = if mode == 'b' { 512 } else { 1 }; let num = match num.checked_mul(multiplier) { Some(n) => n, @@ -87,7 +105,7 @@ mod tests { #[test] fn test_parse_numbers_obsolete() { assert_eq!( - parse_obsolete("+2c"), + parse_obsolete(&OsString::from("+2c")), Some(Ok(ObsoleteArgs { num: 2, plus: true, @@ -96,7 +114,7 @@ mod tests { })) ); assert_eq!( - parse_obsolete("-5"), + parse_obsolete(&OsString::from("-5")), Some(Ok(ObsoleteArgs { num: 5, plus: false, @@ -105,7 +123,7 @@ mod tests { })) ); assert_eq!( - parse_obsolete("+100f"), + parse_obsolete(&OsString::from("+100f")), Some(Ok(ObsoleteArgs { num: 100, plus: true, @@ -114,7 +132,7 @@ mod tests { })) ); assert_eq!( - parse_obsolete("-2b"), + parse_obsolete(&OsString::from("-2b")), Some(Ok(ObsoleteArgs { num: 1024, plus: false, @@ -125,23 +143,47 @@ mod tests { } #[test] fn test_parse_errors_obsolete() { - assert_eq!(parse_obsolete("-5n"), Some(Err(ParseError::Context))); - assert_eq!(parse_obsolete("-5c5"), Some(Err(ParseError::Context))); - assert_eq!(parse_obsolete("-1vzc"), Some(Err(ParseError::Context))); - assert_eq!(parse_obsolete("-5m"), Some(Err(ParseError::Context))); - assert_eq!(parse_obsolete("-1k"), Some(Err(ParseError::Context))); - assert_eq!(parse_obsolete("-1mmk"), Some(Err(ParseError::Context))); - assert_eq!(parse_obsolete("-105kzm"), Some(Err(ParseError::Context))); - assert_eq!(parse_obsolete("-1vz"), Some(Err(ParseError::Context))); assert_eq!( - parse_obsolete("-1vzqvq"), // spell-checker:disable-line + parse_obsolete(&OsString::from("-5n")), + Some(Err(ParseError::Context)) + ); + assert_eq!( + parse_obsolete(&OsString::from("-5c5")), + Some(Err(ParseError::Context)) + ); + assert_eq!( + parse_obsolete(&OsString::from("-1vzc")), + Some(Err(ParseError::Context)) + ); + assert_eq!( + parse_obsolete(&OsString::from("-5m")), + Some(Err(ParseError::Context)) + ); + assert_eq!( + parse_obsolete(&OsString::from("-1k")), + Some(Err(ParseError::Context)) + ); + assert_eq!( + parse_obsolete(&OsString::from("-1mmk")), + Some(Err(ParseError::Context)) + ); + assert_eq!( + parse_obsolete(&OsString::from("-105kzm")), + Some(Err(ParseError::Context)) + ); + assert_eq!( + parse_obsolete(&OsString::from("-1vz")), + Some(Err(ParseError::Context)) + ); + assert_eq!( + parse_obsolete(&OsString::from("-1vzqvq")), // spell-checker:disable-line Some(Err(ParseError::Context)) ); } #[test] fn test_parse_obsolete_no_match() { - assert_eq!(parse_obsolete("-k"), None); - assert_eq!(parse_obsolete("asd"), None); - assert_eq!(parse_obsolete("-cc"), None); + assert_eq!(parse_obsolete(&OsString::from("-k")), None); + assert_eq!(parse_obsolete(&OsString::from("asd")), None); + assert_eq!(parse_obsolete(&OsString::from("-cc")), None); } } diff --git a/src/uu/tail/src/paths.rs b/src/uu/tail/src/paths.rs index b1cbc9bb8..4badd6866 100644 --- a/src/uu/tail/src/paths.rs +++ b/src/uu/tail/src/paths.rs @@ -28,8 +28,7 @@ pub struct Input { impl Input { pub fn from>(string: &T) -> Self { - let valid_string = string.as_ref().to_str(); - let kind = if valid_string.is_some() && valid_string.unwrap() == text::DASH { + let kind = if string.as_ref() == Path::new(text::DASH) { InputKind::Stdin } else { InputKind::File(PathBuf::from(string.as_ref()))