From d466b6702bb182a68eabcf52f6cae6b36baed122 Mon Sep 17 00:00:00 2001 From: Jeremy Smart Date: Thu, 10 Apr 2025 22:05:12 -0400 Subject: [PATCH 1/4] head: fix overflow errors --- src/uu/head/src/head.rs | 16 ++--- src/uu/head/src/parse.rs | 137 ++++++++++++++++++++------------------- 2 files changed, 74 insertions(+), 79 deletions(-) diff --git a/src/uu/head/src/head.rs b/src/uu/head/src/head.rs index 9d2a841db..970b94a2f 100644 --- a/src/uu/head/src/head.rs +++ b/src/uu/head/src/head.rs @@ -191,16 +191,10 @@ fn arg_iterate<'a>( if let Some(s) = second.to_str() { match parse::parse_obsolete(s) { Some(Ok(iter)) => Ok(Box::new(vec![first].into_iter().chain(iter).chain(args))), - Some(Err(e)) => match e { - parse::ParseError::Syntax => Err(HeadError::ParseError(format!( - "bad argument format: {}", - s.quote() - ))), - parse::ParseError::Overflow => Err(HeadError::ParseError(format!( - "invalid argument: {} Value too large for defined datatype", - s.quote() - ))), - }, + Some(Err(parse::ParseError)) => Err(HeadError::ParseError(format!( + "bad argument format: {}", + s.quote() + ))), None => Ok(Box::new(vec![first, second].into_iter().chain(args))), } } else { @@ -668,7 +662,7 @@ mod tests { //test that bad obsoletes are an error assert!(arg_outputs("head -123FooBar").is_err()); //test overflow - assert!(arg_outputs("head -100000000000000000000000000000000000000000").is_err()); + assert!(arg_outputs("head -100000000000000000000000000000000000000000").is_ok()); //test that empty args remain unchanged assert_eq!(arg_outputs("head"), Ok("head".to_owned())); } diff --git a/src/uu/head/src/parse.rs b/src/uu/head/src/parse.rs index 097599bc2..a59a62c45 100644 --- a/src/uu/head/src/parse.rs +++ b/src/uu/head/src/parse.rs @@ -7,30 +7,34 @@ use std::ffi::OsString; use uucore::parser::parse_size::{ParseSizeError, parse_size_u64}; #[derive(PartialEq, Eq, Debug)] -pub enum ParseError { - Syntax, - Overflow, -} +pub struct ParseError; /// Parses obsolete syntax /// head -NUM\[kmzv\] // spell-checker:disable-line pub fn parse_obsolete(src: &str) -> Option, ParseError>> { let mut chars = src.char_indices(); - if let Some((_, '-')) = chars.next() { - let mut num_end = 0usize; + if let Some((mut num_start, '-')) = chars.next() { + num_start += 1; + let mut num_end = src.len(); let mut has_num = false; + let mut plus_possible = false; let mut last_char = 0 as char; for (n, c) in &mut chars { if c.is_ascii_digit() { has_num = true; - num_end = n; + plus_possible = false; + } else if c == '+' && plus_possible { + plus_possible = false; + num_start += 1; + continue; } else { + num_end = n; last_char = c; break; } } if has_num { - process_num_block(&src[1..=num_end], last_char, &mut chars) + process_num_block(&src[num_start..num_end], last_char, &mut chars) } else { None } @@ -45,64 +49,61 @@ fn process_num_block( last_char: char, chars: &mut std::str::CharIndices, ) -> Option, ParseError>> { - match src.parse::() { - Ok(num) => { - let mut quiet = false; - let mut verbose = false; - let mut zero_terminated = false; - let mut multiplier = None; - let mut c = last_char; - loop { - // note that here, we only match lower case 'k', 'c', and 'm' - match c { - // we want to preserve order - // this also saves us 1 heap allocation - 'q' => { - quiet = true; - verbose = false; - } - 'v' => { - verbose = true; - quiet = false; - } - 'z' => zero_terminated = true, - 'c' => multiplier = Some(1), - 'b' => multiplier = Some(512), - 'k' => multiplier = Some(1024), - 'm' => multiplier = Some(1024 * 1024), - '\0' => {} - _ => return Some(Err(ParseError::Syntax)), - } - if let Some((_, next)) = chars.next() { - c = next; - } else { - break; - } + let num = src.chars().fold(0u32, |acc, ch| { + acc.saturating_mul(10) + .saturating_add(ch.to_digit(10).unwrap()) + }); + let mut quiet = false; + let mut verbose = false; + let mut zero_terminated = false; + let mut multiplier = None; + let mut c = last_char; + loop { + // note that here, we only match lower case 'k', 'c', and 'm' + match c { + // we want to preserve order + // this also saves us 1 heap allocation + 'q' => { + quiet = true; + verbose = false; } - let mut options = Vec::new(); - if quiet { - options.push(OsString::from("-q")); + 'v' => { + verbose = true; + quiet = false; } - if verbose { - options.push(OsString::from("-v")); - } - if zero_terminated { - options.push(OsString::from("-z")); - } - if let Some(n) = multiplier { - options.push(OsString::from("-c")); - let Some(num) = num.checked_mul(n) else { - return Some(Err(ParseError::Overflow)); - }; - options.push(OsString::from(format!("{num}"))); - } else { - options.push(OsString::from("-n")); - options.push(OsString::from(format!("{num}"))); - } - Some(Ok(options)) + 'z' => zero_terminated = true, + 'c' => multiplier = Some(1), + 'b' => multiplier = Some(512), + 'k' => multiplier = Some(1024), + 'm' => multiplier = Some(1024 * 1024), + '\0' => {} + _ => return Some(Err(ParseError)), + } + if let Some((_, next)) = chars.next() { + c = next; + } else { + break; } - Err(_) => Some(Err(ParseError::Overflow)), } + let mut options = Vec::new(); + if quiet { + options.push(OsString::from("-q")); + } + if verbose { + options.push(OsString::from("-v")); + } + if zero_terminated { + options.push(OsString::from("-z")); + } + if let Some(n) = multiplier { + options.push(OsString::from("-c")); + let num = num.saturating_mul(n); + options.push(OsString::from(format!("{num}"))); + } else { + options.push(OsString::from("-n")); + options.push(OsString::from(format!("{num}"))); + } + Some(Ok(options)) } /// Parses an -c or -n argument, @@ -177,8 +178,8 @@ mod tests { #[test] fn test_parse_errors_obsolete() { - assert_eq!(obsolete("-5n"), Some(Err(ParseError::Syntax))); - assert_eq!(obsolete("-5c5"), Some(Err(ParseError::Syntax))); + assert_eq!(obsolete("-5n"), Some(Err(ParseError))); + assert_eq!(obsolete("-5c5"), Some(Err(ParseError))); } #[test] @@ -192,18 +193,18 @@ mod tests { fn test_parse_obsolete_overflow_x64() { assert_eq!( obsolete("-1000000000000000m"), - Some(Err(ParseError::Overflow)) + obsolete_result(&["-c", "4294967295"]) ); assert_eq!( obsolete("-10000000000000000000000"), - Some(Err(ParseError::Overflow)) + obsolete_result(&["-n", "4294967295"]) ); } #[test] #[cfg(target_pointer_width = "32")] fn test_parse_obsolete_overflow_x32() { - assert_eq!(obsolete("-42949672960"), Some(Err(ParseError::Overflow))); - assert_eq!(obsolete("-42949672k"), Some(Err(ParseError::Overflow))); + assert_eq!(obsolete("-42949672960"), Some(Err(ParseError))); + assert_eq!(obsolete("-42949672k"), Some(Err(ParseError))); } } From 1ffaf2d6295f2fde1bd715c923dd96ceaf933dcc Mon Sep 17 00:00:00 2001 From: Jeremy Smart Date: Thu, 10 Apr 2025 22:39:13 -0400 Subject: [PATCH 2/4] head: update 32-bit tests --- src/uu/head/src/parse.rs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/uu/head/src/parse.rs b/src/uu/head/src/parse.rs index a59a62c45..0dbac6fe2 100644 --- a/src/uu/head/src/parse.rs +++ b/src/uu/head/src/parse.rs @@ -204,7 +204,13 @@ mod tests { #[test] #[cfg(target_pointer_width = "32")] fn test_parse_obsolete_overflow_x32() { - assert_eq!(obsolete("-42949672960"), Some(Err(ParseError))); - assert_eq!(obsolete("-42949672k"), Some(Err(ParseError))); + assert_eq!( + obsolete("-42949672960"), + obsolete_result(&["-n", "4294967295"]) + ); + assert_eq!( + obsolete("-42949672k"), + obsolete_result(&["-c", "4294967295"]) + ); } } From cc1112660a48de0ce372ca5e0f6fb65d50f75bcc Mon Sep 17 00:00:00 2001 From: Jeremy Smart Date: Fri, 11 Apr 2025 22:01:49 -0400 Subject: [PATCH 3/4] head: return to parse:: and switch to parse_size_u64_max --- src/uu/head/src/parse.rs | 17 +++++++++-------- tests/by-util/test_head.rs | 28 ++++++++++------------------ 2 files changed, 19 insertions(+), 26 deletions(-) diff --git a/src/uu/head/src/parse.rs b/src/uu/head/src/parse.rs index 0dbac6fe2..a4ce6e710 100644 --- a/src/uu/head/src/parse.rs +++ b/src/uu/head/src/parse.rs @@ -4,7 +4,7 @@ // file that was distributed with this source code. use std::ffi::OsString; -use uucore::parser::parse_size::{ParseSizeError, parse_size_u64}; +use uucore::parser::parse_size::{ParseSizeError, parse_size_u64_max}; #[derive(PartialEq, Eq, Debug)] pub struct ParseError; @@ -49,10 +49,11 @@ fn process_num_block( last_char: char, chars: &mut std::str::CharIndices, ) -> Option, ParseError>> { - let num = src.chars().fold(0u32, |acc, ch| { - acc.saturating_mul(10) - .saturating_add(ch.to_digit(10).unwrap()) - }); + let num = match src.parse::() { + Ok(n) => n, + Err(e) if *e.kind() == std::num::IntErrorKind::PosOverflow => usize::MAX, + _ => return Some(Err(ParseError)), + }; let mut quiet = false; let mut verbose = false; let mut zero_terminated = false; @@ -129,7 +130,7 @@ pub fn parse_num(src: &str) -> Result<(u64, bool), ParseSizeError> { if trimmed_string.is_empty() { Ok((0, all_but_last)) } else { - parse_size_u64(trimmed_string).map(|n| (n, all_but_last)) + parse_size_u64_max(trimmed_string).map(|n| (n, all_but_last)) } } @@ -193,11 +194,11 @@ mod tests { fn test_parse_obsolete_overflow_x64() { assert_eq!( obsolete("-1000000000000000m"), - obsolete_result(&["-c", "4294967295"]) + obsolete_result(&["-c", "18446744073709551615"]) ); assert_eq!( obsolete("-10000000000000000000000"), - obsolete_result(&["-n", "4294967295"]) + obsolete_result(&["-n", "18446744073709551615"]) ); } diff --git a/tests/by-util/test_head.rs b/tests/by-util/test_head.rs index 6c73936f3..9cd690c73 100644 --- a/tests/by-util/test_head.rs +++ b/tests/by-util/test_head.rs @@ -321,24 +321,20 @@ fn test_bad_utf8_lines() { fn test_head_invalid_num() { new_ucmd!() .args(&["-c", "1024R", "emptyfile.txt"]) - .fails() - .stderr_is( - "head: invalid number of bytes: '1024R': Value too large for defined data type\n", - ); + .succeeds() + .no_output(); new_ucmd!() .args(&["-n", "1024R", "emptyfile.txt"]) - .fails() - .stderr_is( - "head: invalid number of lines: '1024R': Value too large for defined data type\n", - ); + .succeeds() + .no_output(); new_ucmd!() .args(&["-c", "1Y", "emptyfile.txt"]) - .fails() - .stderr_is("head: invalid number of bytes: '1Y': Value too large for defined data type\n"); + .succeeds() + .no_output(); new_ucmd!() .args(&["-n", "1Y", "emptyfile.txt"]) - .fails() - .stderr_is("head: invalid number of lines: '1Y': Value too large for defined data type\n"); + .succeeds() + .no_output(); #[cfg(target_pointer_width = "32")] { let sizes = ["1000G", "10T"]; @@ -350,10 +346,7 @@ fn test_head_invalid_num() { { let sizes = ["-1000G", "-10T"]; for size in &sizes { - new_ucmd!() - .args(&["-c", size]) - .fails() - .stderr_is("head: out of range integral type conversion attempted: number of -bytes or -lines is too large\n"); + new_ucmd!().args(&["-c", size]).succeeds().no_output(); } } new_ucmd!() @@ -778,8 +771,7 @@ fn test_value_too_large() { new_ucmd!() .args(&["-n", format!("{MAX}0").as_str(), "lorem_ipsum.txt"]) - .fails() - .stderr_contains("Value too large for defined data type"); + .succeeds(); } #[test] From 37eee0e1e6ddd16fdf44151d7806ebbe5686508d Mon Sep 17 00:00:00 2001 From: Jeremy Smart Date: Fri, 11 Apr 2025 22:48:50 -0400 Subject: [PATCH 4/4] head: saturate to max int size on 32 bit platforms --- src/uu/head/src/head.rs | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/src/uu/head/src/head.rs b/src/uu/head/src/head.rs index 970b94a2f..573926a7b 100644 --- a/src/uu/head/src/head.rs +++ b/src/uu/head/src/head.rs @@ -282,13 +282,7 @@ fn read_n_lines(input: &mut impl io::BufRead, n: u64, separator: u8) -> io::Resu } fn catch_too_large_numbers_in_backwards_bytes_or_lines(n: u64) -> Option { - match usize::try_from(n) { - Ok(value) => Some(value), - Err(e) => { - show!(HeadError::NumTooLarge(e)); - None - } - } + usize::try_from(n).ok() } fn read_but_last_n_bytes(mut input: impl Read, n: u64) -> io::Result {