1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-28 03:27:44 +00:00

Merge pull request #7721 from Qelxiros/7166-head-maxint

head: fix overflow errors
This commit is contained in:
Sylvestre Ledru 2025-04-13 16:37:13 +02:00 committed by GitHub
commit 4c796caba5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 94 additions and 106 deletions

View file

@ -191,16 +191,10 @@ fn arg_iterate<'a>(
if let Some(s) = second.to_str() { if let Some(s) = second.to_str() {
match parse::parse_obsolete(s) { match parse::parse_obsolete(s) {
Some(Ok(iter)) => Ok(Box::new(vec![first].into_iter().chain(iter).chain(args))), Some(Ok(iter)) => Ok(Box::new(vec![first].into_iter().chain(iter).chain(args))),
Some(Err(e)) => match e { Some(Err(parse::ParseError)) => Err(HeadError::ParseError(format!(
parse::ParseError::Syntax => Err(HeadError::ParseError(format!( "bad argument format: {}",
"bad argument format: {}", s.quote()
s.quote() ))),
))),
parse::ParseError::Overflow => Err(HeadError::ParseError(format!(
"invalid argument: {} Value too large for defined datatype",
s.quote()
))),
},
None => Ok(Box::new(vec![first, second].into_iter().chain(args))), None => Ok(Box::new(vec![first, second].into_iter().chain(args))),
} }
} else { } else {
@ -288,13 +282,7 @@ fn read_n_lines(input: &mut impl io::BufRead, n: u64, separator: u8) -> io::Resu
} }
fn catch_too_large_numbers_in_backwards_bytes_or_lines(n: u64) -> Option<usize> { fn catch_too_large_numbers_in_backwards_bytes_or_lines(n: u64) -> Option<usize> {
match usize::try_from(n) { usize::try_from(n).ok()
Ok(value) => Some(value),
Err(e) => {
show!(HeadError::NumTooLarge(e));
None
}
}
} }
fn read_but_last_n_bytes(mut input: impl Read, n: u64) -> io::Result<u64> { fn read_but_last_n_bytes(mut input: impl Read, n: u64) -> io::Result<u64> {
@ -668,7 +656,7 @@ mod tests {
//test that bad obsoletes are an error //test that bad obsoletes are an error
assert!(arg_outputs("head -123FooBar").is_err()); assert!(arg_outputs("head -123FooBar").is_err());
//test overflow //test overflow
assert!(arg_outputs("head -100000000000000000000000000000000000000000").is_err()); assert!(arg_outputs("head -100000000000000000000000000000000000000000").is_ok());
//test that empty args remain unchanged //test that empty args remain unchanged
assert_eq!(arg_outputs("head"), Ok("head".to_owned())); assert_eq!(arg_outputs("head"), Ok("head".to_owned()));
} }

View file

@ -4,33 +4,37 @@
// file that was distributed with this source code. // file that was distributed with this source code.
use std::ffi::OsString; use std::ffi::OsString;
use uucore::parser::parse_size::{ParseSizeError, parse_size_u64}; use uucore::parser::parse_size::{ParseSizeError, parse_size_u64_max};
#[derive(PartialEq, Eq, Debug)] #[derive(PartialEq, Eq, Debug)]
pub enum ParseError { pub struct ParseError;
Syntax,
Overflow,
}
/// Parses obsolete syntax /// Parses obsolete syntax
/// head -NUM\[kmzv\] // spell-checker:disable-line /// head -NUM\[kmzv\] // spell-checker:disable-line
pub fn parse_obsolete(src: &str) -> Option<Result<Vec<OsString>, ParseError>> { pub fn parse_obsolete(src: &str) -> Option<Result<Vec<OsString>, ParseError>> {
let mut chars = src.char_indices(); let mut chars = src.char_indices();
if let Some((_, '-')) = chars.next() { if let Some((mut num_start, '-')) = chars.next() {
let mut num_end = 0usize; num_start += 1;
let mut num_end = src.len();
let mut has_num = false; let mut has_num = false;
let mut plus_possible = false;
let mut last_char = 0 as char; let mut last_char = 0 as char;
for (n, c) in &mut chars { for (n, c) in &mut chars {
if c.is_ascii_digit() { if c.is_ascii_digit() {
has_num = true; has_num = true;
num_end = n; plus_possible = false;
} else if c == '+' && plus_possible {
plus_possible = false;
num_start += 1;
continue;
} else { } else {
num_end = n;
last_char = c; last_char = c;
break; break;
} }
} }
if has_num { if has_num {
process_num_block(&src[1..=num_end], last_char, &mut chars) process_num_block(&src[num_start..num_end], last_char, &mut chars)
} else { } else {
None None
} }
@ -45,64 +49,62 @@ fn process_num_block(
last_char: char, last_char: char,
chars: &mut std::str::CharIndices, chars: &mut std::str::CharIndices,
) -> Option<Result<Vec<OsString>, ParseError>> { ) -> Option<Result<Vec<OsString>, ParseError>> {
match src.parse::<usize>() { let num = match src.parse::<usize>() {
Ok(num) => { Ok(n) => n,
let mut quiet = false; Err(e) if *e.kind() == std::num::IntErrorKind::PosOverflow => usize::MAX,
let mut verbose = false; _ => return Some(Err(ParseError)),
let mut zero_terminated = false; };
let mut multiplier = None; let mut quiet = false;
let mut c = last_char; let mut verbose = false;
loop { let mut zero_terminated = false;
// note that here, we only match lower case 'k', 'c', and 'm' let mut multiplier = None;
match c { let mut c = last_char;
// we want to preserve order loop {
// this also saves us 1 heap allocation // note that here, we only match lower case 'k', 'c', and 'm'
'q' => { match c {
quiet = true; // we want to preserve order
verbose = false; // this also saves us 1 heap allocation
} 'q' => {
'v' => { quiet = true;
verbose = true; verbose = false;
quiet = false;
}
'z' => zero_terminated = true,
'c' => multiplier = Some(1),
'b' => multiplier = Some(512),
'k' => multiplier = Some(1024),
'm' => multiplier = Some(1024 * 1024),
'\0' => {}
_ => return Some(Err(ParseError::Syntax)),
}
if let Some((_, next)) = chars.next() {
c = next;
} else {
break;
}
} }
let mut options = Vec::new(); 'v' => {
if quiet { verbose = true;
options.push(OsString::from("-q")); quiet = false;
} }
if verbose { 'z' => zero_terminated = true,
options.push(OsString::from("-v")); 'c' => multiplier = Some(1),
} 'b' => multiplier = Some(512),
if zero_terminated { 'k' => multiplier = Some(1024),
options.push(OsString::from("-z")); 'm' => multiplier = Some(1024 * 1024),
} '\0' => {}
if let Some(n) = multiplier { _ => return Some(Err(ParseError)),
options.push(OsString::from("-c")); }
let Some(num) = num.checked_mul(n) else { if let Some((_, next)) = chars.next() {
return Some(Err(ParseError::Overflow)); c = next;
}; } else {
options.push(OsString::from(format!("{num}"))); break;
} else {
options.push(OsString::from("-n"));
options.push(OsString::from(format!("{num}")));
}
Some(Ok(options))
} }
Err(_) => Some(Err(ParseError::Overflow)),
} }
let mut options = Vec::new();
if quiet {
options.push(OsString::from("-q"));
}
if verbose {
options.push(OsString::from("-v"));
}
if zero_terminated {
options.push(OsString::from("-z"));
}
if let Some(n) = multiplier {
options.push(OsString::from("-c"));
let num = num.saturating_mul(n);
options.push(OsString::from(format!("{num}")));
} else {
options.push(OsString::from("-n"));
options.push(OsString::from(format!("{num}")));
}
Some(Ok(options))
} }
/// Parses an -c or -n argument, /// Parses an -c or -n argument,
@ -128,7 +130,7 @@ pub fn parse_num(src: &str) -> Result<(u64, bool), ParseSizeError> {
if trimmed_string.is_empty() { if trimmed_string.is_empty() {
Ok((0, all_but_last)) Ok((0, all_but_last))
} else { } else {
parse_size_u64(trimmed_string).map(|n| (n, all_but_last)) parse_size_u64_max(trimmed_string).map(|n| (n, all_but_last))
} }
} }
@ -177,8 +179,8 @@ mod tests {
#[test] #[test]
fn test_parse_errors_obsolete() { fn test_parse_errors_obsolete() {
assert_eq!(obsolete("-5n"), Some(Err(ParseError::Syntax))); assert_eq!(obsolete("-5n"), Some(Err(ParseError)));
assert_eq!(obsolete("-5c5"), Some(Err(ParseError::Syntax))); assert_eq!(obsolete("-5c5"), Some(Err(ParseError)));
} }
#[test] #[test]
@ -192,18 +194,24 @@ mod tests {
fn test_parse_obsolete_overflow_x64() { fn test_parse_obsolete_overflow_x64() {
assert_eq!( assert_eq!(
obsolete("-1000000000000000m"), obsolete("-1000000000000000m"),
Some(Err(ParseError::Overflow)) obsolete_result(&["-c", "18446744073709551615"])
); );
assert_eq!( assert_eq!(
obsolete("-10000000000000000000000"), obsolete("-10000000000000000000000"),
Some(Err(ParseError::Overflow)) obsolete_result(&["-n", "18446744073709551615"])
); );
} }
#[test] #[test]
#[cfg(target_pointer_width = "32")] #[cfg(target_pointer_width = "32")]
fn test_parse_obsolete_overflow_x32() { fn test_parse_obsolete_overflow_x32() {
assert_eq!(obsolete("-42949672960"), Some(Err(ParseError::Overflow))); assert_eq!(
assert_eq!(obsolete("-42949672k"), Some(Err(ParseError::Overflow))); obsolete("-42949672960"),
obsolete_result(&["-n", "4294967295"])
);
assert_eq!(
obsolete("-42949672k"),
obsolete_result(&["-c", "4294967295"])
);
} }
} }

View file

@ -321,24 +321,20 @@ fn test_bad_utf8_lines() {
fn test_head_invalid_num() { fn test_head_invalid_num() {
new_ucmd!() new_ucmd!()
.args(&["-c", "1024R", "emptyfile.txt"]) .args(&["-c", "1024R", "emptyfile.txt"])
.fails() .succeeds()
.stderr_is( .no_output();
"head: invalid number of bytes: '1024R': Value too large for defined data type\n",
);
new_ucmd!() new_ucmd!()
.args(&["-n", "1024R", "emptyfile.txt"]) .args(&["-n", "1024R", "emptyfile.txt"])
.fails() .succeeds()
.stderr_is( .no_output();
"head: invalid number of lines: '1024R': Value too large for defined data type\n",
);
new_ucmd!() new_ucmd!()
.args(&["-c", "1Y", "emptyfile.txt"]) .args(&["-c", "1Y", "emptyfile.txt"])
.fails() .succeeds()
.stderr_is("head: invalid number of bytes: '1Y': Value too large for defined data type\n"); .no_output();
new_ucmd!() new_ucmd!()
.args(&["-n", "1Y", "emptyfile.txt"]) .args(&["-n", "1Y", "emptyfile.txt"])
.fails() .succeeds()
.stderr_is("head: invalid number of lines: '1Y': Value too large for defined data type\n"); .no_output();
#[cfg(target_pointer_width = "32")] #[cfg(target_pointer_width = "32")]
{ {
let sizes = ["1000G", "10T"]; let sizes = ["1000G", "10T"];
@ -350,10 +346,7 @@ fn test_head_invalid_num() {
{ {
let sizes = ["-1000G", "-10T"]; let sizes = ["-1000G", "-10T"];
for size in &sizes { for size in &sizes {
new_ucmd!() new_ucmd!().args(&["-c", size]).succeeds().no_output();
.args(&["-c", size])
.fails()
.stderr_is("head: out of range integral type conversion attempted: number of -bytes or -lines is too large\n");
} }
} }
new_ucmd!() new_ucmd!()
@ -778,8 +771,7 @@ fn test_value_too_large() {
new_ucmd!() new_ucmd!()
.args(&["-n", format!("{MAX}0").as_str(), "lorem_ipsum.txt"]) .args(&["-n", format!("{MAX}0").as_str(), "lorem_ipsum.txt"])
.fails() .succeeds();
.stderr_contains("Value too large for defined data type");
} }
#[test] #[test]