Merge pull request #7721 from Qelxiros/7166-head-maxint

head: fix overflow errors
2025-09-14 02:57:57 +00:00 · 2025-04-13 16:37:13 +02:00 · 2025-04-13 16:37:13 +02:00 · 4c796caba5
commit 4c796caba5
parent 80b6a2155c 37eee0e1e6
3 changed files with 94 additions and 106 deletions
--- a/src/uu/head/src/head.rs
+++ b/src/uu/head/src/head.rs
@ -191,16 +191,10 @@ fn arg_iterate<'a>(
        if let Some(s) = second.to_str() {
            match parse::parse_obsolete(s) {
                Some(Ok(iter)) => Ok(Box::new(vec![first].into_iter().chain(iter).chain(args))),
-                Some(Err(e)) => match e {
+                Some(Err(parse::ParseError)) => Err(HeadError::ParseError(format!(
-                    parse::ParseError::Syntax => Err(HeadError::ParseError(format!(
+                    "bad argument format: {}",
-                        "bad argument format: {}",
+                    s.quote()
-                        s.quote()
+                ))),
                    ))),
                    parse::ParseError::Overflow => Err(HeadError::ParseError(format!(
                        "invalid argument: {} Value too large for defined datatype",
                        s.quote()
                    ))),
                },
                None => Ok(Box::new(vec![first, second].into_iter().chain(args))),
            }
        } else {
@ -288,13 +282,7 @@ fn read_n_lines(input: &mut impl io::BufRead, n: u64, separator: u8) -> io::Resu
 }
 fn catch_too_large_numbers_in_backwards_bytes_or_lines(n: u64) -> Option<usize> {
-    match usize::try_from(n) {
+    usize::try_from(n).ok()
        Ok(value) => Some(value),
        Err(e) => {
            show!(HeadError::NumTooLarge(e));
            None
        }
    }
 }
 fn read_but_last_n_bytes(mut input: impl Read, n: u64) -> io::Result<u64> {
@ -668,7 +656,7 @@ mod tests {
        //test that bad obsoletes are an error
        assert!(arg_outputs("head -123FooBar").is_err());
        //test overflow
-        assert!(arg_outputs("head -100000000000000000000000000000000000000000").is_err());
+        assert!(arg_outputs("head -100000000000000000000000000000000000000000").is_ok());
        //test that empty args remain unchanged
        assert_eq!(arg_outputs("head"), Ok("head".to_owned()));
    }
--- a/src/uu/head/src/parse.rs
+++ b/src/uu/head/src/parse.rs
@ -4,33 +4,37 @@
 // file that was distributed with this source code.
 use std::ffi::OsString;
-use uucore::parser::parse_size::{ParseSizeError, parse_size_u64};
+use uucore::parser::parse_size::{ParseSizeError, parse_size_u64_max};
 #[derive(PartialEq, Eq, Debug)]
-pub enum ParseError {
+pub struct ParseError;
    Syntax,
    Overflow,
 }
 /// Parses obsolete syntax
 /// head -NUM\[kmzv\] // spell-checker:disable-line
 pub fn parse_obsolete(src: &str) -> Option<Result<Vec<OsString>, ParseError>> {
    let mut chars = src.char_indices();
-    if let Some((_, '-')) = chars.next() {
+    if let Some((mut num_start, '-')) = chars.next() {
-        let mut num_end = 0usize;
+        num_start += 1;
        let mut num_end = src.len();
        let mut has_num = false;
        let mut plus_possible = false;
        let mut last_char = 0 as char;
        for (n, c) in &mut chars {
            if c.is_ascii_digit() {
                has_num = true;
-                num_end = n;
+                plus_possible = false;
            } else if c == '+' && plus_possible {
                plus_possible = false;
                num_start += 1;
                continue;
            } else {
                num_end = n;
                last_char = c;
                break;
            }
        }
        if has_num {
-            process_num_block(&src[1..=num_end], last_char, &mut chars)
+            process_num_block(&src[num_start..num_end], last_char, &mut chars)
        } else {
            None
        }
@ -45,64 +49,62 @@ fn process_num_block(
    last_char: char,
    chars: &mut std::str::CharIndices,
 ) -> Option<Result<Vec<OsString>, ParseError>> {
-    match src.parse::<usize>() {
+    let num = match src.parse::<usize>() {
-        Ok(num) => {
+        Ok(n) => n,
-            let mut quiet = false;
+        Err(e) if *e.kind() == std::num::IntErrorKind::PosOverflow => usize::MAX,
-            let mut verbose = false;
+        _ => return Some(Err(ParseError)),
-            let mut zero_terminated = false;
+    };
-            let mut multiplier = None;
+    let mut quiet = false;
-            let mut c = last_char;
+    let mut verbose = false;
-            loop {
+    let mut zero_terminated = false;
-                // note that here, we only match lower case 'k', 'c', and 'm'
+    let mut multiplier = None;
-                match c {
+    let mut c = last_char;
-                    // we want to preserve order
+    loop {
-                    // this also saves us 1 heap allocation
+        // note that here, we only match lower case 'k', 'c', and 'm'
-                    'q' => {
+        match c {
-                        quiet = true;
+            // we want to preserve order
-                        verbose = false;
+            // this also saves us 1 heap allocation
-                    }
+            'q' => {
-                    'v' => {
+                quiet = true;
-                        verbose = true;
+                verbose = false;
                        quiet = false;
                    }
                    'z' => zero_terminated = true,
                    'c' => multiplier = Some(1),
                    'b' => multiplier = Some(512),
                    'k' => multiplier = Some(1024),
                    'm' => multiplier = Some(1024 * 1024),
                    '\0' => {}
                    _ => return Some(Err(ParseError::Syntax)),
                }
                if let Some((_, next)) = chars.next() {
                    c = next;
                } else {
                    break;
                }
            }
-            let mut options = Vec::new();
+            'v' => {
-            if quiet {
+                verbose = true;
-                options.push(OsString::from("-q"));
+                quiet = false;
            }
-            if verbose {
+            'z' => zero_terminated = true,
-                options.push(OsString::from("-v"));
+            'c' => multiplier = Some(1),
-            }
+            'b' => multiplier = Some(512),
-            if zero_terminated {
+            'k' => multiplier = Some(1024),
-                options.push(OsString::from("-z"));
+            'm' => multiplier = Some(1024 * 1024),
-            }
+            '\0' => {}
-            if let Some(n) = multiplier {
+            _ => return Some(Err(ParseError)),
-                options.push(OsString::from("-c"));
+        }
-                let Some(num) = num.checked_mul(n) else {
+        if let Some((_, next)) = chars.next() {
-                    return Some(Err(ParseError::Overflow));
+            c = next;
-                };
+        } else {
-                options.push(OsString::from(format!("{num}")));
+            break;
            } else {
                options.push(OsString::from("-n"));
                options.push(OsString::from(format!("{num}")));
            }
            Some(Ok(options))
        }
        Err(_) => Some(Err(ParseError::Overflow)),
    }
    let mut options = Vec::new();
    if quiet {
        options.push(OsString::from("-q"));
    }
    if verbose {
        options.push(OsString::from("-v"));
    }
    if zero_terminated {
        options.push(OsString::from("-z"));
    }
    if let Some(n) = multiplier {
        options.push(OsString::from("-c"));
        let num = num.saturating_mul(n);
        options.push(OsString::from(format!("{num}")));
    } else {
        options.push(OsString::from("-n"));
        options.push(OsString::from(format!("{num}")));
    }
    Some(Ok(options))
 }
 /// Parses an -c or -n argument,
@ -128,7 +130,7 @@ pub fn parse_num(src: &str) -> Result<(u64, bool), ParseSizeError> {
    if trimmed_string.is_empty() {
        Ok((0, all_but_last))
    } else {
-        parse_size_u64(trimmed_string).map(|n| (n, all_but_last))
+        parse_size_u64_max(trimmed_string).map(|n| (n, all_but_last))
    }
 }
@ -177,8 +179,8 @@ mod tests {
    #[test]
    fn test_parse_errors_obsolete() {
-        assert_eq!(obsolete("-5n"), Some(Err(ParseError::Syntax)));
+        assert_eq!(obsolete("-5n"), Some(Err(ParseError)));
-        assert_eq!(obsolete("-5c5"), Some(Err(ParseError::Syntax)));
+        assert_eq!(obsolete("-5c5"), Some(Err(ParseError)));
    }
    #[test]
@ -192,18 +194,24 @@ mod tests {
    fn test_parse_obsolete_overflow_x64() {
        assert_eq!(
            obsolete("-1000000000000000m"),
-            Some(Err(ParseError::Overflow))
+            obsolete_result(&["-c", "18446744073709551615"])
        );
        assert_eq!(
            obsolete("-10000000000000000000000"),
-            Some(Err(ParseError::Overflow))
+            obsolete_result(&["-n", "18446744073709551615"])
        );
    }
    #[test]
    #[cfg(target_pointer_width = "32")]
    fn test_parse_obsolete_overflow_x32() {
-        assert_eq!(obsolete("-42949672960"), Some(Err(ParseError::Overflow)));
+        assert_eq!(
-        assert_eq!(obsolete("-42949672k"), Some(Err(ParseError::Overflow)));
+            obsolete("-42949672960"),
            obsolete_result(&["-n", "4294967295"])
        );
        assert_eq!(
            obsolete("-42949672k"),
            obsolete_result(&["-c", "4294967295"])
        );
    }
 }
--- a/tests/by-util/test_head.rs
+++ b/tests/by-util/test_head.rs
@ -321,24 +321,20 @@ fn test_bad_utf8_lines() {
 fn test_head_invalid_num() {
    new_ucmd!()
        .args(&["-c", "1024R", "emptyfile.txt"])
-        .fails()
+        .succeeds()
-        .stderr_is(
+        .no_output();
            "head: invalid number of bytes: '1024R': Value too large for defined data type\n",
        );
    new_ucmd!()
        .args(&["-n", "1024R", "emptyfile.txt"])
-        .fails()
+        .succeeds()
-        .stderr_is(
+        .no_output();
            "head: invalid number of lines: '1024R': Value too large for defined data type\n",
        );
    new_ucmd!()
        .args(&["-c", "1Y", "emptyfile.txt"])
-        .fails()
+        .succeeds()
-        .stderr_is("head: invalid number of bytes: '1Y': Value too large for defined data type\n");
+        .no_output();
    new_ucmd!()
        .args(&["-n", "1Y", "emptyfile.txt"])
-        .fails()
+        .succeeds()
-        .stderr_is("head: invalid number of lines: '1Y': Value too large for defined data type\n");
+        .no_output();
    #[cfg(target_pointer_width = "32")]
    {
        let sizes = ["1000G", "10T"];
@ -350,10 +346,7 @@ fn test_head_invalid_num() {
    {
        let sizes = ["-1000G", "-10T"];
        for size in &sizes {
-            new_ucmd!()
+            new_ucmd!().args(&["-c", size]).succeeds().no_output();
                .args(&["-c", size])
                .fails()
                .stderr_is("head: out of range integral type conversion attempted: number of -bytes or -lines is too large\n");
        }
    }
    new_ucmd!()
@ -778,8 +771,7 @@ fn test_value_too_large() {
    new_ucmd!()
        .args(&["-n", format!("{MAX}0").as_str(), "lorem_ipsum.txt"])
-        .fails()
+        .succeeds();
        .stderr_contains("Value too large for defined data type");
 }
 #[test]