From b6c952c46ebd09774c3f3d55b3b3440aec531c97 Mon Sep 17 00:00:00 2001 From: Omer Tuchfeld Date: Sun, 6 Feb 2022 15:55:17 +0100 Subject: [PATCH 01/12] Fix `parse_size` to use u64 rather than usize for better 32-bit support Using usize limits 32-bit platforms to operate only on sizes of 4GiB or less. While 32-bit platforms only have 4GiB of addressable memory, not all operations require the data to be entirely in memory, so this limitation can be lifted if we use u64 instead of usize. This only fixes the core function, further commits fix the utilities making use of this function. --- src/uucore/src/lib/parser/parse_size.rs | 44 +++++++++---------------- 1 file changed, 15 insertions(+), 29 deletions(-) diff --git a/src/uucore/src/lib/parser/parse_size.rs b/src/uucore/src/lib/parser/parse_size.rs index 35a03ea32..e68c04e5c 100644 --- a/src/uucore/src/lib/parser/parse_size.rs +++ b/src/uucore/src/lib/parser/parse_size.rs @@ -33,14 +33,14 @@ use crate::display::Quotable; /// assert_eq!(Ok(9 * 1000), parse_size("9kB")); // kB is 1000 /// assert_eq!(Ok(2 * 1024), parse_size("2K")); // K is 1024 /// ``` -pub fn parse_size(size: &str) -> Result { +pub fn parse_size(size: &str) -> Result { if size.is_empty() { return Err(ParseSizeError::parse_failure(size)); } // Get the numeric part of the size argument. For example, if the // argument is "123K", then the numeric part is "123". let numeric_string: String = size.chars().take_while(|c| c.is_digit(10)).collect(); - let number: usize = if !numeric_string.is_empty() { + let number: u64 = if !numeric_string.is_empty() { match numeric_string.parse() { Ok(n) => n, Err(_) => return Err(ParseSizeError::parse_failure(size)), @@ -75,7 +75,7 @@ pub fn parse_size(size: &str) -> Result { "YB" | "yB" => (1000, 8), _ => return Err(ParseSizeError::parse_failure(size)), }; - let factor = match usize::try_from(base.pow(exponent)) { + let factor = match u64::try_from(base.pow(exponent)) { Ok(n) => n, Err(_) => return Err(ParseSizeError::size_too_big(size)), }; @@ -181,7 +181,7 @@ mod tests { #[test] fn all_suffixes() { - // Units are K,M,G,T,P,E,Z,Y (powers of 1024) or KB,MB,... (powers of 1000). + // Units are K,M,G,T,P,E,Z,Y (powers of 1024) or KB,MB,... (powers of 1000). // Binary prefixes can be used, too: KiB=K, MiB=M, and so on. let suffixes = [ ('K', 1u32), @@ -190,31 +190,30 @@ mod tests { ('T', 4u32), ('P', 5u32), ('E', 6u32), - #[cfg(target_pointer_width = "128")] - ('Z', 7u32), // ParseSizeError::SizeTooBig on x64 - #[cfg(target_pointer_width = "128")] - ('Y', 8u32), // ParseSizeError::SizeTooBig on x64 + // The following will always result ParseSizeError::SizeTooBig as they cannot fit in u64 + // ('Z', 7u32), + // ('Y', 8u32), ]; for &(c, exp) in &suffixes { let s = format!("2{}B", c); // KB - assert_eq!(Ok((2 * (1000_u128).pow(exp)) as usize), parse_size(&s)); + assert_eq!(Ok((2 * (1000_u128).pow(exp)) as u64), parse_size(&s)); let s = format!("2{}", c); // K - assert_eq!(Ok((2 * (1024_u128).pow(exp)) as usize), parse_size(&s)); + assert_eq!(Ok((2 * (1024_u128).pow(exp)) as u64), parse_size(&s)); let s = format!("2{}iB", c); // KiB - assert_eq!(Ok((2 * (1024_u128).pow(exp)) as usize), parse_size(&s)); + assert_eq!(Ok((2 * (1024_u128).pow(exp)) as u64), parse_size(&s)); let s = format!("2{}iB", c.to_lowercase()); // kiB - assert_eq!(Ok((2 * (1024_u128).pow(exp)) as usize), parse_size(&s)); + assert_eq!(Ok((2 * (1024_u128).pow(exp)) as u64), parse_size(&s)); // suffix only let s = format!("{}B", c); // KB - assert_eq!(Ok(((1000_u128).pow(exp)) as usize), parse_size(&s)); + assert_eq!(Ok(((1000_u128).pow(exp)) as u64), parse_size(&s)); let s = format!("{}", c); // K - assert_eq!(Ok(((1024_u128).pow(exp)) as usize), parse_size(&s)); + assert_eq!(Ok(((1024_u128).pow(exp)) as u64), parse_size(&s)); let s = format!("{}iB", c); // KiB - assert_eq!(Ok(((1024_u128).pow(exp)) as usize), parse_size(&s)); + assert_eq!(Ok(((1024_u128).pow(exp)) as u64), parse_size(&s)); let s = format!("{}iB", c.to_lowercase()); // kiB - assert_eq!(Ok(((1024_u128).pow(exp)) as usize), parse_size(&s)); + assert_eq!(Ok(((1024_u128).pow(exp)) as u64), parse_size(&s)); } } @@ -239,19 +238,6 @@ mod tests { ); } - #[test] - #[cfg(target_pointer_width = "32")] - fn overflow_x32() { - assert!(variant_eq( - &parse_size("1T").unwrap_err(), - &ParseSizeError::SizeTooBig(String::new()) - )); - assert!(variant_eq( - &parse_size("1000G").unwrap_err(), - &ParseSizeError::SizeTooBig(String::new()) - )); - } - #[test] fn invalid_syntax() { let test_strings = [ From 88dfb8d374e2e9f94f9cfbda9681122116ad9655 Mon Sep 17 00:00:00 2001 From: Omer Tuchfeld Date: Sun, 6 Feb 2022 21:21:07 +0100 Subject: [PATCH 02/12] Fix type-error when calling `parse_size` from dd --- src/uu/dd/src/datastructures.rs | 4 +- src/uu/dd/src/dd.rs | 61 +++++++++++++--------------- src/uu/dd/src/parseargs.rs | 70 +++++++++++++++++++++++---------- 3 files changed, 80 insertions(+), 55 deletions(-) diff --git a/src/uu/dd/src/datastructures.rs b/src/uu/dd/src/datastructures.rs index c9c89e858..067058bbe 100644 --- a/src/uu/dd/src/datastructures.rs +++ b/src/uu/dd/src/datastructures.rs @@ -83,8 +83,8 @@ pub struct OFlags { /// then becomes Bytes(N) #[derive(Debug, PartialEq)] pub enum CountType { - Reads(usize), - Bytes(usize), + Reads(u64), + Bytes(u64), } #[derive(Debug)] diff --git a/src/uu/dd/src/dd.rs b/src/uu/dd/src/dd.rs index 7cc6fb082..d8bc3acd3 100644 --- a/src/uu/dd/src/dd.rs +++ b/src/uu/dd/src/dd.rs @@ -37,9 +37,8 @@ use std::time; use clap::{crate_version, App, AppSettings, Arg, ArgMatches}; use gcd::Gcd; use uucore::display::Quotable; -use uucore::error::{FromIo, UResult, USimpleError}; -use uucore::show_error; -use uucore::InvalidEncodingHandling; +use uucore::error::{FromIo, UResult}; +use uucore::{show_error, InvalidEncodingHandling}; const ABOUT: &str = "copy, and optionally convert, a file system resource"; const BUF_INIT_BYTE: u8 = 0xDD; @@ -75,11 +74,13 @@ impl Input { }; if let Some(amt) = skip { - let num_bytes_read = i - .force_fill(amt.try_into().unwrap()) - .map_err_context(|| "failed to read input".to_string())?; - if num_bytes_read < amt { - show_error!("'standard input': cannot skip to specified offset"); + if let Err(e) = i.read_skip(amt) { + if let io::ErrorKind::UnexpectedEof = e.kind() { + show_error!("'standard input': cannot skip to specified offset"); + } else { + return io::Result::Err(e) + .map_err_context(|| "I/O error while skipping".to_string()); + } } } @@ -148,9 +149,6 @@ impl Input { }; if let Some(amt) = skip { - let amt: u64 = amt - .try_into() - .map_err(|_| USimpleError::new(1, "failed to parse seek amount"))?; src.seek(io::SeekFrom::Start(amt)) .map_err_context(|| "failed to seek in input file".to_string())?; } @@ -262,19 +260,18 @@ impl Input { }) } - /// Read the specified number of bytes from this reader. - /// - /// On success, this method returns the number of bytes read. If - /// this reader has fewer than `n` bytes available, then it reads - /// as many as possible. In that case, this method returns a - /// number less than `n`. - /// - /// # Errors - /// - /// If there is a problem reading. - fn force_fill(&mut self, n: u64) -> std::io::Result { - let mut buf = vec![]; - self.take(n).read_to_end(&mut buf) + /// Skips amount_to_read bytes from the Input by copying into a sink + fn read_skip(&mut self, amount_to_read: u64) -> std::io::Result<()> { + let copy_result = io::copy(&mut self.src.by_ref().take(amount_to_read), &mut io::sink()); + if let Ok(n) = copy_result { + if n != amount_to_read { + io::Result::Err(io::Error::new(io::ErrorKind::UnexpectedEof, "")) + } else { + Ok(()) + } + } else { + io::Result::Err(copy_result.unwrap_err()) + } } } @@ -301,8 +298,7 @@ impl OutputTrait for Output { // stdout is not seekable, so we just write null bytes. if let Some(amt) = seek { - let bytes = vec![b'\0'; amt]; - dst.write_all(&bytes) + io::copy(&mut io::repeat(0u8).take(amt as u64), &mut dst) .map_err_context(|| String::from("write error"))?; } @@ -526,7 +522,7 @@ impl OutputTrait for Output { // Instead, we suppress the error by calling // `Result::ok()`. This matches the behavior of GNU `dd` // when given the command-line argument `of=/dev/null`. - let i = seek.unwrap_or(0).try_into().unwrap(); + let i = seek.unwrap_or(0); if !cflags.notrunc { dst.set_len(i).ok(); } @@ -658,15 +654,14 @@ fn calc_loop_bsize( ) -> usize { match count { Some(CountType::Reads(rmax)) => { - let rmax: u64 = (*rmax).try_into().unwrap(); let rsofar = rstat.reads_complete + rstat.reads_partial; - let rremain: usize = (rmax - rsofar).try_into().unwrap(); - cmp::min(ideal_bsize, rremain * ibs) + let rremain = rmax - rsofar; + cmp::min(ideal_bsize as u64, rremain * ibs as u64) as usize } Some(CountType::Bytes(bmax)) => { let bmax: u128 = (*bmax).try_into().unwrap(); - let bremain: usize = (bmax - wstat.bytes_total).try_into().unwrap(); - cmp::min(ideal_bsize, bremain) + let bremain: u128 = bmax - wstat.bytes_total; + cmp::min(ideal_bsize as u128, bremain as u128) as usize } None => ideal_bsize, } @@ -677,7 +672,7 @@ fn calc_loop_bsize( fn below_count_limit(count: &Option, rstat: &ReadStat, wstat: &WriteStat) -> bool { match count { Some(CountType::Reads(n)) => { - let n = (*n).try_into().unwrap(); + let n = *n; rstat.reads_complete + rstat.reads_partial <= n } Some(CountType::Bytes(n)) => { diff --git a/src/uu/dd/src/parseargs.rs b/src/uu/dd/src/parseargs.rs index 7a0bad851..c8324c4ca 100644 --- a/src/uu/dd/src/parseargs.rs +++ b/src/uu/dd/src/parseargs.rs @@ -31,6 +31,10 @@ pub enum ParseError { BlockUnblockWithoutCBS, StatusLevelNotRecognized(String), Unimplemented(String), + BsOutOfRange, + IbsOutOfRange, + ObsOutOfRange, + CbsOutOfRange, } impl ParseError { @@ -48,6 +52,10 @@ impl ParseError { Self::BlockUnblockWithoutCBS => Self::BlockUnblockWithoutCBS, Self::StatusLevelNotRecognized(_) => Self::StatusLevelNotRecognized(s), Self::Unimplemented(_) => Self::Unimplemented(s), + Self::BsOutOfRange => Self::BsOutOfRange, + Self::IbsOutOfRange => Self::IbsOutOfRange, + Self::ObsOutOfRange => Self::ObsOutOfRange, + Self::CbsOutOfRange => Self::CbsOutOfRange, } } } @@ -92,6 +100,18 @@ impl std::fmt::Display for ParseError { Self::StatusLevelNotRecognized(arg) => { write!(f, "status=LEVEL not recognized -> {}", arg) } + ParseError::BsOutOfRange => { + write!(f, "bs=N cannot fit into memory") + } + ParseError::IbsOutOfRange => { + write!(f, "ibs=N cannot fit into memory") + } + ParseError::ObsOutOfRange => { + write!(f, "ibs=N cannot fit into memory") + } + ParseError::CbsOutOfRange => { + write!(f, "cbs=N cannot fit into memory") + } Self::Unimplemented(arg) => { write!(f, "feature not implemented on this system -> {}", arg) } @@ -334,7 +354,7 @@ fn show_zero_multiplier_warning() { } /// Parse bytes using str::parse, then map error if needed. -fn parse_bytes_only(s: &str) -> Result { +fn parse_bytes_only(s: &str) -> Result { s.parse() .map_err(|_| ParseError::MultiplierStringParseFailure(s.to_string())) } @@ -364,7 +384,7 @@ fn parse_bytes_only(s: &str) -> Result { /// assert_eq!(parse_bytes_no_x("2b").unwrap(), 2 * 512); /// assert_eq!(parse_bytes_no_x("2k").unwrap(), 2 * 1024); /// ``` -fn parse_bytes_no_x(s: &str) -> Result { +fn parse_bytes_no_x(s: &str) -> Result { let (num, multiplier) = match (s.find('c'), s.rfind('w'), s.rfind('b')) { (None, None, None) => match uucore::parse_size::parse_size(s) { Ok(n) => (n, 1), @@ -387,7 +407,7 @@ fn parse_bytes_no_x(s: &str) -> Result { /// Parse byte and multiplier like 512, 5KiB, or 1G. /// Uses uucore::parse_size, and adds the 'w' and 'c' suffixes which are mentioned /// in dd's info page. -fn parse_bytes_with_opt_multiplier(s: &str) -> Result { +fn parse_bytes_with_opt_multiplier(s: &str) -> Result { // TODO On my Linux system, there seems to be a maximum block size of 4096 bytes: // // $ printf "%0.sa" {1..10000} | dd bs=4095 count=1 status=none | wc -c @@ -420,9 +440,27 @@ fn parse_bytes_with_opt_multiplier(s: &str) -> Result { pub fn parse_ibs(matches: &Matches) -> Result { if let Some(mixed_str) = matches.value_of(options::BS) { - parse_bytes_with_opt_multiplier(mixed_str) + parse_bytes_with_opt_multiplier(mixed_str)? + .try_into() + .map_err(|_| ParseError::BsOutOfRange) } else if let Some(mixed_str) = matches.value_of(options::IBS) { - parse_bytes_with_opt_multiplier(mixed_str) + parse_bytes_with_opt_multiplier(mixed_str)? + .try_into() + .map_err(|_| ParseError::IbsOutOfRange) + } else { + Ok(512) + } +} + +pub fn parse_obs(matches: &Matches) -> Result { + if let Some(mixed_str) = matches.value_of("bs") { + parse_bytes_with_opt_multiplier(mixed_str)? + .try_into() + .map_err(|_| ParseError::BsOutOfRange) + } else if let Some(mixed_str) = matches.value_of("obs") { + parse_bytes_with_opt_multiplier(mixed_str)? + .try_into() + .map_err(|_| ParseError::ObsOutOfRange) } else { Ok(512) } @@ -430,7 +468,9 @@ pub fn parse_ibs(matches: &Matches) -> Result { fn parse_cbs(matches: &Matches) -> Result, ParseError> { if let Some(s) = matches.value_of(options::CBS) { - let bytes = parse_bytes_with_opt_multiplier(s)?; + let bytes = parse_bytes_with_opt_multiplier(s)? + .try_into() + .map_err(|_| ParseError::CbsOutOfRange)?; Ok(Some(bytes)) } else { Ok(None) @@ -447,16 +487,6 @@ pub(crate) fn parse_status_level(matches: &Matches) -> Result Result { - if let Some(mixed_str) = matches.value_of("bs") { - parse_bytes_with_opt_multiplier(mixed_str) - } else if let Some(mixed_str) = matches.value_of("obs") { - parse_bytes_with_opt_multiplier(mixed_str) - } else { - Ok(512) - } -} - fn parse_ctable(fmt: Option, case: Option) -> Option<&'static ConversionTable> { fn parse_conv_and_case_table( fmt: &ConvFlag, @@ -715,13 +745,13 @@ pub fn parse_skip_amt( ibs: &usize, iflags: &IFlags, matches: &Matches, -) -> Result, ParseError> { +) -> Result, ParseError> { if let Some(amt) = matches.value_of(options::SKIP) { let n = parse_bytes_with_opt_multiplier(amt)?; if iflags.skip_bytes { Ok(Some(n)) } else { - Ok(Some(ibs * n)) + Ok(Some(*ibs as u64 * n)) } } else { Ok(None) @@ -733,13 +763,13 @@ pub fn parse_seek_amt( obs: &usize, oflags: &OFlags, matches: &Matches, -) -> Result, ParseError> { +) -> Result, ParseError> { if let Some(amt) = matches.value_of(options::SEEK) { let n = parse_bytes_with_opt_multiplier(amt)?; if oflags.seek_bytes { Ok(Some(n)) } else { - Ok(Some(obs * n)) + Ok(Some(*obs as u64 * n)) } } else { Ok(None) From 0fe6017006e45885a4878d0de83b6de4297d7433 Mon Sep 17 00:00:00 2001 From: Omer Tuchfeld Date: Sun, 6 Feb 2022 21:21:46 +0100 Subject: [PATCH 03/12] Fix type-error when calling `parse_size` from du --- src/uu/du/src/du.rs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/uu/du/src/du.rs b/src/uu/du/src/du.rs index 34580f0ee..0bb1abf4a 100644 --- a/src/uu/du/src/du.rs +++ b/src/uu/du/src/du.rs @@ -12,7 +12,6 @@ use chrono::prelude::DateTime; use chrono::Local; use clap::{crate_version, App, AppSettings, Arg, ArgMatches}; use std::collections::HashSet; -use std::convert::TryFrom; use std::env; use std::fs; #[cfg(not(windows))] @@ -248,7 +247,7 @@ fn get_file_info(path: &Path) -> Option { result } -fn read_block_size(s: Option<&str>) -> usize { +fn read_block_size(s: Option<&str>) -> u64 { if let Some(s) = s { parse_size(s) .unwrap_or_else(|e| crash!(1, "{}", format_error_message(&e, s, options::BLOCK_SIZE))) @@ -483,7 +482,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { show_warning!("options --apparent-size and -b are ineffective with --inodes"); } - let block_size = u64::try_from(read_block_size(matches.value_of(options::BLOCK_SIZE))).unwrap(); + let block_size = read_block_size(matches.value_of(options::BLOCK_SIZE)); let threshold = matches.value_of(options::THRESHOLD).map(|s| { Threshold::from_str(s) @@ -807,7 +806,7 @@ impl FromStr for Threshold { fn from_str(s: &str) -> std::result::Result { let offset = if s.starts_with(&['-', '+'][..]) { 1 } else { 0 }; - let size = u64::try_from(parse_size(&s[offset..])?).unwrap(); + let size = parse_size(&s[offset..])?; if s.starts_with('-') { Ok(Self::Upper(size)) From 8d8e25880e14b81a0df7e53a491bd67b951d5b15 Mon Sep 17 00:00:00 2001 From: Omer Tuchfeld Date: Sun, 6 Feb 2022 21:21:52 +0100 Subject: [PATCH 04/12] Fix type-error when calling `parse_size` from head --- src/uu/head/src/head.rs | 52 +++++++++++++++++++++++++++------------- src/uu/head/src/parse.rs | 2 +- src/uu/head/src/take.rs | 4 ++-- 3 files changed, 39 insertions(+), 19 deletions(-) diff --git a/src/uu/head/src/head.rs b/src/uu/head/src/head.rs index 6691b3a6d..2dc0a682e 100644 --- a/src/uu/head/src/head.rs +++ b/src/uu/head/src/head.rs @@ -106,10 +106,10 @@ pub fn uu_app<'a>() -> App<'a> { #[derive(Debug, PartialEq)] enum Mode { - FirstLines(usize), - AllButLastLines(usize), - FirstBytes(usize), - AllButLastBytes(usize), + FirstLines(u64), + AllButLastLines(u64), + FirstBytes(u64), + AllButLastBytes(u64), } impl Default for Mode { @@ -199,12 +199,12 @@ impl HeadOptions { } } -fn read_n_bytes(input: R, n: usize) -> std::io::Result<()> +fn read_n_bytes(input: R, n: u64) -> std::io::Result<()> where R: Read, { // Read the first `n` bytes from the `input` reader. - let mut reader = input.take(n as u64); + let mut reader = input.take(n); // Write those bytes to `stdout`. let stdout = std::io::stdout(); @@ -215,7 +215,7 @@ where Ok(()) } -fn read_n_lines(input: &mut impl std::io::BufRead, n: usize, zero: bool) -> std::io::Result<()> { +fn read_n_lines(input: &mut impl std::io::BufRead, n: u64, zero: bool) -> std::io::Result<()> { // Read the first `n` lines from the `input` reader. let separator = if zero { b'\0' } else { b'\n' }; let mut reader = take_lines(input, n, separator); @@ -233,8 +233,9 @@ fn read_n_lines(input: &mut impl std::io::BufRead, n: usize, zero: bool) -> std: fn read_but_last_n_bytes(input: &mut impl std::io::BufRead, n: usize) -> std::io::Result<()> { if n == 0 { //prints everything - return read_n_bytes(input, std::usize::MAX); + return read_n_bytes(input, std::u64::MAX); } + let stdout = std::io::stdout(); let mut stdout = stdout.lock(); @@ -337,17 +338,18 @@ fn read_but_last_n_lines( /// assert_eq!(find_nth_line_from_end(&mut input, 4, false).unwrap(), 0); /// assert_eq!(find_nth_line_from_end(&mut input, 1000, false).unwrap(), 0); /// ``` -fn find_nth_line_from_end(input: &mut R, n: usize, zeroed: bool) -> std::io::Result +fn find_nth_line_from_end(input: &mut R, n: u64, zeroed: bool) -> std::io::Result where R: Read + Seek, { let size = input.seek(SeekFrom::End(0))?; - let size = usize::try_from(size).unwrap(); let mut buffer = [0u8; BUF_SIZE]; - let buffer = &mut buffer[..BUF_SIZE.min(size)]; - let mut i = 0usize; - let mut lines = 0usize; + let buf_size: usize = (BUF_SIZE as u64).min(size).try_into().unwrap(); + let buffer = &mut buffer[..buf_size]; + + let mut i = 0u64; + let mut lines = 0u64; loop { // the casts here are ok, `buffer.len()` should never be above a few k @@ -382,7 +384,7 @@ where fn head_backwards_file(input: &mut std::fs::File, options: &HeadOptions) -> std::io::Result<()> { match options.mode { Mode::AllButLastBytes(n) => { - let size = input.metadata()?.len().try_into().unwrap(); + let size = input.metadata()?.len(); if n >= size { return Ok(()); } else { @@ -431,12 +433,30 @@ fn uu_head(options: &HeadOptions) -> UResult<()> { } let stdin = std::io::stdin(); let mut stdin = stdin.lock(); + + // Outputting "all-but-last" requires us to use a ring buffer with size n, so n + // must be converted from u64 to usize to fit in memory. If such conversion fails, + // it means the platform doesn't have enough memory to hold the buffer, so we fail. + if let Mode::AllButLastLines(n) | Mode::AllButLastBytes(n) = options.mode { + if let Err(n) = usize::try_from(n) { + show!(USimpleError::new( + 1, + format!("{}: number of bytes is too large", n) + )); + continue; + }; + }; + match options.mode { Mode::FirstBytes(n) => read_n_bytes(&mut stdin, n), - Mode::AllButLastBytes(n) => read_but_last_n_bytes(&mut stdin, n), + // unwrap is guaranteed to succeed because we checked the value of n above + Mode::AllButLastBytes(n) => { + read_but_last_n_bytes(&mut stdin, n.try_into().unwrap()) + } Mode::FirstLines(n) => read_n_lines(&mut stdin, n, options.zeroed), + // unwrap is guaranteed to succeed because we checked the value of n above Mode::AllButLastLines(n) => { - read_but_last_n_lines(&mut stdin, n, options.zeroed) + read_but_last_n_lines(&mut stdin, n.try_into().unwrap(), options.zeroed) } } } diff --git a/src/uu/head/src/parse.rs b/src/uu/head/src/parse.rs index b44a8b69d..ee543fe06 100644 --- a/src/uu/head/src/parse.rs +++ b/src/uu/head/src/parse.rs @@ -97,7 +97,7 @@ pub fn parse_obsolete(src: &str) -> Option } /// Parses an -c or -n argument, /// the bool specifies whether to read from the end -pub fn parse_num(src: &str) -> Result<(usize, bool), ParseSizeError> { +pub fn parse_num(src: &str) -> Result<(u64, bool), ParseSizeError> { let mut size_string = src.trim(); let mut all_but_last = false; diff --git a/src/uu/head/src/take.rs b/src/uu/head/src/take.rs index a003f9328..47beba8a4 100644 --- a/src/uu/head/src/take.rs +++ b/src/uu/head/src/take.rs @@ -69,7 +69,7 @@ where /// details. pub struct TakeLines { inner: T, - limit: usize, + limit: u64, separator: u8, } @@ -103,7 +103,7 @@ impl Read for TakeLines { /// /// The `separator` defines the character to interpret as the line /// ending. For the usual notion of "line", set this to `b'\n'`. -pub fn take_lines(reader: R, limit: usize, separator: u8) -> TakeLines { +pub fn take_lines(reader: R, limit: u64, separator: u8) -> TakeLines { TakeLines { inner: reader, limit, From 5d861df9614e62a12d870aec4ee5a9540603c7ac Mon Sep 17 00:00:00 2001 From: Omer Tuchfeld Date: Sun, 6 Feb 2022 21:21:59 +0100 Subject: [PATCH 05/12] Fix type-error when calling `parse_size` from tail --- src/uu/tail/src/tail.rs | 41 +++++++++++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/src/uu/tail/src/tail.rs b/src/uu/tail/src/tail.rs index 00db47c0e..27153117c 100644 --- a/src/uu/tail/src/tail.rs +++ b/src/uu/tail/src/tail.rs @@ -22,6 +22,7 @@ use chunks::ReverseChunks; use clap::{App, AppSettings, Arg}; use std::collections::VecDeque; +use std::convert::TryInto; use std::ffi::OsString; use std::fmt; use std::fs::{File, Metadata}; @@ -66,8 +67,8 @@ pub mod options { #[derive(Debug)] enum FilterMode { - Bytes(usize), - Lines(usize, u8), // (number of lines, delimiter) + Bytes(u64), + Lines(u64, u8), // (number of lines, delimiter) } impl Default for FilterMode { @@ -440,7 +441,7 @@ fn follow(readers: &mut [(T, &String)], settings: &Settings) -> URes /// ``` fn forwards_thru_file( reader: &mut R, - num_delimiters: usize, + num_delimiters: u64, delimiter: u8, ) -> std::io::Result where @@ -471,7 +472,7 @@ where /// Iterate over bytes in the file, in reverse, until we find the /// `num_delimiters` instance of `delimiter`. The `file` is left seek'd to the /// position just after that delimiter. -fn backwards_thru_file(file: &mut File, num_delimiters: usize, delimiter: u8) { +fn backwards_thru_file(file: &mut File, num_delimiters: u64, delimiter: u8) { // This variable counts the number of delimiters found in the file // so far (reading from the end of the file toward the beginning). let mut counter = 0; @@ -541,6 +542,18 @@ fn bounded_tail(file: &mut File, mode: &FilterMode, beginning: bool) { std::io::copy(file, &mut stdout).unwrap(); } +/// An alternative to [`Iterator::skip`] with u64 instead of usize. This is +/// necessary because the usize limit doesn't make sense when iterating over +/// something that's not in memory. For example, a very large file. This allows +/// us to skip data larger than 4 GiB even on 32-bit platforms. +fn skip_u64(iter: &mut impl Iterator, num: u64) { + for _ in 0..num { + if iter.next().is_none() { + break; + } + } +} + /// Collect the last elements of an iterator into a `VecDeque`. /// /// This function returns a [`VecDeque`] containing either the last @@ -553,10 +566,10 @@ fn bounded_tail(file: &mut File, mode: &FilterMode, beginning: bool) { /// /// If any element of `iter` is an [`Err`], then this function panics. fn unbounded_tail_collect( - iter: impl Iterator>, - count: usize, + mut iter: impl Iterator>, + count: u64, beginning: bool, -) -> VecDeque +) -> UResult> where E: fmt::Debug, { @@ -564,9 +577,13 @@ where // GNU `tail` seems to index bytes and lines starting at 1, not // at 0. It seems to treat `+0` and `+1` as the same thing. let i = count.max(1) - 1; - iter.skip(i as usize).map(|r| r.unwrap()).collect() + skip_u64(&mut iter, i); + Ok(iter.map(|r| r.unwrap()).collect()) } else { - RingBuffer::from_iter(iter.map(|r| r.unwrap()), count as usize).data + let count: usize = count + .try_into() + .map_err(|_| USimpleError::new(1, "Insufficient addressable memory"))?; + Ok(RingBuffer::from_iter(iter.map(|r| r.unwrap()), count).data) } } @@ -577,14 +594,14 @@ fn unbounded_tail(reader: &mut BufReader, settings: &Settings) -> UR match settings.mode { FilterMode::Lines(count, sep) => { let mut stdout = stdout(); - for line in unbounded_tail_collect(lines(reader, sep), count, settings.beginning) { + for line in unbounded_tail_collect(lines(reader, sep), count, settings.beginning)? { stdout .write_all(&line) .map_err_context(|| String::from("IO error"))?; } } FilterMode::Bytes(count) => { - for byte in unbounded_tail_collect(reader.bytes(), count, settings.beginning) { + for byte in unbounded_tail_collect(reader.bytes(), count, settings.beginning)? { if let Err(err) = stdout().write(&[byte]) { return Err(USimpleError::new(1, err.to_string())); } @@ -600,7 +617,7 @@ fn is_seekable(file: &mut T) -> bool { && file.seek(SeekFrom::Start(0)).is_ok() } -fn parse_num(src: &str) -> Result<(usize, bool), ParseSizeError> { +fn parse_num(src: &str) -> Result<(u64, bool), ParseSizeError> { let mut size_string = src.trim(); let mut starting_with = false; From 8535cd41e0b9e765b23215ee9db53307876ee615 Mon Sep 17 00:00:00 2001 From: Gilad Naaman Date: Sat, 5 Feb 2022 13:35:37 +0200 Subject: [PATCH 06/12] Fix type-error when calling `parse_size` from sort --- src/uu/sort/src/sort.rs | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index a2c636321..1c118b15a 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -33,6 +33,7 @@ use numeric_str_cmp::{human_numeric_str_cmp, numeric_str_cmp, NumInfo, NumInfoPa use rand::{thread_rng, Rng}; use rayon::prelude::*; use std::cmp::Ordering; +use std::convert::TryFrom; use std::env; use std::error::Error; use std::ffi::{OsStr, OsString}; @@ -354,7 +355,13 @@ impl GlobalSettings { } else if size_string.ends_with('b') { size_string.pop(); } - parse_size(&size_string) + let size = parse_size(&size_string)?; + usize::try_from(size).map_err(|_| { + ParseSizeError::SizeTooBig(format!( + "Buffer size {} does not fit in address space", + size + )) + }) } else { Err(ParseSizeError::ParseFailure("invalid suffix".to_string())) } From 159a1dc1db6b3211c6ac1b99f1d4280b4ec0efea Mon Sep 17 00:00:00 2001 From: Gilad Naaman Date: Sat, 5 Feb 2022 13:45:18 +0200 Subject: [PATCH 07/12] Fix type-error when calling `parse_size` from split --- src/uu/split/src/split.rs | 72 +++++++++++++++++++-------------------- 1 file changed, 35 insertions(+), 37 deletions(-) diff --git a/src/uu/split/src/split.rs b/src/uu/split/src/split.rs index ca59dfb6e..90a853efa 100644 --- a/src/uu/split/src/split.rs +++ b/src/uu/split/src/split.rs @@ -14,9 +14,11 @@ mod platform; use crate::filenames::FilenameIterator; use crate::filenames::SuffixType; use clap::{crate_version, App, AppSettings, Arg, ArgMatches}; +use std::convert::TryInto; use std::env; use std::fmt; use std::fs::{metadata, File}; +use std::io; use std::io::{stdin, BufReader, BufWriter, ErrorKind, Read, Write}; use std::num::ParseIntError; use std::path::Path; @@ -175,17 +177,17 @@ pub fn uu_app<'a>() -> App<'a> { /// The strategy for breaking up the input file into chunks. enum Strategy { /// Each chunk has the specified number of lines. - Lines(usize), + Lines(u64), /// Each chunk has the specified number of bytes. - Bytes(usize), + Bytes(u64), /// Each chunk has as many lines as possible without exceeding the /// specified number of bytes. - LineBytes(usize), + LineBytes(u64), /// Split the file into this many chunks. - Number(usize), + Number(u64), } /// An error when parsing a chunking strategy from command-line arguments. @@ -246,7 +248,7 @@ impl Strategy { } (0, 0, 0, 1) => { let s = matches.value_of(OPT_NUMBER).unwrap(); - let n = s.parse::().map_err(StrategyError::NumberOfChunks)?; + let n = s.parse::().map_err(StrategyError::NumberOfChunks)?; Ok(Self::Number(n)) } _ => Err(StrategyError::MultipleWays), @@ -399,17 +401,17 @@ struct ByteChunkWriter<'a> { settings: &'a Settings, /// The maximum number of bytes allowed for a single chunk of output. - chunk_size: usize, + chunk_size: u64, /// Running total of number of chunks that have been completed. - num_chunks_written: usize, + num_chunks_written: u64, /// Remaining capacity in number of bytes in the current chunk. /// /// This number starts at `chunk_size` and decreases as bytes are /// written. Once it reaches zero, a writer for a new chunk is /// initialized and this number gets reset to `chunk_size`. - num_bytes_remaining_in_current_chunk: usize, + num_bytes_remaining_in_current_chunk: u64, /// The underlying writer for the current chunk. /// @@ -423,7 +425,7 @@ struct ByteChunkWriter<'a> { } impl<'a> ByteChunkWriter<'a> { - fn new(chunk_size: usize, settings: &'a Settings) -> Option> { + fn new(chunk_size: u64, settings: &'a Settings) -> Option> { let mut filename_iterator = FilenameIterator::new( &settings.prefix, &settings.additional_suffix, @@ -453,7 +455,7 @@ impl<'a> Write for ByteChunkWriter<'a> { // different underlying writers. In that case, each iteration of // this loop writes to the underlying writer that corresponds to // the current chunk number. - let mut carryover_bytes_written = 0; + let mut carryover_bytes_written: usize = 0; loop { if buf.is_empty() { return Ok(carryover_bytes_written); @@ -464,19 +466,23 @@ impl<'a> Write for ByteChunkWriter<'a> { // write enough bytes to fill the current chunk, then increment // the chunk number and repeat. let n = buf.len(); - if n < self.num_bytes_remaining_in_current_chunk { + if (n as u64) < self.num_bytes_remaining_in_current_chunk { let num_bytes_written = self.inner.write(buf)?; - self.num_bytes_remaining_in_current_chunk -= num_bytes_written; + self.num_bytes_remaining_in_current_chunk -= num_bytes_written as u64; return Ok(carryover_bytes_written + num_bytes_written); } else { // Write enough bytes to fill the current chunk. - let i = self.num_bytes_remaining_in_current_chunk; + // + // Conversion to usize is safe because we checked that + // self.num_bytes_remaining_in_current_chunk is lower than + // n, which is already usize. + let i = self.num_bytes_remaining_in_current_chunk as usize; let num_bytes_written = self.inner.write(&buf[..i])?; // It's possible that the underlying writer did not // write all the bytes. if num_bytes_written < i { - self.num_bytes_remaining_in_current_chunk -= num_bytes_written; + self.num_bytes_remaining_in_current_chunk -= num_bytes_written as u64; return Ok(carryover_bytes_written + num_bytes_written); } else { // Move the window to look at only the remaining bytes. @@ -527,17 +533,17 @@ struct LineChunkWriter<'a> { settings: &'a Settings, /// The maximum number of lines allowed for a single chunk of output. - chunk_size: usize, + chunk_size: u64, /// Running total of number of chunks that have been completed. - num_chunks_written: usize, + num_chunks_written: u64, /// Remaining capacity in number of lines in the current chunk. /// /// This number starts at `chunk_size` and decreases as lines are /// written. Once it reaches zero, a writer for a new chunk is /// initialized and this number gets reset to `chunk_size`. - num_lines_remaining_in_current_chunk: usize, + num_lines_remaining_in_current_chunk: u64, /// The underlying writer for the current chunk. /// @@ -551,7 +557,7 @@ struct LineChunkWriter<'a> { } impl<'a> LineChunkWriter<'a> { - fn new(chunk_size: usize, settings: &'a Settings) -> Option> { + fn new(chunk_size: u64, settings: &'a Settings) -> Option> { let mut filename_iterator = FilenameIterator::new( &settings.prefix, &settings.additional_suffix, @@ -632,7 +638,7 @@ impl<'a> Write for LineChunkWriter<'a> { fn split_into_n_chunks_by_byte( settings: &Settings, reader: &mut R, - num_chunks: usize, + num_chunks: u64, ) -> UResult<()> where R: Read, @@ -648,16 +654,20 @@ where // files. let metadata = metadata(&settings.input).unwrap(); let num_bytes = metadata.len(); - let will_have_empty_files = settings.elide_empty_files && num_chunks as u64 > num_bytes; + let will_have_empty_files = settings.elide_empty_files && num_chunks > num_bytes; let (num_chunks, chunk_size) = if will_have_empty_files { - let num_chunks = num_bytes as usize; + let num_chunks = num_bytes; let chunk_size = 1; (num_chunks, chunk_size) } else { - let chunk_size = ((num_bytes / (num_chunks as u64)) as usize).max(1); + let chunk_size = (num_bytes / (num_chunks)).max(1); (num_chunks, chunk_size) }; + let num_chunks: usize = num_chunks + .try_into() + .map_err(|_| USimpleError::new(1, "Number of chunks too big"))?; + // This object is responsible for creating the filename for each chunk. let mut filename_iterator = FilenameIterator::new( &settings.prefix, @@ -682,29 +692,17 @@ where // Write `chunk_size` bytes from the reader into each writer // except the last. // - // Re-use the buffer to avoid re-allocating a `Vec` on each - // iteration. The contents will be completely overwritten each - // time we call `read_exact()`. - // // The last writer gets all remaining bytes so that if the number // of bytes in the input file was not evenly divisible by // `num_chunks`, we don't leave any bytes behind. - let mut buf = vec![0u8; chunk_size]; for writer in writers.iter_mut().take(num_chunks - 1) { - reader.read_exact(&mut buf)?; - writer.write_all(&buf)?; + io::copy(&mut reader.by_ref().take(chunk_size), writer)?; } // Write all the remaining bytes to the last chunk. - // - // To do this, we resize our buffer to have the necessary number - // of bytes. let i = num_chunks - 1; - let last_chunk_size = num_bytes as usize - (chunk_size * (num_chunks - 1)); - buf.resize(last_chunk_size, 0); - - reader.read_exact(&mut buf)?; - writers[i].write_all(&buf)?; + let last_chunk_size = num_bytes - (chunk_size * (num_chunks as u64 - 1)); + io::copy(&mut reader.by_ref().take(last_chunk_size), &mut writers[i])?; Ok(()) } From 6856c5dba528e15eb0ceaed41ff688a1b549d677 Mon Sep 17 00:00:00 2001 From: Gilad Naaman Date: Sat, 5 Feb 2022 14:10:05 +0200 Subject: [PATCH 08/12] Fix type-error when calling `parse_size` from truncate --- src/uu/truncate/src/truncate.rs | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/src/uu/truncate/src/truncate.rs b/src/uu/truncate/src/truncate.rs index 416afe54a..b88040fb8 100644 --- a/src/uu/truncate/src/truncate.rs +++ b/src/uu/truncate/src/truncate.rs @@ -7,7 +7,6 @@ // spell-checker:ignore (ToDO) RFILE refsize rfilename fsize tsize use clap::{crate_version, App, AppSettings, Arg}; -use std::convert::TryFrom; use std::fs::{metadata, OpenOptions}; use std::io::ErrorKind; #[cfg(unix)] @@ -20,13 +19,13 @@ use uucore::parse_size::{parse_size, ParseSizeError}; #[derive(Debug, Eq, PartialEq)] enum TruncateMode { - Absolute(usize), - Extend(usize), - Reduce(usize), - AtMost(usize), - AtLeast(usize), - RoundDown(usize), - RoundUp(usize), + Absolute(u64), + Extend(u64), + Reduce(u64), + AtMost(u64), + AtLeast(u64), + RoundDown(u64), + RoundUp(u64), } impl TruncateMode { @@ -55,7 +54,7 @@ impl TruncateMode { /// let fsize = 3; /// assert_eq!(mode.to_size(fsize), 0); /// ``` - fn to_size(&self, fsize: usize) -> usize { + fn to_size(&self, fsize: u64) -> u64 { match self { TruncateMode::Absolute(size) => *size, TruncateMode::Extend(size) => fsize + size, @@ -192,10 +191,10 @@ pub fn uu_app<'a>() -> App<'a> { /// /// If the file could not be opened, or there was a problem setting the /// size of the file. -fn file_truncate(filename: &str, create: bool, size: usize) -> std::io::Result<()> { +fn file_truncate(filename: &str, create: bool, size: u64) -> std::io::Result<()> { let path = Path::new(filename); let f = OpenOptions::new().write(true).create(create).open(path)?; - f.set_len(u64::try_from(size).unwrap()) + f.set_len(size) } /// Truncate files to a size relative to a given file. @@ -244,7 +243,7 @@ fn truncate_reference_and_size( ), _ => e.map_err_context(String::new), })?; - let fsize = metadata.len() as usize; + let fsize = metadata.len(); let tsize = mode.to_size(fsize); for filename in filenames { #[cfg(unix)] @@ -292,7 +291,7 @@ fn truncate_reference_file_only( ), _ => e.map_err_context(String::new), })?; - let tsize = metadata.len() as usize; + let tsize = metadata.len(); for filename in filenames { #[cfg(unix)] if std::fs::metadata(filename)?.file_type().is_fifo() { @@ -350,7 +349,7 @@ fn truncate_size_only(size_string: &str, filenames: &[String], create: bool) -> } Err(_) => 0, }; - let tsize = mode.to_size(fsize as usize); + let tsize = mode.to_size(fsize); match file_truncate(filename, create, tsize) { Ok(_) => continue, Err(e) if e.kind() == ErrorKind::NotFound && !create => continue, From 468ff8f0b9ce37694cacb926b5c3ec87721b6803 Mon Sep 17 00:00:00 2001 From: Omer Tuchfeld Date: Sun, 6 Feb 2022 21:59:59 +0100 Subject: [PATCH 09/12] Fix type-error when calling `parse_size` from od --- src/uu/od/src/inputoffset.rs | 8 ++++---- src/uu/od/src/od.rs | 18 ++++++++++-------- src/uu/od/src/parse_inputs.rs | 8 ++++---- src/uu/od/src/parse_nrofbytes.rs | 4 ++-- src/uu/od/src/partialreader.rs | 22 +++++++++++++--------- 5 files changed, 33 insertions(+), 27 deletions(-) diff --git a/src/uu/od/src/inputoffset.rs b/src/uu/od/src/inputoffset.rs index bc12098f8..25b439291 100644 --- a/src/uu/od/src/inputoffset.rs +++ b/src/uu/od/src/inputoffset.rs @@ -11,15 +11,15 @@ pub struct InputOffset { /// The radix to print the byte offset. NoPrefix will not print a byte offset. radix: Radix, /// The current position. Initialize at `new`, increase using `increase_position`. - byte_pos: usize, + byte_pos: u64, /// An optional label printed in parentheses, typically different from `byte_pos`, /// but will increase with the same value if `byte_pos` in increased. - label: Option, + label: Option, } impl InputOffset { /// creates a new `InputOffset` using the provided values. - pub fn new(radix: Radix, byte_pos: usize, label: Option) -> Self { + pub fn new(radix: Radix, byte_pos: u64, label: Option) -> Self { Self { radix, byte_pos, @@ -28,7 +28,7 @@ impl InputOffset { } /// Increase `byte_pos` and `label` if a label is used. - pub fn increase_position(&mut self, n: usize) { + pub fn increase_position(&mut self, n: u64) { self.byte_pos += n; if let Some(l) = self.label { self.label = Some(l + n); diff --git a/src/uu/od/src/od.rs b/src/uu/od/src/od.rs index 3786e8e68..3bbe3ab5d 100644 --- a/src/uu/od/src/od.rs +++ b/src/uu/od/src/od.rs @@ -29,6 +29,7 @@ mod prn_float; mod prn_int; use std::cmp; +use std::convert::TryFrom; use crate::byteorder_io::*; use crate::formatteriteminfo::*; @@ -111,9 +112,9 @@ pub(crate) mod options { struct OdOptions { byte_order: ByteOrder, - skip_bytes: usize, - read_bytes: Option, - label: Option, + skip_bytes: u64, + read_bytes: Option, + label: Option, input_strings: Vec, formats: Vec, line_bytes: usize, @@ -148,7 +149,7 @@ impl OdOptions { }, }; - let mut label: Option = None; + let mut label: Option = None; let parsed_input = parse_inputs(matches) .map_err(|e| USimpleError::new(1, format!("Invalid inputs: {}", e)))?; @@ -170,7 +171,8 @@ impl OdOptions { 16 } else { match parse_number_of_bytes(s) { - Ok(n) => n, + Ok(n) => usize::try_from(n) + .map_err(|_| USimpleError::new(1, format!("‘{}‘ is too large", s)))?, Err(e) => { return Err(USimpleError::new( 1, @@ -569,7 +571,7 @@ where ); } - input_offset.increase_position(length); + input_offset.increase_position(length as u64); } Err(e) => { show_error!("{}", e); @@ -648,8 +650,8 @@ fn print_bytes(prefix: &str, input_decoder: &MemoryDecoder, output_info: &Output /// `read_bytes` is an optional limit to the number of bytes to read fn open_input_peek_reader( input_strings: &[String], - skip_bytes: usize, - read_bytes: Option, + skip_bytes: u64, + read_bytes: Option, ) -> PeekReader> { // should return "impl PeekRead + Read + HasError" when supported in (stable) rust let inputs = input_strings diff --git a/src/uu/od/src/parse_inputs.rs b/src/uu/od/src/parse_inputs.rs index 9d64fc732..45e664ce3 100644 --- a/src/uu/od/src/parse_inputs.rs +++ b/src/uu/od/src/parse_inputs.rs @@ -32,7 +32,7 @@ impl<'a> CommandLineOpts for ArgMatches { #[derive(PartialEq, Debug)] pub enum CommandLineInputs { FileNames(Vec), - FileAndOffset((String, usize, Option)), + FileAndOffset((String, u64, Option)), } /// Interprets the command line inputs of od. @@ -141,7 +141,7 @@ pub fn parse_inputs_traditional(input_strings: &[&str]) -> Result Result { +pub fn parse_offset_operand(s: &str) -> Result { let mut start = 0; let mut len = s.len(); let mut radix = 8; @@ -164,7 +164,7 @@ pub fn parse_offset_operand(s: &str) -> Result { radix = 10; } } - match usize::from_str_radix(&s[start..len], radix) { + match u64::from_str_radix(&s[start..len], radix) { Ok(i) => Ok(i * multiply), Err(_) => Err("parse failed"), } @@ -332,7 +332,7 @@ mod tests { .unwrap_err(); } - fn parse_offset_operand_str(s: &str) -> Result { + fn parse_offset_operand_str(s: &str) -> Result { parse_offset_operand(&String::from(s)) } diff --git a/src/uu/od/src/parse_nrofbytes.rs b/src/uu/od/src/parse_nrofbytes.rs index d6329c60a..ad00452aa 100644 --- a/src/uu/od/src/parse_nrofbytes.rs +++ b/src/uu/od/src/parse_nrofbytes.rs @@ -1,6 +1,6 @@ use uucore::parse_size::{parse_size, ParseSizeError}; -pub fn parse_number_of_bytes(s: &str) -> Result { +pub fn parse_number_of_bytes(s: &str) -> Result { let mut start = 0; let mut len = s.len(); let mut radix = 16; @@ -65,7 +65,7 @@ pub fn parse_number_of_bytes(s: &str) -> Result { _ => {} } - let factor = match usize::from_str_radix(&s[start..len], radix) { + let factor = match u64::from_str_radix(&s[start..len], radix) { Ok(f) => f, Err(e) => return Err(ParseSizeError::ParseFailure(e.to_string())), }; diff --git a/src/uu/od/src/partialreader.rs b/src/uu/od/src/partialreader.rs index 68e3f30a1..8b51d8dee 100644 --- a/src/uu/od/src/partialreader.rs +++ b/src/uu/od/src/partialreader.rs @@ -15,15 +15,15 @@ const MAX_SKIP_BUFFER: usize = 16 * 1024; /// number of bytes. pub struct PartialReader { inner: R, - skip: usize, - limit: Option, + skip: u64, + limit: Option, } impl PartialReader { /// Create a new `PartialReader` wrapping `inner`, which will skip /// `skip` bytes, and limits the output to `limit` bytes. Set `limit` /// to `None` if there should be no limit. - pub fn new(inner: R, skip: usize, limit: Option) -> Self { + pub fn new(inner: R, skip: u64, limit: Option) -> Self { Self { inner, skip, limit } } } @@ -34,7 +34,7 @@ impl Read for PartialReader { let mut bytes = [0; MAX_SKIP_BUFFER]; while self.skip > 0 { - let skip_count = cmp::min(self.skip, MAX_SKIP_BUFFER); + let skip_count: usize = cmp::min(self.skip as usize, MAX_SKIP_BUFFER); match self.inner.read(&mut bytes[..skip_count])? { 0 => { @@ -44,7 +44,7 @@ impl Read for PartialReader { "tried to skip past end of input", )); } - n => self.skip -= n, + n => self.skip -= n as u64, } } } @@ -53,15 +53,15 @@ impl Read for PartialReader { None => self.inner.read(out), Some(0) => Ok(0), Some(ref mut limit) => { - let slice = if *limit > out.len() { + let slice = if *limit > (out.len() as u64) { out } else { - &mut out[0..*limit] + &mut out[0..(*limit as usize)] }; match self.inner.read(slice) { Err(e) => Err(e), Ok(r) => { - *limit -= r; + *limit -= r as u64; Ok(r) } } @@ -145,7 +145,11 @@ mod tests { fn test_read_skipping_huge_number() { let mut v = [0; 10]; // test if it does not eat all memory.... - let mut sut = PartialReader::new(Cursor::new(&b"abcdefgh"[..]), usize::max_value(), None); + let mut sut = PartialReader::new( + Cursor::new(&b"abcdefgh"[..]), + usize::max_value() as u64, + None, + ); sut.read(v.as_mut()).unwrap_err(); } From e9adf979d9bbc62bda882634531cc7350b7c117c Mon Sep 17 00:00:00 2001 From: Omer Tuchfeld Date: Sun, 6 Feb 2022 22:15:27 +0100 Subject: [PATCH 10/12] Fix type-error when calling `parse_size` from stdbuf --- src/uu/stdbuf/src/stdbuf.rs | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/uu/stdbuf/src/stdbuf.rs b/src/uu/stdbuf/src/stdbuf.rs index a568ab277..d6e136dcb 100644 --- a/src/uu/stdbuf/src/stdbuf.rs +++ b/src/uu/stdbuf/src/stdbuf.rs @@ -11,7 +11,7 @@ extern crate uucore; use clap::{crate_version, App, AppSettings, Arg, ArgMatches}; -use std::convert::TryFrom; +use std::convert::{TryFrom, TryInto}; use std::fs::File; use std::io::{self, Write}; use std::os::unix::process::ExitStatusExt; @@ -117,7 +117,14 @@ fn check_option(matches: &ArgMatches, name: &str) -> Result parse_size(x).map_or_else( |e| crash!(125, "invalid mode {}", e), - |m| Ok(BufferType::Size(m)), + |m| { + Ok(BufferType::Size(m.try_into().map_err(|_| { + ProgramOptionsError(format!( + "invalid mode {}: Value too large for defined data type", + x + )) + })?)) + }, ), }, None => Ok(BufferType::Default), From fa608983542632b7b32b143f7736fad5804c6af8 Mon Sep 17 00:00:00 2001 From: Omer Tuchfeld Date: Tue, 22 Feb 2022 11:09:22 +0100 Subject: [PATCH 11/12] Adjust 32-bit tests for tail,split,truncate,head --- tests/by-util/test_head.rs | 9 +-------- tests/by-util/test_split.rs | 9 +-------- tests/by-util/test_tail.rs | 5 +---- tests/by-util/test_truncate.rs | 9 +-------- 4 files changed, 4 insertions(+), 28 deletions(-) diff --git a/tests/by-util/test_head.rs b/tests/by-util/test_head.rs index 1c4a01557..46ef59d99 100644 --- a/tests/by-util/test_head.rs +++ b/tests/by-util/test_head.rs @@ -302,14 +302,7 @@ fn test_head_invalid_num() { { let sizes = ["1000G", "10T"]; for size in &sizes { - new_ucmd!() - .args(&["-c", size]) - .fails() - .code_is(1) - .stderr_only(format!( - "head: invalid number of bytes: '{}': Value too large for defined data type", - size - )); + new_ucmd!().args(&["-c", size]).succeeds(); } } new_ucmd!() diff --git a/tests/by-util/test_split.rs b/tests/by-util/test_split.rs index 9454687ac..d888c859f 100644 --- a/tests/by-util/test_split.rs +++ b/tests/by-util/test_split.rs @@ -337,14 +337,7 @@ fn test_split_invalid_bytes_size() { { let sizes = ["1000G", "10T"]; for size in &sizes { - new_ucmd!() - .args(&["-b", size]) - .fails() - .code_is(1) - .stderr_only(format!( - "split: invalid number of bytes: '{}': Value too large for defined data type", - size - )); + new_ucmd!().args(&["-b", size]).succeeds(); } } } diff --git a/tests/by-util/test_tail.rs b/tests/by-util/test_tail.rs index ebcd29cf5..bc757c3d1 100644 --- a/tests/by-util/test_tail.rs +++ b/tests/by-util/test_tail.rs @@ -485,10 +485,7 @@ fn test_tail_invalid_num() { .args(&["-c", size]) .fails() .code_is(1) - .stderr_only(format!( - "tail: invalid number of bytes: '{}': Value too large for defined data type", - size - )); + .stderr_only("tail: Insufficient addressable memory"); } } new_ucmd!() diff --git a/tests/by-util/test_truncate.rs b/tests/by-util/test_truncate.rs index 214eb3eda..1a5716574 100644 --- a/tests/by-util/test_truncate.rs +++ b/tests/by-util/test_truncate.rs @@ -323,14 +323,7 @@ fn test_truncate_bytes_size() { { let sizes = ["1000G", "10T"]; for size in &sizes { - new_ucmd!() - .args(&["--size", size, "file"]) - .fails() - .code_is(1) - .stderr_only(format!( - "truncate: Invalid number: '{}': Value too large for defined data type", - size - )); + new_ucmd!().args(&["--size", size, "file"]).succeeds(); } } } From f3895124b9fdc05a23c6fc86f51471e28bbd3c79 Mon Sep 17 00:00:00 2001 From: Omer Tuchfeld Date: Tue, 22 Feb 2022 14:23:46 +0100 Subject: [PATCH 12/12] Remove impractical test creating a file too large --- tests/by-util/test_truncate.rs | 7 ------- 1 file changed, 7 deletions(-) diff --git a/tests/by-util/test_truncate.rs b/tests/by-util/test_truncate.rs index 1a5716574..0d3739646 100644 --- a/tests/by-util/test_truncate.rs +++ b/tests/by-util/test_truncate.rs @@ -319,13 +319,6 @@ fn test_truncate_bytes_size() { .fails() .code_is(1) .stderr_only("truncate: Invalid number: '1Y': Value too large for defined data type"); - #[cfg(target_pointer_width = "32")] - { - let sizes = ["1000G", "10T"]; - for size in &sizes { - new_ucmd!().args(&["--size", size, "file"]).succeeds(); - } - } } /// Test that truncating a non-existent file creates that file.