1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-28 11:37:44 +00:00

Use u128 instead of usize for large numbers, and consistency across architectures

This commit is contained in:
Samuel Ainsworth 2021-05-04 15:21:35 -07:00 committed by Sylvestre Ledru
parent b8a3a8995f
commit bacad8ed93

View file

@ -221,7 +221,7 @@ trait Splitter {
&mut self, &mut self,
reader: &mut BufReader<Box<dyn Read>>, reader: &mut BufReader<Box<dyn Read>>,
writer: &mut BufWriter<Box<dyn Write>>, writer: &mut BufWriter<Box<dyn Write>>,
) -> usize; ) -> u128;
} }
struct LineSplitter { struct LineSplitter {
@ -244,8 +244,8 @@ impl Splitter for LineSplitter {
&mut self, &mut self,
reader: &mut BufReader<Box<dyn Read>>, reader: &mut BufReader<Box<dyn Read>>,
writer: &mut BufWriter<Box<dyn Write>>, writer: &mut BufWriter<Box<dyn Write>>,
) -> usize { ) -> u128 {
let mut bytes_consumed = 0usize; let mut bytes_consumed = 0u128;
let mut buffer = String::with_capacity(1024); let mut buffer = String::with_capacity(1024);
for _ in 0..self.lines_per_split { for _ in 0..self.lines_per_split {
let bytes_read = reader let bytes_read = reader
@ -263,7 +263,7 @@ impl Splitter for LineSplitter {
// replaces. // replaces.
buffer.clear(); buffer.clear();
bytes_consumed += bytes_read; bytes_consumed += bytes_read as u128;
} }
bytes_consumed bytes_consumed
@ -271,32 +271,29 @@ impl Splitter for LineSplitter {
} }
struct ByteSplitter { struct ByteSplitter {
bytes_per_split: usize, bytes_per_split: u128,
} }
impl ByteSplitter { impl ByteSplitter {
fn new(settings: &Settings) -> ByteSplitter { fn new(settings: &Settings) -> ByteSplitter {
// These multipliers are the same as supported by GNU coreutils with the // These multipliers are the same as supported by GNU coreutils.
// exception of zetabytes (2^70) and yottabytes (2^80) as they overflow let modifiers: Vec<(&str, u128)> = vec![
// standard machine usize (2^64), so we disable for now. Note however ("K", 1024u128),
// that they are supported by the GNU coreutils split. Ignored for now.
let modifiers: Vec<(&str, usize)> = vec![
("K", 1024usize),
("M", 1024 * 1024), ("M", 1024 * 1024),
("G", 1024 * 1024 * 1024), ("G", 1024 * 1024 * 1024),
("T", 1024 * 1024 * 1024 * 1024), ("T", 1024 * 1024 * 1024 * 1024),
("P", 1024 * 1024 * 1024 * 1024 * 1024), ("P", 1024 * 1024 * 1024 * 1024 * 1024),
("E", 1024 * 1024 * 1024 * 1024 * 1024 * 1024), ("E", 1024 * 1024 * 1024 * 1024 * 1024 * 1024),
// ("Z", 1024 * 1024 * 1024 * 1024 * 1024 * 1024 * 1024), ("Z", 1024 * 1024 * 1024 * 1024 * 1024 * 1024 * 1024),
// ("Y", 1024 * 1024 * 1024 * 1024 * 1024 * 1024 * 1024 * 1024), ("Y", 1024 * 1024 * 1024 * 1024 * 1024 * 1024 * 1024 * 1024),
("KB", 1000), ("KB", 1000),
("MB", 1000 * 1000), ("MB", 1000 * 1000),
("GB", 1000 * 1000 * 1000), ("GB", 1000 * 1000 * 1000),
("TB", 1000 * 1000 * 1000 * 1000), ("TB", 1000 * 1000 * 1000 * 1000),
("PB", 1000 * 1000 * 1000 * 1000 * 1000), ("PB", 1000 * 1000 * 1000 * 1000 * 1000),
("EB", 1000 * 1000 * 1000 * 1000 * 1000 * 1000), ("EB", 1000 * 1000 * 1000 * 1000 * 1000 * 1000),
// ("ZB", 1000 * 1000 * 1000 * 1000 * 1000 * 1000 * 1000), ("ZB", 1000 * 1000 * 1000 * 1000 * 1000 * 1000 * 1000),
// ("YB", 1000 * 1000 * 1000 * 1000 * 1000 * 1000 * 1000 * 1000), ("YB", 1000 * 1000 * 1000 * 1000 * 1000 * 1000 * 1000 * 1000),
]; ];
// This sequential find is acceptable since none of the modifiers are // This sequential find is acceptable since none of the modifiers are
@ -308,7 +305,7 @@ impl ByteSplitter {
// Try to parse the actual numeral. // Try to parse the actual numeral.
let n = &settings.strategy_param[0..(settings.strategy_param.len() - suffix.len())] let n = &settings.strategy_param[0..(settings.strategy_param.len() - suffix.len())]
.parse::<usize>() .parse::<u128>()
.unwrap_or_else(|e| crash!(1, "invalid number of bytes: {}", e)); .unwrap_or_else(|e| crash!(1, "invalid number of bytes: {}", e));
ByteSplitter { ByteSplitter {
@ -322,15 +319,23 @@ impl Splitter for ByteSplitter {
&mut self, &mut self,
reader: &mut BufReader<Box<dyn Read>>, reader: &mut BufReader<Box<dyn Read>>,
writer: &mut BufWriter<Box<dyn Write>>, writer: &mut BufWriter<Box<dyn Write>>,
) -> usize { ) -> u128 {
// We buffer reads and writes. We proceed until `bytes_consumed` is // We buffer reads and writes. We proceed until `bytes_consumed` is
// equal to `self.bytes_per_split` or we reach EOF. // equal to `self.bytes_per_split` or we reach EOF.
let mut bytes_consumed = 0usize; let mut bytes_consumed = 0u128;
const BUFFER_SIZE: usize = 1024; const BUFFER_SIZE: usize = 1024;
let mut buffer = [0u8; BUFFER_SIZE]; let mut buffer = [0u8; BUFFER_SIZE];
while bytes_consumed < self.bytes_per_split { while bytes_consumed < self.bytes_per_split {
// Don't overshoot `self.bytes_per_split`! // Don't overshoot `self.bytes_per_split`! Note: Using std::cmp::min
let bytes_desired = std::cmp::min(BUFFER_SIZE, self.bytes_per_split - bytes_consumed); // doesn't really work since we have to get types to match which
// can't be done in a way that keeps all conversions safe.
let bytes_desired = if (BUFFER_SIZE as u128) <= self.bytes_per_split - bytes_consumed {
BUFFER_SIZE
} else {
// This is a safe conversion since the difference must be less
// than BUFFER_SIZE in this branch.
(self.bytes_per_split - bytes_consumed) as usize
};
let bytes_read = reader let bytes_read = reader
.read(&mut buffer[0..bytes_desired]) .read(&mut buffer[0..bytes_desired])
.unwrap_or_else(|_| crash!(1, "error reading bytes from input file")); .unwrap_or_else(|_| crash!(1, "error reading bytes from input file"));
@ -343,7 +348,7 @@ impl Splitter for ByteSplitter {
.write_all(&buffer[0..bytes_read]) .write_all(&buffer[0..bytes_read])
.unwrap_or_else(|_| crash!(1, "error writing bytes to output file")); .unwrap_or_else(|_| crash!(1, "error writing bytes to output file"));
bytes_consumed += bytes_read; bytes_consumed += bytes_read as u128;
} }
bytes_consumed bytes_consumed