mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-28 11:37:44 +00:00
Use u128 instead of usize for large numbers, and consistency across architectures
This commit is contained in:
parent
b8a3a8995f
commit
bacad8ed93
1 changed files with 26 additions and 21 deletions
|
@ -221,7 +221,7 @@ trait Splitter {
|
||||||
&mut self,
|
&mut self,
|
||||||
reader: &mut BufReader<Box<dyn Read>>,
|
reader: &mut BufReader<Box<dyn Read>>,
|
||||||
writer: &mut BufWriter<Box<dyn Write>>,
|
writer: &mut BufWriter<Box<dyn Write>>,
|
||||||
) -> usize;
|
) -> u128;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct LineSplitter {
|
struct LineSplitter {
|
||||||
|
@ -244,8 +244,8 @@ impl Splitter for LineSplitter {
|
||||||
&mut self,
|
&mut self,
|
||||||
reader: &mut BufReader<Box<dyn Read>>,
|
reader: &mut BufReader<Box<dyn Read>>,
|
||||||
writer: &mut BufWriter<Box<dyn Write>>,
|
writer: &mut BufWriter<Box<dyn Write>>,
|
||||||
) -> usize {
|
) -> u128 {
|
||||||
let mut bytes_consumed = 0usize;
|
let mut bytes_consumed = 0u128;
|
||||||
let mut buffer = String::with_capacity(1024);
|
let mut buffer = String::with_capacity(1024);
|
||||||
for _ in 0..self.lines_per_split {
|
for _ in 0..self.lines_per_split {
|
||||||
let bytes_read = reader
|
let bytes_read = reader
|
||||||
|
@ -263,7 +263,7 @@ impl Splitter for LineSplitter {
|
||||||
// replaces.
|
// replaces.
|
||||||
buffer.clear();
|
buffer.clear();
|
||||||
|
|
||||||
bytes_consumed += bytes_read;
|
bytes_consumed += bytes_read as u128;
|
||||||
}
|
}
|
||||||
|
|
||||||
bytes_consumed
|
bytes_consumed
|
||||||
|
@ -271,32 +271,29 @@ impl Splitter for LineSplitter {
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ByteSplitter {
|
struct ByteSplitter {
|
||||||
bytes_per_split: usize,
|
bytes_per_split: u128,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ByteSplitter {
|
impl ByteSplitter {
|
||||||
fn new(settings: &Settings) -> ByteSplitter {
|
fn new(settings: &Settings) -> ByteSplitter {
|
||||||
// These multipliers are the same as supported by GNU coreutils with the
|
// These multipliers are the same as supported by GNU coreutils.
|
||||||
// exception of zetabytes (2^70) and yottabytes (2^80) as they overflow
|
let modifiers: Vec<(&str, u128)> = vec![
|
||||||
// standard machine usize (2^64), so we disable for now. Note however
|
("K", 1024u128),
|
||||||
// that they are supported by the GNU coreutils split. Ignored for now.
|
|
||||||
let modifiers: Vec<(&str, usize)> = vec![
|
|
||||||
("K", 1024usize),
|
|
||||||
("M", 1024 * 1024),
|
("M", 1024 * 1024),
|
||||||
("G", 1024 * 1024 * 1024),
|
("G", 1024 * 1024 * 1024),
|
||||||
("T", 1024 * 1024 * 1024 * 1024),
|
("T", 1024 * 1024 * 1024 * 1024),
|
||||||
("P", 1024 * 1024 * 1024 * 1024 * 1024),
|
("P", 1024 * 1024 * 1024 * 1024 * 1024),
|
||||||
("E", 1024 * 1024 * 1024 * 1024 * 1024 * 1024),
|
("E", 1024 * 1024 * 1024 * 1024 * 1024 * 1024),
|
||||||
// ("Z", 1024 * 1024 * 1024 * 1024 * 1024 * 1024 * 1024),
|
("Z", 1024 * 1024 * 1024 * 1024 * 1024 * 1024 * 1024),
|
||||||
// ("Y", 1024 * 1024 * 1024 * 1024 * 1024 * 1024 * 1024 * 1024),
|
("Y", 1024 * 1024 * 1024 * 1024 * 1024 * 1024 * 1024 * 1024),
|
||||||
("KB", 1000),
|
("KB", 1000),
|
||||||
("MB", 1000 * 1000),
|
("MB", 1000 * 1000),
|
||||||
("GB", 1000 * 1000 * 1000),
|
("GB", 1000 * 1000 * 1000),
|
||||||
("TB", 1000 * 1000 * 1000 * 1000),
|
("TB", 1000 * 1000 * 1000 * 1000),
|
||||||
("PB", 1000 * 1000 * 1000 * 1000 * 1000),
|
("PB", 1000 * 1000 * 1000 * 1000 * 1000),
|
||||||
("EB", 1000 * 1000 * 1000 * 1000 * 1000 * 1000),
|
("EB", 1000 * 1000 * 1000 * 1000 * 1000 * 1000),
|
||||||
// ("ZB", 1000 * 1000 * 1000 * 1000 * 1000 * 1000 * 1000),
|
("ZB", 1000 * 1000 * 1000 * 1000 * 1000 * 1000 * 1000),
|
||||||
// ("YB", 1000 * 1000 * 1000 * 1000 * 1000 * 1000 * 1000 * 1000),
|
("YB", 1000 * 1000 * 1000 * 1000 * 1000 * 1000 * 1000 * 1000),
|
||||||
];
|
];
|
||||||
|
|
||||||
// This sequential find is acceptable since none of the modifiers are
|
// This sequential find is acceptable since none of the modifiers are
|
||||||
|
@ -308,7 +305,7 @@ impl ByteSplitter {
|
||||||
|
|
||||||
// Try to parse the actual numeral.
|
// Try to parse the actual numeral.
|
||||||
let n = &settings.strategy_param[0..(settings.strategy_param.len() - suffix.len())]
|
let n = &settings.strategy_param[0..(settings.strategy_param.len() - suffix.len())]
|
||||||
.parse::<usize>()
|
.parse::<u128>()
|
||||||
.unwrap_or_else(|e| crash!(1, "invalid number of bytes: {}", e));
|
.unwrap_or_else(|e| crash!(1, "invalid number of bytes: {}", e));
|
||||||
|
|
||||||
ByteSplitter {
|
ByteSplitter {
|
||||||
|
@ -322,15 +319,23 @@ impl Splitter for ByteSplitter {
|
||||||
&mut self,
|
&mut self,
|
||||||
reader: &mut BufReader<Box<dyn Read>>,
|
reader: &mut BufReader<Box<dyn Read>>,
|
||||||
writer: &mut BufWriter<Box<dyn Write>>,
|
writer: &mut BufWriter<Box<dyn Write>>,
|
||||||
) -> usize {
|
) -> u128 {
|
||||||
// We buffer reads and writes. We proceed until `bytes_consumed` is
|
// We buffer reads and writes. We proceed until `bytes_consumed` is
|
||||||
// equal to `self.bytes_per_split` or we reach EOF.
|
// equal to `self.bytes_per_split` or we reach EOF.
|
||||||
let mut bytes_consumed = 0usize;
|
let mut bytes_consumed = 0u128;
|
||||||
const BUFFER_SIZE: usize = 1024;
|
const BUFFER_SIZE: usize = 1024;
|
||||||
let mut buffer = [0u8; BUFFER_SIZE];
|
let mut buffer = [0u8; BUFFER_SIZE];
|
||||||
while bytes_consumed < self.bytes_per_split {
|
while bytes_consumed < self.bytes_per_split {
|
||||||
// Don't overshoot `self.bytes_per_split`!
|
// Don't overshoot `self.bytes_per_split`! Note: Using std::cmp::min
|
||||||
let bytes_desired = std::cmp::min(BUFFER_SIZE, self.bytes_per_split - bytes_consumed);
|
// doesn't really work since we have to get types to match which
|
||||||
|
// can't be done in a way that keeps all conversions safe.
|
||||||
|
let bytes_desired = if (BUFFER_SIZE as u128) <= self.bytes_per_split - bytes_consumed {
|
||||||
|
BUFFER_SIZE
|
||||||
|
} else {
|
||||||
|
// This is a safe conversion since the difference must be less
|
||||||
|
// than BUFFER_SIZE in this branch.
|
||||||
|
(self.bytes_per_split - bytes_consumed) as usize
|
||||||
|
};
|
||||||
let bytes_read = reader
|
let bytes_read = reader
|
||||||
.read(&mut buffer[0..bytes_desired])
|
.read(&mut buffer[0..bytes_desired])
|
||||||
.unwrap_or_else(|_| crash!(1, "error reading bytes from input file"));
|
.unwrap_or_else(|_| crash!(1, "error reading bytes from input file"));
|
||||||
|
@ -343,7 +348,7 @@ impl Splitter for ByteSplitter {
|
||||||
.write_all(&buffer[0..bytes_read])
|
.write_all(&buffer[0..bytes_read])
|
||||||
.unwrap_or_else(|_| crash!(1, "error writing bytes to output file"));
|
.unwrap_or_else(|_| crash!(1, "error writing bytes to output file"));
|
||||||
|
|
||||||
bytes_consumed += bytes_read;
|
bytes_consumed += bytes_read as u128;
|
||||||
}
|
}
|
||||||
|
|
||||||
bytes_consumed
|
bytes_consumed
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue