mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-27 19:17:43 +00:00
Merge pull request #2866 from jfinkels/split-number-2
split: implement -n option
This commit is contained in:
commit
7c1abdb7d9
4 changed files with 144 additions and 27 deletions
|
@ -355,23 +355,23 @@ fn num_prefix(i: usize) -> String {
|
|||
/// assert_eq!(factory.make(650).unwrap(), "zaaa");
|
||||
/// assert_eq!(factory.make(6551).unwrap(), "zaab");
|
||||
/// ```
|
||||
pub struct FilenameFactory {
|
||||
additional_suffix: String,
|
||||
prefix: String,
|
||||
pub struct FilenameFactory<'a> {
|
||||
prefix: &'a str,
|
||||
additional_suffix: &'a str,
|
||||
suffix_length: usize,
|
||||
use_numeric_suffix: bool,
|
||||
}
|
||||
|
||||
impl FilenameFactory {
|
||||
impl<'a> FilenameFactory<'a> {
|
||||
/// Create a new instance of this struct.
|
||||
///
|
||||
/// For an explanation of the parameters, see the struct documentation.
|
||||
pub fn new(
|
||||
prefix: String,
|
||||
additional_suffix: String,
|
||||
prefix: &'a str,
|
||||
additional_suffix: &'a str,
|
||||
suffix_length: usize,
|
||||
use_numeric_suffix: bool,
|
||||
) -> FilenameFactory {
|
||||
) -> FilenameFactory<'a> {
|
||||
FilenameFactory {
|
||||
prefix,
|
||||
additional_suffix,
|
||||
|
@ -392,8 +392,8 @@ impl FilenameFactory {
|
|||
/// ```rust,ignore
|
||||
/// use crate::filenames::FilenameFactory;
|
||||
///
|
||||
/// let prefix = String::new();
|
||||
/// let suffix = String::new();
|
||||
/// let prefix = "";
|
||||
/// let suffix = "";
|
||||
/// let width = 1;
|
||||
/// let use_numeric_suffix = true;
|
||||
/// let factory = FilenameFactory::new(prefix, suffix, width, use_numeric_suffix);
|
||||
|
@ -401,15 +401,16 @@ impl FilenameFactory {
|
|||
/// assert_eq!(factory.make(10), None);
|
||||
/// ```
|
||||
pub fn make(&self, i: usize) -> Option<String> {
|
||||
let prefix = self.prefix.clone();
|
||||
let suffix1 = match (self.use_numeric_suffix, self.suffix_length) {
|
||||
let suffix = match (self.use_numeric_suffix, self.suffix_length) {
|
||||
(true, 0) => Some(num_prefix(i)),
|
||||
(false, 0) => str_prefix(i),
|
||||
(true, width) => num_prefix_fixed_width(i, width),
|
||||
(false, width) => str_prefix_fixed_width(i, width),
|
||||
}?;
|
||||
let suffix2 = &self.additional_suffix;
|
||||
Some(prefix + &suffix1 + suffix2)
|
||||
Some(format!(
|
||||
"{}{}{}",
|
||||
self.prefix, suffix, self.additional_suffix
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -513,7 +514,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn test_alphabetic_suffix() {
|
||||
let factory = FilenameFactory::new("123".to_string(), "789".to_string(), 3, false);
|
||||
let factory = FilenameFactory::new("123", "789", 3, false);
|
||||
assert_eq!(factory.make(0).unwrap(), "123aaa789");
|
||||
assert_eq!(factory.make(1).unwrap(), "123aab789");
|
||||
assert_eq!(factory.make(28).unwrap(), "123abc789");
|
||||
|
@ -521,7 +522,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn test_numeric_suffix() {
|
||||
let factory = FilenameFactory::new("abc".to_string(), "xyz".to_string(), 3, true);
|
||||
let factory = FilenameFactory::new("abc", "xyz", 3, true);
|
||||
assert_eq!(factory.make(0).unwrap(), "abc000xyz");
|
||||
assert_eq!(factory.make(1).unwrap(), "abc001xyz");
|
||||
assert_eq!(factory.make(123).unwrap(), "abc123xyz");
|
||||
|
|
|
@ -14,8 +14,7 @@ use crate::filenames::FilenameFactory;
|
|||
use clap::{crate_version, App, AppSettings, Arg, ArgMatches};
|
||||
use std::convert::TryFrom;
|
||||
use std::env;
|
||||
use std::fs::remove_file;
|
||||
use std::fs::File;
|
||||
use std::fs::{metadata, remove_file, File};
|
||||
use std::io::{stdin, BufRead, BufReader, BufWriter, Read, Write};
|
||||
use std::path::Path;
|
||||
use uucore::display::Quotable;
|
||||
|
@ -27,6 +26,7 @@ static OPT_LINE_BYTES: &str = "line-bytes";
|
|||
static OPT_LINES: &str = "lines";
|
||||
static OPT_ADDITIONAL_SUFFIX: &str = "additional-suffix";
|
||||
static OPT_FILTER: &str = "filter";
|
||||
static OPT_NUMBER: &str = "number";
|
||||
static OPT_NUMERIC_SUFFIXES: &str = "numeric-suffixes";
|
||||
static OPT_SUFFIX_LENGTH: &str = "suffix-length";
|
||||
static OPT_DEFAULT_SUFFIX_LENGTH: &str = "0";
|
||||
|
@ -132,6 +132,13 @@ pub fn uu_app<'a>() -> App<'a> {
|
|||
.default_value("1000")
|
||||
.help("put NUMBER lines/records per output file"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new(OPT_NUMBER)
|
||||
.short('n')
|
||||
.long(OPT_NUMBER)
|
||||
.takes_value(true)
|
||||
.help("generate CHUNKS output files; see explanation below"),
|
||||
)
|
||||
// rest of the arguments
|
||||
.arg(
|
||||
Arg::new(OPT_ADDITIONAL_SUFFIX)
|
||||
|
@ -194,6 +201,9 @@ enum Strategy {
|
|||
/// Each chunk has as many lines as possible without exceeding the
|
||||
/// specified number of bytes.
|
||||
LineBytes(usize),
|
||||
|
||||
/// Split the file into this many chunks.
|
||||
Number(usize),
|
||||
}
|
||||
|
||||
impl Strategy {
|
||||
|
@ -208,26 +218,34 @@ impl Strategy {
|
|||
matches.occurrences_of(OPT_LINES),
|
||||
matches.occurrences_of(OPT_BYTES),
|
||||
matches.occurrences_of(OPT_LINE_BYTES),
|
||||
matches.occurrences_of(OPT_NUMBER),
|
||||
) {
|
||||
(0, 0, 0) => Ok(Strategy::Lines(1000)),
|
||||
(1, 0, 0) => {
|
||||
(0, 0, 0, 0) => Ok(Strategy::Lines(1000)),
|
||||
(1, 0, 0, 0) => {
|
||||
let s = matches.value_of(OPT_LINES).unwrap();
|
||||
let n = parse_size(s)
|
||||
.map_err(|e| USimpleError::new(1, format!("invalid number of lines: {}", e)))?;
|
||||
Ok(Strategy::Lines(n))
|
||||
}
|
||||
(0, 1, 0) => {
|
||||
(0, 1, 0, 0) => {
|
||||
let s = matches.value_of(OPT_BYTES).unwrap();
|
||||
let n = parse_size(s)
|
||||
.map_err(|e| USimpleError::new(1, format!("invalid number of bytes: {}", e)))?;
|
||||
Ok(Strategy::Bytes(n))
|
||||
}
|
||||
(0, 0, 1) => {
|
||||
(0, 0, 1, 0) => {
|
||||
let s = matches.value_of(OPT_LINE_BYTES).unwrap();
|
||||
let n = parse_size(s)
|
||||
.map_err(|e| USimpleError::new(1, format!("invalid number of bytes: {}", e)))?;
|
||||
Ok(Strategy::LineBytes(n))
|
||||
}
|
||||
(0, 0, 0, 1) => {
|
||||
let s = matches.value_of(OPT_NUMBER).unwrap();
|
||||
let n = s.parse::<usize>().map_err(|e| {
|
||||
USimpleError::new(1, format!("invalid number of chunks: {}", e))
|
||||
})?;
|
||||
Ok(Strategy::Number(n))
|
||||
}
|
||||
_ => Err(UUsageError::new(1, "cannot split in more than one way")),
|
||||
}
|
||||
}
|
||||
|
@ -344,6 +362,84 @@ impl Splitter for ByteSplitter {
|
|||
}
|
||||
}
|
||||
|
||||
/// Split a file into a specific number of chunks by byte.
|
||||
///
|
||||
/// This function always creates one output file for each chunk, even
|
||||
/// if there is an error reading or writing one of the chunks or if
|
||||
/// the input file is truncated. However, if the `filter` option is
|
||||
/// being used, then no files are created.
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// This function returns an error if there is a problem reading from
|
||||
/// `reader` or writing to one of the output files.
|
||||
fn split_into_n_chunks_by_byte<R>(
|
||||
settings: &Settings,
|
||||
reader: &mut R,
|
||||
num_chunks: usize,
|
||||
) -> UResult<()>
|
||||
where
|
||||
R: Read,
|
||||
{
|
||||
// Get the size of the input file in bytes and compute the number
|
||||
// of bytes per chunk.
|
||||
let metadata = metadata(&settings.input).unwrap();
|
||||
let num_bytes = metadata.len();
|
||||
let chunk_size = (num_bytes / (num_chunks as u64)) as usize;
|
||||
|
||||
// This object is responsible for creating the filename for each chunk.
|
||||
let filename_factory = FilenameFactory::new(
|
||||
&settings.prefix,
|
||||
&settings.additional_suffix,
|
||||
settings.suffix_length,
|
||||
settings.numeric_suffix,
|
||||
);
|
||||
|
||||
// Create one writer for each chunk. This will create each
|
||||
// of the underlying files (if not in `--filter` mode).
|
||||
let mut writers = vec![];
|
||||
for i in 0..num_chunks {
|
||||
let filename = filename_factory
|
||||
.make(i)
|
||||
.ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?;
|
||||
let writer = platform::instantiate_current_writer(&settings.filter, filename.as_str());
|
||||
writers.push(writer);
|
||||
}
|
||||
|
||||
// This block evaluates to an object of type `std::io::Result<()>`.
|
||||
{
|
||||
// Write `chunk_size` bytes from the reader into each writer
|
||||
// except the last.
|
||||
//
|
||||
// Re-use the buffer to avoid re-allocating a `Vec` on each
|
||||
// iteration. The contents will be completely overwritten each
|
||||
// time we call `read_exact()`.
|
||||
//
|
||||
// The last writer gets all remaining bytes so that if the number
|
||||
// of bytes in the input file was not evenly divisible by
|
||||
// `num_chunks`, we don't leave any bytes behind.
|
||||
let mut buf = vec![0u8; chunk_size];
|
||||
for writer in writers.iter_mut().take(num_chunks - 1) {
|
||||
reader.read_exact(&mut buf)?;
|
||||
writer.write_all(&buf)?;
|
||||
}
|
||||
|
||||
// Write all the remaining bytes to the last chunk.
|
||||
//
|
||||
// To do this, we resize our buffer to have the necessary number
|
||||
// of bytes.
|
||||
let i = num_chunks - 1;
|
||||
let last_chunk_size = num_bytes as usize - (chunk_size * (num_chunks - 1));
|
||||
buf.resize(last_chunk_size, 0);
|
||||
|
||||
reader.read_exact(&mut buf)?;
|
||||
writers[i].write_all(&buf)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
.map_err_context(|| "I/O error".to_string())
|
||||
}
|
||||
|
||||
fn split(settings: Settings) -> UResult<()> {
|
||||
let mut reader = BufReader::new(if settings.input == "-" {
|
||||
Box::new(stdin()) as Box<dyn Read>
|
||||
|
@ -357,17 +453,22 @@ fn split(settings: Settings) -> UResult<()> {
|
|||
Box::new(r) as Box<dyn Read>
|
||||
});
|
||||
|
||||
if let Strategy::Number(num_chunks) = settings.strategy {
|
||||
return split_into_n_chunks_by_byte(&settings, &mut reader, num_chunks);
|
||||
}
|
||||
|
||||
let mut splitter: Box<dyn Splitter> = match settings.strategy {
|
||||
Strategy::Lines(chunk_size) => Box::new(LineSplitter::new(chunk_size)),
|
||||
Strategy::Bytes(chunk_size) | Strategy::LineBytes(chunk_size) => {
|
||||
Box::new(ByteSplitter::new(chunk_size))
|
||||
}
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
// This object is responsible for creating the filename for each chunk.
|
||||
let filename_factory = FilenameFactory::new(
|
||||
settings.prefix,
|
||||
settings.additional_suffix,
|
||||
&settings.prefix,
|
||||
&settings.additional_suffix,
|
||||
settings.suffix_length,
|
||||
settings.numeric_suffix,
|
||||
);
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
// *
|
||||
// * For the full copyright and license information, please view the LICENSE
|
||||
// * file that was distributed with this source code.
|
||||
// spell-checker:ignore xzaaa sixhundredfiftyonebytes ninetyonebytes asciilowercase
|
||||
// spell-checker:ignore xzaaa sixhundredfiftyonebytes ninetyonebytes asciilowercase fghij klmno pqrst uvwxyz
|
||||
extern crate rand;
|
||||
extern crate regex;
|
||||
|
||||
|
@ -12,11 +12,10 @@ use crate::common::util::*;
|
|||
use rand::SeedableRng;
|
||||
#[cfg(not(windows))]
|
||||
use std::env;
|
||||
use std::io::Write;
|
||||
use std::path::Path;
|
||||
use std::{
|
||||
fs::{read_dir, File},
|
||||
io::{BufWriter, Read},
|
||||
io::{BufWriter, Read, Write},
|
||||
};
|
||||
|
||||
fn random_chars(n: usize) -> String {
|
||||
|
@ -425,3 +424,19 @@ creating file 'xaf'
|
|||
",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_number() {
|
||||
let (at, mut ucmd) = at_and_ucmd!();
|
||||
let file_read = |f| {
|
||||
let mut s = String::new();
|
||||
at.open(f).read_to_string(&mut s).unwrap();
|
||||
s
|
||||
};
|
||||
ucmd.args(&["-n", "5", "asciilowercase.txt"]).succeeds();
|
||||
assert_eq!(file_read("xaa"), "abcde");
|
||||
assert_eq!(file_read("xab"), "fghij");
|
||||
assert_eq!(file_read("xac"), "klmno");
|
||||
assert_eq!(file_read("xad"), "pqrst");
|
||||
assert_eq!(file_read("xae"), "uvwxyz");
|
||||
}
|
||||
|
|
2
tests/fixtures/split/asciilowercase.txt
vendored
2
tests/fixtures/split/asciilowercase.txt
vendored
|
@ -1 +1 @@
|
|||
abcdefghijklmnopqrstuvwxyz
|
||||
abcdefghijklmnopqrstuvwxyz
|
Loading…
Add table
Add a link
Reference in a new issue