1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-27 19:17:43 +00:00

Merge pull request #2866 from jfinkels/split-number-2

split: implement -n option
This commit is contained in:
Sylvestre Ledru 2022-01-30 09:58:04 +01:00 committed by GitHub
commit 7c1abdb7d9
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 144 additions and 27 deletions

View file

@ -355,23 +355,23 @@ fn num_prefix(i: usize) -> String {
/// assert_eq!(factory.make(650).unwrap(), "zaaa");
/// assert_eq!(factory.make(6551).unwrap(), "zaab");
/// ```
pub struct FilenameFactory {
additional_suffix: String,
prefix: String,
pub struct FilenameFactory<'a> {
prefix: &'a str,
additional_suffix: &'a str,
suffix_length: usize,
use_numeric_suffix: bool,
}
impl FilenameFactory {
impl<'a> FilenameFactory<'a> {
/// Create a new instance of this struct.
///
/// For an explanation of the parameters, see the struct documentation.
pub fn new(
prefix: String,
additional_suffix: String,
prefix: &'a str,
additional_suffix: &'a str,
suffix_length: usize,
use_numeric_suffix: bool,
) -> FilenameFactory {
) -> FilenameFactory<'a> {
FilenameFactory {
prefix,
additional_suffix,
@ -392,8 +392,8 @@ impl FilenameFactory {
/// ```rust,ignore
/// use crate::filenames::FilenameFactory;
///
/// let prefix = String::new();
/// let suffix = String::new();
/// let prefix = "";
/// let suffix = "";
/// let width = 1;
/// let use_numeric_suffix = true;
/// let factory = FilenameFactory::new(prefix, suffix, width, use_numeric_suffix);
@ -401,15 +401,16 @@ impl FilenameFactory {
/// assert_eq!(factory.make(10), None);
/// ```
pub fn make(&self, i: usize) -> Option<String> {
let prefix = self.prefix.clone();
let suffix1 = match (self.use_numeric_suffix, self.suffix_length) {
let suffix = match (self.use_numeric_suffix, self.suffix_length) {
(true, 0) => Some(num_prefix(i)),
(false, 0) => str_prefix(i),
(true, width) => num_prefix_fixed_width(i, width),
(false, width) => str_prefix_fixed_width(i, width),
}?;
let suffix2 = &self.additional_suffix;
Some(prefix + &suffix1 + suffix2)
Some(format!(
"{}{}{}",
self.prefix, suffix, self.additional_suffix
))
}
}
@ -513,7 +514,7 @@ mod tests {
#[test]
fn test_alphabetic_suffix() {
let factory = FilenameFactory::new("123".to_string(), "789".to_string(), 3, false);
let factory = FilenameFactory::new("123", "789", 3, false);
assert_eq!(factory.make(0).unwrap(), "123aaa789");
assert_eq!(factory.make(1).unwrap(), "123aab789");
assert_eq!(factory.make(28).unwrap(), "123abc789");
@ -521,7 +522,7 @@ mod tests {
#[test]
fn test_numeric_suffix() {
let factory = FilenameFactory::new("abc".to_string(), "xyz".to_string(), 3, true);
let factory = FilenameFactory::new("abc", "xyz", 3, true);
assert_eq!(factory.make(0).unwrap(), "abc000xyz");
assert_eq!(factory.make(1).unwrap(), "abc001xyz");
assert_eq!(factory.make(123).unwrap(), "abc123xyz");

View file

@ -14,8 +14,7 @@ use crate::filenames::FilenameFactory;
use clap::{crate_version, App, AppSettings, Arg, ArgMatches};
use std::convert::TryFrom;
use std::env;
use std::fs::remove_file;
use std::fs::File;
use std::fs::{metadata, remove_file, File};
use std::io::{stdin, BufRead, BufReader, BufWriter, Read, Write};
use std::path::Path;
use uucore::display::Quotable;
@ -27,6 +26,7 @@ static OPT_LINE_BYTES: &str = "line-bytes";
static OPT_LINES: &str = "lines";
static OPT_ADDITIONAL_SUFFIX: &str = "additional-suffix";
static OPT_FILTER: &str = "filter";
static OPT_NUMBER: &str = "number";
static OPT_NUMERIC_SUFFIXES: &str = "numeric-suffixes";
static OPT_SUFFIX_LENGTH: &str = "suffix-length";
static OPT_DEFAULT_SUFFIX_LENGTH: &str = "0";
@ -132,6 +132,13 @@ pub fn uu_app<'a>() -> App<'a> {
.default_value("1000")
.help("put NUMBER lines/records per output file"),
)
.arg(
Arg::new(OPT_NUMBER)
.short('n')
.long(OPT_NUMBER)
.takes_value(true)
.help("generate CHUNKS output files; see explanation below"),
)
// rest of the arguments
.arg(
Arg::new(OPT_ADDITIONAL_SUFFIX)
@ -194,6 +201,9 @@ enum Strategy {
/// Each chunk has as many lines as possible without exceeding the
/// specified number of bytes.
LineBytes(usize),
/// Split the file into this many chunks.
Number(usize),
}
impl Strategy {
@ -208,26 +218,34 @@ impl Strategy {
matches.occurrences_of(OPT_LINES),
matches.occurrences_of(OPT_BYTES),
matches.occurrences_of(OPT_LINE_BYTES),
matches.occurrences_of(OPT_NUMBER),
) {
(0, 0, 0) => Ok(Strategy::Lines(1000)),
(1, 0, 0) => {
(0, 0, 0, 0) => Ok(Strategy::Lines(1000)),
(1, 0, 0, 0) => {
let s = matches.value_of(OPT_LINES).unwrap();
let n = parse_size(s)
.map_err(|e| USimpleError::new(1, format!("invalid number of lines: {}", e)))?;
Ok(Strategy::Lines(n))
}
(0, 1, 0) => {
(0, 1, 0, 0) => {
let s = matches.value_of(OPT_BYTES).unwrap();
let n = parse_size(s)
.map_err(|e| USimpleError::new(1, format!("invalid number of bytes: {}", e)))?;
Ok(Strategy::Bytes(n))
}
(0, 0, 1) => {
(0, 0, 1, 0) => {
let s = matches.value_of(OPT_LINE_BYTES).unwrap();
let n = parse_size(s)
.map_err(|e| USimpleError::new(1, format!("invalid number of bytes: {}", e)))?;
Ok(Strategy::LineBytes(n))
}
(0, 0, 0, 1) => {
let s = matches.value_of(OPT_NUMBER).unwrap();
let n = s.parse::<usize>().map_err(|e| {
USimpleError::new(1, format!("invalid number of chunks: {}", e))
})?;
Ok(Strategy::Number(n))
}
_ => Err(UUsageError::new(1, "cannot split in more than one way")),
}
}
@ -344,6 +362,84 @@ impl Splitter for ByteSplitter {
}
}
/// Split a file into a specific number of chunks by byte.
///
/// This function always creates one output file for each chunk, even
/// if there is an error reading or writing one of the chunks or if
/// the input file is truncated. However, if the `filter` option is
/// being used, then no files are created.
///
/// # Errors
///
/// This function returns an error if there is a problem reading from
/// `reader` or writing to one of the output files.
fn split_into_n_chunks_by_byte<R>(
settings: &Settings,
reader: &mut R,
num_chunks: usize,
) -> UResult<()>
where
R: Read,
{
// Get the size of the input file in bytes and compute the number
// of bytes per chunk.
let metadata = metadata(&settings.input).unwrap();
let num_bytes = metadata.len();
let chunk_size = (num_bytes / (num_chunks as u64)) as usize;
// This object is responsible for creating the filename for each chunk.
let filename_factory = FilenameFactory::new(
&settings.prefix,
&settings.additional_suffix,
settings.suffix_length,
settings.numeric_suffix,
);
// Create one writer for each chunk. This will create each
// of the underlying files (if not in `--filter` mode).
let mut writers = vec![];
for i in 0..num_chunks {
let filename = filename_factory
.make(i)
.ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?;
let writer = platform::instantiate_current_writer(&settings.filter, filename.as_str());
writers.push(writer);
}
// This block evaluates to an object of type `std::io::Result<()>`.
{
// Write `chunk_size` bytes from the reader into each writer
// except the last.
//
// Re-use the buffer to avoid re-allocating a `Vec` on each
// iteration. The contents will be completely overwritten each
// time we call `read_exact()`.
//
// The last writer gets all remaining bytes so that if the number
// of bytes in the input file was not evenly divisible by
// `num_chunks`, we don't leave any bytes behind.
let mut buf = vec![0u8; chunk_size];
for writer in writers.iter_mut().take(num_chunks - 1) {
reader.read_exact(&mut buf)?;
writer.write_all(&buf)?;
}
// Write all the remaining bytes to the last chunk.
//
// To do this, we resize our buffer to have the necessary number
// of bytes.
let i = num_chunks - 1;
let last_chunk_size = num_bytes as usize - (chunk_size * (num_chunks - 1));
buf.resize(last_chunk_size, 0);
reader.read_exact(&mut buf)?;
writers[i].write_all(&buf)?;
Ok(())
}
.map_err_context(|| "I/O error".to_string())
}
fn split(settings: Settings) -> UResult<()> {
let mut reader = BufReader::new(if settings.input == "-" {
Box::new(stdin()) as Box<dyn Read>
@ -357,17 +453,22 @@ fn split(settings: Settings) -> UResult<()> {
Box::new(r) as Box<dyn Read>
});
if let Strategy::Number(num_chunks) = settings.strategy {
return split_into_n_chunks_by_byte(&settings, &mut reader, num_chunks);
}
let mut splitter: Box<dyn Splitter> = match settings.strategy {
Strategy::Lines(chunk_size) => Box::new(LineSplitter::new(chunk_size)),
Strategy::Bytes(chunk_size) | Strategy::LineBytes(chunk_size) => {
Box::new(ByteSplitter::new(chunk_size))
}
_ => unreachable!(),
};
// This object is responsible for creating the filename for each chunk.
let filename_factory = FilenameFactory::new(
settings.prefix,
settings.additional_suffix,
&settings.prefix,
&settings.additional_suffix,
settings.suffix_length,
settings.numeric_suffix,
);

View file

@ -2,7 +2,7 @@
// *
// * For the full copyright and license information, please view the LICENSE
// * file that was distributed with this source code.
// spell-checker:ignore xzaaa sixhundredfiftyonebytes ninetyonebytes asciilowercase
// spell-checker:ignore xzaaa sixhundredfiftyonebytes ninetyonebytes asciilowercase fghij klmno pqrst uvwxyz
extern crate rand;
extern crate regex;
@ -12,11 +12,10 @@ use crate::common::util::*;
use rand::SeedableRng;
#[cfg(not(windows))]
use std::env;
use std::io::Write;
use std::path::Path;
use std::{
fs::{read_dir, File},
io::{BufWriter, Read},
io::{BufWriter, Read, Write},
};
fn random_chars(n: usize) -> String {
@ -425,3 +424,19 @@ creating file 'xaf'
",
);
}
#[test]
fn test_number() {
let (at, mut ucmd) = at_and_ucmd!();
let file_read = |f| {
let mut s = String::new();
at.open(f).read_to_string(&mut s).unwrap();
s
};
ucmd.args(&["-n", "5", "asciilowercase.txt"]).succeeds();
assert_eq!(file_read("xaa"), "abcde");
assert_eq!(file_read("xab"), "fghij");
assert_eq!(file_read("xac"), "klmno");
assert_eq!(file_read("xad"), "pqrst");
assert_eq!(file_read("xae"), "uvwxyz");
}

View file

@ -1 +1 @@
abcdefghijklmnopqrstuvwxyz
abcdefghijklmnopqrstuvwxyz