mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-28 03:27:44 +00:00
split: use ByteChunkWriter and LineChunkWriter
Replace `ByteSplitter` and `LineSplitter` with `ByteChunkWriter` and `LineChunkWriter` respectively. This results in a more maintainable design and an increase in the speed of splitting by lines.
This commit is contained in:
parent
ca7af808d5
commit
1d7e1b8732
3 changed files with 65 additions and 59 deletions
|
@ -16,13 +16,14 @@ use clap::{crate_version, App, AppSettings, Arg, ArgMatches};
|
||||||
use std::convert::TryFrom;
|
use std::convert::TryFrom;
|
||||||
use std::env;
|
use std::env;
|
||||||
use std::fmt;
|
use std::fmt;
|
||||||
use std::fs::{metadata, remove_file, File};
|
use std::fs::{metadata, File};
|
||||||
use std::io::{stdin, BufRead, BufReader, BufWriter, ErrorKind, Read, Write};
|
use std::io::{stdin, BufRead, BufReader, BufWriter, ErrorKind, Read, Write};
|
||||||
use std::num::ParseIntError;
|
use std::num::ParseIntError;
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
use uucore::display::Quotable;
|
use uucore::display::Quotable;
|
||||||
use uucore::error::{FromIo, UResult, USimpleError, UUsageError};
|
use uucore::error::{FromIo, UIoError, UResult, USimpleError, UUsageError};
|
||||||
use uucore::parse_size::{parse_size, ParseSizeError};
|
use uucore::parse_size::{parse_size, ParseSizeError};
|
||||||
|
use uucore::uio_error;
|
||||||
|
|
||||||
static OPT_BYTES: &str = "bytes";
|
static OPT_BYTES: &str = "bytes";
|
||||||
static OPT_LINE_BYTES: &str = "line-bytes";
|
static OPT_LINE_BYTES: &str = "line-bytes";
|
||||||
|
@ -739,65 +740,47 @@ fn split(settings: &Settings) -> UResult<()> {
|
||||||
Box::new(r) as Box<dyn Read>
|
Box::new(r) as Box<dyn Read>
|
||||||
});
|
});
|
||||||
|
|
||||||
if let Strategy::Number(num_chunks) = settings.strategy {
|
match settings.strategy {
|
||||||
return split_into_n_chunks_by_byte(settings, &mut reader, num_chunks);
|
Strategy::Number(num_chunks) => {
|
||||||
}
|
split_into_n_chunks_by_byte(settings, &mut reader, num_chunks)
|
||||||
|
|
||||||
let mut splitter: Box<dyn Splitter> = match settings.strategy {
|
|
||||||
Strategy::Lines(chunk_size) => Box::new(LineSplitter::new(chunk_size)),
|
|
||||||
Strategy::Bytes(chunk_size) | Strategy::LineBytes(chunk_size) => {
|
|
||||||
Box::new(ByteSplitter::new(chunk_size))
|
|
||||||
}
|
}
|
||||||
_ => unreachable!(),
|
Strategy::Lines(chunk_size) => {
|
||||||
};
|
let mut writer = LineChunkWriter::new(chunk_size, settings)
|
||||||
|
.ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?;
|
||||||
// This object is responsible for creating the filename for each chunk.
|
match std::io::copy(&mut reader, &mut writer) {
|
||||||
let mut filename_iterator = FilenameIterator::new(
|
Ok(_) => Ok(()),
|
||||||
&settings.prefix,
|
Err(e) => match e.kind() {
|
||||||
&settings.additional_suffix,
|
// TODO Since the writer object controls the creation of
|
||||||
settings.suffix_length,
|
// new files, we need to rely on the `std::io::Result`
|
||||||
settings.numeric_suffix,
|
// returned by its `write()` method to communicate any
|
||||||
);
|
// errors to this calling scope. If a new file cannot be
|
||||||
loop {
|
// created because we have exceeded the number of
|
||||||
// Get a new part file set up, and construct `writer` for it.
|
// allowable filenames, we use `ErrorKind::Other` to
|
||||||
let filename = filename_iterator
|
// indicate that. A special error message needs to be
|
||||||
.next()
|
// printed in that case.
|
||||||
.ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?;
|
ErrorKind::Other => Err(USimpleError::new(1, "output file suffixes exhausted")),
|
||||||
let mut writer = platform::instantiate_current_writer(&settings.filter, filename.as_str());
|
_ => Err(uio_error!(e, "input/output error")),
|
||||||
|
},
|
||||||
let bytes_consumed = splitter
|
|
||||||
.consume(&mut reader, &mut writer)
|
|
||||||
.map_err_context(|| "input/output error".to_string())?;
|
|
||||||
writer
|
|
||||||
.flush()
|
|
||||||
.map_err_context(|| "error flushing to output file".to_string())?;
|
|
||||||
|
|
||||||
// If we didn't write anything we should clean up the empty file, and
|
|
||||||
// break from the loop.
|
|
||||||
if bytes_consumed == 0 {
|
|
||||||
// The output file is only ever created if --filter isn't used.
|
|
||||||
// Complicated, I know...
|
|
||||||
if settings.filter.is_none() {
|
|
||||||
remove_file(filename)
|
|
||||||
.map_err_context(|| "error removing empty file".to_string())?;
|
|
||||||
}
|
}
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
Strategy::Bytes(chunk_size) | Strategy::LineBytes(chunk_size) => {
|
||||||
// TODO It is silly to have the "creating file" message here
|
let mut writer = ByteChunkWriter::new(chunk_size, settings)
|
||||||
// after the file has been already created. However, because
|
.ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?;
|
||||||
// of the way the main loop has been written, an extra file
|
match std::io::copy(&mut reader, &mut writer) {
|
||||||
// gets created and then deleted in the last iteration of the
|
Ok(_) => Ok(()),
|
||||||
// loop. So we need to make sure we are not in that case when
|
Err(e) => match e.kind() {
|
||||||
// printing this message.
|
// TODO Since the writer object controls the creation of
|
||||||
//
|
// new files, we need to rely on the `std::io::Result`
|
||||||
// This is only here temporarily while we make some
|
// returned by its `write()` method to communicate any
|
||||||
// improvements to the architecture of the main loop in this
|
// errors to this calling scope. If a new file cannot be
|
||||||
// function. In the future, it will move to a more appropriate
|
// created because we have exceeded the number of
|
||||||
// place---at the point where the file is actually created.
|
// allowable filenames, we use `ErrorKind::Other` to
|
||||||
if settings.verbose {
|
// indicate that. A special error message needs to be
|
||||||
println!("creating file {}", filename.quote());
|
// printed in that case.
|
||||||
|
ErrorKind::Other => Err(USimpleError::new(1, "output file suffixes exhausted")),
|
||||||
|
_ => Err(uio_error!(e, "input/output error")),
|
||||||
|
},
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok(())
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
// *
|
// *
|
||||||
// * For the full copyright and license information, please view the LICENSE
|
// * For the full copyright and license information, please view the LICENSE
|
||||||
// * file that was distributed with this source code.
|
// * file that was distributed with this source code.
|
||||||
// spell-checker:ignore xzaaa sixhundredfiftyonebytes ninetyonebytes asciilowercase fghij klmno pqrst uvwxyz
|
// spell-checker:ignore xzaaa sixhundredfiftyonebytes ninetyonebytes asciilowercase fghij klmno pqrst uvwxyz fivelines
|
||||||
extern crate rand;
|
extern crate rand;
|
||||||
extern crate regex;
|
extern crate regex;
|
||||||
|
|
||||||
|
@ -449,3 +449,21 @@ fn test_invalid_suffix_length() {
|
||||||
.no_stdout()
|
.no_stdout()
|
||||||
.stderr_contains("invalid suffix length: 'xyz'");
|
.stderr_contains("invalid suffix length: 'xyz'");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_include_newlines() {
|
||||||
|
let (at, mut ucmd) = at_and_ucmd!();
|
||||||
|
ucmd.args(&["-l", "2", "fivelines.txt"]).succeeds();
|
||||||
|
|
||||||
|
let mut s = String::new();
|
||||||
|
at.open("xaa").read_to_string(&mut s).unwrap();
|
||||||
|
assert_eq!(s, "1\n2\n");
|
||||||
|
|
||||||
|
let mut s = String::new();
|
||||||
|
at.open("xab").read_to_string(&mut s).unwrap();
|
||||||
|
assert_eq!(s, "3\n4\n");
|
||||||
|
|
||||||
|
let mut s = String::new();
|
||||||
|
at.open("xac").read_to_string(&mut s).unwrap();
|
||||||
|
assert_eq!(s, "5\n");
|
||||||
|
}
|
||||||
|
|
5
tests/fixtures/split/fivelines.txt
vendored
Normal file
5
tests/fixtures/split/fivelines.txt
vendored
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
1
|
||||||
|
2
|
||||||
|
3
|
||||||
|
4
|
||||||
|
5
|
Loading…
Add table
Add a link
Reference in a new issue