mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-29 12:07:46 +00:00
Rewrite cut_bytes(), more performant than GNU
Creates BufReader in buffer.rs. BufReader uses a stack allocated buffer to read into and returns selected slices into the buffer. This does away with any dynamic allocations in the 'newline loop. 1.5 to 2.5 more performant than previous version. 1.5 to 2.0 times more performant than GNU.
This commit is contained in:
parent
b8def68668
commit
f06d9fe130
2 changed files with 224 additions and 59 deletions
134
cut/buffer.rs
Normal file
134
cut/buffer.rs
Normal file
|
@ -0,0 +1,134 @@
|
|||
use std;
|
||||
use std::io::{IoResult, IoError};
|
||||
|
||||
pub struct BufReader<R> {
|
||||
reader: R,
|
||||
buffer: [u8, ..4096],
|
||||
start: uint,
|
||||
end: uint, // exclusive
|
||||
}
|
||||
|
||||
pub mod Bytes {
|
||||
pub trait Select {
|
||||
fn select<'a>(&'a mut self, bytes: uint) -> Selected<'a>;
|
||||
}
|
||||
|
||||
pub enum Selected<'a> {
|
||||
NewlineFound(&'a [u8]),
|
||||
Complete(&'a [u8]),
|
||||
Partial(&'a [u8]),
|
||||
EndOfFile,
|
||||
}
|
||||
}
|
||||
|
||||
impl<R: Reader> BufReader<R> {
|
||||
pub fn new(reader: R) -> BufReader<R> {
|
||||
let empty_buffer = unsafe {
|
||||
std::mem::uninitialized::<[u8, ..4096]>()
|
||||
};
|
||||
|
||||
BufReader {
|
||||
reader: reader,
|
||||
buffer: empty_buffer,
|
||||
start: 0,
|
||||
end: 0,
|
||||
}
|
||||
}
|
||||
|
||||
fn read(&mut self) -> IoResult<uint> {
|
||||
let buf_len = self.buffer.len();
|
||||
let buffer_fill = self.buffer.mut_slice(self.end, buf_len);
|
||||
|
||||
match self.reader.read(buffer_fill) {
|
||||
Ok(nread) => {
|
||||
self.end += nread;
|
||||
Ok(nread)
|
||||
}
|
||||
error => error
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn maybe_fill_buf(&mut self) -> IoResult<uint> {
|
||||
if self.end == self.start {
|
||||
self.start = 0;
|
||||
self.end = 0;
|
||||
}
|
||||
|
||||
if self.end <= 2048 { self.read() } else { Ok(0) }
|
||||
}
|
||||
|
||||
pub fn consume_line(&mut self) -> uint {
|
||||
let mut bytes_consumed = 0;
|
||||
|
||||
loop {
|
||||
match self.maybe_fill_buf() {
|
||||
Err(IoError { kind: std::io::EndOfFile, .. }) => (),
|
||||
Err(err) => fail!("read error: {}", err.desc),
|
||||
_ => ()
|
||||
}
|
||||
|
||||
let buffer_used = self.end - self.start;
|
||||
|
||||
if buffer_used == 0 { return bytes_consumed; }
|
||||
|
||||
for idx in range(self.start, self.end) {
|
||||
if self.buffer[idx] == b'\n' {
|
||||
self.start = idx + 1;
|
||||
return bytes_consumed + idx + 1;
|
||||
}
|
||||
}
|
||||
|
||||
bytes_consumed += buffer_used;
|
||||
|
||||
self.start = 0;
|
||||
self.end = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<R: Reader> Bytes::Select for BufReader<R> {
|
||||
fn select<'a>(&'a mut self, bytes: uint) -> Bytes::Selected<'a> {
|
||||
match self.maybe_fill_buf() {
|
||||
Err(IoError { kind: std::io::EndOfFile, .. }) => (),
|
||||
Err(err) => fail!("read error: {}", err.desc),
|
||||
_ => ()
|
||||
}
|
||||
|
||||
let buffer_used = self.end - self.start;
|
||||
|
||||
if buffer_used == 0 { return Bytes::EndOfFile; }
|
||||
|
||||
let (complete, max_segment_len) = {
|
||||
if bytes < buffer_used {
|
||||
(true, bytes + 1)
|
||||
} else {
|
||||
(false, buffer_used)
|
||||
}
|
||||
};
|
||||
|
||||
for idx in range(self.start, self.start + max_segment_len) {
|
||||
if self.buffer[idx] == b'\n' {
|
||||
let segment = self.buffer.slice(self.start, idx + 1);
|
||||
|
||||
self.start = idx + 1;
|
||||
|
||||
return Bytes::NewlineFound(segment);
|
||||
}
|
||||
}
|
||||
|
||||
if complete {
|
||||
let segment = self.buffer.slice(self.start,
|
||||
self.start + bytes);
|
||||
|
||||
self.start += bytes;
|
||||
Bytes::Complete(segment)
|
||||
} else {
|
||||
let segment = self.buffer.slice(self.start, self.end);
|
||||
|
||||
self.start = 0;
|
||||
self.end = 0;
|
||||
Bytes::Partial(segment)
|
||||
}
|
||||
}
|
||||
}
|
149
cut/cut.rs
149
cut/cut.rs
|
@ -14,7 +14,7 @@
|
|||
extern crate getopts;
|
||||
extern crate libc;
|
||||
|
||||
use std::io::{File, BufferedWriter, BufferedReader, stdin, print};
|
||||
use std::io::{stdio, File, BufferedWriter, BufferedReader, print};
|
||||
use getopts::{optopt, optflag, getopts, usage};
|
||||
|
||||
use ranges::Range;
|
||||
|
@ -22,6 +22,7 @@ use ranges::Range;
|
|||
#[path = "../common/util.rs"]
|
||||
mod util;
|
||||
mod ranges;
|
||||
mod buffer;
|
||||
|
||||
static NAME: &'static str = "cut";
|
||||
static VERSION: &'static str = "1.0.0";
|
||||
|
@ -50,67 +51,94 @@ fn list_to_ranges(list: &str, complement: bool) -> Result<Vec<Range>, String> {
|
|||
}
|
||||
}
|
||||
|
||||
fn cut_bytes<T: Reader>(mut reader: BufferedReader<T>,
|
||||
fn cut_bytes<R: Reader>(reader: R,
|
||||
ranges: &Vec<Range>,
|
||||
opts: &Options) -> int {
|
||||
let mut out = BufferedWriter::new(std::io::stdio::stdout_raw());
|
||||
let (use_delim, out_delim) = match opts.out_delim.clone() {
|
||||
Some(delim) => (true, delim),
|
||||
None => (false, "".to_string())
|
||||
};
|
||||
use buffer::Bytes::{Select, NewlineFound, Complete, Partial, EndOfFile};
|
||||
|
||||
let mut buf_read = buffer::BufReader::new(reader);
|
||||
let mut out = BufferedWriter::new(stdio::stdout_raw());
|
||||
|
||||
'newline: loop {
|
||||
let line = match reader.read_until(b'\n') {
|
||||
Ok(line) => line,
|
||||
Err(std::io::IoError { kind: std::io::EndOfFile, .. }) => break,
|
||||
_ => fail!(),
|
||||
};
|
||||
|
||||
let line_len = line.len();
|
||||
let mut cur_pos = 1;
|
||||
let mut print_delim = false;
|
||||
|
||||
for &Range { low: low, high: high } in ranges.iter() {
|
||||
if low > line_len { break; }
|
||||
// skip upto low
|
||||
let orig_pos = cur_pos;
|
||||
loop {
|
||||
match buf_read.select(low - cur_pos) {
|
||||
NewlineFound(_) => {
|
||||
out.write(&[b'\n']).unwrap();
|
||||
continue 'newline
|
||||
}
|
||||
Complete(bytes) => {
|
||||
cur_pos += bytes.len();
|
||||
break
|
||||
}
|
||||
Partial(bytes) => cur_pos += bytes.len(),
|
||||
EndOfFile => {
|
||||
if orig_pos != cur_pos {
|
||||
out.write(&[b'\n']).unwrap();
|
||||
}
|
||||
|
||||
if use_delim {
|
||||
if print_delim {
|
||||
out.write_str(out_delim.as_slice()).unwrap();
|
||||
break 'newline
|
||||
}
|
||||
}
|
||||
print_delim = true;
|
||||
}
|
||||
|
||||
if high >= line_len {
|
||||
let segment = line.slice(low - 1, line_len);
|
||||
|
||||
out.write(segment).unwrap();
|
||||
|
||||
if *line.get(line_len - 1) == b'\n' {
|
||||
continue 'newline
|
||||
match opts.out_delim {
|
||||
Some(ref delim) => {
|
||||
if print_delim {
|
||||
out.write(delim.as_bytes()).unwrap();
|
||||
}
|
||||
print_delim = true;
|
||||
}
|
||||
} else {
|
||||
let segment = line.slice(low - 1, high);
|
||||
None => ()
|
||||
}
|
||||
|
||||
out.write(segment).unwrap();
|
||||
// write out from low to high
|
||||
loop {
|
||||
match buf_read.select(high - cur_pos + 1) {
|
||||
NewlineFound(bytes) => {
|
||||
out.write(bytes).unwrap();
|
||||
continue 'newline
|
||||
}
|
||||
Complete(bytes) => {
|
||||
out.write(bytes).unwrap();
|
||||
cur_pos = high + 1;
|
||||
break
|
||||
}
|
||||
Partial(bytes) => {
|
||||
cur_pos += bytes.len();
|
||||
out.write(bytes).unwrap();
|
||||
}
|
||||
EndOfFile => {
|
||||
if cur_pos != low || low == high {
|
||||
out.write(&[b'\n']).unwrap();
|
||||
}
|
||||
|
||||
break 'newline
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
out.write(&[b'\n']).unwrap();
|
||||
buf_read.consume_line();
|
||||
out.write([b'\n']).unwrap();
|
||||
}
|
||||
|
||||
0
|
||||
}
|
||||
|
||||
fn cut_characters<T: Reader>(mut reader: BufferedReader<T>,
|
||||
fn cut_characters<R: Reader>(reader: R,
|
||||
ranges: &Vec<Range>,
|
||||
opts: &Options) -> int {
|
||||
let mut out = BufferedWriter::new(std::io::stdio::stdout_raw());
|
||||
let (use_delim, out_delim) = match opts.out_delim.clone() {
|
||||
Some(delim) => (true, delim),
|
||||
None => (false, "".to_string())
|
||||
};
|
||||
let mut buf_in = BufferedReader::new(reader);
|
||||
let mut out = BufferedWriter::new(stdio::stdout_raw());
|
||||
|
||||
'newline: loop {
|
||||
let line = match reader.read_line() {
|
||||
let line = match buf_in.read_line() {
|
||||
Ok(line) => line,
|
||||
Err(std::io::IoError { kind: std::io::EndOfFile, .. }) => break,
|
||||
_ => fail!(),
|
||||
|
@ -126,11 +154,14 @@ fn cut_characters<T: Reader>(mut reader: BufferedReader<T>,
|
|||
None => break
|
||||
};
|
||||
|
||||
if use_delim {
|
||||
if print_delim {
|
||||
out.write_str(out_delim.as_slice()).unwrap();
|
||||
match opts.out_delim {
|
||||
Some(ref delim) => {
|
||||
if print_delim {
|
||||
out.write(delim.as_bytes()).unwrap();
|
||||
}
|
||||
print_delim = true;
|
||||
}
|
||||
print_delim = true;
|
||||
None => ()
|
||||
}
|
||||
|
||||
match char_indices.nth(high - low) {
|
||||
|
@ -204,15 +235,16 @@ impl<'a> Iterator<(uint, uint)> for Searcher<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
fn cut_fields_delimiter<T: Reader>(mut reader: BufferedReader<T>,
|
||||
fn cut_fields_delimiter<R: Reader>(reader: R,
|
||||
ranges: &Vec<Range>,
|
||||
delim: &String,
|
||||
only_delimited: bool,
|
||||
out_delim: &String) -> int {
|
||||
let mut out = BufferedWriter::new(std::io::stdio::stdout_raw());
|
||||
let mut buf_in = BufferedReader::new(reader);
|
||||
let mut out = BufferedWriter::new(stdio::stdout_raw());
|
||||
|
||||
'newline: loop {
|
||||
let line = match reader.read_until(b'\n') {
|
||||
let line = match buf_in.read_until(b'\n') {
|
||||
Ok(line) => line,
|
||||
Err(std::io::IoError { kind: std::io::EndOfFile, .. }) => break,
|
||||
_ => fail!(),
|
||||
|
@ -279,7 +311,7 @@ fn cut_fields_delimiter<T: Reader>(mut reader: BufferedReader<T>,
|
|||
0
|
||||
}
|
||||
|
||||
fn cut_fields<T: Reader>(mut reader: BufferedReader<T>,
|
||||
fn cut_fields<R: Reader>(reader: R,
|
||||
ranges: &Vec<Range>,
|
||||
opts: &FieldOptions) -> int {
|
||||
match opts.out_delimeter {
|
||||
|
@ -290,10 +322,11 @@ fn cut_fields<T: Reader>(mut reader: BufferedReader<T>,
|
|||
None => ()
|
||||
}
|
||||
|
||||
let mut out = BufferedWriter::new(std::io::stdio::stdout_raw());
|
||||
let mut buf_in = BufferedReader::new(reader);
|
||||
let mut out = BufferedWriter::new(stdio::stdout_raw());
|
||||
|
||||
'newline: loop {
|
||||
let line = match reader.read_until(b'\n') {
|
||||
let line = match buf_in.read_until(b'\n') {
|
||||
Ok(line) => line,
|
||||
Err(std::io::IoError { kind: std::io::EndOfFile, .. }) => break,
|
||||
_ => fail!(),
|
||||
|
@ -367,17 +400,17 @@ fn cut_files(mut filenames: Vec<String>, mode: Mode) -> int {
|
|||
|
||||
for filename in filenames.iter() {
|
||||
if filename.as_slice() == "-" {
|
||||
if stdin_read { continue; }
|
||||
if stdin_read { continue }
|
||||
|
||||
exit_code |= match mode {
|
||||
Bytes(ref ranges, ref opts) => {
|
||||
cut_bytes(stdin(), ranges, opts)
|
||||
cut_bytes(stdio::stdin_raw(), ranges, opts)
|
||||
}
|
||||
Characters(ref ranges, ref opts) => {
|
||||
cut_characters(stdin(), ranges, opts)
|
||||
cut_characters(stdio::stdin_raw(), ranges, opts)
|
||||
}
|
||||
Fields(ref ranges, ref opts) => {
|
||||
cut_fields(stdin(), ranges, opts)
|
||||
cut_fields(stdio::stdin_raw(), ranges, opts)
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -387,11 +420,11 @@ fn cut_files(mut filenames: Vec<String>, mode: Mode) -> int {
|
|||
|
||||
if ! path.exists() {
|
||||
show_error!("{}: No such file or directory", filename);
|
||||
continue;
|
||||
continue
|
||||
}
|
||||
|
||||
let buf_file = match File::open(&path) {
|
||||
Ok(file) => BufferedReader::new(file),
|
||||
let file = match File::open(&path) {
|
||||
Ok(f) => f,
|
||||
Err(e) => {
|
||||
show_error!("{}: {}", filename, e.desc);
|
||||
continue
|
||||
|
@ -399,13 +432,11 @@ fn cut_files(mut filenames: Vec<String>, mode: Mode) -> int {
|
|||
};
|
||||
|
||||
exit_code |= match mode {
|
||||
Bytes(ref ranges, ref opts) => cut_bytes(buf_file, ranges, opts),
|
||||
Bytes(ref ranges, ref opts) => cut_bytes(file, ranges, opts),
|
||||
Characters(ref ranges, ref opts) => {
|
||||
cut_characters(buf_file, ranges, opts)
|
||||
}
|
||||
Fields(ref ranges, ref opts) => {
|
||||
cut_fields(buf_file, ranges, opts)
|
||||
cut_characters(file, ranges, opts)
|
||||
}
|
||||
Fields(ref ranges, ref opts) => cut_fields(file, ranges, opts)
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue