mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-29 20:17:45 +00:00
Merge pull request #345 from polyphemus/cut-bytes-rewrite
Rewrite bytes cutting for cut
This commit is contained in:
commit
514cbd3c9d
2 changed files with 224 additions and 59 deletions
134
cut/buffer.rs
Normal file
134
cut/buffer.rs
Normal file
|
@ -0,0 +1,134 @@
|
|||
use std;
|
||||
use std::io::{IoResult, IoError};
|
||||
|
||||
pub struct BufReader<R> {
|
||||
reader: R,
|
||||
buffer: [u8, ..4096],
|
||||
start: uint,
|
||||
end: uint, // exclusive
|
||||
}
|
||||
|
||||
pub mod Bytes {
|
||||
pub trait Select {
|
||||
fn select<'a>(&'a mut self, bytes: uint) -> Selected<'a>;
|
||||
}
|
||||
|
||||
pub enum Selected<'a> {
|
||||
NewlineFound(&'a [u8]),
|
||||
Complete(&'a [u8]),
|
||||
Partial(&'a [u8]),
|
||||
EndOfFile,
|
||||
}
|
||||
}
|
||||
|
||||
impl<R: Reader> BufReader<R> {
|
||||
pub fn new(reader: R) -> BufReader<R> {
|
||||
let empty_buffer = unsafe {
|
||||
std::mem::uninitialized::<[u8, ..4096]>()
|
||||
};
|
||||
|
||||
BufReader {
|
||||
reader: reader,
|
||||
buffer: empty_buffer,
|
||||
start: 0,
|
||||
end: 0,
|
||||
}
|
||||
}
|
||||
|
||||
fn read(&mut self) -> IoResult<uint> {
|
||||
let buf_len = self.buffer.len();
|
||||
let buffer_fill = self.buffer.mut_slice(self.end, buf_len);
|
||||
|
||||
match self.reader.read(buffer_fill) {
|
||||
Ok(nread) => {
|
||||
self.end += nread;
|
||||
Ok(nread)
|
||||
}
|
||||
error => error
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn maybe_fill_buf(&mut self) -> IoResult<uint> {
|
||||
if self.end == self.start {
|
||||
self.start = 0;
|
||||
self.end = 0;
|
||||
}
|
||||
|
||||
if self.end <= 2048 { self.read() } else { Ok(0) }
|
||||
}
|
||||
|
||||
pub fn consume_line(&mut self) -> uint {
|
||||
let mut bytes_consumed = 0;
|
||||
|
||||
loop {
|
||||
match self.maybe_fill_buf() {
|
||||
Err(IoError { kind: std::io::EndOfFile, .. }) => (),
|
||||
Err(err) => fail!("read error: {}", err.desc),
|
||||
_ => ()
|
||||
}
|
||||
|
||||
let buffer_used = self.end - self.start;
|
||||
|
||||
if buffer_used == 0 { return bytes_consumed; }
|
||||
|
||||
for idx in range(self.start, self.end) {
|
||||
if self.buffer[idx] == b'\n' {
|
||||
self.start = idx + 1;
|
||||
return bytes_consumed + idx + 1;
|
||||
}
|
||||
}
|
||||
|
||||
bytes_consumed += buffer_used;
|
||||
|
||||
self.start = 0;
|
||||
self.end = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<R: Reader> Bytes::Select for BufReader<R> {
|
||||
fn select<'a>(&'a mut self, bytes: uint) -> Bytes::Selected<'a> {
|
||||
match self.maybe_fill_buf() {
|
||||
Err(IoError { kind: std::io::EndOfFile, .. }) => (),
|
||||
Err(err) => fail!("read error: {}", err.desc),
|
||||
_ => ()
|
||||
}
|
||||
|
||||
let buffer_used = self.end - self.start;
|
||||
|
||||
if buffer_used == 0 { return Bytes::EndOfFile; }
|
||||
|
||||
let (complete, max_segment_len) = {
|
||||
if bytes < buffer_used {
|
||||
(true, bytes + 1)
|
||||
} else {
|
||||
(false, buffer_used)
|
||||
}
|
||||
};
|
||||
|
||||
for idx in range(self.start, self.start + max_segment_len) {
|
||||
if self.buffer[idx] == b'\n' {
|
||||
let segment = self.buffer.slice(self.start, idx + 1);
|
||||
|
||||
self.start = idx + 1;
|
||||
|
||||
return Bytes::NewlineFound(segment);
|
||||
}
|
||||
}
|
||||
|
||||
if complete {
|
||||
let segment = self.buffer.slice(self.start,
|
||||
self.start + bytes);
|
||||
|
||||
self.start += bytes;
|
||||
Bytes::Complete(segment)
|
||||
} else {
|
||||
let segment = self.buffer.slice(self.start, self.end);
|
||||
|
||||
self.start = 0;
|
||||
self.end = 0;
|
||||
Bytes::Partial(segment)
|
||||
}
|
||||
}
|
||||
}
|
149
cut/cut.rs
149
cut/cut.rs
|
@ -14,7 +14,7 @@
|
|||
extern crate getopts;
|
||||
extern crate libc;
|
||||
|
||||
use std::io::{File, BufferedWriter, BufferedReader, stdin, print};
|
||||
use std::io::{stdio, File, BufferedWriter, BufferedReader, print};
|
||||
use getopts::{optopt, optflag, getopts, usage};
|
||||
|
||||
use ranges::Range;
|
||||
|
@ -22,6 +22,7 @@ use ranges::Range;
|
|||
#[path = "../common/util.rs"]
|
||||
mod util;
|
||||
mod ranges;
|
||||
mod buffer;
|
||||
|
||||
static NAME: &'static str = "cut";
|
||||
static VERSION: &'static str = "1.0.0";
|
||||
|
@ -50,67 +51,94 @@ fn list_to_ranges(list: &str, complement: bool) -> Result<Vec<Range>, String> {
|
|||
}
|
||||
}
|
||||
|
||||
fn cut_bytes<T: Reader>(mut reader: BufferedReader<T>,
|
||||
fn cut_bytes<R: Reader>(reader: R,
|
||||
ranges: &Vec<Range>,
|
||||
opts: &Options) -> int {
|
||||
let mut out = BufferedWriter::new(std::io::stdio::stdout_raw());
|
||||
let (use_delim, out_delim) = match opts.out_delim.clone() {
|
||||
Some(delim) => (true, delim),
|
||||
None => (false, "".to_string())
|
||||
};
|
||||
use buffer::Bytes::{Select, NewlineFound, Complete, Partial, EndOfFile};
|
||||
|
||||
let mut buf_read = buffer::BufReader::new(reader);
|
||||
let mut out = BufferedWriter::new(stdio::stdout_raw());
|
||||
|
||||
'newline: loop {
|
||||
let line = match reader.read_until(b'\n') {
|
||||
Ok(line) => line,
|
||||
Err(std::io::IoError { kind: std::io::EndOfFile, .. }) => break,
|
||||
_ => fail!(),
|
||||
};
|
||||
|
||||
let line_len = line.len();
|
||||
let mut cur_pos = 1;
|
||||
let mut print_delim = false;
|
||||
|
||||
for &Range { low: low, high: high } in ranges.iter() {
|
||||
if low > line_len { break; }
|
||||
// skip upto low
|
||||
let orig_pos = cur_pos;
|
||||
loop {
|
||||
match buf_read.select(low - cur_pos) {
|
||||
NewlineFound(_) => {
|
||||
out.write(&[b'\n']).unwrap();
|
||||
continue 'newline
|
||||
}
|
||||
Complete(bytes) => {
|
||||
cur_pos += bytes.len();
|
||||
break
|
||||
}
|
||||
Partial(bytes) => cur_pos += bytes.len(),
|
||||
EndOfFile => {
|
||||
if orig_pos != cur_pos {
|
||||
out.write(&[b'\n']).unwrap();
|
||||
}
|
||||
|
||||
if use_delim {
|
||||
if print_delim {
|
||||
out.write_str(out_delim.as_slice()).unwrap();
|
||||
break 'newline
|
||||
}
|
||||
}
|
||||
print_delim = true;
|
||||
}
|
||||
|
||||
if high >= line_len {
|
||||
let segment = line.slice(low - 1, line_len);
|
||||
|
||||
out.write(segment).unwrap();
|
||||
|
||||
if *line.get(line_len - 1) == b'\n' {
|
||||
continue 'newline
|
||||
match opts.out_delim {
|
||||
Some(ref delim) => {
|
||||
if print_delim {
|
||||
out.write(delim.as_bytes()).unwrap();
|
||||
}
|
||||
print_delim = true;
|
||||
}
|
||||
} else {
|
||||
let segment = line.slice(low - 1, high);
|
||||
None => ()
|
||||
}
|
||||
|
||||
out.write(segment).unwrap();
|
||||
// write out from low to high
|
||||
loop {
|
||||
match buf_read.select(high - cur_pos + 1) {
|
||||
NewlineFound(bytes) => {
|
||||
out.write(bytes).unwrap();
|
||||
continue 'newline
|
||||
}
|
||||
Complete(bytes) => {
|
||||
out.write(bytes).unwrap();
|
||||
cur_pos = high + 1;
|
||||
break
|
||||
}
|
||||
Partial(bytes) => {
|
||||
cur_pos += bytes.len();
|
||||
out.write(bytes).unwrap();
|
||||
}
|
||||
EndOfFile => {
|
||||
if cur_pos != low || low == high {
|
||||
out.write(&[b'\n']).unwrap();
|
||||
}
|
||||
|
||||
break 'newline
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
out.write(&[b'\n']).unwrap();
|
||||
buf_read.consume_line();
|
||||
out.write([b'\n']).unwrap();
|
||||
}
|
||||
|
||||
0
|
||||
}
|
||||
|
||||
fn cut_characters<T: Reader>(mut reader: BufferedReader<T>,
|
||||
fn cut_characters<R: Reader>(reader: R,
|
||||
ranges: &Vec<Range>,
|
||||
opts: &Options) -> int {
|
||||
let mut out = BufferedWriter::new(std::io::stdio::stdout_raw());
|
||||
let (use_delim, out_delim) = match opts.out_delim.clone() {
|
||||
Some(delim) => (true, delim),
|
||||
None => (false, "".to_string())
|
||||
};
|
||||
let mut buf_in = BufferedReader::new(reader);
|
||||
let mut out = BufferedWriter::new(stdio::stdout_raw());
|
||||
|
||||
'newline: loop {
|
||||
let line = match reader.read_line() {
|
||||
let line = match buf_in.read_line() {
|
||||
Ok(line) => line,
|
||||
Err(std::io::IoError { kind: std::io::EndOfFile, .. }) => break,
|
||||
_ => fail!(),
|
||||
|
@ -126,11 +154,14 @@ fn cut_characters<T: Reader>(mut reader: BufferedReader<T>,
|
|||
None => break
|
||||
};
|
||||
|
||||
if use_delim {
|
||||
if print_delim {
|
||||
out.write_str(out_delim.as_slice()).unwrap();
|
||||
match opts.out_delim {
|
||||
Some(ref delim) => {
|
||||
if print_delim {
|
||||
out.write(delim.as_bytes()).unwrap();
|
||||
}
|
||||
print_delim = true;
|
||||
}
|
||||
print_delim = true;
|
||||
None => ()
|
||||
}
|
||||
|
||||
match char_indices.nth(high - low) {
|
||||
|
@ -204,15 +235,16 @@ impl<'a> Iterator<(uint, uint)> for Searcher<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
fn cut_fields_delimiter<T: Reader>(mut reader: BufferedReader<T>,
|
||||
fn cut_fields_delimiter<R: Reader>(reader: R,
|
||||
ranges: &Vec<Range>,
|
||||
delim: &String,
|
||||
only_delimited: bool,
|
||||
out_delim: &String) -> int {
|
||||
let mut out = BufferedWriter::new(std::io::stdio::stdout_raw());
|
||||
let mut buf_in = BufferedReader::new(reader);
|
||||
let mut out = BufferedWriter::new(stdio::stdout_raw());
|
||||
|
||||
'newline: loop {
|
||||
let line = match reader.read_until(b'\n') {
|
||||
let line = match buf_in.read_until(b'\n') {
|
||||
Ok(line) => line,
|
||||
Err(std::io::IoError { kind: std::io::EndOfFile, .. }) => break,
|
||||
_ => fail!(),
|
||||
|
@ -279,7 +311,7 @@ fn cut_fields_delimiter<T: Reader>(mut reader: BufferedReader<T>,
|
|||
0
|
||||
}
|
||||
|
||||
fn cut_fields<T: Reader>(mut reader: BufferedReader<T>,
|
||||
fn cut_fields<R: Reader>(reader: R,
|
||||
ranges: &Vec<Range>,
|
||||
opts: &FieldOptions) -> int {
|
||||
match opts.out_delimeter {
|
||||
|
@ -290,10 +322,11 @@ fn cut_fields<T: Reader>(mut reader: BufferedReader<T>,
|
|||
None => ()
|
||||
}
|
||||
|
||||
let mut out = BufferedWriter::new(std::io::stdio::stdout_raw());
|
||||
let mut buf_in = BufferedReader::new(reader);
|
||||
let mut out = BufferedWriter::new(stdio::stdout_raw());
|
||||
|
||||
'newline: loop {
|
||||
let line = match reader.read_until(b'\n') {
|
||||
let line = match buf_in.read_until(b'\n') {
|
||||
Ok(line) => line,
|
||||
Err(std::io::IoError { kind: std::io::EndOfFile, .. }) => break,
|
||||
_ => fail!(),
|
||||
|
@ -367,17 +400,17 @@ fn cut_files(mut filenames: Vec<String>, mode: Mode) -> int {
|
|||
|
||||
for filename in filenames.iter() {
|
||||
if filename.as_slice() == "-" {
|
||||
if stdin_read { continue; }
|
||||
if stdin_read { continue }
|
||||
|
||||
exit_code |= match mode {
|
||||
Bytes(ref ranges, ref opts) => {
|
||||
cut_bytes(stdin(), ranges, opts)
|
||||
cut_bytes(stdio::stdin_raw(), ranges, opts)
|
||||
}
|
||||
Characters(ref ranges, ref opts) => {
|
||||
cut_characters(stdin(), ranges, opts)
|
||||
cut_characters(stdio::stdin_raw(), ranges, opts)
|
||||
}
|
||||
Fields(ref ranges, ref opts) => {
|
||||
cut_fields(stdin(), ranges, opts)
|
||||
cut_fields(stdio::stdin_raw(), ranges, opts)
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -387,11 +420,11 @@ fn cut_files(mut filenames: Vec<String>, mode: Mode) -> int {
|
|||
|
||||
if ! path.exists() {
|
||||
show_error!("{}: No such file or directory", filename);
|
||||
continue;
|
||||
continue
|
||||
}
|
||||
|
||||
let buf_file = match File::open(&path) {
|
||||
Ok(file) => BufferedReader::new(file),
|
||||
let file = match File::open(&path) {
|
||||
Ok(f) => f,
|
||||
Err(e) => {
|
||||
show_error!("{}: {}", filename, e.desc);
|
||||
continue
|
||||
|
@ -399,13 +432,11 @@ fn cut_files(mut filenames: Vec<String>, mode: Mode) -> int {
|
|||
};
|
||||
|
||||
exit_code |= match mode {
|
||||
Bytes(ref ranges, ref opts) => cut_bytes(buf_file, ranges, opts),
|
||||
Bytes(ref ranges, ref opts) => cut_bytes(file, ranges, opts),
|
||||
Characters(ref ranges, ref opts) => {
|
||||
cut_characters(buf_file, ranges, opts)
|
||||
}
|
||||
Fields(ref ranges, ref opts) => {
|
||||
cut_fields(buf_file, ranges, opts)
|
||||
cut_characters(file, ranges, opts)
|
||||
}
|
||||
Fields(ref ranges, ref opts) => cut_fields(file, ranges, opts)
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue