mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-30 04:27:45 +00:00
commit
57050517f9
5 changed files with 356 additions and 285 deletions
|
@ -62,7 +62,23 @@ macro_rules! crash_if_err(
|
|||
($exitcode:expr, $exp:expr) => (
|
||||
match $exp {
|
||||
Ok(m) => m,
|
||||
Err(f) => crash!($exitcode, "{}", f.to_string())
|
||||
Err(f) => crash!($exitcode, "{}", f),
|
||||
}
|
||||
)
|
||||
);
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! pipe_crash_if_err(
|
||||
($exitcode:expr, $exp:expr) => (
|
||||
match $exp {
|
||||
Ok(_) => (),
|
||||
Err(f) => {
|
||||
if f.kind() == ::std::io::ErrorKind::BrokenPipe {
|
||||
()
|
||||
} else {
|
||||
crash!($exitcode, "{}", f)
|
||||
}
|
||||
},
|
||||
}
|
||||
)
|
||||
);
|
||||
|
|
|
@ -1,142 +1,151 @@
|
|||
use std;
|
||||
use std::old_io::{IoResult, IoError};
|
||||
/*
|
||||
* This file is part of the uutils coreutils package.
|
||||
*
|
||||
* (c) Rolf Morel <rolfmorel@gmail.com>
|
||||
* (c) kwantam <kwantam@gmail.com>
|
||||
* substantially rewritten to use the stdlib BufReader trait
|
||||
* rather than re-implementing it here.
|
||||
*
|
||||
* For the full copyright and license information, please view the LICENSE
|
||||
* file that was distributed with this source code.
|
||||
*/
|
||||
|
||||
pub struct BufReader<R> {
|
||||
reader: R,
|
||||
buffer: [u8; 4096],
|
||||
start: usize,
|
||||
end: usize, // exclusive
|
||||
}
|
||||
use std::io::{BufRead, BufReader, Read, Write};
|
||||
use std::io::Result as IoResult;
|
||||
|
||||
#[allow(non_snake_case)]
|
||||
pub mod Bytes {
|
||||
use std::io::Write;
|
||||
|
||||
pub trait Select {
|
||||
fn select<'a>(&'a mut self, bytes: usize) -> Selected<'a>;
|
||||
fn select<W: Write>(&mut self, bytes: usize, out: Option<&mut W>) -> Selected;
|
||||
}
|
||||
|
||||
pub enum Selected<'a> {
|
||||
NewlineFound(&'a [u8]),
|
||||
Complete(&'a [u8]),
|
||||
Partial(&'a [u8]),
|
||||
#[derive(PartialEq, Eq, Debug)]
|
||||
pub enum Selected {
|
||||
NewlineFound,
|
||||
Complete(usize),
|
||||
Partial(usize),
|
||||
EndOfFile,
|
||||
}
|
||||
}
|
||||
|
||||
impl<R: Reader> BufReader<R> {
|
||||
pub fn new(reader: R) -> BufReader<R> {
|
||||
let empty_buffer = unsafe {
|
||||
std::mem::uninitialized::<[u8; 4096]>()
|
||||
};
|
||||
#[derive(Debug)]
|
||||
pub struct ByteReader<R> where R: Read {
|
||||
inner: BufReader<R>,
|
||||
}
|
||||
|
||||
BufReader {
|
||||
reader: reader,
|
||||
buffer: empty_buffer,
|
||||
start: 0,
|
||||
end: 0,
|
||||
impl<R: Read> ByteReader<R> {
|
||||
pub fn new(read: R) -> ByteReader<R> {
|
||||
ByteReader {
|
||||
inner: BufReader::with_capacity(4096, read),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn read(&mut self) -> IoResult<usize> {
|
||||
let buffer_fill = &mut self.buffer[self.end..];
|
||||
impl<R: Read> Read for ByteReader<R> {
|
||||
fn read(&mut self, buf: &mut [u8]) -> IoResult<usize> {
|
||||
self.inner.read(buf)
|
||||
}
|
||||
}
|
||||
|
||||
match self.reader.read(buffer_fill) {
|
||||
Ok(nread) => {
|
||||
self.end += nread;
|
||||
Ok(nread)
|
||||
}
|
||||
error => error
|
||||
}
|
||||
impl<R: Read> BufRead for ByteReader<R> {
|
||||
fn fill_buf(&mut self) -> IoResult<&[u8]> {
|
||||
self.inner.fill_buf()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn maybe_fill_buf(&mut self) -> IoResult<usize> {
|
||||
if self.end == self.start {
|
||||
self.start = 0;
|
||||
self.end = 0;
|
||||
|
||||
self.read()
|
||||
} else {
|
||||
Ok(0)
|
||||
}
|
||||
fn consume(&mut self, amt: usize) {
|
||||
self.inner.consume(amt)
|
||||
}
|
||||
}
|
||||
|
||||
impl<R: Read> ByteReader<R> {
|
||||
pub fn consume_line(&mut self) -> usize {
|
||||
let mut bytes_consumed = 0;
|
||||
let mut consume_val;
|
||||
|
||||
loop {
|
||||
match self.maybe_fill_buf() {
|
||||
Ok(0) | Err(IoError { kind: std::old_io::EndOfFile, .. })
|
||||
if self.start == self.end => return bytes_consumed,
|
||||
Err(err) => panic!("read error: {}", err.desc),
|
||||
_ => ()
|
||||
}
|
||||
{ // need filled_buf to go out of scope
|
||||
let filled_buf = match self.fill_buf() {
|
||||
Ok(b) => {
|
||||
if b.len() == 0 {
|
||||
return bytes_consumed
|
||||
} else {
|
||||
b
|
||||
}
|
||||
},
|
||||
Err(e) => crash!(1, "read error: {}", e),
|
||||
};
|
||||
|
||||
let filled_buf = &self.buffer[self.start..self.end];
|
||||
|
||||
match filled_buf.position_elem(&b'\n') {
|
||||
Some(idx) => {
|
||||
self.start += idx + 1;
|
||||
return bytes_consumed + idx + 1;
|
||||
match filled_buf.position_elem(&b'\n') {
|
||||
Some(idx) => {
|
||||
consume_val = idx + 1;
|
||||
bytes_consumed += consume_val;
|
||||
break;
|
||||
}
|
||||
_ => ()
|
||||
}
|
||||
_ => ()
|
||||
|
||||
consume_val = filled_buf.len();
|
||||
}
|
||||
|
||||
bytes_consumed += filled_buf.len();
|
||||
|
||||
self.start = 0;
|
||||
self.end = 0;
|
||||
bytes_consumed += consume_val;
|
||||
self.consume(consume_val);
|
||||
}
|
||||
|
||||
self.consume(consume_val);
|
||||
return bytes_consumed;
|
||||
}
|
||||
}
|
||||
|
||||
impl<R: Reader> Bytes::Select for BufReader<R> {
|
||||
fn select<'a>(&'a mut self, bytes: usize) -> Bytes::Selected<'a> {
|
||||
match self.maybe_fill_buf() {
|
||||
Err(IoError { kind: std::old_io::EndOfFile, .. }) => (),
|
||||
Err(err) => panic!("read error: {}", err.desc),
|
||||
_ => ()
|
||||
}
|
||||
|
||||
let newline_idx = match self.end - self.start {
|
||||
0 => return Bytes::Selected::EndOfFile,
|
||||
buf_used if bytes < buf_used => {
|
||||
// because the output delimiter should only be placed between
|
||||
// segments check if the byte after bytes is a newline
|
||||
let buf_slice = &self.buffer[self.start..self.start + bytes + 1];
|
||||
|
||||
match buf_slice.position_elem(&b'\n') {
|
||||
Some(idx) => idx,
|
||||
None => {
|
||||
let segment = &self.buffer[self.start..self.start + bytes];
|
||||
|
||||
self.start += bytes;
|
||||
|
||||
return Bytes::Selected::Complete(segment);
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
let buf_filled = &self.buffer[self.start..self.end];
|
||||
|
||||
match buf_filled.position_elem(&b'\n') {
|
||||
Some(idx) => idx,
|
||||
None => {
|
||||
let segment = &self.buffer[self.start..self.end];
|
||||
|
||||
self.start = 0;
|
||||
self.end = 0;
|
||||
|
||||
return Bytes::Selected::Partial(segment);
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<R: Read> self::Bytes::Select for ByteReader<R> {
|
||||
fn select<W: Write>(&mut self, bytes: usize, out: Option<&mut W>) -> Bytes::Selected {
|
||||
enum SRes {
|
||||
Comp,
|
||||
Part,
|
||||
Newl,
|
||||
};
|
||||
|
||||
let new_start = self.start + newline_idx + 1;
|
||||
let segment = &self.buffer[self.start..new_start];
|
||||
use self::Bytes::Selected::*;
|
||||
|
||||
self.start = new_start;
|
||||
Bytes::Selected::NewlineFound(segment)
|
||||
let (res, consume_val) = {
|
||||
let buffer = match self.fill_buf() {
|
||||
Err(e) => crash!(1, "read error: {}", e),
|
||||
Ok(b) => b,
|
||||
};
|
||||
|
||||
let (res, consume_val) = match buffer.len() {
|
||||
0 => return EndOfFile,
|
||||
buf_used if bytes < buf_used => {
|
||||
// because the output delimiter should only be placed between
|
||||
// segments check if the byte after bytes is a newline
|
||||
let buf_slice = &buffer[0..bytes + 1];
|
||||
|
||||
match buf_slice.position_elem(&b'\n') {
|
||||
Some(idx) => (SRes::Newl, idx+1),
|
||||
None => (SRes::Comp, bytes),
|
||||
}
|
||||
},
|
||||
_ => {
|
||||
match buffer.position_elem(&b'\n') {
|
||||
Some(idx) => (SRes::Newl, idx+1),
|
||||
None => (SRes::Part, buffer.len()),
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
match out {
|
||||
Some(out) => pipe_crash_if_err!(1, out.write_all(&buffer[0..consume_val])),
|
||||
None => (),
|
||||
}
|
||||
(res, consume_val)
|
||||
};
|
||||
|
||||
self.consume(consume_val);
|
||||
match res {
|
||||
SRes::Comp => Complete(consume_val),
|
||||
SRes::Part => Partial(consume_val),
|
||||
SRes::Newl => NewlineFound,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
310
src/cut/cut.rs
310
src/cut/cut.rs
|
@ -1,5 +1,5 @@
|
|||
#![crate_name = "cut"]
|
||||
#![feature(collections, core, old_io, old_path, rustc_private)]
|
||||
#![feature(collections, path_ext, rustc_private)]
|
||||
|
||||
/*
|
||||
* This file is part of the uutils coreutils package.
|
||||
|
@ -13,17 +13,20 @@
|
|||
extern crate getopts;
|
||||
extern crate libc;
|
||||
|
||||
use std::old_io::{stdio, File, BufferedWriter, BufferedReader, print};
|
||||
use std::old_io::fs::PathExtensions;
|
||||
use std::fs::{File, PathExt};
|
||||
use std::io::{stdout, stdin, BufRead, BufReader, Read, Stdout, Write};
|
||||
use std::path::Path;
|
||||
use getopts::{optopt, optflag, getopts, usage};
|
||||
|
||||
use ranges::Range;
|
||||
use searcher::Searcher;
|
||||
|
||||
#[path = "../common/util.rs"]
|
||||
#[macro_use]
|
||||
mod util;
|
||||
mod ranges;
|
||||
mod buffer;
|
||||
mod ranges;
|
||||
mod searcher;
|
||||
|
||||
static NAME: &'static str = "cut";
|
||||
static VERSION: &'static str = "1.0.0";
|
||||
|
@ -52,14 +55,12 @@ fn list_to_ranges(list: &str, complement: bool) -> Result<Vec<Range>, String> {
|
|||
}
|
||||
}
|
||||
|
||||
fn cut_bytes<R: Reader>(reader: R,
|
||||
ranges: &Vec<Range>,
|
||||
opts: &Options) -> i32 {
|
||||
fn cut_bytes<R: Read>(reader: R, ranges: &Vec<Range>, opts: &Options) -> i32 {
|
||||
use buffer::Bytes::Select;
|
||||
use buffer::Bytes::Selected::{NewlineFound, Complete, Partial, EndOfFile};
|
||||
use buffer::Bytes::Selected::*;
|
||||
|
||||
let mut buf_read = buffer::BufReader::new(reader);
|
||||
let mut out = BufferedWriter::new(stdio::stdout_raw());
|
||||
let mut buf_read = buffer::ByteReader::new(reader);
|
||||
let mut out = stdout();
|
||||
|
||||
'newline: loop {
|
||||
let mut cur_pos = 1;
|
||||
|
@ -69,19 +70,19 @@ fn cut_bytes<R: Reader>(reader: R,
|
|||
// skip upto low
|
||||
let orig_pos = cur_pos;
|
||||
loop {
|
||||
match buf_read.select(low - cur_pos) {
|
||||
NewlineFound(_) => {
|
||||
out.write_all(&[b'\n']).unwrap();
|
||||
match buf_read.select(low - cur_pos, None::<&mut Stdout>) {
|
||||
NewlineFound => {
|
||||
pipe_crash_if_err!(1, out.write_all(&[b'\n']));
|
||||
continue 'newline
|
||||
}
|
||||
Complete(bytes) => {
|
||||
cur_pos += bytes.len();
|
||||
Complete(len) => {
|
||||
cur_pos += len;
|
||||
break
|
||||
}
|
||||
Partial(bytes) => cur_pos += bytes.len(),
|
||||
Partial(len) => cur_pos += len,
|
||||
EndOfFile => {
|
||||
if orig_pos != cur_pos {
|
||||
out.write_all(&[b'\n']).unwrap();
|
||||
pipe_crash_if_err!(1, out.write_all(&[b'\n']));
|
||||
}
|
||||
|
||||
break 'newline
|
||||
|
@ -92,7 +93,7 @@ fn cut_bytes<R: Reader>(reader: R,
|
|||
match opts.out_delim {
|
||||
Some(ref delim) => {
|
||||
if print_delim {
|
||||
out.write_all(delim.as_bytes()).unwrap();
|
||||
pipe_crash_if_err!(1, out.write_all(delim.as_bytes()));
|
||||
}
|
||||
print_delim = true;
|
||||
}
|
||||
|
@ -101,23 +102,16 @@ fn cut_bytes<R: Reader>(reader: R,
|
|||
|
||||
// write out from low to high
|
||||
loop {
|
||||
match buf_read.select(high - cur_pos + 1) {
|
||||
NewlineFound(bytes) => {
|
||||
out.write_all(bytes).unwrap();
|
||||
continue 'newline
|
||||
}
|
||||
Complete(bytes) => {
|
||||
out.write_all(bytes).unwrap();
|
||||
match buf_read.select(high - cur_pos + 1, Some(&mut out)) {
|
||||
NewlineFound => continue 'newline,
|
||||
Partial(len) => cur_pos += len,
|
||||
Complete(_) => {
|
||||
cur_pos = high + 1;
|
||||
break
|
||||
}
|
||||
Partial(bytes) => {
|
||||
cur_pos += bytes.len();
|
||||
out.write_all(bytes).unwrap();
|
||||
}
|
||||
EndOfFile => {
|
||||
if cur_pos != low || low == high {
|
||||
out.write_all(&[b'\n']).unwrap();
|
||||
pipe_crash_if_err!(1, out.write_all(&[b'\n']));
|
||||
}
|
||||
|
||||
break 'newline
|
||||
|
@ -127,39 +121,49 @@ fn cut_bytes<R: Reader>(reader: R,
|
|||
}
|
||||
|
||||
buf_read.consume_line();
|
||||
out.write_all(&[b'\n']).unwrap();
|
||||
pipe_crash_if_err!(1, out.write_all(&[b'\n']));
|
||||
}
|
||||
|
||||
0
|
||||
}
|
||||
|
||||
fn cut_characters<R: Reader>(reader: R,
|
||||
ranges: &Vec<Range>,
|
||||
opts: &Options) -> i32 {
|
||||
let mut buf_in = BufferedReader::new(reader);
|
||||
let mut out = BufferedWriter::new(stdio::stdout_raw());
|
||||
fn cut_characters<R: Read>(reader: R, ranges: &Vec<Range>, opts: &Options) -> i32 {
|
||||
let mut buf_in = BufReader::new(reader);
|
||||
let mut out = stdout();
|
||||
let mut buffer = String::new();
|
||||
|
||||
'newline: loop {
|
||||
let line = match buf_in.read_line() {
|
||||
Ok(line) => line,
|
||||
Err(std::old_io::IoError { kind: std::old_io::EndOfFile, .. }) => break,
|
||||
_ => panic!(),
|
||||
buffer.clear();
|
||||
match buf_in.read_line(&mut buffer) {
|
||||
Ok(n) if n == 0 => break,
|
||||
Err(e) => {
|
||||
if buffer.len() == 0 {
|
||||
crash!(1, "read error: {}", e);
|
||||
}
|
||||
},
|
||||
_ => (),
|
||||
};
|
||||
|
||||
let line = &buffer[..];
|
||||
let mut char_pos = 0;
|
||||
let mut char_indices = line.as_slice().char_indices();
|
||||
let mut char_indices = line.char_indices();
|
||||
let mut print_delim = false;
|
||||
let mut low_idx = 0;
|
||||
|
||||
for &Range { low, high } in ranges.iter() {
|
||||
let low_idx = match char_indices.nth(low - char_pos - 1) {
|
||||
Some((low_idx, _)) => low_idx,
|
||||
None => break
|
||||
low_idx = if low - char_pos > 0 {
|
||||
match char_indices.nth(low - char_pos - 1) {
|
||||
Some((low_idx, _)) => low_idx,
|
||||
None => break,
|
||||
}
|
||||
} else {
|
||||
low_idx
|
||||
};
|
||||
|
||||
match opts.out_delim {
|
||||
Some(ref delim) => {
|
||||
if print_delim {
|
||||
out.write_all(delim.as_bytes()).unwrap();
|
||||
pipe_crash_if_err!(1, out.write_all(delim.as_bytes()));
|
||||
}
|
||||
print_delim = true;
|
||||
}
|
||||
|
@ -169,14 +173,15 @@ fn cut_characters<R: Reader>(reader: R,
|
|||
match char_indices.nth(high - low) {
|
||||
Some((high_idx, _)) => {
|
||||
let segment = &line.as_bytes()[low_idx..high_idx];
|
||||
low_idx = high_idx;
|
||||
|
||||
out.write_all(segment).unwrap();
|
||||
pipe_crash_if_err!(1, out.write_all(segment));
|
||||
}
|
||||
None => {
|
||||
let bytes = line.as_bytes();
|
||||
let segment = &bytes[low_idx..];
|
||||
|
||||
out.write_all(segment).unwrap();
|
||||
pipe_crash_if_err!(1, out.write_all(segment));
|
||||
|
||||
if line.as_bytes()[bytes.len() - 1] == b'\n' {
|
||||
continue 'newline
|
||||
|
@ -186,84 +191,40 @@ fn cut_characters<R: Reader>(reader: R,
|
|||
|
||||
char_pos = high + 1;
|
||||
}
|
||||
out.write_all(&[b'\n']).unwrap();
|
||||
pipe_crash_if_err!(1, out.write_all(&[b'\n']));
|
||||
}
|
||||
|
||||
0
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
struct Searcher<'a> {
|
||||
haystack: &'a [u8],
|
||||
needle: &'a [u8],
|
||||
position: usize
|
||||
}
|
||||
|
||||
impl<'a> Searcher<'a> {
|
||||
fn new(haystack: &'a [u8], needle: &'a [u8]) -> Searcher<'a> {
|
||||
Searcher {
|
||||
haystack: haystack,
|
||||
needle: needle,
|
||||
position: 0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Iterator for Searcher<'a> {
|
||||
type Item = (usize, usize);
|
||||
|
||||
fn next(&mut self) -> Option<(usize, usize)> {
|
||||
if self.needle.len() == 1 {
|
||||
for offset in range(self.position, self.haystack.len()) {
|
||||
if self.haystack[offset] == self.needle[0] {
|
||||
self.position = offset + 1;
|
||||
return Some((offset, offset + 1));
|
||||
}
|
||||
}
|
||||
|
||||
self.position = self.haystack.len();
|
||||
return None;
|
||||
}
|
||||
|
||||
while self.position + self.needle.len() <= self.haystack.len() {
|
||||
if &self.haystack[self.position..self.position + self.needle.len()] == self.needle {
|
||||
let match_pos = self.position;
|
||||
self.position += self.needle.len();
|
||||
return Some((match_pos, match_pos + self.needle.len()));
|
||||
} else {
|
||||
self.position += 1;
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn cut_fields_delimiter<R: Reader>(reader: R,
|
||||
ranges: &Vec<Range>,
|
||||
delim: &String,
|
||||
only_delimited: bool,
|
||||
out_delim: &String) -> i32 {
|
||||
let mut buf_in = BufferedReader::new(reader);
|
||||
let mut out = BufferedWriter::new(stdio::stdout_raw());
|
||||
fn cut_fields_delimiter<R: Read>(reader: R, ranges: &Vec<Range>, delim: &String, only_delimited: bool, out_delim: &String) -> i32 {
|
||||
let mut buf_in = BufReader::new(reader);
|
||||
let mut out = stdout();
|
||||
let mut buffer = Vec::new();
|
||||
|
||||
'newline: loop {
|
||||
let line = match buf_in.read_until(b'\n') {
|
||||
Ok(line) => line,
|
||||
Err(std::old_io::IoError { kind: std::old_io::EndOfFile, .. }) => break,
|
||||
_ => panic!(),
|
||||
};
|
||||
buffer.clear();
|
||||
match buf_in.read_until(b'\n', &mut buffer) {
|
||||
Ok(n) if n == 0 => break,
|
||||
Err(e) => {
|
||||
if buffer.len() == 0 {
|
||||
crash!(1, "read error: {}", e);
|
||||
}
|
||||
},
|
||||
_ => (),
|
||||
}
|
||||
|
||||
let line = &buffer[..];
|
||||
let mut fields_pos = 1;
|
||||
let mut low_idx = 0;
|
||||
let mut delim_search = Searcher::new(line.as_slice(),
|
||||
delim.as_bytes()).peekable();
|
||||
let mut delim_search = Searcher::new(line, delim.as_bytes()).peekable();
|
||||
let mut print_delim = false;
|
||||
|
||||
if delim_search.peek().is_none() {
|
||||
if ! only_delimited {
|
||||
out.write_all(line.as_slice()).unwrap();
|
||||
pipe_crash_if_err!(1, out.write_all(line));
|
||||
if line[line.len() - 1] != b'\n' {
|
||||
out.write_all(&[b'\n']).unwrap();
|
||||
pipe_crash_if_err!(1, out.write_all(&[b'\n']));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -278,16 +239,16 @@ fn cut_fields_delimiter<R: Reader>(reader: R,
|
|||
};
|
||||
}
|
||||
|
||||
for _ in range(0, high - low + 1) {
|
||||
for _ in 0..high - low + 1 {
|
||||
if print_delim {
|
||||
out.write_str(out_delim.as_slice()).unwrap();
|
||||
pipe_crash_if_err!(1, out.write_all(out_delim.as_bytes()));
|
||||
}
|
||||
|
||||
match delim_search.next() {
|
||||
Some((high_idx, next_low_idx)) => {
|
||||
let segment = &line[low_idx..high_idx];
|
||||
|
||||
out.write_all(segment).unwrap();
|
||||
pipe_crash_if_err!(1, out.write_all(segment));
|
||||
|
||||
print_delim = true;
|
||||
|
||||
|
@ -297,7 +258,7 @@ fn cut_fields_delimiter<R: Reader>(reader: R,
|
|||
None => {
|
||||
let segment = &line[low_idx..];
|
||||
|
||||
out.write_all(segment).unwrap();
|
||||
pipe_crash_if_err!(1, out.write_all(segment));
|
||||
|
||||
if line[line.len() - 1] == b'\n' {
|
||||
continue 'newline
|
||||
|
@ -308,44 +269,48 @@ fn cut_fields_delimiter<R: Reader>(reader: R,
|
|||
}
|
||||
}
|
||||
|
||||
out.write_all(&[b'\n']).unwrap();
|
||||
pipe_crash_if_err!(1, out.write_all(&[b'\n']));
|
||||
}
|
||||
|
||||
0
|
||||
}
|
||||
|
||||
fn cut_fields<R: Reader>(reader: R,
|
||||
ranges: &Vec<Range>,
|
||||
opts: &FieldOptions) -> i32 {
|
||||
fn cut_fields<R: Read>(reader: R, ranges: &Vec<Range>, opts: &FieldOptions) -> i32 {
|
||||
match opts.out_delimeter {
|
||||
Some(ref delim) => {
|
||||
Some(ref o_delim) => {
|
||||
return cut_fields_delimiter(reader, ranges, &opts.delimiter,
|
||||
opts.only_delimited, delim);
|
||||
opts.only_delimited, o_delim);
|
||||
}
|
||||
None => ()
|
||||
}
|
||||
|
||||
let mut buf_in = BufferedReader::new(reader);
|
||||
let mut out = BufferedWriter::new(stdio::stdout_raw());
|
||||
let mut buf_in = BufReader::new(reader);
|
||||
let mut out = stdout();
|
||||
let mut buffer = Vec::new();
|
||||
|
||||
'newline: loop {
|
||||
let line = match buf_in.read_until(b'\n') {
|
||||
Ok(line) => line,
|
||||
Err(std::old_io::IoError { kind: std::old_io::EndOfFile, .. }) => break,
|
||||
_ => panic!(),
|
||||
};
|
||||
buffer.clear();
|
||||
match buf_in.read_until(b'\n', &mut buffer) {
|
||||
Ok(n) if n == 0 => break,
|
||||
Err(e) => {
|
||||
if buffer.len() == 0 {
|
||||
crash!(1, "read error: {}", e);
|
||||
}
|
||||
},
|
||||
_ => (),
|
||||
}
|
||||
|
||||
let line = &buffer[..];
|
||||
let mut fields_pos = 1;
|
||||
let mut low_idx = 0;
|
||||
let mut delim_search = Searcher::new(line.as_slice(),
|
||||
opts.delimiter.as_bytes()).peekable();
|
||||
let mut delim_search = Searcher::new(line, opts.delimiter.as_bytes()).peekable();
|
||||
let mut print_delim = false;
|
||||
|
||||
if delim_search.peek().is_none() {
|
||||
if ! opts.only_delimited {
|
||||
out.write_all(line.as_slice()).unwrap();
|
||||
pipe_crash_if_err!(1, out.write_all(line));
|
||||
if line[line.len() - 1] != b'\n' {
|
||||
out.write_all(&[b'\n']).unwrap();
|
||||
pipe_crash_if_err!(1, out.write_all(&[b'\n']));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -370,7 +335,7 @@ fn cut_fields<R: Reader>(reader: R,
|
|||
Some((high_idx, next_low_idx)) => {
|
||||
let segment = &line[low_idx..high_idx];
|
||||
|
||||
out.write_all(segment).unwrap();
|
||||
pipe_crash_if_err!(1, out.write_all(segment));
|
||||
|
||||
print_delim = true;
|
||||
low_idx = next_low_idx;
|
||||
|
@ -379,7 +344,7 @@ fn cut_fields<R: Reader>(reader: R,
|
|||
None => {
|
||||
let segment = &line[low_idx..line.len()];
|
||||
|
||||
out.write_all(segment).unwrap();
|
||||
pipe_crash_if_err!(1, out.write_all(segment));
|
||||
|
||||
if line[line.len() - 1] == b'\n' {
|
||||
continue 'newline
|
||||
|
@ -389,7 +354,7 @@ fn cut_fields<R: Reader>(reader: R,
|
|||
}
|
||||
}
|
||||
|
||||
out.write_all(&[b'\n']).unwrap();
|
||||
pipe_crash_if_err!(1, out.write_all(&[b'\n']));
|
||||
}
|
||||
|
||||
0
|
||||
|
@ -402,24 +367,18 @@ fn cut_files(mut filenames: Vec<String>, mode: Mode) -> i32 {
|
|||
if filenames.len() == 0 { filenames.push("-".to_string()); }
|
||||
|
||||
for filename in filenames.iter() {
|
||||
if filename.as_slice() == "-" {
|
||||
if filename == "-" {
|
||||
if stdin_read { continue }
|
||||
|
||||
exit_code |= match mode {
|
||||
Mode::Bytes(ref ranges, ref opts) => {
|
||||
cut_bytes(stdio::stdin_raw(), ranges, opts)
|
||||
}
|
||||
Mode::Characters(ref ranges, ref opts) => {
|
||||
cut_characters(stdio::stdin_raw(), ranges, opts)
|
||||
}
|
||||
Mode::Fields(ref ranges, ref opts) => {
|
||||
cut_fields(stdio::stdin_raw(), ranges, opts)
|
||||
}
|
||||
Mode::Bytes(ref ranges, ref opts) => cut_bytes(stdin(), ranges, opts),
|
||||
Mode::Characters(ref ranges, ref opts) => cut_characters(stdin(), ranges, opts),
|
||||
Mode::Fields(ref ranges, ref opts) => cut_fields(stdin(), ranges, opts),
|
||||
};
|
||||
|
||||
stdin_read = true;
|
||||
} else {
|
||||
let path = Path::new(filename.as_slice());
|
||||
let path = Path::new(&filename[..]);
|
||||
|
||||
if ! path.exists() {
|
||||
show_error!("{}: No such file or directory", filename);
|
||||
|
@ -429,17 +388,15 @@ fn cut_files(mut filenames: Vec<String>, mode: Mode) -> i32 {
|
|||
let file = match File::open(&path) {
|
||||
Ok(f) => f,
|
||||
Err(e) => {
|
||||
show_error!("{}: {}", filename, e.desc);
|
||||
show_error!("opening '{}': {}", &filename[..], e);
|
||||
continue
|
||||
}
|
||||
};
|
||||
|
||||
exit_code |= match mode {
|
||||
Mode::Bytes(ref ranges, ref opts) => cut_bytes(file, ranges, opts),
|
||||
Mode::Characters(ref ranges, ref opts) => {
|
||||
cut_characters(file, ranges, opts)
|
||||
}
|
||||
Mode::Fields(ref ranges, ref opts) => cut_fields(file, ranges, opts)
|
||||
Mode::Characters(ref ranges, ref opts) => cut_characters(file, ranges, opts),
|
||||
Mode::Fields(ref ranges, ref opts) => cut_fields(file, ranges, opts),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
@ -461,7 +418,7 @@ pub fn uumain(args: Vec<String>) -> i32 {
|
|||
optflag("", "version", "output version information and exit"),
|
||||
];
|
||||
|
||||
let matches = match getopts(args.tail(), &opts) {
|
||||
let matches = match getopts(&args[1..], &opts) {
|
||||
Ok(m) => m,
|
||||
Err(f) => {
|
||||
show_error!("Invalid options\n{}", f);
|
||||
|
@ -473,7 +430,7 @@ pub fn uumain(args: Vec<String>) -> i32 {
|
|||
println!("Usage:");
|
||||
println!(" {0} OPTION... [FILE]...", args[0]);
|
||||
println!("");
|
||||
print(usage("Print selected parts of lines from each FILE to standard output.", &opts).as_slice());
|
||||
println!("{}", &usage("Print selected parts of lines from each FILE to standard output.", &opts)[..]);
|
||||
println!("");
|
||||
println!("Use one, and only one of -b, -c or -f. Each LIST is made up of one");
|
||||
println!("range, or many ranges separated by commas. Selected input is written");
|
||||
|
@ -500,28 +457,40 @@ pub fn uumain(args: Vec<String>) -> i32 {
|
|||
matches.opt_str("characters"),
|
||||
matches.opt_str("fields")) {
|
||||
(Some(byte_ranges), None, None) => {
|
||||
list_to_ranges(byte_ranges.as_slice(), complement).map(|ranges|
|
||||
Mode::Bytes(ranges,
|
||||
Options { out_delim: matches.opt_str("output-delimiter") })
|
||||
)
|
||||
list_to_ranges(&byte_ranges[..], complement)
|
||||
.map(|ranges| Mode::Bytes(ranges, Options { out_delim: matches.opt_str("output-delimiter") }))
|
||||
}
|
||||
(None, Some(char_ranges), None) => {
|
||||
list_to_ranges(char_ranges.as_slice(), complement).map(|ranges|
|
||||
Mode::Characters(ranges,
|
||||
Options { out_delim: matches.opt_str("output-delimiter") })
|
||||
)
|
||||
list_to_ranges(&char_ranges[..], complement)
|
||||
.map(|ranges| Mode::Characters(ranges, Options { out_delim: matches.opt_str("output-delimiter") }))
|
||||
}
|
||||
(None, None, Some(field_ranges)) => {
|
||||
list_to_ranges(field_ranges.as_slice(), complement).and_then(|ranges|
|
||||
list_to_ranges(&field_ranges[..], complement).and_then(|ranges|
|
||||
{
|
||||
let out_delim = matches.opt_str("output-delimiter");
|
||||
let out_delim = match matches.opt_str("output-delimiter") {
|
||||
Some(s) => {
|
||||
if s.len() == 0 {
|
||||
Some("\0".to_string())
|
||||
} else {
|
||||
Some(s)
|
||||
}
|
||||
},
|
||||
None => None,
|
||||
};
|
||||
|
||||
let only_delimited = matches.opt_present("only-delimited");
|
||||
|
||||
match matches.opt_str("delimiter") {
|
||||
Some(delim) => {
|
||||
if delim.as_slice().chars().count() != 1 {
|
||||
Err("the delimiter must be a single character".to_string())
|
||||
if delim.chars().count() > 1 {
|
||||
Err("the delimiter must be a single character, or the empty string for null".to_string())
|
||||
} else {
|
||||
let delim = if delim.len() == 0 {
|
||||
"\0".to_string()
|
||||
} else {
|
||||
delim
|
||||
};
|
||||
|
||||
Ok(Mode::Fields(ranges,
|
||||
FieldOptions {
|
||||
delimiter: delim,
|
||||
|
@ -546,6 +515,19 @@ pub fn uumain(args: Vec<String>) -> i32 {
|
|||
_ => Err("you must specify a list of bytes, characters, or fields".to_string())
|
||||
};
|
||||
|
||||
let mode_parse = match mode_parse {
|
||||
Err(_) => mode_parse,
|
||||
Ok(mode) => {
|
||||
match mode {
|
||||
Mode::Bytes(_, _) | Mode::Characters(_, _) if matches.opt_present("delimiter") =>
|
||||
Err("an input delimiter may be specified only when operating on fields".to_string()),
|
||||
Mode::Bytes(_, _) | Mode::Characters(_, _) if matches.opt_present("only-delimited") =>
|
||||
Err("suppressing non-delimited lines makes sense only when operating on fields".to_string()),
|
||||
_ => Ok(mode),
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
match mode_parse {
|
||||
Ok(mode) => cut_files(matches.free, mode),
|
||||
Err(err_msg) => {
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
* file that was distributed with this source code.
|
||||
*/
|
||||
|
||||
use std;
|
||||
use std::str::FromStr;
|
||||
|
||||
#[derive(PartialEq,Eq,PartialOrd,Ord,Debug)]
|
||||
pub struct Range {
|
||||
|
@ -15,42 +15,52 @@ pub struct Range {
|
|||
pub high: usize,
|
||||
}
|
||||
|
||||
impl std::str::FromStr for Range {
|
||||
impl FromStr for Range {
|
||||
type Err = &'static str;
|
||||
|
||||
fn from_str(s: &str) -> Result<Range, &'static str> {
|
||||
use std::usize::MAX;
|
||||
|
||||
let mut parts = s.splitn(1, '-');
|
||||
let mut parts = s.splitn(2, '-');
|
||||
|
||||
let field = "fields and positions are numbered from 1";
|
||||
let order = "high end of range less than low end";
|
||||
let inval = "failed to parse range";
|
||||
|
||||
match (parts.next(), parts.next()) {
|
||||
(Some(nm), None) => {
|
||||
if let Ok(nm) = nm.parse::<usize>() {
|
||||
if nm > 0 { Ok(Range{ low: nm, high: nm}) } else { Err("invalid range") }
|
||||
if nm > 0 { Ok(Range{ low: nm, high: nm}) } else { Err(field) }
|
||||
} else {
|
||||
Err("invalid range")
|
||||
Err(inval)
|
||||
}
|
||||
}
|
||||
(Some(n), Some(m)) if m.len() == 0 => {
|
||||
if let Ok(low) = n.parse::<usize>() {
|
||||
if low > 0 { Ok(Range{ low: low, high: MAX}) } else { Err("invalid range") }
|
||||
if low > 0 { Ok(Range{ low: low, high: MAX}) } else { Err(field) }
|
||||
} else {
|
||||
Err("invalid range")
|
||||
Err(inval)
|
||||
}
|
||||
}
|
||||
(Some(n), Some(m)) if n.len() == 0 => {
|
||||
if let Ok(high) = m.parse::<usize>() {
|
||||
if high > 0 { Ok(Range{ low: 1, high: high}) } else { Err("invalid range") }
|
||||
if high > 0 { Ok(Range{ low: 1, high: high}) } else { Err(field) }
|
||||
} else {
|
||||
Err("invalid range")
|
||||
Err(inval)
|
||||
}
|
||||
}
|
||||
(Some(n), Some(m)) => {
|
||||
match (n.parse::<usize>(), m.parse::<usize>()) {
|
||||
(Ok(low), Ok(high)) if low > 0 && low <= high => {
|
||||
Ok(Range { low: low, high: high })
|
||||
}
|
||||
_ => Err("invalid range")
|
||||
(Ok(low), Ok(high)) => {
|
||||
if low > 0 && low <= high {
|
||||
Ok(Range { low: low, high: high })
|
||||
} else if low == 0 {
|
||||
Err(field)
|
||||
} else {
|
||||
Err(order)
|
||||
}
|
||||
},
|
||||
_ => Err(inval),
|
||||
}
|
||||
}
|
||||
_ => unreachable!()
|
||||
|
@ -65,7 +75,7 @@ impl Range {
|
|||
let mut ranges : Vec<Range> = vec!();
|
||||
|
||||
for item in list.split(',') {
|
||||
match std::str::FromStr::from_str(item) {
|
||||
match FromStr::from_str(item) {
|
||||
Ok(range_item) => ranges.push(range_item),
|
||||
Err(e)=> return Err(format!("range '{}' was invalid: {}", item, e))
|
||||
}
|
||||
|
@ -74,7 +84,7 @@ impl Range {
|
|||
ranges.sort();
|
||||
|
||||
// merge overlapping ranges
|
||||
for i in range(0, ranges.len()) {
|
||||
for i in 0..ranges.len() {
|
||||
let j = i + 1;
|
||||
|
||||
while j < ranges.len() && ranges[j].low <= ranges[i].high {
|
||||
|
|
54
src/cut/searcher.rs
Normal file
54
src/cut/searcher.rs
Normal file
|
@ -0,0 +1,54 @@
|
|||
/*
|
||||
* This file is part of the uutils coreutils package.
|
||||
*
|
||||
* (c) Rolf Morel <rolfmorel@gmail.com>
|
||||
*
|
||||
* For the full copyright and license information, please view the LICENSE
|
||||
* file that was distributed with this source code.
|
||||
*/
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct Searcher<'a> {
|
||||
haystack: &'a [u8],
|
||||
needle: &'a [u8],
|
||||
position: usize
|
||||
}
|
||||
|
||||
impl<'a> Searcher<'a> {
|
||||
pub fn new(haystack: &'a [u8], needle: &'a [u8]) -> Searcher<'a> {
|
||||
Searcher {
|
||||
haystack: haystack,
|
||||
needle: needle,
|
||||
position: 0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Iterator for Searcher<'a> {
|
||||
type Item = (usize, usize);
|
||||
|
||||
fn next(&mut self) -> Option<(usize, usize)> {
|
||||
if self.needle.len() == 1 {
|
||||
for offset in self.position..self.haystack.len() {
|
||||
if self.haystack[offset] == self.needle[0] {
|
||||
self.position = offset + 1;
|
||||
return Some((offset, offset + 1));
|
||||
}
|
||||
}
|
||||
|
||||
self.position = self.haystack.len();
|
||||
return None;
|
||||
}
|
||||
|
||||
while self.position + self.needle.len() <= self.haystack.len() {
|
||||
if &self.haystack[self.position..self.position + self.needle.len()] == self.needle {
|
||||
let match_pos = self.position;
|
||||
self.position += self.needle.len();
|
||||
return Some((match_pos, match_pos + self.needle.len()));
|
||||
} else {
|
||||
self.position += 1;
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue