1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-31 04:57:45 +00:00

Merge pull request #593 from kwantam/master

fix `cut`
This commit is contained in:
Heather 2015-05-12 07:10:02 +03:00
commit 57050517f9
5 changed files with 356 additions and 285 deletions

View file

@ -62,7 +62,23 @@ macro_rules! crash_if_err(
($exitcode:expr, $exp:expr) => ( ($exitcode:expr, $exp:expr) => (
match $exp { match $exp {
Ok(m) => m, Ok(m) => m,
Err(f) => crash!($exitcode, "{}", f.to_string()) Err(f) => crash!($exitcode, "{}", f),
}
)
);
#[macro_export]
macro_rules! pipe_crash_if_err(
($exitcode:expr, $exp:expr) => (
match $exp {
Ok(_) => (),
Err(f) => {
if f.kind() == ::std::io::ErrorKind::BrokenPipe {
()
} else {
crash!($exitcode, "{}", f)
}
},
} }
) )
); );

View file

@ -1,142 +1,151 @@
use std; /*
use std::old_io::{IoResult, IoError}; * This file is part of the uutils coreutils package.
*
* (c) Rolf Morel <rolfmorel@gmail.com>
* (c) kwantam <kwantam@gmail.com>
* substantially rewritten to use the stdlib BufReader trait
* rather than re-implementing it here.
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
pub struct BufReader<R> { use std::io::{BufRead, BufReader, Read, Write};
reader: R, use std::io::Result as IoResult;
buffer: [u8; 4096],
start: usize,
end: usize, // exclusive
}
#[allow(non_snake_case)] #[allow(non_snake_case)]
pub mod Bytes { pub mod Bytes {
use std::io::Write;
pub trait Select { pub trait Select {
fn select<'a>(&'a mut self, bytes: usize) -> Selected<'a>; fn select<W: Write>(&mut self, bytes: usize, out: Option<&mut W>) -> Selected;
} }
pub enum Selected<'a> { #[derive(PartialEq, Eq, Debug)]
NewlineFound(&'a [u8]), pub enum Selected {
Complete(&'a [u8]), NewlineFound,
Partial(&'a [u8]), Complete(usize),
Partial(usize),
EndOfFile, EndOfFile,
} }
} }
impl<R: Reader> BufReader<R> { #[derive(Debug)]
pub fn new(reader: R) -> BufReader<R> { pub struct ByteReader<R> where R: Read {
let empty_buffer = unsafe { inner: BufReader<R>,
std::mem::uninitialized::<[u8; 4096]>() }
};
BufReader { impl<R: Read> ByteReader<R> {
reader: reader, pub fn new(read: R) -> ByteReader<R> {
buffer: empty_buffer, ByteReader {
start: 0, inner: BufReader::with_capacity(4096, read),
end: 0,
} }
} }
}
#[inline] impl<R: Read> Read for ByteReader<R> {
fn read(&mut self) -> IoResult<usize> { fn read(&mut self, buf: &mut [u8]) -> IoResult<usize> {
let buffer_fill = &mut self.buffer[self.end..]; self.inner.read(buf)
}
}
match self.reader.read(buffer_fill) { impl<R: Read> BufRead for ByteReader<R> {
Ok(nread) => { fn fill_buf(&mut self) -> IoResult<&[u8]> {
self.end += nread; self.inner.fill_buf()
Ok(nread)
}
error => error
}
} }
#[inline] fn consume(&mut self, amt: usize) {
fn maybe_fill_buf(&mut self) -> IoResult<usize> { self.inner.consume(amt)
if self.end == self.start {
self.start = 0;
self.end = 0;
self.read()
} else {
Ok(0)
}
} }
}
impl<R: Read> ByteReader<R> {
pub fn consume_line(&mut self) -> usize { pub fn consume_line(&mut self) -> usize {
let mut bytes_consumed = 0; let mut bytes_consumed = 0;
let mut consume_val;
loop { loop {
match self.maybe_fill_buf() { { // need filled_buf to go out of scope
Ok(0) | Err(IoError { kind: std::old_io::EndOfFile, .. }) let filled_buf = match self.fill_buf() {
if self.start == self.end => return bytes_consumed, Ok(b) => {
Err(err) => panic!("read error: {}", err.desc), if b.len() == 0 {
_ => () return bytes_consumed
} } else {
b
}
},
Err(e) => crash!(1, "read error: {}", e),
};
let filled_buf = &self.buffer[self.start..self.end]; match filled_buf.position_elem(&b'\n') {
Some(idx) => {
match filled_buf.position_elem(&b'\n') { consume_val = idx + 1;
Some(idx) => { bytes_consumed += consume_val;
self.start += idx + 1; break;
return bytes_consumed + idx + 1; }
_ => ()
} }
_ => ()
consume_val = filled_buf.len();
} }
bytes_consumed += filled_buf.len(); bytes_consumed += consume_val;
self.consume(consume_val);
self.start = 0;
self.end = 0;
} }
self.consume(consume_val);
return bytes_consumed;
} }
} }
impl<R: Reader> Bytes::Select for BufReader<R> { impl<R: Read> self::Bytes::Select for ByteReader<R> {
fn select<'a>(&'a mut self, bytes: usize) -> Bytes::Selected<'a> { fn select<W: Write>(&mut self, bytes: usize, out: Option<&mut W>) -> Bytes::Selected {
match self.maybe_fill_buf() { enum SRes {
Err(IoError { kind: std::old_io::EndOfFile, .. }) => (), Comp,
Err(err) => panic!("read error: {}", err.desc), Part,
_ => () Newl,
}
let newline_idx = match self.end - self.start {
0 => return Bytes::Selected::EndOfFile,
buf_used if bytes < buf_used => {
// because the output delimiter should only be placed between
// segments check if the byte after bytes is a newline
let buf_slice = &self.buffer[self.start..self.start + bytes + 1];
match buf_slice.position_elem(&b'\n') {
Some(idx) => idx,
None => {
let segment = &self.buffer[self.start..self.start + bytes];
self.start += bytes;
return Bytes::Selected::Complete(segment);
}
}
}
_ => {
let buf_filled = &self.buffer[self.start..self.end];
match buf_filled.position_elem(&b'\n') {
Some(idx) => idx,
None => {
let segment = &self.buffer[self.start..self.end];
self.start = 0;
self.end = 0;
return Bytes::Selected::Partial(segment);
}
}
}
}; };
let new_start = self.start + newline_idx + 1; use self::Bytes::Selected::*;
let segment = &self.buffer[self.start..new_start];
self.start = new_start; let (res, consume_val) = {
Bytes::Selected::NewlineFound(segment) let buffer = match self.fill_buf() {
Err(e) => crash!(1, "read error: {}", e),
Ok(b) => b,
};
let (res, consume_val) = match buffer.len() {
0 => return EndOfFile,
buf_used if bytes < buf_used => {
// because the output delimiter should only be placed between
// segments check if the byte after bytes is a newline
let buf_slice = &buffer[0..bytes + 1];
match buf_slice.position_elem(&b'\n') {
Some(idx) => (SRes::Newl, idx+1),
None => (SRes::Comp, bytes),
}
},
_ => {
match buffer.position_elem(&b'\n') {
Some(idx) => (SRes::Newl, idx+1),
None => (SRes::Part, buffer.len()),
}
},
};
match out {
Some(out) => pipe_crash_if_err!(1, out.write_all(&buffer[0..consume_val])),
None => (),
}
(res, consume_val)
};
self.consume(consume_val);
match res {
SRes::Comp => Complete(consume_val),
SRes::Part => Partial(consume_val),
SRes::Newl => NewlineFound,
}
} }
} }

View file

@ -1,5 +1,5 @@
#![crate_name = "cut"] #![crate_name = "cut"]
#![feature(collections, core, old_io, old_path, rustc_private)] #![feature(collections, path_ext, rustc_private)]
/* /*
* This file is part of the uutils coreutils package. * This file is part of the uutils coreutils package.
@ -13,17 +13,20 @@
extern crate getopts; extern crate getopts;
extern crate libc; extern crate libc;
use std::old_io::{stdio, File, BufferedWriter, BufferedReader, print}; use std::fs::{File, PathExt};
use std::old_io::fs::PathExtensions; use std::io::{stdout, stdin, BufRead, BufReader, Read, Stdout, Write};
use std::path::Path;
use getopts::{optopt, optflag, getopts, usage}; use getopts::{optopt, optflag, getopts, usage};
use ranges::Range; use ranges::Range;
use searcher::Searcher;
#[path = "../common/util.rs"] #[path = "../common/util.rs"]
#[macro_use] #[macro_use]
mod util; mod util;
mod ranges;
mod buffer; mod buffer;
mod ranges;
mod searcher;
static NAME: &'static str = "cut"; static NAME: &'static str = "cut";
static VERSION: &'static str = "1.0.0"; static VERSION: &'static str = "1.0.0";
@ -52,14 +55,12 @@ fn list_to_ranges(list: &str, complement: bool) -> Result<Vec<Range>, String> {
} }
} }
fn cut_bytes<R: Reader>(reader: R, fn cut_bytes<R: Read>(reader: R, ranges: &Vec<Range>, opts: &Options) -> i32 {
ranges: &Vec<Range>,
opts: &Options) -> i32 {
use buffer::Bytes::Select; use buffer::Bytes::Select;
use buffer::Bytes::Selected::{NewlineFound, Complete, Partial, EndOfFile}; use buffer::Bytes::Selected::*;
let mut buf_read = buffer::BufReader::new(reader); let mut buf_read = buffer::ByteReader::new(reader);
let mut out = BufferedWriter::new(stdio::stdout_raw()); let mut out = stdout();
'newline: loop { 'newline: loop {
let mut cur_pos = 1; let mut cur_pos = 1;
@ -69,19 +70,19 @@ fn cut_bytes<R: Reader>(reader: R,
// skip upto low // skip upto low
let orig_pos = cur_pos; let orig_pos = cur_pos;
loop { loop {
match buf_read.select(low - cur_pos) { match buf_read.select(low - cur_pos, None::<&mut Stdout>) {
NewlineFound(_) => { NewlineFound => {
out.write_all(&[b'\n']).unwrap(); pipe_crash_if_err!(1, out.write_all(&[b'\n']));
continue 'newline continue 'newline
} }
Complete(bytes) => { Complete(len) => {
cur_pos += bytes.len(); cur_pos += len;
break break
} }
Partial(bytes) => cur_pos += bytes.len(), Partial(len) => cur_pos += len,
EndOfFile => { EndOfFile => {
if orig_pos != cur_pos { if orig_pos != cur_pos {
out.write_all(&[b'\n']).unwrap(); pipe_crash_if_err!(1, out.write_all(&[b'\n']));
} }
break 'newline break 'newline
@ -92,7 +93,7 @@ fn cut_bytes<R: Reader>(reader: R,
match opts.out_delim { match opts.out_delim {
Some(ref delim) => { Some(ref delim) => {
if print_delim { if print_delim {
out.write_all(delim.as_bytes()).unwrap(); pipe_crash_if_err!(1, out.write_all(delim.as_bytes()));
} }
print_delim = true; print_delim = true;
} }
@ -101,23 +102,16 @@ fn cut_bytes<R: Reader>(reader: R,
// write out from low to high // write out from low to high
loop { loop {
match buf_read.select(high - cur_pos + 1) { match buf_read.select(high - cur_pos + 1, Some(&mut out)) {
NewlineFound(bytes) => { NewlineFound => continue 'newline,
out.write_all(bytes).unwrap(); Partial(len) => cur_pos += len,
continue 'newline Complete(_) => {
}
Complete(bytes) => {
out.write_all(bytes).unwrap();
cur_pos = high + 1; cur_pos = high + 1;
break break
} }
Partial(bytes) => {
cur_pos += bytes.len();
out.write_all(bytes).unwrap();
}
EndOfFile => { EndOfFile => {
if cur_pos != low || low == high { if cur_pos != low || low == high {
out.write_all(&[b'\n']).unwrap(); pipe_crash_if_err!(1, out.write_all(&[b'\n']));
} }
break 'newline break 'newline
@ -127,39 +121,49 @@ fn cut_bytes<R: Reader>(reader: R,
} }
buf_read.consume_line(); buf_read.consume_line();
out.write_all(&[b'\n']).unwrap(); pipe_crash_if_err!(1, out.write_all(&[b'\n']));
} }
0 0
} }
fn cut_characters<R: Reader>(reader: R, fn cut_characters<R: Read>(reader: R, ranges: &Vec<Range>, opts: &Options) -> i32 {
ranges: &Vec<Range>, let mut buf_in = BufReader::new(reader);
opts: &Options) -> i32 { let mut out = stdout();
let mut buf_in = BufferedReader::new(reader); let mut buffer = String::new();
let mut out = BufferedWriter::new(stdio::stdout_raw());
'newline: loop { 'newline: loop {
let line = match buf_in.read_line() { buffer.clear();
Ok(line) => line, match buf_in.read_line(&mut buffer) {
Err(std::old_io::IoError { kind: std::old_io::EndOfFile, .. }) => break, Ok(n) if n == 0 => break,
_ => panic!(), Err(e) => {
if buffer.len() == 0 {
crash!(1, "read error: {}", e);
}
},
_ => (),
}; };
let line = &buffer[..];
let mut char_pos = 0; let mut char_pos = 0;
let mut char_indices = line.as_slice().char_indices(); let mut char_indices = line.char_indices();
let mut print_delim = false; let mut print_delim = false;
let mut low_idx = 0;
for &Range { low, high } in ranges.iter() { for &Range { low, high } in ranges.iter() {
let low_idx = match char_indices.nth(low - char_pos - 1) { low_idx = if low - char_pos > 0 {
Some((low_idx, _)) => low_idx, match char_indices.nth(low - char_pos - 1) {
None => break Some((low_idx, _)) => low_idx,
None => break,
}
} else {
low_idx
}; };
match opts.out_delim { match opts.out_delim {
Some(ref delim) => { Some(ref delim) => {
if print_delim { if print_delim {
out.write_all(delim.as_bytes()).unwrap(); pipe_crash_if_err!(1, out.write_all(delim.as_bytes()));
} }
print_delim = true; print_delim = true;
} }
@ -169,14 +173,15 @@ fn cut_characters<R: Reader>(reader: R,
match char_indices.nth(high - low) { match char_indices.nth(high - low) {
Some((high_idx, _)) => { Some((high_idx, _)) => {
let segment = &line.as_bytes()[low_idx..high_idx]; let segment = &line.as_bytes()[low_idx..high_idx];
low_idx = high_idx;
out.write_all(segment).unwrap(); pipe_crash_if_err!(1, out.write_all(segment));
} }
None => { None => {
let bytes = line.as_bytes(); let bytes = line.as_bytes();
let segment = &bytes[low_idx..]; let segment = &bytes[low_idx..];
out.write_all(segment).unwrap(); pipe_crash_if_err!(1, out.write_all(segment));
if line.as_bytes()[bytes.len() - 1] == b'\n' { if line.as_bytes()[bytes.len() - 1] == b'\n' {
continue 'newline continue 'newline
@ -186,84 +191,40 @@ fn cut_characters<R: Reader>(reader: R,
char_pos = high + 1; char_pos = high + 1;
} }
out.write_all(&[b'\n']).unwrap(); pipe_crash_if_err!(1, out.write_all(&[b'\n']));
} }
0 0
} }
#[derive(Clone)] fn cut_fields_delimiter<R: Read>(reader: R, ranges: &Vec<Range>, delim: &String, only_delimited: bool, out_delim: &String) -> i32 {
struct Searcher<'a> { let mut buf_in = BufReader::new(reader);
haystack: &'a [u8], let mut out = stdout();
needle: &'a [u8], let mut buffer = Vec::new();
position: usize
}
impl<'a> Searcher<'a> {
fn new(haystack: &'a [u8], needle: &'a [u8]) -> Searcher<'a> {
Searcher {
haystack: haystack,
needle: needle,
position: 0
}
}
}
impl<'a> Iterator for Searcher<'a> {
type Item = (usize, usize);
fn next(&mut self) -> Option<(usize, usize)> {
if self.needle.len() == 1 {
for offset in range(self.position, self.haystack.len()) {
if self.haystack[offset] == self.needle[0] {
self.position = offset + 1;
return Some((offset, offset + 1));
}
}
self.position = self.haystack.len();
return None;
}
while self.position + self.needle.len() <= self.haystack.len() {
if &self.haystack[self.position..self.position + self.needle.len()] == self.needle {
let match_pos = self.position;
self.position += self.needle.len();
return Some((match_pos, match_pos + self.needle.len()));
} else {
self.position += 1;
}
}
None
}
}
fn cut_fields_delimiter<R: Reader>(reader: R,
ranges: &Vec<Range>,
delim: &String,
only_delimited: bool,
out_delim: &String) -> i32 {
let mut buf_in = BufferedReader::new(reader);
let mut out = BufferedWriter::new(stdio::stdout_raw());
'newline: loop { 'newline: loop {
let line = match buf_in.read_until(b'\n') { buffer.clear();
Ok(line) => line, match buf_in.read_until(b'\n', &mut buffer) {
Err(std::old_io::IoError { kind: std::old_io::EndOfFile, .. }) => break, Ok(n) if n == 0 => break,
_ => panic!(), Err(e) => {
}; if buffer.len() == 0 {
crash!(1, "read error: {}", e);
}
},
_ => (),
}
let line = &buffer[..];
let mut fields_pos = 1; let mut fields_pos = 1;
let mut low_idx = 0; let mut low_idx = 0;
let mut delim_search = Searcher::new(line.as_slice(), let mut delim_search = Searcher::new(line, delim.as_bytes()).peekable();
delim.as_bytes()).peekable();
let mut print_delim = false; let mut print_delim = false;
if delim_search.peek().is_none() { if delim_search.peek().is_none() {
if ! only_delimited { if ! only_delimited {
out.write_all(line.as_slice()).unwrap(); pipe_crash_if_err!(1, out.write_all(line));
if line[line.len() - 1] != b'\n' { if line[line.len() - 1] != b'\n' {
out.write_all(&[b'\n']).unwrap(); pipe_crash_if_err!(1, out.write_all(&[b'\n']));
} }
} }
@ -278,16 +239,16 @@ fn cut_fields_delimiter<R: Reader>(reader: R,
}; };
} }
for _ in range(0, high - low + 1) { for _ in 0..high - low + 1 {
if print_delim { if print_delim {
out.write_str(out_delim.as_slice()).unwrap(); pipe_crash_if_err!(1, out.write_all(out_delim.as_bytes()));
} }
match delim_search.next() { match delim_search.next() {
Some((high_idx, next_low_idx)) => { Some((high_idx, next_low_idx)) => {
let segment = &line[low_idx..high_idx]; let segment = &line[low_idx..high_idx];
out.write_all(segment).unwrap(); pipe_crash_if_err!(1, out.write_all(segment));
print_delim = true; print_delim = true;
@ -297,7 +258,7 @@ fn cut_fields_delimiter<R: Reader>(reader: R,
None => { None => {
let segment = &line[low_idx..]; let segment = &line[low_idx..];
out.write_all(segment).unwrap(); pipe_crash_if_err!(1, out.write_all(segment));
if line[line.len() - 1] == b'\n' { if line[line.len() - 1] == b'\n' {
continue 'newline continue 'newline
@ -308,44 +269,48 @@ fn cut_fields_delimiter<R: Reader>(reader: R,
} }
} }
out.write_all(&[b'\n']).unwrap(); pipe_crash_if_err!(1, out.write_all(&[b'\n']));
} }
0 0
} }
fn cut_fields<R: Reader>(reader: R, fn cut_fields<R: Read>(reader: R, ranges: &Vec<Range>, opts: &FieldOptions) -> i32 {
ranges: &Vec<Range>,
opts: &FieldOptions) -> i32 {
match opts.out_delimeter { match opts.out_delimeter {
Some(ref delim) => { Some(ref o_delim) => {
return cut_fields_delimiter(reader, ranges, &opts.delimiter, return cut_fields_delimiter(reader, ranges, &opts.delimiter,
opts.only_delimited, delim); opts.only_delimited, o_delim);
} }
None => () None => ()
} }
let mut buf_in = BufferedReader::new(reader); let mut buf_in = BufReader::new(reader);
let mut out = BufferedWriter::new(stdio::stdout_raw()); let mut out = stdout();
let mut buffer = Vec::new();
'newline: loop { 'newline: loop {
let line = match buf_in.read_until(b'\n') { buffer.clear();
Ok(line) => line, match buf_in.read_until(b'\n', &mut buffer) {
Err(std::old_io::IoError { kind: std::old_io::EndOfFile, .. }) => break, Ok(n) if n == 0 => break,
_ => panic!(), Err(e) => {
}; if buffer.len() == 0 {
crash!(1, "read error: {}", e);
}
},
_ => (),
}
let line = &buffer[..];
let mut fields_pos = 1; let mut fields_pos = 1;
let mut low_idx = 0; let mut low_idx = 0;
let mut delim_search = Searcher::new(line.as_slice(), let mut delim_search = Searcher::new(line, opts.delimiter.as_bytes()).peekable();
opts.delimiter.as_bytes()).peekable();
let mut print_delim = false; let mut print_delim = false;
if delim_search.peek().is_none() { if delim_search.peek().is_none() {
if ! opts.only_delimited { if ! opts.only_delimited {
out.write_all(line.as_slice()).unwrap(); pipe_crash_if_err!(1, out.write_all(line));
if line[line.len() - 1] != b'\n' { if line[line.len() - 1] != b'\n' {
out.write_all(&[b'\n']).unwrap(); pipe_crash_if_err!(1, out.write_all(&[b'\n']));
} }
} }
@ -370,7 +335,7 @@ fn cut_fields<R: Reader>(reader: R,
Some((high_idx, next_low_idx)) => { Some((high_idx, next_low_idx)) => {
let segment = &line[low_idx..high_idx]; let segment = &line[low_idx..high_idx];
out.write_all(segment).unwrap(); pipe_crash_if_err!(1, out.write_all(segment));
print_delim = true; print_delim = true;
low_idx = next_low_idx; low_idx = next_low_idx;
@ -379,7 +344,7 @@ fn cut_fields<R: Reader>(reader: R,
None => { None => {
let segment = &line[low_idx..line.len()]; let segment = &line[low_idx..line.len()];
out.write_all(segment).unwrap(); pipe_crash_if_err!(1, out.write_all(segment));
if line[line.len() - 1] == b'\n' { if line[line.len() - 1] == b'\n' {
continue 'newline continue 'newline
@ -389,7 +354,7 @@ fn cut_fields<R: Reader>(reader: R,
} }
} }
out.write_all(&[b'\n']).unwrap(); pipe_crash_if_err!(1, out.write_all(&[b'\n']));
} }
0 0
@ -402,24 +367,18 @@ fn cut_files(mut filenames: Vec<String>, mode: Mode) -> i32 {
if filenames.len() == 0 { filenames.push("-".to_string()); } if filenames.len() == 0 { filenames.push("-".to_string()); }
for filename in filenames.iter() { for filename in filenames.iter() {
if filename.as_slice() == "-" { if filename == "-" {
if stdin_read { continue } if stdin_read { continue }
exit_code |= match mode { exit_code |= match mode {
Mode::Bytes(ref ranges, ref opts) => { Mode::Bytes(ref ranges, ref opts) => cut_bytes(stdin(), ranges, opts),
cut_bytes(stdio::stdin_raw(), ranges, opts) Mode::Characters(ref ranges, ref opts) => cut_characters(stdin(), ranges, opts),
} Mode::Fields(ref ranges, ref opts) => cut_fields(stdin(), ranges, opts),
Mode::Characters(ref ranges, ref opts) => {
cut_characters(stdio::stdin_raw(), ranges, opts)
}
Mode::Fields(ref ranges, ref opts) => {
cut_fields(stdio::stdin_raw(), ranges, opts)
}
}; };
stdin_read = true; stdin_read = true;
} else { } else {
let path = Path::new(filename.as_slice()); let path = Path::new(&filename[..]);
if ! path.exists() { if ! path.exists() {
show_error!("{}: No such file or directory", filename); show_error!("{}: No such file or directory", filename);
@ -429,17 +388,15 @@ fn cut_files(mut filenames: Vec<String>, mode: Mode) -> i32 {
let file = match File::open(&path) { let file = match File::open(&path) {
Ok(f) => f, Ok(f) => f,
Err(e) => { Err(e) => {
show_error!("{}: {}", filename, e.desc); show_error!("opening '{}': {}", &filename[..], e);
continue continue
} }
}; };
exit_code |= match mode { exit_code |= match mode {
Mode::Bytes(ref ranges, ref opts) => cut_bytes(file, ranges, opts), Mode::Bytes(ref ranges, ref opts) => cut_bytes(file, ranges, opts),
Mode::Characters(ref ranges, ref opts) => { Mode::Characters(ref ranges, ref opts) => cut_characters(file, ranges, opts),
cut_characters(file, ranges, opts) Mode::Fields(ref ranges, ref opts) => cut_fields(file, ranges, opts),
}
Mode::Fields(ref ranges, ref opts) => cut_fields(file, ranges, opts)
}; };
} }
} }
@ -461,7 +418,7 @@ pub fn uumain(args: Vec<String>) -> i32 {
optflag("", "version", "output version information and exit"), optflag("", "version", "output version information and exit"),
]; ];
let matches = match getopts(args.tail(), &opts) { let matches = match getopts(&args[1..], &opts) {
Ok(m) => m, Ok(m) => m,
Err(f) => { Err(f) => {
show_error!("Invalid options\n{}", f); show_error!("Invalid options\n{}", f);
@ -473,7 +430,7 @@ pub fn uumain(args: Vec<String>) -> i32 {
println!("Usage:"); println!("Usage:");
println!(" {0} OPTION... [FILE]...", args[0]); println!(" {0} OPTION... [FILE]...", args[0]);
println!(""); println!("");
print(usage("Print selected parts of lines from each FILE to standard output.", &opts).as_slice()); println!("{}", &usage("Print selected parts of lines from each FILE to standard output.", &opts)[..]);
println!(""); println!("");
println!("Use one, and only one of -b, -c or -f. Each LIST is made up of one"); println!("Use one, and only one of -b, -c or -f. Each LIST is made up of one");
println!("range, or many ranges separated by commas. Selected input is written"); println!("range, or many ranges separated by commas. Selected input is written");
@ -500,28 +457,40 @@ pub fn uumain(args: Vec<String>) -> i32 {
matches.opt_str("characters"), matches.opt_str("characters"),
matches.opt_str("fields")) { matches.opt_str("fields")) {
(Some(byte_ranges), None, None) => { (Some(byte_ranges), None, None) => {
list_to_ranges(byte_ranges.as_slice(), complement).map(|ranges| list_to_ranges(&byte_ranges[..], complement)
Mode::Bytes(ranges, .map(|ranges| Mode::Bytes(ranges, Options { out_delim: matches.opt_str("output-delimiter") }))
Options { out_delim: matches.opt_str("output-delimiter") })
)
} }
(None, Some(char_ranges), None) => { (None, Some(char_ranges), None) => {
list_to_ranges(char_ranges.as_slice(), complement).map(|ranges| list_to_ranges(&char_ranges[..], complement)
Mode::Characters(ranges, .map(|ranges| Mode::Characters(ranges, Options { out_delim: matches.opt_str("output-delimiter") }))
Options { out_delim: matches.opt_str("output-delimiter") })
)
} }
(None, None, Some(field_ranges)) => { (None, None, Some(field_ranges)) => {
list_to_ranges(field_ranges.as_slice(), complement).and_then(|ranges| list_to_ranges(&field_ranges[..], complement).and_then(|ranges|
{ {
let out_delim = matches.opt_str("output-delimiter"); let out_delim = match matches.opt_str("output-delimiter") {
Some(s) => {
if s.len() == 0 {
Some("\0".to_string())
} else {
Some(s)
}
},
None => None,
};
let only_delimited = matches.opt_present("only-delimited"); let only_delimited = matches.opt_present("only-delimited");
match matches.opt_str("delimiter") { match matches.opt_str("delimiter") {
Some(delim) => { Some(delim) => {
if delim.as_slice().chars().count() != 1 { if delim.chars().count() > 1 {
Err("the delimiter must be a single character".to_string()) Err("the delimiter must be a single character, or the empty string for null".to_string())
} else { } else {
let delim = if delim.len() == 0 {
"\0".to_string()
} else {
delim
};
Ok(Mode::Fields(ranges, Ok(Mode::Fields(ranges,
FieldOptions { FieldOptions {
delimiter: delim, delimiter: delim,
@ -546,6 +515,19 @@ pub fn uumain(args: Vec<String>) -> i32 {
_ => Err("you must specify a list of bytes, characters, or fields".to_string()) _ => Err("you must specify a list of bytes, characters, or fields".to_string())
}; };
let mode_parse = match mode_parse {
Err(_) => mode_parse,
Ok(mode) => {
match mode {
Mode::Bytes(_, _) | Mode::Characters(_, _) if matches.opt_present("delimiter") =>
Err("an input delimiter may be specified only when operating on fields".to_string()),
Mode::Bytes(_, _) | Mode::Characters(_, _) if matches.opt_present("only-delimited") =>
Err("suppressing non-delimited lines makes sense only when operating on fields".to_string()),
_ => Ok(mode),
}
}
};
match mode_parse { match mode_parse {
Ok(mode) => cut_files(matches.free, mode), Ok(mode) => cut_files(matches.free, mode),
Err(err_msg) => { Err(err_msg) => {

View file

@ -7,7 +7,7 @@
* file that was distributed with this source code. * file that was distributed with this source code.
*/ */
use std; use std::str::FromStr;
#[derive(PartialEq,Eq,PartialOrd,Ord,Debug)] #[derive(PartialEq,Eq,PartialOrd,Ord,Debug)]
pub struct Range { pub struct Range {
@ -15,42 +15,52 @@ pub struct Range {
pub high: usize, pub high: usize,
} }
impl std::str::FromStr for Range { impl FromStr for Range {
type Err = &'static str; type Err = &'static str;
fn from_str(s: &str) -> Result<Range, &'static str> { fn from_str(s: &str) -> Result<Range, &'static str> {
use std::usize::MAX; use std::usize::MAX;
let mut parts = s.splitn(1, '-'); let mut parts = s.splitn(2, '-');
let field = "fields and positions are numbered from 1";
let order = "high end of range less than low end";
let inval = "failed to parse range";
match (parts.next(), parts.next()) { match (parts.next(), parts.next()) {
(Some(nm), None) => { (Some(nm), None) => {
if let Ok(nm) = nm.parse::<usize>() { if let Ok(nm) = nm.parse::<usize>() {
if nm > 0 { Ok(Range{ low: nm, high: nm}) } else { Err("invalid range") } if nm > 0 { Ok(Range{ low: nm, high: nm}) } else { Err(field) }
} else { } else {
Err("invalid range") Err(inval)
} }
} }
(Some(n), Some(m)) if m.len() == 0 => { (Some(n), Some(m)) if m.len() == 0 => {
if let Ok(low) = n.parse::<usize>() { if let Ok(low) = n.parse::<usize>() {
if low > 0 { Ok(Range{ low: low, high: MAX}) } else { Err("invalid range") } if low > 0 { Ok(Range{ low: low, high: MAX}) } else { Err(field) }
} else { } else {
Err("invalid range") Err(inval)
} }
} }
(Some(n), Some(m)) if n.len() == 0 => { (Some(n), Some(m)) if n.len() == 0 => {
if let Ok(high) = m.parse::<usize>() { if let Ok(high) = m.parse::<usize>() {
if high > 0 { Ok(Range{ low: 1, high: high}) } else { Err("invalid range") } if high > 0 { Ok(Range{ low: 1, high: high}) } else { Err(field) }
} else { } else {
Err("invalid range") Err(inval)
} }
} }
(Some(n), Some(m)) => { (Some(n), Some(m)) => {
match (n.parse::<usize>(), m.parse::<usize>()) { match (n.parse::<usize>(), m.parse::<usize>()) {
(Ok(low), Ok(high)) if low > 0 && low <= high => { (Ok(low), Ok(high)) => {
Ok(Range { low: low, high: high }) if low > 0 && low <= high {
} Ok(Range { low: low, high: high })
_ => Err("invalid range") } else if low == 0 {
Err(field)
} else {
Err(order)
}
},
_ => Err(inval),
} }
} }
_ => unreachable!() _ => unreachable!()
@ -65,7 +75,7 @@ impl Range {
let mut ranges : Vec<Range> = vec!(); let mut ranges : Vec<Range> = vec!();
for item in list.split(',') { for item in list.split(',') {
match std::str::FromStr::from_str(item) { match FromStr::from_str(item) {
Ok(range_item) => ranges.push(range_item), Ok(range_item) => ranges.push(range_item),
Err(e)=> return Err(format!("range '{}' was invalid: {}", item, e)) Err(e)=> return Err(format!("range '{}' was invalid: {}", item, e))
} }
@ -74,7 +84,7 @@ impl Range {
ranges.sort(); ranges.sort();
// merge overlapping ranges // merge overlapping ranges
for i in range(0, ranges.len()) { for i in 0..ranges.len() {
let j = i + 1; let j = i + 1;
while j < ranges.len() && ranges[j].low <= ranges[i].high { while j < ranges.len() && ranges[j].low <= ranges[i].high {

54
src/cut/searcher.rs Normal file
View file

@ -0,0 +1,54 @@
/*
* This file is part of the uutils coreutils package.
*
* (c) Rolf Morel <rolfmorel@gmail.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
#[derive(Clone)]
pub struct Searcher<'a> {
haystack: &'a [u8],
needle: &'a [u8],
position: usize
}
impl<'a> Searcher<'a> {
pub fn new(haystack: &'a [u8], needle: &'a [u8]) -> Searcher<'a> {
Searcher {
haystack: haystack,
needle: needle,
position: 0
}
}
}
impl<'a> Iterator for Searcher<'a> {
type Item = (usize, usize);
fn next(&mut self) -> Option<(usize, usize)> {
if self.needle.len() == 1 {
for offset in self.position..self.haystack.len() {
if self.haystack[offset] == self.needle[0] {
self.position = offset + 1;
return Some((offset, offset + 1));
}
}
self.position = self.haystack.len();
return None;
}
while self.position + self.needle.len() <= self.haystack.len() {
if &self.haystack[self.position..self.position + self.needle.len()] == self.needle {
let match_pos = self.position;
self.position += self.needle.len();
return Some((match_pos, match_pos + self.needle.len()));
} else {
self.position += 1;
}
}
None
}
}