1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-30 04:27:45 +00:00

Merge pull request #593 from kwantam/master

fix `cut`
This commit is contained in:
Heather 2015-05-12 07:10:02 +03:00
commit 57050517f9
5 changed files with 356 additions and 285 deletions

View file

@ -62,7 +62,23 @@ macro_rules! crash_if_err(
($exitcode:expr, $exp:expr) => (
match $exp {
Ok(m) => m,
Err(f) => crash!($exitcode, "{}", f.to_string())
Err(f) => crash!($exitcode, "{}", f),
}
)
);
#[macro_export]
macro_rules! pipe_crash_if_err(
($exitcode:expr, $exp:expr) => (
match $exp {
Ok(_) => (),
Err(f) => {
if f.kind() == ::std::io::ErrorKind::BrokenPipe {
()
} else {
crash!($exitcode, "{}", f)
}
},
}
)
);

View file

@ -1,142 +1,151 @@
use std;
use std::old_io::{IoResult, IoError};
/*
* This file is part of the uutils coreutils package.
*
* (c) Rolf Morel <rolfmorel@gmail.com>
* (c) kwantam <kwantam@gmail.com>
* substantially rewritten to use the stdlib BufReader trait
* rather than re-implementing it here.
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
pub struct BufReader<R> {
reader: R,
buffer: [u8; 4096],
start: usize,
end: usize, // exclusive
}
use std::io::{BufRead, BufReader, Read, Write};
use std::io::Result as IoResult;
#[allow(non_snake_case)]
pub mod Bytes {
use std::io::Write;
pub trait Select {
fn select<'a>(&'a mut self, bytes: usize) -> Selected<'a>;
fn select<W: Write>(&mut self, bytes: usize, out: Option<&mut W>) -> Selected;
}
pub enum Selected<'a> {
NewlineFound(&'a [u8]),
Complete(&'a [u8]),
Partial(&'a [u8]),
#[derive(PartialEq, Eq, Debug)]
pub enum Selected {
NewlineFound,
Complete(usize),
Partial(usize),
EndOfFile,
}
}
impl<R: Reader> BufReader<R> {
pub fn new(reader: R) -> BufReader<R> {
let empty_buffer = unsafe {
std::mem::uninitialized::<[u8; 4096]>()
};
#[derive(Debug)]
pub struct ByteReader<R> where R: Read {
inner: BufReader<R>,
}
BufReader {
reader: reader,
buffer: empty_buffer,
start: 0,
end: 0,
impl<R: Read> ByteReader<R> {
pub fn new(read: R) -> ByteReader<R> {
ByteReader {
inner: BufReader::with_capacity(4096, read),
}
}
}
#[inline]
fn read(&mut self) -> IoResult<usize> {
let buffer_fill = &mut self.buffer[self.end..];
impl<R: Read> Read for ByteReader<R> {
fn read(&mut self, buf: &mut [u8]) -> IoResult<usize> {
self.inner.read(buf)
}
}
match self.reader.read(buffer_fill) {
Ok(nread) => {
self.end += nread;
Ok(nread)
}
error => error
}
impl<R: Read> BufRead for ByteReader<R> {
fn fill_buf(&mut self) -> IoResult<&[u8]> {
self.inner.fill_buf()
}
#[inline]
fn maybe_fill_buf(&mut self) -> IoResult<usize> {
if self.end == self.start {
self.start = 0;
self.end = 0;
self.read()
} else {
Ok(0)
}
fn consume(&mut self, amt: usize) {
self.inner.consume(amt)
}
}
impl<R: Read> ByteReader<R> {
pub fn consume_line(&mut self) -> usize {
let mut bytes_consumed = 0;
let mut consume_val;
loop {
match self.maybe_fill_buf() {
Ok(0) | Err(IoError { kind: std::old_io::EndOfFile, .. })
if self.start == self.end => return bytes_consumed,
Err(err) => panic!("read error: {}", err.desc),
_ => ()
}
{ // need filled_buf to go out of scope
let filled_buf = match self.fill_buf() {
Ok(b) => {
if b.len() == 0 {
return bytes_consumed
} else {
b
}
},
Err(e) => crash!(1, "read error: {}", e),
};
let filled_buf = &self.buffer[self.start..self.end];
match filled_buf.position_elem(&b'\n') {
Some(idx) => {
self.start += idx + 1;
return bytes_consumed + idx + 1;
match filled_buf.position_elem(&b'\n') {
Some(idx) => {
consume_val = idx + 1;
bytes_consumed += consume_val;
break;
}
_ => ()
}
_ => ()
consume_val = filled_buf.len();
}
bytes_consumed += filled_buf.len();
self.start = 0;
self.end = 0;
bytes_consumed += consume_val;
self.consume(consume_val);
}
self.consume(consume_val);
return bytes_consumed;
}
}
impl<R: Reader> Bytes::Select for BufReader<R> {
fn select<'a>(&'a mut self, bytes: usize) -> Bytes::Selected<'a> {
match self.maybe_fill_buf() {
Err(IoError { kind: std::old_io::EndOfFile, .. }) => (),
Err(err) => panic!("read error: {}", err.desc),
_ => ()
}
let newline_idx = match self.end - self.start {
0 => return Bytes::Selected::EndOfFile,
buf_used if bytes < buf_used => {
// because the output delimiter should only be placed between
// segments check if the byte after bytes is a newline
let buf_slice = &self.buffer[self.start..self.start + bytes + 1];
match buf_slice.position_elem(&b'\n') {
Some(idx) => idx,
None => {
let segment = &self.buffer[self.start..self.start + bytes];
self.start += bytes;
return Bytes::Selected::Complete(segment);
}
}
}
_ => {
let buf_filled = &self.buffer[self.start..self.end];
match buf_filled.position_elem(&b'\n') {
Some(idx) => idx,
None => {
let segment = &self.buffer[self.start..self.end];
self.start = 0;
self.end = 0;
return Bytes::Selected::Partial(segment);
}
}
}
impl<R: Read> self::Bytes::Select for ByteReader<R> {
fn select<W: Write>(&mut self, bytes: usize, out: Option<&mut W>) -> Bytes::Selected {
enum SRes {
Comp,
Part,
Newl,
};
let new_start = self.start + newline_idx + 1;
let segment = &self.buffer[self.start..new_start];
use self::Bytes::Selected::*;
self.start = new_start;
Bytes::Selected::NewlineFound(segment)
let (res, consume_val) = {
let buffer = match self.fill_buf() {
Err(e) => crash!(1, "read error: {}", e),
Ok(b) => b,
};
let (res, consume_val) = match buffer.len() {
0 => return EndOfFile,
buf_used if bytes < buf_used => {
// because the output delimiter should only be placed between
// segments check if the byte after bytes is a newline
let buf_slice = &buffer[0..bytes + 1];
match buf_slice.position_elem(&b'\n') {
Some(idx) => (SRes::Newl, idx+1),
None => (SRes::Comp, bytes),
}
},
_ => {
match buffer.position_elem(&b'\n') {
Some(idx) => (SRes::Newl, idx+1),
None => (SRes::Part, buffer.len()),
}
},
};
match out {
Some(out) => pipe_crash_if_err!(1, out.write_all(&buffer[0..consume_val])),
None => (),
}
(res, consume_val)
};
self.consume(consume_val);
match res {
SRes::Comp => Complete(consume_val),
SRes::Part => Partial(consume_val),
SRes::Newl => NewlineFound,
}
}
}

View file

@ -1,5 +1,5 @@
#![crate_name = "cut"]
#![feature(collections, core, old_io, old_path, rustc_private)]
#![feature(collections, path_ext, rustc_private)]
/*
* This file is part of the uutils coreutils package.
@ -13,17 +13,20 @@
extern crate getopts;
extern crate libc;
use std::old_io::{stdio, File, BufferedWriter, BufferedReader, print};
use std::old_io::fs::PathExtensions;
use std::fs::{File, PathExt};
use std::io::{stdout, stdin, BufRead, BufReader, Read, Stdout, Write};
use std::path::Path;
use getopts::{optopt, optflag, getopts, usage};
use ranges::Range;
use searcher::Searcher;
#[path = "../common/util.rs"]
#[macro_use]
mod util;
mod ranges;
mod buffer;
mod ranges;
mod searcher;
static NAME: &'static str = "cut";
static VERSION: &'static str = "1.0.0";
@ -52,14 +55,12 @@ fn list_to_ranges(list: &str, complement: bool) -> Result<Vec<Range>, String> {
}
}
fn cut_bytes<R: Reader>(reader: R,
ranges: &Vec<Range>,
opts: &Options) -> i32 {
fn cut_bytes<R: Read>(reader: R, ranges: &Vec<Range>, opts: &Options) -> i32 {
use buffer::Bytes::Select;
use buffer::Bytes::Selected::{NewlineFound, Complete, Partial, EndOfFile};
use buffer::Bytes::Selected::*;
let mut buf_read = buffer::BufReader::new(reader);
let mut out = BufferedWriter::new(stdio::stdout_raw());
let mut buf_read = buffer::ByteReader::new(reader);
let mut out = stdout();
'newline: loop {
let mut cur_pos = 1;
@ -69,19 +70,19 @@ fn cut_bytes<R: Reader>(reader: R,
// skip upto low
let orig_pos = cur_pos;
loop {
match buf_read.select(low - cur_pos) {
NewlineFound(_) => {
out.write_all(&[b'\n']).unwrap();
match buf_read.select(low - cur_pos, None::<&mut Stdout>) {
NewlineFound => {
pipe_crash_if_err!(1, out.write_all(&[b'\n']));
continue 'newline
}
Complete(bytes) => {
cur_pos += bytes.len();
Complete(len) => {
cur_pos += len;
break
}
Partial(bytes) => cur_pos += bytes.len(),
Partial(len) => cur_pos += len,
EndOfFile => {
if orig_pos != cur_pos {
out.write_all(&[b'\n']).unwrap();
pipe_crash_if_err!(1, out.write_all(&[b'\n']));
}
break 'newline
@ -92,7 +93,7 @@ fn cut_bytes<R: Reader>(reader: R,
match opts.out_delim {
Some(ref delim) => {
if print_delim {
out.write_all(delim.as_bytes()).unwrap();
pipe_crash_if_err!(1, out.write_all(delim.as_bytes()));
}
print_delim = true;
}
@ -101,23 +102,16 @@ fn cut_bytes<R: Reader>(reader: R,
// write out from low to high
loop {
match buf_read.select(high - cur_pos + 1) {
NewlineFound(bytes) => {
out.write_all(bytes).unwrap();
continue 'newline
}
Complete(bytes) => {
out.write_all(bytes).unwrap();
match buf_read.select(high - cur_pos + 1, Some(&mut out)) {
NewlineFound => continue 'newline,
Partial(len) => cur_pos += len,
Complete(_) => {
cur_pos = high + 1;
break
}
Partial(bytes) => {
cur_pos += bytes.len();
out.write_all(bytes).unwrap();
}
EndOfFile => {
if cur_pos != low || low == high {
out.write_all(&[b'\n']).unwrap();
pipe_crash_if_err!(1, out.write_all(&[b'\n']));
}
break 'newline
@ -127,39 +121,49 @@ fn cut_bytes<R: Reader>(reader: R,
}
buf_read.consume_line();
out.write_all(&[b'\n']).unwrap();
pipe_crash_if_err!(1, out.write_all(&[b'\n']));
}
0
}
fn cut_characters<R: Reader>(reader: R,
ranges: &Vec<Range>,
opts: &Options) -> i32 {
let mut buf_in = BufferedReader::new(reader);
let mut out = BufferedWriter::new(stdio::stdout_raw());
fn cut_characters<R: Read>(reader: R, ranges: &Vec<Range>, opts: &Options) -> i32 {
let mut buf_in = BufReader::new(reader);
let mut out = stdout();
let mut buffer = String::new();
'newline: loop {
let line = match buf_in.read_line() {
Ok(line) => line,
Err(std::old_io::IoError { kind: std::old_io::EndOfFile, .. }) => break,
_ => panic!(),
buffer.clear();
match buf_in.read_line(&mut buffer) {
Ok(n) if n == 0 => break,
Err(e) => {
if buffer.len() == 0 {
crash!(1, "read error: {}", e);
}
},
_ => (),
};
let line = &buffer[..];
let mut char_pos = 0;
let mut char_indices = line.as_slice().char_indices();
let mut char_indices = line.char_indices();
let mut print_delim = false;
let mut low_idx = 0;
for &Range { low, high } in ranges.iter() {
let low_idx = match char_indices.nth(low - char_pos - 1) {
Some((low_idx, _)) => low_idx,
None => break
low_idx = if low - char_pos > 0 {
match char_indices.nth(low - char_pos - 1) {
Some((low_idx, _)) => low_idx,
None => break,
}
} else {
low_idx
};
match opts.out_delim {
Some(ref delim) => {
if print_delim {
out.write_all(delim.as_bytes()).unwrap();
pipe_crash_if_err!(1, out.write_all(delim.as_bytes()));
}
print_delim = true;
}
@ -169,14 +173,15 @@ fn cut_characters<R: Reader>(reader: R,
match char_indices.nth(high - low) {
Some((high_idx, _)) => {
let segment = &line.as_bytes()[low_idx..high_idx];
low_idx = high_idx;
out.write_all(segment).unwrap();
pipe_crash_if_err!(1, out.write_all(segment));
}
None => {
let bytes = line.as_bytes();
let segment = &bytes[low_idx..];
out.write_all(segment).unwrap();
pipe_crash_if_err!(1, out.write_all(segment));
if line.as_bytes()[bytes.len() - 1] == b'\n' {
continue 'newline
@ -186,84 +191,40 @@ fn cut_characters<R: Reader>(reader: R,
char_pos = high + 1;
}
out.write_all(&[b'\n']).unwrap();
pipe_crash_if_err!(1, out.write_all(&[b'\n']));
}
0
}
#[derive(Clone)]
struct Searcher<'a> {
haystack: &'a [u8],
needle: &'a [u8],
position: usize
}
impl<'a> Searcher<'a> {
fn new(haystack: &'a [u8], needle: &'a [u8]) -> Searcher<'a> {
Searcher {
haystack: haystack,
needle: needle,
position: 0
}
}
}
impl<'a> Iterator for Searcher<'a> {
type Item = (usize, usize);
fn next(&mut self) -> Option<(usize, usize)> {
if self.needle.len() == 1 {
for offset in range(self.position, self.haystack.len()) {
if self.haystack[offset] == self.needle[0] {
self.position = offset + 1;
return Some((offset, offset + 1));
}
}
self.position = self.haystack.len();
return None;
}
while self.position + self.needle.len() <= self.haystack.len() {
if &self.haystack[self.position..self.position + self.needle.len()] == self.needle {
let match_pos = self.position;
self.position += self.needle.len();
return Some((match_pos, match_pos + self.needle.len()));
} else {
self.position += 1;
}
}
None
}
}
fn cut_fields_delimiter<R: Reader>(reader: R,
ranges: &Vec<Range>,
delim: &String,
only_delimited: bool,
out_delim: &String) -> i32 {
let mut buf_in = BufferedReader::new(reader);
let mut out = BufferedWriter::new(stdio::stdout_raw());
fn cut_fields_delimiter<R: Read>(reader: R, ranges: &Vec<Range>, delim: &String, only_delimited: bool, out_delim: &String) -> i32 {
let mut buf_in = BufReader::new(reader);
let mut out = stdout();
let mut buffer = Vec::new();
'newline: loop {
let line = match buf_in.read_until(b'\n') {
Ok(line) => line,
Err(std::old_io::IoError { kind: std::old_io::EndOfFile, .. }) => break,
_ => panic!(),
};
buffer.clear();
match buf_in.read_until(b'\n', &mut buffer) {
Ok(n) if n == 0 => break,
Err(e) => {
if buffer.len() == 0 {
crash!(1, "read error: {}", e);
}
},
_ => (),
}
let line = &buffer[..];
let mut fields_pos = 1;
let mut low_idx = 0;
let mut delim_search = Searcher::new(line.as_slice(),
delim.as_bytes()).peekable();
let mut delim_search = Searcher::new(line, delim.as_bytes()).peekable();
let mut print_delim = false;
if delim_search.peek().is_none() {
if ! only_delimited {
out.write_all(line.as_slice()).unwrap();
pipe_crash_if_err!(1, out.write_all(line));
if line[line.len() - 1] != b'\n' {
out.write_all(&[b'\n']).unwrap();
pipe_crash_if_err!(1, out.write_all(&[b'\n']));
}
}
@ -278,16 +239,16 @@ fn cut_fields_delimiter<R: Reader>(reader: R,
};
}
for _ in range(0, high - low + 1) {
for _ in 0..high - low + 1 {
if print_delim {
out.write_str(out_delim.as_slice()).unwrap();
pipe_crash_if_err!(1, out.write_all(out_delim.as_bytes()));
}
match delim_search.next() {
Some((high_idx, next_low_idx)) => {
let segment = &line[low_idx..high_idx];
out.write_all(segment).unwrap();
pipe_crash_if_err!(1, out.write_all(segment));
print_delim = true;
@ -297,7 +258,7 @@ fn cut_fields_delimiter<R: Reader>(reader: R,
None => {
let segment = &line[low_idx..];
out.write_all(segment).unwrap();
pipe_crash_if_err!(1, out.write_all(segment));
if line[line.len() - 1] == b'\n' {
continue 'newline
@ -308,44 +269,48 @@ fn cut_fields_delimiter<R: Reader>(reader: R,
}
}
out.write_all(&[b'\n']).unwrap();
pipe_crash_if_err!(1, out.write_all(&[b'\n']));
}
0
}
fn cut_fields<R: Reader>(reader: R,
ranges: &Vec<Range>,
opts: &FieldOptions) -> i32 {
fn cut_fields<R: Read>(reader: R, ranges: &Vec<Range>, opts: &FieldOptions) -> i32 {
match opts.out_delimeter {
Some(ref delim) => {
Some(ref o_delim) => {
return cut_fields_delimiter(reader, ranges, &opts.delimiter,
opts.only_delimited, delim);
opts.only_delimited, o_delim);
}
None => ()
}
let mut buf_in = BufferedReader::new(reader);
let mut out = BufferedWriter::new(stdio::stdout_raw());
let mut buf_in = BufReader::new(reader);
let mut out = stdout();
let mut buffer = Vec::new();
'newline: loop {
let line = match buf_in.read_until(b'\n') {
Ok(line) => line,
Err(std::old_io::IoError { kind: std::old_io::EndOfFile, .. }) => break,
_ => panic!(),
};
buffer.clear();
match buf_in.read_until(b'\n', &mut buffer) {
Ok(n) if n == 0 => break,
Err(e) => {
if buffer.len() == 0 {
crash!(1, "read error: {}", e);
}
},
_ => (),
}
let line = &buffer[..];
let mut fields_pos = 1;
let mut low_idx = 0;
let mut delim_search = Searcher::new(line.as_slice(),
opts.delimiter.as_bytes()).peekable();
let mut delim_search = Searcher::new(line, opts.delimiter.as_bytes()).peekable();
let mut print_delim = false;
if delim_search.peek().is_none() {
if ! opts.only_delimited {
out.write_all(line.as_slice()).unwrap();
pipe_crash_if_err!(1, out.write_all(line));
if line[line.len() - 1] != b'\n' {
out.write_all(&[b'\n']).unwrap();
pipe_crash_if_err!(1, out.write_all(&[b'\n']));
}
}
@ -370,7 +335,7 @@ fn cut_fields<R: Reader>(reader: R,
Some((high_idx, next_low_idx)) => {
let segment = &line[low_idx..high_idx];
out.write_all(segment).unwrap();
pipe_crash_if_err!(1, out.write_all(segment));
print_delim = true;
low_idx = next_low_idx;
@ -379,7 +344,7 @@ fn cut_fields<R: Reader>(reader: R,
None => {
let segment = &line[low_idx..line.len()];
out.write_all(segment).unwrap();
pipe_crash_if_err!(1, out.write_all(segment));
if line[line.len() - 1] == b'\n' {
continue 'newline
@ -389,7 +354,7 @@ fn cut_fields<R: Reader>(reader: R,
}
}
out.write_all(&[b'\n']).unwrap();
pipe_crash_if_err!(1, out.write_all(&[b'\n']));
}
0
@ -402,24 +367,18 @@ fn cut_files(mut filenames: Vec<String>, mode: Mode) -> i32 {
if filenames.len() == 0 { filenames.push("-".to_string()); }
for filename in filenames.iter() {
if filename.as_slice() == "-" {
if filename == "-" {
if stdin_read { continue }
exit_code |= match mode {
Mode::Bytes(ref ranges, ref opts) => {
cut_bytes(stdio::stdin_raw(), ranges, opts)
}
Mode::Characters(ref ranges, ref opts) => {
cut_characters(stdio::stdin_raw(), ranges, opts)
}
Mode::Fields(ref ranges, ref opts) => {
cut_fields(stdio::stdin_raw(), ranges, opts)
}
Mode::Bytes(ref ranges, ref opts) => cut_bytes(stdin(), ranges, opts),
Mode::Characters(ref ranges, ref opts) => cut_characters(stdin(), ranges, opts),
Mode::Fields(ref ranges, ref opts) => cut_fields(stdin(), ranges, opts),
};
stdin_read = true;
} else {
let path = Path::new(filename.as_slice());
let path = Path::new(&filename[..]);
if ! path.exists() {
show_error!("{}: No such file or directory", filename);
@ -429,17 +388,15 @@ fn cut_files(mut filenames: Vec<String>, mode: Mode) -> i32 {
let file = match File::open(&path) {
Ok(f) => f,
Err(e) => {
show_error!("{}: {}", filename, e.desc);
show_error!("opening '{}': {}", &filename[..], e);
continue
}
};
exit_code |= match mode {
Mode::Bytes(ref ranges, ref opts) => cut_bytes(file, ranges, opts),
Mode::Characters(ref ranges, ref opts) => {
cut_characters(file, ranges, opts)
}
Mode::Fields(ref ranges, ref opts) => cut_fields(file, ranges, opts)
Mode::Characters(ref ranges, ref opts) => cut_characters(file, ranges, opts),
Mode::Fields(ref ranges, ref opts) => cut_fields(file, ranges, opts),
};
}
}
@ -461,7 +418,7 @@ pub fn uumain(args: Vec<String>) -> i32 {
optflag("", "version", "output version information and exit"),
];
let matches = match getopts(args.tail(), &opts) {
let matches = match getopts(&args[1..], &opts) {
Ok(m) => m,
Err(f) => {
show_error!("Invalid options\n{}", f);
@ -473,7 +430,7 @@ pub fn uumain(args: Vec<String>) -> i32 {
println!("Usage:");
println!(" {0} OPTION... [FILE]...", args[0]);
println!("");
print(usage("Print selected parts of lines from each FILE to standard output.", &opts).as_slice());
println!("{}", &usage("Print selected parts of lines from each FILE to standard output.", &opts)[..]);
println!("");
println!("Use one, and only one of -b, -c or -f. Each LIST is made up of one");
println!("range, or many ranges separated by commas. Selected input is written");
@ -500,28 +457,40 @@ pub fn uumain(args: Vec<String>) -> i32 {
matches.opt_str("characters"),
matches.opt_str("fields")) {
(Some(byte_ranges), None, None) => {
list_to_ranges(byte_ranges.as_slice(), complement).map(|ranges|
Mode::Bytes(ranges,
Options { out_delim: matches.opt_str("output-delimiter") })
)
list_to_ranges(&byte_ranges[..], complement)
.map(|ranges| Mode::Bytes(ranges, Options { out_delim: matches.opt_str("output-delimiter") }))
}
(None, Some(char_ranges), None) => {
list_to_ranges(char_ranges.as_slice(), complement).map(|ranges|
Mode::Characters(ranges,
Options { out_delim: matches.opt_str("output-delimiter") })
)
list_to_ranges(&char_ranges[..], complement)
.map(|ranges| Mode::Characters(ranges, Options { out_delim: matches.opt_str("output-delimiter") }))
}
(None, None, Some(field_ranges)) => {
list_to_ranges(field_ranges.as_slice(), complement).and_then(|ranges|
list_to_ranges(&field_ranges[..], complement).and_then(|ranges|
{
let out_delim = matches.opt_str("output-delimiter");
let out_delim = match matches.opt_str("output-delimiter") {
Some(s) => {
if s.len() == 0 {
Some("\0".to_string())
} else {
Some(s)
}
},
None => None,
};
let only_delimited = matches.opt_present("only-delimited");
match matches.opt_str("delimiter") {
Some(delim) => {
if delim.as_slice().chars().count() != 1 {
Err("the delimiter must be a single character".to_string())
if delim.chars().count() > 1 {
Err("the delimiter must be a single character, or the empty string for null".to_string())
} else {
let delim = if delim.len() == 0 {
"\0".to_string()
} else {
delim
};
Ok(Mode::Fields(ranges,
FieldOptions {
delimiter: delim,
@ -546,6 +515,19 @@ pub fn uumain(args: Vec<String>) -> i32 {
_ => Err("you must specify a list of bytes, characters, or fields".to_string())
};
let mode_parse = match mode_parse {
Err(_) => mode_parse,
Ok(mode) => {
match mode {
Mode::Bytes(_, _) | Mode::Characters(_, _) if matches.opt_present("delimiter") =>
Err("an input delimiter may be specified only when operating on fields".to_string()),
Mode::Bytes(_, _) | Mode::Characters(_, _) if matches.opt_present("only-delimited") =>
Err("suppressing non-delimited lines makes sense only when operating on fields".to_string()),
_ => Ok(mode),
}
}
};
match mode_parse {
Ok(mode) => cut_files(matches.free, mode),
Err(err_msg) => {

View file

@ -7,7 +7,7 @@
* file that was distributed with this source code.
*/
use std;
use std::str::FromStr;
#[derive(PartialEq,Eq,PartialOrd,Ord,Debug)]
pub struct Range {
@ -15,42 +15,52 @@ pub struct Range {
pub high: usize,
}
impl std::str::FromStr for Range {
impl FromStr for Range {
type Err = &'static str;
fn from_str(s: &str) -> Result<Range, &'static str> {
use std::usize::MAX;
let mut parts = s.splitn(1, '-');
let mut parts = s.splitn(2, '-');
let field = "fields and positions are numbered from 1";
let order = "high end of range less than low end";
let inval = "failed to parse range";
match (parts.next(), parts.next()) {
(Some(nm), None) => {
if let Ok(nm) = nm.parse::<usize>() {
if nm > 0 { Ok(Range{ low: nm, high: nm}) } else { Err("invalid range") }
if nm > 0 { Ok(Range{ low: nm, high: nm}) } else { Err(field) }
} else {
Err("invalid range")
Err(inval)
}
}
(Some(n), Some(m)) if m.len() == 0 => {
if let Ok(low) = n.parse::<usize>() {
if low > 0 { Ok(Range{ low: low, high: MAX}) } else { Err("invalid range") }
if low > 0 { Ok(Range{ low: low, high: MAX}) } else { Err(field) }
} else {
Err("invalid range")
Err(inval)
}
}
(Some(n), Some(m)) if n.len() == 0 => {
if let Ok(high) = m.parse::<usize>() {
if high > 0 { Ok(Range{ low: 1, high: high}) } else { Err("invalid range") }
if high > 0 { Ok(Range{ low: 1, high: high}) } else { Err(field) }
} else {
Err("invalid range")
Err(inval)
}
}
(Some(n), Some(m)) => {
match (n.parse::<usize>(), m.parse::<usize>()) {
(Ok(low), Ok(high)) if low > 0 && low <= high => {
Ok(Range { low: low, high: high })
}
_ => Err("invalid range")
(Ok(low), Ok(high)) => {
if low > 0 && low <= high {
Ok(Range { low: low, high: high })
} else if low == 0 {
Err(field)
} else {
Err(order)
}
},
_ => Err(inval),
}
}
_ => unreachable!()
@ -65,7 +75,7 @@ impl Range {
let mut ranges : Vec<Range> = vec!();
for item in list.split(',') {
match std::str::FromStr::from_str(item) {
match FromStr::from_str(item) {
Ok(range_item) => ranges.push(range_item),
Err(e)=> return Err(format!("range '{}' was invalid: {}", item, e))
}
@ -74,7 +84,7 @@ impl Range {
ranges.sort();
// merge overlapping ranges
for i in range(0, ranges.len()) {
for i in 0..ranges.len() {
let j = i + 1;
while j < ranges.len() && ranges[j].low <= ranges[i].high {

54
src/cut/searcher.rs Normal file
View file

@ -0,0 +1,54 @@
/*
* This file is part of the uutils coreutils package.
*
* (c) Rolf Morel <rolfmorel@gmail.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
#[derive(Clone)]
pub struct Searcher<'a> {
haystack: &'a [u8],
needle: &'a [u8],
position: usize
}
impl<'a> Searcher<'a> {
pub fn new(haystack: &'a [u8], needle: &'a [u8]) -> Searcher<'a> {
Searcher {
haystack: haystack,
needle: needle,
position: 0
}
}
}
impl<'a> Iterator for Searcher<'a> {
type Item = (usize, usize);
fn next(&mut self) -> Option<(usize, usize)> {
if self.needle.len() == 1 {
for offset in self.position..self.haystack.len() {
if self.haystack[offset] == self.needle[0] {
self.position = offset + 1;
return Some((offset, offset + 1));
}
}
self.position = self.haystack.len();
return None;
}
while self.position + self.needle.len() <= self.haystack.len() {
if &self.haystack[self.position..self.position + self.needle.len()] == self.needle {
let match_pos = self.position;
self.position += self.needle.len();
return Some((match_pos, match_pos + self.needle.len()));
} else {
self.position += 1;
}
}
None
}
}