1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-29 12:07:46 +00:00

Add initial cut support, only bytes cutting

This commit is contained in:
polyphemus 2014-06-08 23:51:22 +02:00
parent 8568d41a09
commit 2ab586459b
2 changed files with 393 additions and 0 deletions

285
cut/cut.rs Normal file
View file

@ -0,0 +1,285 @@
#![crate_id(name="cut", vers="1.0.0", author="Rolf Morel")]
#![feature(macro_rules)]
extern crate getopts;
extern crate libc;
use std::os;
use std::io::{print,stdin,stdout,File,BufferedWriter,BufferedReader};
use getopts::{optopt, optflag, getopts, usage};
use ranges::Range;
#[path = "../common/util.rs"]
mod util;
mod ranges;
static NAME: &'static str = "cut";
static VERSION: &'static str = "1.0.0";
struct Options {
out_delim: Option<String>,
}
struct FieldOptions {
delimiter: char,
out_delimeter: String,
only_delimited: bool,
}
enum Mode {
Bytes(Vec<Range>, Options),
Characters(Vec<Range>, Options),
Fields(Vec<Range>, FieldOptions),
}
fn list_to_ranges(list: &str, complement: bool) -> Result<Vec<Range>, String> {
use std::uint;
let mut range_vec = {
try!(
if complement {
Range::from_list(list).map(|r| ranges::complement(&r))
} else {
Range::from_list(list)
}
)
};
// add sentinel value for increased performance during cutting
range_vec.push(Range{ low: uint::MAX, high: uint::MAX });
Ok(range_vec)
}
fn cut_bytes(files: Vec<String>, ranges: Vec<Range>, opts: Options) -> int {
let mut out = BufferedWriter::new(std::io::stdio::stdout_raw());
let (use_delim, out_delim) = match opts.out_delim {
Some(delim) => (true, delim),
None => (false, "".to_string())
};
for filename in files.move_iter() {
let mut file = match open(&filename) {
Some(file) => file,
None => continue
};
let mut byte_pos = 0;
let mut print_delim = false;
let mut range_pos = 0;
loop {
let byte = match file.read_u8() {
Ok(byte) => byte,
Err(std::io::IoError{ kind: std::io::EndOfFile, ..}) => {
if byte_pos > 0 {
out.write_u8('\n' as u8);
}
break
}
_ => fail!(),
};
if byte == ('\n' as u8) {
out.write_u8('\n' as u8);
byte_pos = 0;
print_delim = false;
range_pos = 0;
} else {
byte_pos += 1;
if byte_pos > ranges.get(range_pos).high {
range_pos += 1;
}
let cur_range = *ranges.get(range_pos);
if byte_pos >= cur_range.low {
if use_delim {
if print_delim && byte_pos == cur_range.low {
out.write_str(out_delim.as_slice());
}
print_delim = true;
}
out.write_u8(byte);
}
}
}
}
return 0;
}
fn cut_charachters(files: Vec<String>, ranges: Vec<Range>,
opts: Options) -> int {
return 0;
}
fn cut_fields(files: Vec<String>, ranges: Vec<Range>,
opts: FieldOptions) -> int {
for range in ranges.iter() {
println!("{}-{}", range.low, range.high);
}
return 0;
}
#[allow(dead_code)]
fn main() { os::set_exit_status(uumain(os::args())); }
pub fn uumain(args: Vec<String>) -> int {
let program = args.get(0).clone();
let opts = [
optopt("b", "bytes", "select only these bytes", "LIST"),
optopt("c", "characters", "select only these characters", "LIST"),
optopt("d", "delimiter", "use DELIM instead of TAB for field delimiter", "DELIM"),
optopt("f", "fields", "select only these fields; also print any line that contains no delimiter character, unless the -s option is specified", "LIST"),
optflag("n", "", "(ignored)"),
optflag("", "complement", "complement the set of selected bytes, characters or fields"),
optflag("s", "only-delimited", "do not print lines not containing delimiters"),
optopt("", "output-delimiter", "use STRING as the output delimiter the default is to use the input delimiter", "STRING"),
optflag("", "help", "display this help and exit"),
optflag("", "version", "output version information and exit"),
];
let mut matches = match getopts(args.tail(), opts) {
Ok(m) => m,
Err(f) => {
show_error!(1, "Invalid options\n{}", f.to_err_msg())
return 1;
}
};
if matches.opt_present("help") {
println!("Usage:");
println!(" {0:s} OPTION... [FILE]...", program);
println!("");
print(usage("Print selected parts of lines from each FILE to standard output.", opts).as_slice());
println!("");
println!("Use one, and only one of -b, -c or -f. Each LIST is made up of one");
println!("range, or many ranges separated by commas. Selected input is written");
println!("in the same order that it is read, and is written exactly once.");
println!("Each range is one of:");
println!("");
println!(" N N'th byte, character or field, counted from 1");
println!(" N- from N'th byte, character or field, to end of line");
println!(" N-M from N'th to M'th (included) byte, character or field");
println!(" -M from first to M'th (included) byte, character or field");
println!("");
println!("With no FILE, or when FILE is -, read standard input.");
return 0;
}
if matches.opt_present("version") {
println!("{} {}", NAME, VERSION);
return 0;
}
let complement = matches.opt_present("complement");
let mut out_delim = matches.opt_str("output-delimiter");
let mode = match (matches.opt_str("bytes"), matches.opt_str("characters"),
matches.opt_str("fields")) {
(Some(byte_ranges), None, None) => {
match list_to_ranges(byte_ranges.as_slice(), complement) {
Ok(ranges) => Bytes(ranges, Options{ out_delim: out_delim }),
Err(msg) => {
show_error!(1, "{}", msg);
return 1;
}
}
}
(None ,Some(char_ranges), None) => {
match list_to_ranges(char_ranges.as_slice(), complement) {
Ok(ranges) => Characters(ranges,
Options{ out_delim: out_delim }),
Err(msg) => {
show_error!(1, "{}", msg);
return 1;
}
}
}
(None, None ,Some(field_ranges)) => {
match list_to_ranges(field_ranges.as_slice(), complement) {
Ok(ranges) => {
use std::str::from_char;
let only_delimited = matches.opt_present("only-delimited");
let delim = matches.opt_str("delimiter")
.filtered(|s| s.len() == 1)
.map(|s| s.as_slice().char_at(0))
.unwrap_or('\t');
if out_delim.is_none() {
out_delim = Some(from_char(delim));
}
Fields(ranges,
FieldOptions{ delimiter: delim,
out_delimeter: out_delim.unwrap(),
only_delimited: only_delimited })
}
Err(msg) => {
show_error!(1, "{}", msg);
return 1;
}
}
}
(ref b, ref c, ref f) if b.is_some() || c.is_some() || f.is_some() => {
crash!(1, "only one type of list may be specified");
}
_ => crash!(1, "you must specify a list of bytes, characters, or fields")
};
match mode {
Bytes(..) | Characters(..) => {
if matches.opt_present("delimiter") {
show_error!(1, "an input delimiter may be specified only when operating on fields");
return 1;
}
if matches.opt_present("only-delimited") {
show_error!(1, "suppressing non-delimited lines makes sense only when operating on fields");
return 1;
}
}
_ => ()
}
for filename in matches.free.iter() {
if ! (filename.as_slice() == "-" ||
Path::new(filename.as_slice()).exists()) {
show_error!(1, "{}: No such file or directory", filename);
return 1;
}
}
if matches.free.len() == 0 { matches.free.push("-".to_string()); }
match mode {
Bytes(ranges, opts) => return cut_bytes(matches.free, ranges, opts),
Characters(ranges, opts) => return cut_charachters(matches.free,
ranges, opts),
Fields(ranges, opts) => return cut_fields(matches.free, ranges, opts),
}
}
fn open(path: &String) -> Option<BufferedReader<Box<Reader>>> {
if "-" == path.as_slice() {
let reader = box stdin() as Box<Reader>;
return Some(BufferedReader::new(reader));
}
match File::open(&std::path::Path::new(path.as_slice())) {
Ok(fd) => {
let reader = box fd as Box<Reader>;
return Some(BufferedReader::new(reader));
},
Err(e) => {
show_error!(1, "{0:s}: {1:s}", *path, e.desc.to_str());
}
}
None
}

108
cut/ranges.rs Normal file
View file

@ -0,0 +1,108 @@
/*
* This file is part of the uutils coreutils package.
*
* (c) Rolf Morel <rolfmorel@gmail.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
use std;
#[deriving(PartialEq,Eq,PartialOrd,Ord,Show)]
pub struct Range {
pub low: uint,
pub high: uint,
}
impl std::from_str::FromStr for Range {
fn from_str(s: &str) -> Option<Range> {
use std::uint::MAX;
let mut parts = s.splitn('-', 1);
match (parts.next(), parts.next()) {
(Some(nm), None) => {
from_str::<uint>(nm).filtered(|nm| *nm > 0)
.map(|nm| Range{ low: nm, high: nm })
}
(Some(n), Some(m)) if m.len() == 0 => {
from_str::<uint>(n).filtered(|low| *low > 0)
.map(|low| Range{ low: low, high: MAX })
}
(Some(n), Some(m)) if n.len() == 0 => {
from_str::<uint>(m).filtered(|high| *high >= 1)
.map(|high| Range{ low: 1, high: high })
}
(Some(n), Some(m)) => {
match (from_str::<uint>(n), from_str::<uint>(m)) {
(Some(low), Some(high)) if low > 0 && low <= high => {
Some(Range{ low: low, high: high })
}
_ => None
}
}
_ => unreachable!()
}
}
}
impl Range {
pub fn from_list(list: &str) -> Result<Vec<Range>, String> {
use std::cmp::max;
let mut ranges = vec!();
for item in list.split(',') {
match from_str::<Range>(item) {
Some(range_item) => ranges.push(range_item),
None => return Err(format!("range '{}' was invalid", item))
}
}
ranges.sort();
// merge overlapping ranges
for i in range(0, ranges.len()) {
let j = i + 1;
while j < ranges.len() && ranges.get(j).low <= ranges.get(i).high {
let j_high = ranges.remove(j).unwrap().high;
ranges.get_mut(i).high = max(ranges.get(i).high, j_high);
}
}
Ok(ranges)
}
}
pub fn complement(ranges: &Vec<Range>) -> Vec<Range> {
use std::uint;
let mut complements = Vec::with_capacity(ranges.len() + 1);
if ranges.len() > 0 && ranges.get(0).low > 1 {
complements.push(Range{ low: 1, high: ranges.get(0).low - 1 });
}
let mut ranges_iter = ranges.iter().peekable();
loop {
match (ranges_iter.next(), ranges_iter.peek()) {
(Some(left), Some(right)) => {
if left.high + 1 != right.low {
complements.push(Range{ low: left.high + 1,
high: right.low - 1 });
}
}
(Some(last), None) => {
if last.high < uint::MAX {
complements.push(Range{ low: last.high + 1,
high: uint::MAX });
}
}
_ => break
}
}
complements
}