mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-29 12:07:46 +00:00
Add initial cut support, only bytes cutting
This commit is contained in:
parent
8568d41a09
commit
2ab586459b
2 changed files with 393 additions and 0 deletions
285
cut/cut.rs
Normal file
285
cut/cut.rs
Normal file
|
@ -0,0 +1,285 @@
|
|||
#![crate_id(name="cut", vers="1.0.0", author="Rolf Morel")]
|
||||
#![feature(macro_rules)]
|
||||
|
||||
extern crate getopts;
|
||||
extern crate libc;
|
||||
|
||||
use std::os;
|
||||
use std::io::{print,stdin,stdout,File,BufferedWriter,BufferedReader};
|
||||
use getopts::{optopt, optflag, getopts, usage};
|
||||
|
||||
use ranges::Range;
|
||||
|
||||
#[path = "../common/util.rs"]
|
||||
mod util;
|
||||
mod ranges;
|
||||
|
||||
static NAME: &'static str = "cut";
|
||||
static VERSION: &'static str = "1.0.0";
|
||||
|
||||
struct Options {
|
||||
out_delim: Option<String>,
|
||||
}
|
||||
|
||||
struct FieldOptions {
|
||||
delimiter: char,
|
||||
out_delimeter: String,
|
||||
only_delimited: bool,
|
||||
}
|
||||
|
||||
enum Mode {
|
||||
Bytes(Vec<Range>, Options),
|
||||
Characters(Vec<Range>, Options),
|
||||
Fields(Vec<Range>, FieldOptions),
|
||||
}
|
||||
|
||||
fn list_to_ranges(list: &str, complement: bool) -> Result<Vec<Range>, String> {
|
||||
use std::uint;
|
||||
|
||||
let mut range_vec = {
|
||||
try!(
|
||||
if complement {
|
||||
Range::from_list(list).map(|r| ranges::complement(&r))
|
||||
} else {
|
||||
Range::from_list(list)
|
||||
}
|
||||
)
|
||||
};
|
||||
|
||||
// add sentinel value for increased performance during cutting
|
||||
range_vec.push(Range{ low: uint::MAX, high: uint::MAX });
|
||||
|
||||
Ok(range_vec)
|
||||
}
|
||||
|
||||
fn cut_bytes(files: Vec<String>, ranges: Vec<Range>, opts: Options) -> int {
|
||||
let mut out = BufferedWriter::new(std::io::stdio::stdout_raw());
|
||||
let (use_delim, out_delim) = match opts.out_delim {
|
||||
Some(delim) => (true, delim),
|
||||
None => (false, "".to_string())
|
||||
};
|
||||
|
||||
for filename in files.move_iter() {
|
||||
let mut file = match open(&filename) {
|
||||
Some(file) => file,
|
||||
None => continue
|
||||
};
|
||||
|
||||
let mut byte_pos = 0;
|
||||
let mut print_delim = false;
|
||||
let mut range_pos = 0;
|
||||
|
||||
loop {
|
||||
let byte = match file.read_u8() {
|
||||
Ok(byte) => byte,
|
||||
Err(std::io::IoError{ kind: std::io::EndOfFile, ..}) => {
|
||||
if byte_pos > 0 {
|
||||
out.write_u8('\n' as u8);
|
||||
}
|
||||
break
|
||||
}
|
||||
_ => fail!(),
|
||||
};
|
||||
|
||||
if byte == ('\n' as u8) {
|
||||
out.write_u8('\n' as u8);
|
||||
byte_pos = 0;
|
||||
print_delim = false;
|
||||
range_pos = 0;
|
||||
} else {
|
||||
byte_pos += 1;
|
||||
|
||||
if byte_pos > ranges.get(range_pos).high {
|
||||
range_pos += 1;
|
||||
}
|
||||
|
||||
let cur_range = *ranges.get(range_pos);
|
||||
|
||||
if byte_pos >= cur_range.low {
|
||||
if use_delim {
|
||||
if print_delim && byte_pos == cur_range.low {
|
||||
out.write_str(out_delim.as_slice());
|
||||
}
|
||||
|
||||
print_delim = true;
|
||||
}
|
||||
|
||||
out.write_u8(byte);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
fn cut_charachters(files: Vec<String>, ranges: Vec<Range>,
|
||||
opts: Options) -> int {
|
||||
return 0;
|
||||
}
|
||||
|
||||
fn cut_fields(files: Vec<String>, ranges: Vec<Range>,
|
||||
opts: FieldOptions) -> int {
|
||||
for range in ranges.iter() {
|
||||
println!("{}-{}", range.low, range.high);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
fn main() { os::set_exit_status(uumain(os::args())); }
|
||||
|
||||
pub fn uumain(args: Vec<String>) -> int {
|
||||
let program = args.get(0).clone();
|
||||
let opts = [
|
||||
optopt("b", "bytes", "select only these bytes", "LIST"),
|
||||
optopt("c", "characters", "select only these characters", "LIST"),
|
||||
optopt("d", "delimiter", "use DELIM instead of TAB for field delimiter", "DELIM"),
|
||||
optopt("f", "fields", "select only these fields; also print any line that contains no delimiter character, unless the -s option is specified", "LIST"),
|
||||
optflag("n", "", "(ignored)"),
|
||||
optflag("", "complement", "complement the set of selected bytes, characters or fields"),
|
||||
optflag("s", "only-delimited", "do not print lines not containing delimiters"),
|
||||
optopt("", "output-delimiter", "use STRING as the output delimiter the default is to use the input delimiter", "STRING"),
|
||||
optflag("", "help", "display this help and exit"),
|
||||
optflag("", "version", "output version information and exit"),
|
||||
];
|
||||
|
||||
let mut matches = match getopts(args.tail(), opts) {
|
||||
Ok(m) => m,
|
||||
Err(f) => {
|
||||
show_error!(1, "Invalid options\n{}", f.to_err_msg())
|
||||
return 1;
|
||||
}
|
||||
};
|
||||
|
||||
if matches.opt_present("help") {
|
||||
println!("Usage:");
|
||||
println!(" {0:s} OPTION... [FILE]...", program);
|
||||
println!("");
|
||||
print(usage("Print selected parts of lines from each FILE to standard output.", opts).as_slice());
|
||||
println!("");
|
||||
println!("Use one, and only one of -b, -c or -f. Each LIST is made up of one");
|
||||
println!("range, or many ranges separated by commas. Selected input is written");
|
||||
println!("in the same order that it is read, and is written exactly once.");
|
||||
println!("Each range is one of:");
|
||||
println!("");
|
||||
println!(" N N'th byte, character or field, counted from 1");
|
||||
println!(" N- from N'th byte, character or field, to end of line");
|
||||
println!(" N-M from N'th to M'th (included) byte, character or field");
|
||||
println!(" -M from first to M'th (included) byte, character or field");
|
||||
println!("");
|
||||
println!("With no FILE, or when FILE is -, read standard input.");
|
||||
return 0;
|
||||
}
|
||||
|
||||
if matches.opt_present("version") {
|
||||
println!("{} {}", NAME, VERSION);
|
||||
return 0;
|
||||
}
|
||||
|
||||
let complement = matches.opt_present("complement");
|
||||
let mut out_delim = matches.opt_str("output-delimiter");
|
||||
|
||||
let mode = match (matches.opt_str("bytes"), matches.opt_str("characters"),
|
||||
matches.opt_str("fields")) {
|
||||
(Some(byte_ranges), None, None) => {
|
||||
match list_to_ranges(byte_ranges.as_slice(), complement) {
|
||||
Ok(ranges) => Bytes(ranges, Options{ out_delim: out_delim }),
|
||||
Err(msg) => {
|
||||
show_error!(1, "{}", msg);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
(None ,Some(char_ranges), None) => {
|
||||
match list_to_ranges(char_ranges.as_slice(), complement) {
|
||||
Ok(ranges) => Characters(ranges,
|
||||
Options{ out_delim: out_delim }),
|
||||
Err(msg) => {
|
||||
show_error!(1, "{}", msg);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
(None, None ,Some(field_ranges)) => {
|
||||
match list_to_ranges(field_ranges.as_slice(), complement) {
|
||||
Ok(ranges) => {
|
||||
use std::str::from_char;
|
||||
|
||||
let only_delimited = matches.opt_present("only-delimited");
|
||||
let delim = matches.opt_str("delimiter")
|
||||
.filtered(|s| s.len() == 1)
|
||||
.map(|s| s.as_slice().char_at(0))
|
||||
.unwrap_or('\t');
|
||||
if out_delim.is_none() {
|
||||
out_delim = Some(from_char(delim));
|
||||
}
|
||||
|
||||
Fields(ranges,
|
||||
FieldOptions{ delimiter: delim,
|
||||
out_delimeter: out_delim.unwrap(),
|
||||
only_delimited: only_delimited })
|
||||
}
|
||||
Err(msg) => {
|
||||
show_error!(1, "{}", msg);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
(ref b, ref c, ref f) if b.is_some() || c.is_some() || f.is_some() => {
|
||||
crash!(1, "only one type of list may be specified");
|
||||
}
|
||||
_ => crash!(1, "you must specify a list of bytes, characters, or fields")
|
||||
};
|
||||
|
||||
match mode {
|
||||
Bytes(..) | Characters(..) => {
|
||||
if matches.opt_present("delimiter") {
|
||||
show_error!(1, "an input delimiter may be specified only when operating on fields");
|
||||
return 1;
|
||||
}
|
||||
if matches.opt_present("only-delimited") {
|
||||
show_error!(1, "suppressing non-delimited lines makes sense only when operating on fields");
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
_ => ()
|
||||
}
|
||||
|
||||
for filename in matches.free.iter() {
|
||||
if ! (filename.as_slice() == "-" ||
|
||||
Path::new(filename.as_slice()).exists()) {
|
||||
show_error!(1, "{}: No such file or directory", filename);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
if matches.free.len() == 0 { matches.free.push("-".to_string()); }
|
||||
|
||||
match mode {
|
||||
Bytes(ranges, opts) => return cut_bytes(matches.free, ranges, opts),
|
||||
Characters(ranges, opts) => return cut_charachters(matches.free,
|
||||
ranges, opts),
|
||||
Fields(ranges, opts) => return cut_fields(matches.free, ranges, opts),
|
||||
}
|
||||
}
|
||||
|
||||
fn open(path: &String) -> Option<BufferedReader<Box<Reader>>> {
|
||||
if "-" == path.as_slice() {
|
||||
let reader = box stdin() as Box<Reader>;
|
||||
return Some(BufferedReader::new(reader));
|
||||
}
|
||||
|
||||
match File::open(&std::path::Path::new(path.as_slice())) {
|
||||
Ok(fd) => {
|
||||
let reader = box fd as Box<Reader>;
|
||||
return Some(BufferedReader::new(reader));
|
||||
},
|
||||
Err(e) => {
|
||||
show_error!(1, "{0:s}: {1:s}", *path, e.desc.to_str());
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
108
cut/ranges.rs
Normal file
108
cut/ranges.rs
Normal file
|
@ -0,0 +1,108 @@
|
|||
/*
|
||||
* This file is part of the uutils coreutils package.
|
||||
*
|
||||
* (c) Rolf Morel <rolfmorel@gmail.com>
|
||||
*
|
||||
* For the full copyright and license information, please view the LICENSE
|
||||
* file that was distributed with this source code.
|
||||
*/
|
||||
|
||||
use std;
|
||||
|
||||
#[deriving(PartialEq,Eq,PartialOrd,Ord,Show)]
|
||||
pub struct Range {
|
||||
pub low: uint,
|
||||
pub high: uint,
|
||||
}
|
||||
|
||||
impl std::from_str::FromStr for Range {
|
||||
fn from_str(s: &str) -> Option<Range> {
|
||||
use std::uint::MAX;
|
||||
|
||||
let mut parts = s.splitn('-', 1);
|
||||
|
||||
match (parts.next(), parts.next()) {
|
||||
(Some(nm), None) => {
|
||||
from_str::<uint>(nm).filtered(|nm| *nm > 0)
|
||||
.map(|nm| Range{ low: nm, high: nm })
|
||||
}
|
||||
(Some(n), Some(m)) if m.len() == 0 => {
|
||||
from_str::<uint>(n).filtered(|low| *low > 0)
|
||||
.map(|low| Range{ low: low, high: MAX })
|
||||
}
|
||||
(Some(n), Some(m)) if n.len() == 0 => {
|
||||
from_str::<uint>(m).filtered(|high| *high >= 1)
|
||||
.map(|high| Range{ low: 1, high: high })
|
||||
}
|
||||
(Some(n), Some(m)) => {
|
||||
match (from_str::<uint>(n), from_str::<uint>(m)) {
|
||||
(Some(low), Some(high)) if low > 0 && low <= high => {
|
||||
Some(Range{ low: low, high: high })
|
||||
}
|
||||
_ => None
|
||||
}
|
||||
}
|
||||
_ => unreachable!()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Range {
|
||||
pub fn from_list(list: &str) -> Result<Vec<Range>, String> {
|
||||
use std::cmp::max;
|
||||
|
||||
let mut ranges = vec!();
|
||||
|
||||
for item in list.split(',') {
|
||||
match from_str::<Range>(item) {
|
||||
Some(range_item) => ranges.push(range_item),
|
||||
None => return Err(format!("range '{}' was invalid", item))
|
||||
}
|
||||
}
|
||||
|
||||
ranges.sort();
|
||||
|
||||
// merge overlapping ranges
|
||||
for i in range(0, ranges.len()) {
|
||||
let j = i + 1;
|
||||
|
||||
while j < ranges.len() && ranges.get(j).low <= ranges.get(i).high {
|
||||
let j_high = ranges.remove(j).unwrap().high;
|
||||
ranges.get_mut(i).high = max(ranges.get(i).high, j_high);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(ranges)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn complement(ranges: &Vec<Range>) -> Vec<Range> {
|
||||
use std::uint;
|
||||
|
||||
let mut complements = Vec::with_capacity(ranges.len() + 1);
|
||||
|
||||
if ranges.len() > 0 && ranges.get(0).low > 1 {
|
||||
complements.push(Range{ low: 1, high: ranges.get(0).low - 1 });
|
||||
}
|
||||
|
||||
let mut ranges_iter = ranges.iter().peekable();
|
||||
loop {
|
||||
match (ranges_iter.next(), ranges_iter.peek()) {
|
||||
(Some(left), Some(right)) => {
|
||||
if left.high + 1 != right.low {
|
||||
complements.push(Range{ low: left.high + 1,
|
||||
high: right.low - 1 });
|
||||
}
|
||||
}
|
||||
(Some(last), None) => {
|
||||
if last.high < uint::MAX {
|
||||
complements.push(Range{ low: last.high + 1,
|
||||
high: uint::MAX });
|
||||
}
|
||||
}
|
||||
_ => break
|
||||
}
|
||||
}
|
||||
|
||||
complements
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue